upload.py (customized for use with codereviews.googleopensourceprograms.com)
authorTodd Larsen <tlarsen@google.com>
Thu, 13 Nov 2008 19:41:16 +0000
changeset 478 613951c35706
parent 477 8a8b1bd035c4
child 479 50bab5e71a66
upload.py (customized for use with codereviews.googleopensourceprograms.com) updated from latest Rietveld release. Patch by: Todd Larsen
thirdparty/rietveld/upload.py
--- a/thirdparty/rietveld/upload.py	Thu Nov 13 16:40:05 2008 +0000
+++ b/thirdparty/rietveld/upload.py	Thu Nov 13 19:41:16 2008 +0000
@@ -22,9 +22,11 @@
 
 Supported version control systems:
   Git
+  Mercurial
   Subversion
 
-(It is important for Git users to specify a tree-ish to diff against.)
+It is important for Git/Mercurial users to specify a tree/node/branch to diff
+against by using the '--rev' option.
 """
 # This code is derived from appcfg.py in the App Engine SDK (open source),
 # and from ASPN recipe #146306.
@@ -149,6 +151,11 @@
     Returns:
       The authentication token returned by ClientLogin.
     """
+    # account_type = "GOOGLE"
+    account_type = "HOSTED_OR_GOOGLE"
+    if self.host.endswith(".google.com"):
+      # Needed for use inside Google.
+      account_type = "HOSTED"
     req = self._CreateRequest(
         url="https://www.google.com/accounts/ClientLogin",
         data=urllib.urlencode({
@@ -156,8 +163,8 @@
             "Passwd": password,
             "service": "ah",
             "source": "rietveld-codereview-upload",
-            "accountType": "HOSTED_OR_GOOGLE",
-        })
+            "accountType": account_type,
+        }),
     )
     try:
       response = self.opener.open(req)
@@ -370,7 +377,7 @@
                  default="codereviews.googleopensourceprograms.com",
                  metavar="SERVER",
                  help=("The server to upload to. The format is host[:port]. "
-                       "Defaults to 'codereviews.googleopensourceprograms.com'."))
+                       "Defaults to '%default'."))
 group.add_option("-e", "--email", action="store", dest="email",
                  metavar="EMAIL", default=None,
                  help="The username to use. Will prompt if omitted.")
@@ -405,9 +412,13 @@
 group.add_option("-i", "--issue", type="int", action="store",
                  metavar="ISSUE", default=None,
                  help="Issue number to which to add. Defaults to new issue.")
-group.add_option("-l", "--local_base", action="store_true",
-                 dest="local_base", default=False,
-                 help="Base files will be uploaded.")
+group.add_option("--download_base", action="store_true",
+                 dest="download_base", default=False,
+                 help="Base files will be downloaded by the server "
+                 "(side-by-side diffs may not work on files with CRs).")
+group.add_option("--rev", action="store", dest="revision",
+                 metavar="REV", default=None,
+                 help="Branch/tree/revision to diff against (used by DVCS).")
 group.add_option("--send_mail", action="store_true",
                  dest="send_mail", default=False,
                  help="Send notification email to reviewers.")
@@ -426,7 +437,8 @@
     """Prompts the user for a username and password."""
     email = options.email
     if email is None:
-      email = raw_input("Email: ").strip()
+      prompt = "Email (login for uploading to %s): " % options.server
+      email = raw_input(prompt).strip()
     password = getpass.getpass("Password for %s: " % email)
     return (email, password)
 
@@ -496,17 +508,43 @@
 # Use a shell for subcommands on Windows to get a PATH search.
 use_shell = sys.platform.startswith("win")
 
+def RunShellWithReturnCode(command, print_output=False,
+                           universal_newlines=True):
+  """Executes a command and returns the output and the return code.
 
-def RunShell(command, silent_ok=False, universal_newlines=False):
+  Args:
+    command: Command to execute.
+    print_output: If True, the output is printed to stdout.
+    universal_newlines: Use universal_newlines flag (default: True).
+
+  Returns:
+    Tuple (output, return code)
+  """
   logging.info("Running %s", command)
-  p = subprocess.Popen(command, stdout=subprocess.PIPE,
-                       stderr=subprocess.STDOUT, shell=use_shell,
+  p = subprocess.Popen(command, stdout=subprocess.PIPE, shell=use_shell,
                        universal_newlines=universal_newlines)
-  data = p.stdout.read()
+  if print_output:
+    output_array = []
+    while True:
+      line = p.stdout.readline()
+      if not line:
+        break
+      print line.strip("\n")
+      output_array.append(line)
+    output = "".join(output_array)
+  else:
+    output = p.stdout.read()
   p.wait()
   p.stdout.close()
-  if p.returncode:
-    ErrorExit("Got error status from %s" % command)
+  return output, p.returncode
+
+
+def RunShell(command, silent_ok=False, universal_newlines=True,
+             print_output=False):
+  data, retcode = RunShellWithReturnCode(command, print_output,
+                                         universal_newlines)
+  if retcode:
+    ErrorExit("Got error status from %s:\n%s" % (command, data))
   if not silent_ok and not data:
     ErrorExit("No output from %s" % command)
   return data
@@ -515,6 +553,14 @@
 class VersionControlSystem(object):
   """Abstract base class providing an interface to the VCS."""
 
+  def __init__(self, options):
+    """Constructor.
+
+    Args:
+      options: Command line options.
+    """
+    self.options = options
+
   def GenerateDiff(self, args):
     """Return the current diff as a string.
 
@@ -545,49 +591,124 @@
     """Get the content of the upstream version of a file.
 
     Returns:
-      A tuple (content, status) representing the file content and the status of
-      the file.
+      A tuple (base_content, new_content, is_binary, status)
+        base_content: The contents of the base file.
+        new_content: For text files, this is empty.  For binary files, this is
+          the contents of the new file, since the diff output won't contain
+          information to reconstruct the current file.
+        is_binary: True iff the file is binary.
+        status: The status of the file.
     """
 
     raise NotImplementedError(
         "abstract method -- subclass %s must override" % self.__class__)
 
-  def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options):
-    """Uploads the base files."""
-    patches = dict()
-    [patches.setdefault(v, k) for k, v in patch_list]
-    for filename in patches.keys():
-      content, status = self.GetBaseFile(filename)
-      no_base_file = False
+
+  def GetBaseFiles(self, diff):
+    """Helper that calls GetBase file for each file in the patch.
+    
+    Returns:
+      A dictionary that maps from filename to GetBaseFile's tuple.  Filenames
+      are retrieved based on lines that start with "Index:" or
+      "Property changes on:".
+    """
+    files = {}
+    for line in diff.splitlines(True):
+      if line.startswith('Index:') or line.startswith('Property changes on:'):
+        unused, filename = line.split(':', 1)
+        # On Windows if a file has property changes its filename uses '\'
+        # instead of '/'.
+        filename = filename.strip().replace('\\', '/')
+        files[filename] = self.GetBaseFile(filename)
+    return files
+
+
+  def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options,
+                      files):
+    """Uploads the base files (and if necessary, the current ones as well)."""
+
+    def UploadFile(filename, file_id, content, is_binary, status, is_base):
+      """Uploads a file to the server."""
+      file_too_large = False
+      if is_base:
+        type = "base"
+      else:
+        type = "current"
       if len(content) > MAX_UPLOAD_SIZE:
-        print ("Not uploading the base file for " + filename +
-               " because the file is too large.")
-        no_base_file = True
+        print ("Not uploading the %s file for %s because it's too large." %
+               (type, filename))
+        file_too_large = True
         content = ""
       checksum = md5.new(content).hexdigest()
-      if options.verbose > 0:
-        print "Uploading %s" % filename
-      url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset),
-                                          int(patches.get(filename)))
+      if options.verbose > 0 and not file_too_large:
+        print "Uploading %s file for %s" % (type, filename)
+      url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id)
       form_fields = [("filename", filename),
                      ("status", status),
-                     ("checksum", checksum),]
-      if no_base_file:
-        form_fields.append(("no_base_file", "1"))
+                     ("checksum", checksum),
+                     ("is_binary", str(is_binary)),
+                     ("is_current", str(not is_base)),
+                    ]
+      if file_too_large:
+        form_fields.append(("file_too_large", "1"))
       if options.email:
         form_fields.append(("user", options.email))
       ctype, body = EncodeMultipartFormData(form_fields,
                                             [("data", filename, content)])
-      response_body = rpc_server.Send(url, body, content_type=ctype)
+      response_body = rpc_server.Send(url, body,
+                                      content_type=ctype)
       if not response_body.startswith("OK"):
         StatusUpdate("  --> %s" % response_body)
-        sys.exit(False)
+        sys.exit(1)
+
+    patches = dict()
+    [patches.setdefault(v, k) for k, v in patch_list]
+    for filename in patches.keys():
+      base_content, new_content, is_binary, status = files[filename]
+      file_id_str = patches.get(filename)
+      if file_id_str.find("nobase") != -1:
+        base_content = None
+        file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
+      file_id = int(file_id_str)
+      if base_content != None:
+        UploadFile(filename, file_id, base_content, is_binary, status, True)
+      if new_content != None:
+        UploadFile(filename, file_id, new_content, is_binary, status, False)
+
+  def IsImage(self, filename):
+    """Returns true if the filename has an image extension."""
+    mimetype =  mimetypes.guess_type(filename)[0]
+    if not mimetype:
+      return False
+    return mimetype.startswith("image/")
 
 
 class SubversionVCS(VersionControlSystem):
   """Implementation of the VersionControlSystem interface for Subversion."""
 
+  def __init__(self, options):
+    super(SubversionVCS, self).__init__(options)
+    if self.options.revision:
+      match = re.match(r"(\d+)(:(\d+))?", self.options.revision)
+      if not match:
+        ErrorExit("Invalid Subversion revision %s." % self.options.revision)
+      self.rev_start = match.group(1)
+      self.rev_end = match.group(3)
+    else:
+      self.rev_start = self.rev_end = None
+    # Cache output from "svn list -r REVNO dirname".
+    # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev).
+    self.svnls_cache = {}
+    # SVN base URL is required to fetch files deleted in an older revision.
+    # Result is cached to not guess it over and over again in GetBaseFile().
+    required = self.options.download_base or self.options.revision is not None
+    self.svn_base = self._GuessBase(required)
+
   def GuessBase(self, required):
+    """Wrapper for _GuessBase."""
+    return self.svn_base
+
+  def _GuessBase(self, required):
     """Returns the SVN base URL.
 
     Args:
@@ -633,8 +754,8 @@
 
   def GenerateDiff(self, args):
     cmd = ["svn", "diff"]
-    if not sys.platform.startswith("win"):
-      cmd.append("--diff-cmd=diff")
+    if self.options.revision:
+      cmd += ["-r", self.options.revision]
     cmd.extend(args)
     data = RunShell(cmd)
     count = 0
@@ -667,6 +788,7 @@
       'LastChangedBy':       ['LastChangedBy', 'Author'],
       'URL':                 ['URL', 'HeadURL'],
     }
+
     def repl(m):
        if m.group(2):
          return "$%s::%s$" % (m.group(1), " " * len(m.group(3)))
@@ -684,58 +806,150 @@
         unknown_files.append(line)
     return unknown_files
 
+  def ReadFile(self, filename):
+    """Returns the contents of a file."""
+    file = open(filename, 'rb')
+    result = ""
+    try:
+      result = file.read()
+    finally:
+      file.close()
+    return result
+
+  def GetStatus(self, filename):
+    """Returns the status of a file."""
+    if not self.options.revision:
+      status = RunShell(["svn", "status", "--ignore-externals", filename])
+      if not status:
+        ErrorExit("svn status returned no output for %s" % filename)
+      status_lines = status.splitlines()
+      # If file is in a cl, the output will begin with
+      # "\n--- Changelist 'cl_name':\n".  See
+      # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt
+      if (len(status_lines) == 3 and
+          not status_lines[0] and
+          status_lines[1].startswith("--- Changelist")):
+        status = status_lines[2]
+      else:
+        status = status_lines[0]
+    # If we have a revision to diff against we need to run "svn list"
+    # for the old and the new revision and compare the results to get
+    # the correct status for a file.
+    else:
+      dirname, relfilename = os.path.split(filename)
+      if dirname not in self.svnls_cache:
+        cmd = ["svn", "list", "-r", self.rev_start, dirname or "."]
+        out, returncode = RunShellWithReturnCode(cmd)
+        if returncode:
+          ErrorExit("Failed to get status for %s." % filename)
+        old_files = out.splitlines()
+        args = ["svn", "list"]
+        if self.rev_end:
+          args += ["-r", self.rev_end]
+        cmd = args + [dirname or "."]
+        out, returncode = RunShellWithReturnCode(cmd)
+        if returncode:
+          ErrorExit("Failed to run command %s" % cmd)
+        self.svnls_cache[dirname] = (old_files, out.splitlines())
+      old_files, new_files = self.svnls_cache[dirname]
+      if relfilename in old_files and relfilename not in new_files:
+        status = "D   "
+      elif relfilename in old_files and relfilename in new_files:
+        status = "M   "
+      else:
+        status = "A   "
+    return status
+
   def GetBaseFile(self, filename):
-    status = RunShell(["svn", "status", "--ignore-externals", filename])
-    if not status:
-      StatusUpdate("svn status returned no output for %s" % filename)
-      sys.exit(False)
-    status_lines = status.splitlines()
-    # If file is in a cl, the output will begin with
-    # "\n--- Changelist 'cl_name':\n".  See
-    # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt
-    if (len(status_lines) == 3 and
-        not status_lines[0] and
-        status_lines[1].startswith("--- Changelist")):
-      status = status_lines[2]
-    else:
-      status = status_lines[0]
+    status = self.GetStatus(filename)
+    base_content = None
+    new_content = None
+
     # If a file is copied its status will be "A  +", which signifies
     # "addition-with-history".  See "svn st" for more information.  We need to
     # upload the original file or else diff parsing will fail if the file was
     # edited.
-    if ((status[0] == "A" and status[3] != "+") or
-        (status[0] == " " and status[1] == "M")):  # property changed
-      content = ""
+    if status[0] == "A" and status[3] != "+":
+      # We'll need to upload the new content if we're adding a binary file
+      # since diff's output won't contain it.
+      mimetype = RunShell(["svn", "propget", "svn:mime-type", filename],
+                          silent_ok=True)
+      base_content = ""
+      is_binary = mimetype and not mimetype.startswith("text/")
+      if is_binary and self.IsImage(filename):
+        new_content = self.ReadFile(filename)
     elif (status[0] in ("M", "D", "R") or
-          (status[0] == "A" and status[3] == "+")):
-      mimetype = RunShell(["svn", "-rBASE", "propget", "svn:mime-type",
-                           filename],
-                          silent_ok=True)
-      if mimetype.startswith("application/octet-stream"):
-        content = ""
+          (status[0] == "A" and status[3] == "+") or  # Copied file.
+          (status[0] == " " and status[1] == "M")):  # Property change.
+      args = []
+      if self.options.revision:
+        url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
       else:
-        # On Windows svn cat gives \r\n, and calling subprocess.Popen turns
-        # them into \r\r\n, so use universal newlines to avoid the extra \r.
-        if sys.platform.startswith("win"):
-          nl = True
+        # Don't change filename, it's needed later.
+        url = filename
+        args += ["-r", "BASE"]
+      cmd = ["svn"] + args + ["propget", "svn:mime-type", url]
+      mimetype, returncode = RunShellWithReturnCode(cmd)
+      if returncode:
+        # File does not exist in the requested revision.
+        # Reset mimetype, it contains an error message.
+        mimetype = ""
+      get_base = False
+      is_binary = mimetype and not mimetype.startswith("text/")
+      if status[0] == " ":
+        # Empty base content just to force an upload.
+        base_content = ""
+      elif is_binary:
+        if self.IsImage(filename):
+          get_base = True
+          if status[0] == "M":
+            if not self.rev_end:
+              new_content = self.ReadFile(filename)
+            else:
+              url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end)
+              new_content = RunShell(["svn", "cat", url],
+                                     universal_newlines=True)
         else:
-          nl = False
-        content = RunShell(["svn", "cat", filename], universal_newlines=nl)
-      keywords = RunShell(["svn", "-rBASE", "propget", "svn:keywords",
-                           filename],
-                          silent_ok=True)
-      if keywords:
-        content = self._CollapseKeywords(content, keywords)
+          base_content = ""
+      else:
+        get_base = True
+
+      if get_base:
+        if is_binary:
+          universal_newlines = False
+        else:
+          universal_newlines = True
+        if self.rev_start:
+          # "svn cat -r REV delete_file.txt" doesn't work. cat requires
+          # the full URL with "@REV" appended instead of using "-r" option.
+          url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
+          base_content = RunShell(["svn", "cat", url],
+                                  universal_newlines=universal_newlines)
+        else:
+          base_content = RunShell(["svn", "cat", filename],
+                                  universal_newlines=universal_newlines)
+        if not is_binary:
+          args = []
+          if self.rev_start:
+            url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
+          else:
+            url = filename
+            args += ["-r", "BASE"]
+          cmd = ["svn"] + args + ["propget", "svn:keywords", url]
+          keywords, returncode = RunShellWithReturnCode(cmd)
+          if keywords and not returncode:
+            base_content = self._CollapseKeywords(base_content, keywords)
     else:
       StatusUpdate("svn status returned unexpected output: %s" % status)
-      sys.exit(False)
-    return content, status[0:5]
+      sys.exit(1)
+    return base_content, new_content, is_binary, status[0:5]
 
 
 class GitVCS(VersionControlSystem):
   """Implementation of the VersionControlSystem interface for Git."""
 
-  def __init__(self):
+  def __init__(self, options):
+    super(GitVCS, self).__init__(options)
     # Map of filename -> hash of base file.
     self.base_hashes = {}
 
@@ -743,6 +957,8 @@
     # This is more complicated than svn's GenerateDiff because we must convert
     # the diff output to include an svn-style "Index:" line as well as record
     # the hashes of the base files, so we can upload them along with our diff.
+    if self.options.revision:
+      extra_args = [self.options.revision] + extra_args
     gitdiff = RunShell(["git", "diff", "--full-index"] + extra_args)
     svndiff = []
     filecount = 0
@@ -772,13 +988,117 @@
 
   def GetBaseFile(self, filename):
     hash = self.base_hashes[filename]
+    base_content = None
+    new_content = None
+    is_binary = False
     if hash == "0" * 40:  # All-zero hash indicates no base file.
-      return ("", "A")
+      status = "A"
+      base_content = ""
     else:
-      return (RunShell(["git", "show", hash]), "M")
+      status = "M"
+      base_content = RunShell(["git", "show", hash])
+    return (base_content, new_content, is_binary, status)
 
 
-# NOTE: this function is duplicated in engine.py, keep them in sync.
+class MercurialVCS(VersionControlSystem):
+  """Implementation of the VersionControlSystem interface for Mercurial."""
+
+  def __init__(self, options, repo_dir):
+    super(MercurialVCS, self).__init__(options)
+    # Absolute path to repository (we can be in a subdir)
+    self.repo_dir = os.path.normpath(repo_dir)
+    # Compute the subdir
+    cwd = os.path.normpath(os.getcwd())
+    assert cwd.startswith(self.repo_dir)
+    self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/")
+    if self.options.revision:
+      self.base_rev = self.options.revision
+    else:
+      self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip()
+
+  def _GetRelPath(self, filename):
+    """Get relative path of a file according to the current directory,
+    given its logical path in the repo."""
+    assert filename.startswith(self.subdir), filename
+    return filename[len(self.subdir):].lstrip(r"\/")
+
+  def GenerateDiff(self, extra_args):
+    # If no file specified, restrict to the current subdir
+    extra_args = extra_args or ["."]
+    cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args
+    data = RunShell(cmd, silent_ok=True)
+    svndiff = []
+    filecount = 0
+    for line in data.splitlines():
+      m = re.match("diff --git a/(\S+) b/(\S+)", line)
+      if m:
+        # Modify line to make it look like as it comes from svn diff.
+        # With this modification no changes on the server side are required
+        # to make upload.py work with Mercurial repos.
+        # NOTE: for proper handling of moved/copied files, we have to use
+        # the second filename.
+        filename = m.group(2)
+        svndiff.append("Index: %s" % filename)
+        svndiff.append("=" * 67)
+        filecount += 1
+        logging.info(line)
+      else:
+        svndiff.append(line)
+    if not filecount:
+      ErrorExit("No valid patches found in output from hg diff")
+    return "\n".join(svndiff) + "\n"
+
+  def GetUnknownFiles(self):
+    """Return a list of files unknown to the VCS."""
+    args = []
+    status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."],
+        silent_ok=True)
+    unknown_files = []
+    for line in status.splitlines():
+      st, fn = line.split(" ", 1)
+      if st == "?":
+        unknown_files.append(fn)
+    return unknown_files
+
+  def GetBaseFile(self, filename):
+    # "hg status" and "hg cat" both take a path relative to the current subdir
+    # rather than to the repo root, but "hg diff" has given us the full path
+    # to the repo root.
+    base_content = ""
+    new_content = None
+    is_binary = False
+    oldrelpath = relpath = self._GetRelPath(filename)
+    # "hg status -C" returns two lines for moved/copied files, one otherwise
+    out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
+    out = out.splitlines()
+    # HACK: strip error message about missing file/directory if it isn't in
+    # the working copy
+    if out[0].startswith('%s: ' % relpath):
+      out = out[1:]
+    if len(out) > 1:
+      # Moved/copied => considered as modified, use old filename to 
+      # retrieve base contents
+      oldrelpath = out[1].strip()
+      status = "M"
+    else:
+      status, _ = out[0].split(' ', 1)
+    if status != "A":
+      base_content = RunShell(["hg", "cat", "-r", self.base_rev, oldrelpath],
+        silent_ok=True)
+      is_binary = "\0" in base_content  # Mercurial's heuristic
+    if status != "R":
+      new_content = open(relpath, "rb").read()
+      is_binary = is_binary or "\0" in new_content
+    if is_binary and base_content:
+      # Fetch again without converting newlines
+      base_content = RunShell(["hg", "cat", "-r", self.base_rev, oldrelpath],
+        silent_ok=True, universal_newlines=False)
+    if not is_binary or not self.IsImage(relpath):
+      new_content = None
+    return base_content, new_content, is_binary, status
+
+
+# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync.
 def SplitPatch(data):
   """Splits a patch into separate pieces for each file.
 
@@ -832,7 +1152,7 @@
              " because the file is too large.")
       continue
     form_fields = [("filename", patch[0])]
-    if options.local_base:
+    if not options.download_base:
       form_fields.append(("content_upload", "1"))
     files = [("data", "data.diff", patch[1])]
     ctype, body = EncodeMultipartFormData(form_fields, files)
@@ -842,12 +1162,12 @@
     lines = response_body.splitlines()
     if not lines or lines[0] != "OK":
       StatusUpdate("  --> %s" % response_body)
-      sys.exit(False)
+      sys.exit(1)
     rv.append([lines[1], patch[0]])
   return rv
 
 
-def GuessVCS():
+def GuessVCS(options):
   """Helper to guess the version control system.
 
   This examines the current directory, guesses which VersionControlSystem
@@ -857,18 +1177,29 @@
   Returns:
     A VersionControlSystem instance. Exits if the VCS can't be guessed.
   """
+  # Mercurial has a command to get the base directory of a repository
+  # Try running it, but don't die if we don't have hg installed.
+  # NOTE: we try Mercurial first as it can sit on top of an SVN working copy.
+  try:
+    out, returncode = RunShellWithReturnCode(["hg", "root"])
+    if returncode == 0:
+      return MercurialVCS(options, out.strip())
+  except OSError, (errno, message):
+    if errno != 2:  # ENOENT -- they don't have hg installed.
+      raise
+
   # Subversion has a .svn in all working directories.
   if os.path.isdir('.svn'):
     logging.info("Guessed VCS = Subversion")
-    return SubversionVCS()
+    return SubversionVCS(options)
 
   # Git has a command to test if you're in a git tree.
   # Try running it, but don't die if we don't have git installed.
   try:
-    subproc = subprocess.Popen(["git", "rev-parse", "--is-inside-work-tree"],
-                               stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    if subproc.wait() == 0:
-      return GitVCS()
+    out, returncode = RunShellWithReturnCode(["git", "rev-parse",
+                                              "--is-inside-work-tree"])
+    if returncode == 0:
+      return GitVCS(options)
   except OSError, (errno, message):
     if errno != 2:  # ENOENT -- they don't have git installed.
       raise
@@ -888,20 +1219,21 @@
     logging.getLogger().setLevel(logging.DEBUG)
   elif verbosity >= 2:
     logging.getLogger().setLevel(logging.INFO)
-  vcs = GuessVCS()
+  vcs = GuessVCS(options)
   if isinstance(vcs, SubversionVCS):
     # base field is only allowed for Subversion.
     # Note: Fetching base files may become deprecated in future releases.
-    base = vcs.GuessBase(not options.local_base)
+    base = vcs.GuessBase(options.download_base)
   else:
     base = None
-  if not base and not options.local_base:
-    options.local_base = True
+  if not base and options.download_base:
+    options.download_base = True
     logging.info("Enabled upload of base file")
   if not options.assume_yes:
     vcs.CheckForUnknownFiles()
   if data is None:
     data = vcs.GenerateDiff(args)
+  files = vcs.GetBaseFiles(data)
   if verbosity >= 1:
     print "Upload server:", options.server, "(change with -s/--server)"
   if options.issue:
@@ -921,12 +1253,12 @@
     form_fields.append(("user", options.email))
   if options.reviewers:
     for reviewer in options.reviewers.split(','):
-      if reviewer.count("@") != 1 or "." not in reviewer.split("@")[1]:
+      if "@" in reviewer and not reviewer.split("@")[1].count(".") == 1:
         ErrorExit("Invalid email address: %s" % reviewer)
     form_fields.append(("reviewers", options.reviewers))
   if options.cc:
     for cc in options.cc.split(','):
-      if cc.count("@") != 1 or "." not in cc.split("@")[1]:
+      if "@" in cc and not cc.split("@")[1].count(".") == 1:
         ErrorExit("Invalid email address: %s" % cc)
     form_fields.append(("cc", options.cc))
   description = options.description
@@ -938,21 +1270,31 @@
     file.close()
   if description:
     form_fields.append(("description", description))
+  # Send a hash of all the base file so the server can determine if a copy
+  # already exists in an earlier patchset.
+  base_hashes = ""
+  for file, info in files.iteritems():
+    if not info[0] is None:
+      checksum = md5.new(info[0]).hexdigest()
+      if base_hashes:
+        base_hashes += "|"
+      base_hashes += checksum + ":" + file
+  form_fields.append(("base_hashes", base_hashes))
   # If we're uploading base files, don't send the email before the uploads, so
   # that it contains the file status.
-  if options.send_mail and not options.local_base:
+  if options.send_mail and options.download_base:
     form_fields.append(("send_mail", "1"))
-  if options.local_base:
+  if not options.download_base:
     form_fields.append(("content_upload", "1"))
   if len(data) > MAX_UPLOAD_SIZE:
     print "Patch is large, so uploading file patches separately."
-    files = []
+    uploaded_diff_file = []
     form_fields.append(("separate_patches", "1"))
   else:
-    files = [("data", "data.diff", data)]
-  ctype, body = EncodeMultipartFormData(form_fields, files)
+    uploaded_diff_file = [("data", "data.diff", data)]
+  ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file)
   response_body = rpc_server.Send("/upload", body, content_type=ctype)
-  if options.local_base or not files:
+  if not options.download_base or not uploaded_diff_file:
     lines = response_body.splitlines()
     if len(lines) >= 2:
       msg = lines[0]
@@ -968,13 +1310,13 @@
     sys.exit(0)
   issue = msg[msg.rfind("/")+1:]
 
-  if not files:
+  if not uploaded_diff_file:
     result = UploadSeparatePatches(issue, rpc_server, patchset, data, options)
-    if options.local_base:
+    if not options.download_base:
       patches = result
 
-  if options.local_base:
-    vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options)
+  if not options.download_base:
+    vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files)
     if options.send_mail:
       rpc_server.Send("/" + issue + "/mail", payload="")
   return issue