changeset 73:9c1b53abefcb

fetch_command: support svn copy detection in stupid mode
author Patrick Mezard <pmezard@gmail.com>
date Wed, 05 Nov 2008 13:37:08 +0100
parents 9ec2a12c12ae
children 450d5d9d3b80
files fetch_command.py hg_delta_editor.py tests/test_fetch_renames.py
diffstat 3 files changed, 107 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/fetch_command.py
+++ b/fetch_command.py
@@ -217,6 +217,94 @@ def make_diff_path(b):
         return 'trunk'
     return 'branches/' + b
 
+def makecopyfinder(r, branchpath, rootdir):
+    """Return a function detecting copies.
+
+    Returned copyfinder(path) returns None if no copy information can
+    be found or ((source, sourcerev), sourcepath) where "sourcepath" is the
+    copy source path, "sourcerev" the source svn revision and "source" is the
+    copy record path causing the copy to occur. If a single file was copied
+    "sourcepath" and "source" are the same, while file copies dectected from
+    directory copies return the copied source directory in "source".
+    """
+    # filter copy information for current branch
+    branchpath = branchpath + '/'
+    fullbranchpath = rootdir + branchpath
+    copies = []
+    for path, e in r.paths.iteritems():
+        if not e.copyfrom_path:
+            continue
+        if not path.startswith(branchpath):
+            continue
+        if not e.copyfrom_path.startswith(fullbranchpath):
+            # ignore cross branch copies
+            continue
+        dest = path[len(branchpath):]
+        source = e.copyfrom_path[len(fullbranchpath):]
+        copies.append((dest, (source, e.copyfrom_rev)))
+
+    copies.sort()
+    copies.reverse()
+    exactcopies = dict(copies)
+    
+    def finder(path):
+        if path in exactcopies:
+            return exactcopies[path], exactcopies[path][0]
+        # look for parent directory copy, longest first
+        for dest, (source, sourcerev) in copies:
+            dest = dest + '/'
+            if not path.startswith(dest):
+                continue
+            sourcepath = source + '/' + path[len(dest):]
+            return (source, sourcerev), sourcepath
+        return None
+
+    return finder
+
+def getcopies(svn, hg_editor, branch, branchpath, r, files, parentid):
+    """Return a mapping {dest: source} for every file copied into r.
+    """
+    if parentid == revlog.nullid:
+        return {}
+
+    # Extract svn copy information, group them by copy source.
+    # The idea is to duplicate the replay behaviour where copies are
+    # evaluated per copy event (one event for all files in a directory copy,
+    # one event for single file copy). We assume that copy events match
+    # copy sources in revision info.
+    svncopies = {}
+    finder = makecopyfinder(r, branchpath, svn.subdir)
+    for f in files:
+        copy = finder(f)
+        if copy:
+            svncopies.setdefault(copy[0], []).append((f, copy[1]))
+    if not svncopies:
+        return {}
+
+    # cache changeset contexts and map them to source svn revisions
+    parentctx = hg_editor.repo.changectx(parentid)
+    ctxs = {}
+    def getctx(svnrev):
+        if svnrev in ctxs:
+            return ctxs[svnrev]
+        changeid = hg_editor.get_parent_revision(svnrev + 1, branch)
+        ctx = None
+        if changeid != revlog.nullid:
+            ctx = hg_editor.repo.changectx(changeid)
+        ctxs[svnrev] = ctx
+        return ctx
+
+    # check svn copies really make sense in mercurial
+    hgcopies = {}
+    for (sourcepath, rev), copies in svncopies.iteritems():
+        sourcectx = getctx(rev)
+        if sourcectx is None:
+            continue
+        sources = [s[1] for s in copies]
+        if not hg_editor.aresamefiles(sourcectx, parentctx, sources):
+            continue
+        hgcopies.update(copies)
+    return hgcopies
 
 def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
     used_diff = True
@@ -450,6 +538,10 @@ def stupid_svn_server_pull_rev(ui, svn, 
                 # TODO this might not be a required step.
                 if p:
                     files_touched.add(p)
+
+        copies = getcopies(svn, hg_editor, b, branches[b], r, files_touched, 
+                           parent_ha)
+
         date = r.date.replace('T', ' ').replace('Z', '').split('.')[0]
         date += ' -0000'
         def filectxfn(repo, memctx, path):
@@ -462,8 +554,9 @@ def stupid_svn_server_pull_rev(ui, svn, 
             exe = exec_files.get(path, None)
             if exe is None and path in hg_editor.repo[parent_ha]:
                 exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags()
+            copied = copies.get(path)
             return context.memfilectx(path=path, data=fp.read(), islink=False,
-                                      isexec=exe, copied=False)
+                                      isexec=exe, copied=copied)
         extra = {}
         if b:
             extra['branch'] = b
--- a/hg_delta_editor.py
+++ b/hg_delta_editor.py
@@ -499,7 +499,7 @@ class HgChangeReceiver(delta.Editor):
                 self.base_revision = None
             self.should_edit_most_recent_plaintext = True
 
-    def _aresamefiles(self, parentctx, childctx, files):
+    def aresamefiles(self, parentctx, childctx, files):
         """Assuming all files exist in childctx and parentctx, return True
         if none of them was changed in-between.
         """
@@ -560,7 +560,7 @@ class HgChangeReceiver(delta.Editor):
                                                 branch)
             if parentid != revlog.nullid:
                 parentctx = self.repo.changectx(parentid)
-                if self._aresamefiles(parentctx, ctx, [from_file]):
+                if self.aresamefiles(parentctx, ctx, [from_file]):
                     self.copies[path] = from_file
 
     @stash_exception_on_self
@@ -618,7 +618,7 @@ class HgChangeReceiver(delta.Editor):
             parentid = self.get_parent_revision(self.current_rev.revnum, branch)
             if parentid != revlog.nullid:
                 parentctx = self.repo.changectx(parentid)
-                if self._aresamefiles(parentctx, cp_f_ctx, copies.values()):
+                if self.aresamefiles(parentctx, cp_f_ctx, copies.values()):
                     self.copies.update(copies)
 
     @stash_exception_on_self
--- a/tests/test_fetch_renames.py
+++ b/tests/test_fetch_renames.py
@@ -23,9 +23,9 @@ class TestFetchRenames(unittest.TestCase
         shutil.rmtree(self.tmpdir)
         os.chdir(self.oldwd)
 
-    def _load_fixture_and_fetch(self, fixture_name):
+    def _load_fixture_and_fetch(self, fixture_name, stupid):
         return test_util.load_fixture_and_fetch(fixture_name, self.repo_path,
-                                                self.wc_path)
+                                                self.wc_path, stupid=stupid)
 
     def _debug_print_copies(self, repo):
         w = sys.stderr.write
@@ -36,8 +36,8 @@ class TestFetchRenames(unittest.TestCase
                 fctx = ctx[f]
                 w('%s: %r %r\n' % (f, fctx.data(), fctx.renamed()))
 
-    def test_rename(self):
-        repo = self._load_fixture_and_fetch('renames.svndump')
+    def _test_rename(self, stupid):
+        repo = self._load_fixture_and_fetch('renames.svndump', stupid)
         # self._debug_print_copies(repo)
 
         # Map revnum to mappings of dest name to (source name, dest content)
@@ -71,6 +71,12 @@ class TestFetchRenames(unittest.TestCase
                 self.assertEqual(cp[0], copymap[f][0])
                 self.assertEqual(ctx[f].data(), copymap[f][1])
 
+    def test_rename(self):
+        self._test_rename(False)
+
+    def test_rename_stupid(self):
+        self._test_rename(True)
+
 def suite():
     all = [unittest.TestLoader().loadTestsFromTestCase(TestFetchRenames),
           ]