changeset 962:8648ccfb8325

editor: process missing files with regular files Missing files were stored directly in RevisionMeta and resolved after the revision was replayed. It means the missing files set was no pruned by delete_entry() actions or by the filemap, and some of them were fetched for no reason. Say you convert: A branch/foo/bar (from trunk/foo/bar:123) with a filemap excluding "foo/bar". Since the directory was excluded in trunk the files cannot be found and were marked as missing even though they were discarded afterwards.
author Patrick Mezard <patrick@mezard.eu>
date Sat, 20 Oct 2012 22:22:02 +0200
parents b729909d3793
children 64d961130a07
files hgsubversion/editor.py hgsubversion/replay.py tests/fixtures/copies.sh tests/fixtures/copies.svndump tests/test_fetch_mappings.py
diffstat 5 files changed, 120 insertions(+), 67 deletions(-) [+]
line wrap: on
line diff
--- a/hgsubversion/editor.py
+++ b/hgsubversion/editor.py
@@ -90,8 +90,8 @@ class RevisionData(object):
 
     __slots__ = [
         'file', 'added', 'deleted', 'rev', 'execfiles', 'symlinks',
-        'copies', 'missing', 'emptybranches', 'base', 'externals', 'ui',
-        'exception', 'store', '_failonmissing',
+        'copies', 'emptybranches', 'base', 'externals', 'ui',
+        'exception', 'store',
     ]
 
     def __init__(self, ui):
@@ -107,10 +107,6 @@ class RevisionData(object):
         self.symlinks = {}
         # Map fully qualified destination file paths to module source path
         self.copies = {}
-        self.missing = set()
-        # Used in tests and debugging
-        self._failonmissing = self.ui.config(
-            'hgsubversion', 'failonmissing', False)
         self.emptybranches = {}
         self.externals = {}
         self.exception = None
@@ -121,8 +117,6 @@ class RevisionData(object):
         self.symlinks[path] = islink
         if path in self.deleted:
             del self.deleted[path]
-        if path in self.missing:
-            self.missing.remove(path)
         if copypath is not None:
             self.copies[path] = copypath
 
@@ -154,49 +148,6 @@ class RevisionData(object):
             files.update(g)
         return sorted(files)
 
-    def addmissing(self, path):
-        if self._failonmissing:
-            raise EditingError('missing entry: %s' % path)
-        self.missing.add(path)
-
-    def findmissing(self, svn):
-
-        if not self.missing:
-            return
-
-        msg = 'fetching %s files that could not use replay.\n'
-        self.ui.debug(msg % len(self.missing))
-        root = svn.subdir and svn.subdir[1:] or ''
-        r = self.rev.revnum
-
-        files = set()
-        for p in self.missing:
-            self.ui.note('.')
-            self.ui.flush()
-            if p[-1] == '/':
-                dir = p[len(root):]
-                new = [p + f for f, k in svn.list_files(dir, r) if k == 'f']
-                files.update(new)
-            else:
-                files.add(p)
-
-        i = 1
-        self.ui.note('\nfetching files...\n')
-        for p in files:
-            if self.ui.debugflag:
-                self.ui.debug('fetching %s\n' % p)
-            else:
-                self.ui.note('.')
-            self.ui.flush()
-            if i % 50 == 0:
-                svn.init_ra_and_client()
-            i += 1
-            data, mode = svn.get_file(p[len(root):], r)
-            self.set(p, data, 'x' in mode, 'l' in mode)
-
-        self.missing = set()
-        self.ui.note('\n')
-
     def close(self):
         self.store.close()
 
@@ -226,6 +177,9 @@ class HgEditor(svnwrap.Editor):
         self.current = RevisionData(meta.ui)
         self._clear()
 
+    def setsvn(self, svn):
+        self._svn = svn
+
     def _clear(self):
         self._filecounter = 0
         # A mapping of svn paths to CopiedFile entries
@@ -240,6 +194,7 @@ class HgEditor(svnwrap.Editor):
         self._getctx = util.lrucachefunc(self.repo.changectx, 3)
         # A stack of opened directory (baton, path) pairs.
         self._opendirs = []
+        self._missing = set()
 
     def _openfile(self, path, data, isexec, islink, copypath, create=False):
         if path in self._openpaths:
@@ -273,6 +228,26 @@ class HgEditor(svnwrap.Editor):
         self._deleted.add(path)
         if path in self._svncopies:
             del self._svncopies[path]
+        self._missing.discard(path)
+
+    def addmissing(self, path, isdir=False):
+        svn = self._svn
+        root = svn.subdir and svn.subdir[1:] or ''
+        if not isdir:
+            self._missing.add(path[len(root):])
+        else:
+            # Resolve missing directories content immediately so the
+            # missing files maybe processed by delete actions.
+            rev = self.current.rev.revnum
+            path = path + '/'
+            parentdir = path[len(root):]
+            for f, k in svn.list_files(parentdir, rev):
+                if k != 'f':
+                    continue
+                f = parentdir + f
+                if not self.meta.is_path_valid(f, False):
+                    continue
+                self._missing.add(f)
 
     @svnwrap.ieditor
     def delete_entry(self, path, revision_bogus, parent_baton, pool=None):
@@ -292,6 +267,12 @@ class HgEditor(svnwrap.Editor):
         for f in list(self._svncopies):
             if f.startswith(prefix):
                 self._deletefile(f)
+        if path in self._missing:
+            self._missing.remove(path)
+        else:
+            for f in list(self._missing):
+                if f.startswith(prefix):
+                    self._missing.remove(f)
 
         if br_path is not None:
             ha = self.meta.get_parent_revision(self.current.rev.revnum, branch)
@@ -332,7 +313,7 @@ class HgEditor(svnwrap.Editor):
         parent = self.meta.get_parent_revision(baserev + 1, branch, True)
         ctx = self._getctx(parent)
         if fpath not in ctx:
-            self.current.addmissing(path)
+            self.addmissing(path)
             return None
 
         fctx = ctx.filectx(fpath)
@@ -369,7 +350,7 @@ class HgEditor(svnwrap.Editor):
         (from_file,
          from_branch) = self.meta.split_branch_path(copyfrom_path)[:2]
         if not from_file:
-            self.current.addmissing(path)
+            self.addmissing(path)
             return None
         # Use exact=True because during replacements ('R' action) we select
         # replacing branch as parent, but svn delta editor provides delta
@@ -378,7 +359,7 @@ class HgEditor(svnwrap.Editor):
                                            from_branch, True)
         ctx = self._getctx(ha)
         if from_file not in ctx:
-            self.current.addmissing(path)
+            self.addmissing(path)
             return None
 
         fctx = ctx.filectx(from_file)
@@ -435,7 +416,7 @@ class HgEditor(svnwrap.Editor):
                 # existing=False to guess a possible branch location and
                 # test it against the filemap. The actual path and
                 # revision will be resolved below if necessary.
-                self.current.addmissing('%s/' % path)
+                self.addmissing(path, isdir=True)
                 return baton
         if tag:
             changeid = self.meta.tags[tag]
@@ -446,7 +427,7 @@ class HgEditor(svnwrap.Editor):
             frompath, source_branch = self.meta.split_branch_path(copyfrom_path)[:2]
         new_hash = self.meta.get_parent_revision(source_rev + 1, source_branch, True)
         if new_hash == node.nullid:
-            self.current.addmissing('%s/' % path)
+            self.addmissing(path, isdir=True)
             return baton
         fromctx = self._getctx(new_hash)
         if frompath != '/' and frompath != '':
@@ -596,7 +577,7 @@ class HgEditor(svnwrap.Editor):
                         path, target, isexec, islink, copypath)
             except svnwrap.SubversionException, e: # pragma: no cover
                 if e.args[1] == svnwrap.ERR_INCOMPLETE_DATA:
-                    self.current.addmissing(path)
+                    self.addmissing(path)
                 else: # pragma: no cover
                     raise hgutil.Abort(*e.args)
             except: # pragma: no cover
@@ -625,6 +606,32 @@ class HgEditor(svnwrap.Editor):
                 self.current.set(path, data, isexec, islink, copied)
         self._svncopies.clear()
 
+        # Resolve missing files
+        if self._missing:
+            missing = sorted(self._missing)
+            self.ui.debug('fetching %s files that could not use replay.\n'
+                    % len(missing))
+            if self.ui.configbool('hgsubversion', 'failonmissing', False):
+                raise EditingError('missing entry: %s' % missing[0])
+
+            svn = self._svn
+            rev = self.current.rev.revnum
+            root = svn.subdir and svn.subdir[1:] or ''
+            i = 1
+            for f in missing:
+                if self.ui.debugflag:
+                    self.ui.debug('fetching %s\n' % f)
+                else:
+                    self.ui.note('.')
+                self.ui.flush()
+                if i % 50 == 0:
+                    svn.init_ra_and_client()
+                i += 1
+                data, mode = svn.get_file(f, rev)
+                self.current.set(f, data, 'x' in mode, 'l' in mode)
+            if not self.ui.debugflag:
+                self.ui.note('\n')
+
         for f in self._deleted:
             self.current.delete(f)
         self._deleted.clear()
--- a/hgsubversion/replay.py
+++ b/hgsubversion/replay.py
@@ -72,6 +72,7 @@ def _convert_rev(ui, meta, svn, r, tbdel
     editor = meta.editor
     editor.current.clear()
     editor.current.rev = r
+    editor.setsvn(svn)
 
     if firstrun and meta.revmap.oldest <= 0:
         # We know nothing about this project, so fetch everything before
@@ -83,15 +84,12 @@ def _convert_rev(ui, meta, svn, r, tbdel
     editor.close()
 
     current = editor.current
-    current.findmissing(svn)
 
     updateexternals(ui, meta, current)
 
     if current.exception is not None:  # pragma: no cover
         traceback.print_exception(*current.exception)
         raise ReplayException()
-    if current.missing:
-        raise MissingPlainTextError()
 
     files_to_commit = current.files()
     branch_batches = {}
--- a/tests/fixtures/copies.sh
+++ b/tests/fixtures/copies.sh
@@ -18,6 +18,9 @@ cd project
 svn cp trunk/dir trunk/dir2
 echo b >> trunk/dir2/a
 svn ci -m 'copy/edit trunk/dir/a'
+svn up
+svn cp trunk/dir2 trunk/dir3
+svn ci -m 'copy dir2 to dir3'
 cd ..
 
 svnadmin dump testrepo > ../copies.svndump
--- a/tests/fixtures/copies.svndump
+++ b/tests/fixtures/copies.svndump
@@ -1,6 +1,6 @@
 SVN-fs-dump-format-version: 2
 
-UUID: 707bea87-43e4-45d9-8f28-5d06ca9e3f3b
+UUID: f9962aa6-eec5-4335-8af9-9ae89f4b18b2
 
 Revision-number: 0
 Prop-content-length: 56
@@ -9,7 +9,7 @@ Content-length: 56
 K 8
 svn:date
 V 27
-2012-10-14T12:41:39.387675Z
+2012-10-14T14:22:33.372222Z
 PROPS-END
 
 Revision-number: 1
@@ -23,7 +23,7 @@ pmezard
 K 8
 svn:date
 V 27
-2012-10-14T12:41:39.409053Z
+2012-10-14T14:22:33.393643Z
 K 7
 svn:log
 V 4
@@ -72,7 +72,7 @@ pmezard
 K 8
 svn:date
 V 27
-2012-10-14T12:41:41.048526Z
+2012-10-14T14:22:35.042430Z
 K 7
 svn:log
 V 21
@@ -98,3 +98,28 @@ a
 b
 
 
+Revision-number: 3
+Prop-content-length: 119
+Content-length: 119
+
+K 10
+svn:author
+V 7
+pmezard
+K 8
+svn:date
+V 27
+2012-10-14T14:22:38.041919Z
+K 7
+svn:log
+V 17
+copy dir2 to dir3
+PROPS-END
+
+Node-path: trunk/dir3
+Node-kind: dir
+Node-action: add
+Node-copyfrom-rev: 2
+Node-copyfrom-path: trunk/dir2
+
+
--- a/tests/test_fetch_mappings.py
+++ b/tests/test_fetch_mappings.py
@@ -101,7 +101,8 @@ class MapTests(test_util.TestBase):
         all_tests = set(test)
         self.assertEqual(fromself.symmetric_difference(all_tests), set())
 
-    def _loadwithfilemap(self, svndump, filemapcontent, stupid=False):
+    def _loadwithfilemap(self, svndump, filemapcontent, stupid=False,
+            failonmissing=True):
         repo_path = self.load_svndump(svndump)
         filemap = open(self.filemap, 'w')
         filemap.write(filemapcontent)
@@ -109,7 +110,7 @@ class MapTests(test_util.TestBase):
         ui = self.ui(stupid)
         ui.setconfig('hgsubversion', 'filemap', self.filemap)
         ui.setconfig('hgsubversion', 'failoninvalidreplayfile', 'true')
-        ui.setconfig('hgsubversion', 'failonmissing', 'true')
+        ui.setconfig('hgsubversion', 'failonmissing', failonmissing)
         commands.clone(ui, test_util.fileurl(repo_path),
                        self.wc_path, filemap=self.filemap)
         return self.repo
@@ -146,9 +147,28 @@ class MapTests(test_util.TestBase):
                          ['alpha', 'beta'])
 
     def test_file_map_copy(self):
-        repo = self._loadwithfilemap('copies.svndump', "exclude dir2\n")
+        # Exercise excluding files copied from a non-excluded directory.
+        # There will be missing files as we are copying from an excluded
+        # directory.
+        repo = self._loadwithfilemap('copies.svndump', "exclude dir2\n",
+                failonmissing=False)
+        self.assertEqual(['dir/a', 'dir3/a'], list(repo['tip']))
+
+    def test_file_map_exclude_copy_source_and_dest(self):
+        # dir3 is excluded and copied from dir2 which is also excluded.
+        # dir3 files should not be marked as missing and fetched.
+        repo = self._loadwithfilemap('copies.svndump',
+                "exclude dir2\nexclude dir3\n")
         self.assertEqual(['dir/a'], list(repo['tip']))
 
+    def test_file_map_include_file_exclude_dir(self):
+        # dir3 is excluded but we want dir3/a, which is also copied from
+        # an exluded dir2. dir3/a should be fetched.
+        repo = self._loadwithfilemap('copies.svndump',
+                "include .\nexclude dir2\nexclude dir3\ninclude dir3/a\n",
+                failonmissing=False)
+        self.assertEqual(['dir/a', 'dir3/a'], list(repo['tip']))
+
     def test_branchmap(self, stupid=False):
         repo_path = self.load_svndump('branchmap.svndump')
         branchmap = open(self.branchmap, 'w')