changeset 899:7f90bb48c9de

svn verify: use a custom editor and get_revision() Previously, we would fetch each file in the revision/changeset individually. With this change, we fetch the entire revision in one request, and use a custom editor to verify its contents. This is quite a lot faster than the previous means when verifying over the internet. By an order of magnitude or two, in fact. As data is transfered in a single operation, verifying a revision from PyPy took 30 seconds rather than 30 minutes, and saturated my 10Mbps connection. Please note that the output ordering isn't stable between the two; output will appear in reverse order when using the fast verifier.
author Dan Villiom Podlaski Christiansen <danchr@gmail.com>
date Wed, 14 Dec 2011 00:07:58 +0100
parents 6524260be543
children abd8f2f2c58a
files hgsubversion/verify.py tests/comprehensive/test_verify_and_startrev.py tests/test_fetch_mappings.py tests/test_utility_commands.py
diffstat 4 files changed, 183 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/hgsubversion/verify.py
+++ b/hgsubversion/verify.py
@@ -1,9 +1,12 @@
 import posixpath
 
+from mercurial import util as hgutil
 from mercurial import error
 
+import svnwrap
 import svnrepo
 import util
+import editor
 
 def verify(ui, repo, args=None, **opts):
     '''verify current revision against Subversion repository
@@ -35,43 +38,156 @@ def verify(ui, repo, args=None, **opts):
 
     ui.write('verifying %s against %s@%i\n' % (ctx, branchurl, srev))
 
-    svnfiles = set()
-    result = 0
-
-    hgfiles = set(ctx) - util.ignoredfiles
-
-    svndata = svn.list_files(branchpath, srev)
-    for i, (fn, type) in enumerate(svndata):
-        util.progress(ui, 'verify', i, total=len(hgfiles))
-
-        if type != 'f':
-            continue
-        svnfiles.add(fn)
-        fp = fn
-        if branchpath:
-            fp = branchpath + '/' + fn
-        data, mode = svn.get_file(posixpath.normpath(fp), srev)
-        try:
-            fctx = ctx[fn]
-        except error.LookupError:
-            result = 1
-            continue
-        if not fctx.data() == data:
-            ui.write('difference in: %s\n' % fn)
-            result = 1
-        if not fctx.flags() == mode:
-            ui.write('wrong flags for: %s\n' % fn)
+    if opts.get('stupid', ui.configbool('hgsubversion', 'stupid')):
+        svnfiles = set()
+        result = 0
+
+        hgfiles = set(ctx) - util.ignoredfiles
+
+        svndata = svn.list_files(branchpath, srev)
+        for i, (fn, type) in enumerate(svndata):
+            util.progress(ui, 'verify', i, total=len(hgfiles))
+
+            if type != 'f':
+                continue
+            svnfiles.add(fn)
+            fp = fn
+            if branchpath:
+                fp = branchpath + '/' + fn
+            data, mode = svn.get_file(posixpath.normpath(fp), srev)
+            try:
+                fctx = ctx[fn]
+            except error.LookupError:
+                result = 1
+                continue
+            if not fctx.data() == data:
+                ui.write('difference in: %s\n' % fn)
+                result = 1
+            if not fctx.flags() == mode:
+                ui.write('wrong flags for: %s\n' % fn)
+                result = 1
+
+        if hgfiles != svnfiles:
+            unexpected = hgfiles - svnfiles
+            for f in sorted(unexpected):
+                ui.write('unexpected file: %s\n' % f)
+            missing = svnfiles - hgfiles
+            for f in sorted(missing):
+                ui.write('missing file: %s\n' % f)
             result = 1
 
-    if hgfiles != svnfiles:
-        unexpected = hgfiles - svnfiles
-        for f in sorted(unexpected):
-            ui.write('unexpected file: %s\n' % f)
-        missing = svnfiles - hgfiles
-        for f in sorted(missing):
-            ui.write('missing file: %s\n' % f)
-        result = 1
+        util.progress(ui, 'verify', None, total=len(hgfiles))
 
-    util.progress(ui, 'verify', None, total=len(hgfiles))
+    else:
+        class VerifyEditor(svnwrap.Editor):
+            """editor that verifies a repository against the given context."""
+            def __init__(self, ui, ctx):
+                self.ui = ui
+                self.ctx = ctx
+                self.unexpected = set(ctx) - util.ignoredfiles
+                self.missing = set()
+                self.failed = False
+
+                self.total = len(self.unexpected)
+                self.seen = 0
+
+            def open_root(self, base_revnum, pool=None):
+                pass
+
+            def add_directory(self, path, parent_baton, copyfrom_path,
+                              copyfrom_revision, pool=None):
+                self.file = None
+                self.props = None
+
+            def open_directory(self, path, parent_baton, base_revision, pool=None):
+                self.file = None
+                self.props = None
+
+            def add_file(self, path, parent_baton=None, copyfrom_path=None,
+                         copyfrom_revision=None, file_pool=None):
+
+                if path in self.unexpected:
+                    self.unexpected.remove(path)
+                    self.file = path
+                    self.props = {}
+                else:
+                    self.total += 1
+                    self.missing.add(path)
+                    self.failed = True
+                    self.file = None
+                    self.props = None
+
+                self.seen += 1
+                util.progress(self.ui, 'verify', self.seen, total=self.total)
+
+            def open_file(self, path, base_revnum):
+                raise NotImplementedError()
+
+            def apply_textdelta(self, file_baton, base_checksum, pool=None):
+                stream = editor.NeverClosingStringIO()
+                handler = svnwrap.apply_txdelta('', stream)
+                if not callable(handler):
+                    raise hgutil.Abort('Error in Subversion bindings: '
+                                       'cannot call handler!')
+                def txdelt_window(window):
+                    handler(window)
+                    # window being None means we're done
+                    if window:
+                        return
+
+                    fctx = self.ctx[self.file]
+                    hgdata = fctx.data()
+                    svndata = stream.getvalue()
+
+                    if 'svn:executable' in self.props:
+                        if fctx.flags() != 'x':
+                            self.ui.warn('wrong flags for: %s\n' % self.file)
+                            self.failed = True
+                    elif 'svn:special' in self.props:
+                        hgdata = 'link ' + hgdata
+                        if fctx.flags() != 'l':
+                            self.ui.warn('wrong flags for: %s\n' % self.file)
+                            self.failed = True
+                    elif fctx.flags():
+                        self.ui.warn('wrong flags for: %s\n' % self.file)
+                        self.failed = True
+
+                    if hgdata != svndata:
+                        self.ui.warn('difference in: %s\n' % self.file)
+                        self.failed = True
+
+                if self.file is not None:
+                    return txdelt_window
+
+            def change_dir_prop(self, dir_baton, name, value, pool=None):
+                pass
+
+            def change_file_prop(self, file_baton, name, value, pool=None):
+                if self.props is not None:
+                    self.props[name] = value
+
+            def close_directory(self, dir_baton, pool=None):
+                pass
+
+            def delete_entry(self, path, revnum, pool=None):
+                raise NotImplementedError()
+
+            def check(self):
+                util.progress(self.ui, 'verify', None, total=self.total)
+
+                for f in self.unexpected:
+                    self.ui.warn('unexpected file: %s\n' % f)
+                    self.failed = True
+                for f in self.missing:
+                    self.ui.warn('missing file: %s\n' % f)
+                    self.failed = True
+                return not self.failed
+
+        v = VerifyEditor(ui, ctx)
+        svnrepo.svnremoterepo(ui, branchurl).svn.get_revision(srev, v)
+        if v.check():
+            result = 0
+        else:
+            result = 1
 
     return result
--- a/tests/comprehensive/test_verify_and_startrev.py
+++ b/tests/comprehensive/test_verify_and_startrev.py
@@ -42,7 +42,10 @@ def _do_case(self, name, stupid, layout)
     assert len(self.repo) > 0
     for i in repo:
         ctx = repo[i]
-        self.assertEqual(verify.verify(repo.ui, repo, rev=ctx.node()), 0)
+        self.assertEqual(verify.verify(repo.ui, repo, rev=ctx.node(),
+                                       stupid=True), 0)
+        self.assertEqual(verify.verify(repo.ui, repo, rev=ctx.node(),
+                                       stupid=False), 0)
 
     # check a startrev clone
     if layout == 'single' and name not in _skipshallow:
@@ -59,7 +62,18 @@ def _do_case(self, name, stupid, layout)
 
         repo.ui.pushbuffer()
         self.assertEqual(0, verify.verify(repo.ui, shallowrepo,
-                                          rev=shallowtip.node()))
+                                          rev=shallowtip.node(),
+                                          stupid=True))
+        self.assertEqual(0, verify.verify(repo.ui, shallowrepo,
+                                          rev=shallowtip.node(),
+                                          stupid=False))
+
+        stupidui = ui.ui(repo.ui)
+        stupidui.config('hgsubversion', 'stupid', True)
+        self.assertEqual(verify.verify(stupidui, repo, rev=ctx.node(),
+                                       stupid=True), 0)
+        self.assertEqual(verify.verify(stupidui, repo, rev=ctx.node(),
+                                       stupid=False), 0)
 
         # viewing diff's of lists of files is easier on the eyes
         self.assertMultiLineEqual('\n'.join(fulltip), '\n'.join(shallowtip),
--- a/tests/test_fetch_mappings.py
+++ b/tests/test_fetch_mappings.py
@@ -12,8 +12,8 @@ from mercurial import util as hgutil
 
 from hgsubversion import maps
 from hgsubversion import svncommands
-from hgsubversion import verify
 from hgsubversion import util
+from hgsubversion import verify
 
 class MapTests(test_util.TestBase):
     @property
--- a/tests/test_utility_commands.py
+++ b/tests/test_utility_commands.py
@@ -246,15 +246,16 @@ class UtilityTests(test_util.TestBase):
                                 authors=author_path)
         self.assertMultiLineEqual(open(author_path).read(), 'Augie=\nevil=\n')
 
-    def test_svnverify(self):
+    def test_svnverify(self, stupid=False):
         repo, repo_path = self.load_and_fetch('binaryfiles.svndump',
-                                              noupdate=False)
-        ret = verify.verify(self.ui(), repo, [], rev=1)
+                                              noupdate=False, stupid=stupid)
+        ret = verify.verify(self.ui(), repo, [], rev=1, stupid=stupid)
         self.assertEqual(0, ret)
         repo_path = self.load_svndump('binaryfiles-broken.svndump')
         u = self.ui()
         u.pushbuffer()
-        ret = verify.verify(u, repo, [test_util.fileurl(repo_path)], rev=1)
+        ret = verify.verify(u, repo, [test_util.fileurl(repo_path)],
+                            rev=1, stupid=stupid)
         output = u.popbuffer()
         self.assertEqual(1, ret)
         output = re.sub(r'file://\S+', 'file://', output)
@@ -265,16 +266,20 @@ unexpected file: binary1
 missing file: binary3
 """, output)
 
-    def test_svnverify_corruption(self):
+    def test_svnverify_stupid(self):
+        self.test_svnverify(True)
+
+    def test_corruption(self, stupid=False):
         SUCCESS = 0
         FAILURE = 1
 
         repo, repo_path = self.load_and_fetch('correct.svndump', layout='single',
-                                              subdir='')
+                                              subdir='', stupid=stupid)
 
         ui = self.ui()
 
-        self.assertEqual(SUCCESS, verify.verify(ui, self.repo, rev='tip'))
+        self.assertEqual(SUCCESS, verify.verify(ui, self.repo, rev='tip',
+                                                stupid=stupid))
 
         corrupt_source = test_util.fileurl(self.load_svndump('corrupt.svndump'))
 
@@ -300,6 +305,9 @@ missing file: binary3
 
         self.assertEqual((FAILURE, expected), (code, actual))
 
+    def test_corruption_stupid(self):
+        self.test_corruption(True)
+
 def suite():
     all_tests = [unittest.TestLoader().loadTestsFromTestCase(UtilityTests),
           ]