# HG changeset patch # User Dan Villiom Podlaski Christiansen # Date 1323817678 -3600 # Node ID 7f90bb48c9debcfe4b4f978c3d8d6a4cc3bf8cbc # Parent 6524260be54362767ef25a4366129b6150be753a svn verify: use a custom editor and get_revision() Previously, we would fetch each file in the revision/changeset individually. With this change, we fetch the entire revision in one request, and use a custom editor to verify its contents. This is quite a lot faster than the previous means when verifying over the internet. By an order of magnitude or two, in fact. As data is transfered in a single operation, verifying a revision from PyPy took 30 seconds rather than 30 minutes, and saturated my 10Mbps connection. Please note that the output ordering isn't stable between the two; output will appear in reverse order when using the fast verifier. diff --git a/hgsubversion/verify.py b/hgsubversion/verify.py --- a/hgsubversion/verify.py +++ b/hgsubversion/verify.py @@ -1,9 +1,12 @@ import posixpath +from mercurial import util as hgutil from mercurial import error +import svnwrap import svnrepo import util +import editor def verify(ui, repo, args=None, **opts): '''verify current revision against Subversion repository @@ -35,43 +38,156 @@ def verify(ui, repo, args=None, **opts): ui.write('verifying %s against %s@%i\n' % (ctx, branchurl, srev)) - svnfiles = set() - result = 0 - - hgfiles = set(ctx) - util.ignoredfiles - - svndata = svn.list_files(branchpath, srev) - for i, (fn, type) in enumerate(svndata): - util.progress(ui, 'verify', i, total=len(hgfiles)) - - if type != 'f': - continue - svnfiles.add(fn) - fp = fn - if branchpath: - fp = branchpath + '/' + fn - data, mode = svn.get_file(posixpath.normpath(fp), srev) - try: - fctx = ctx[fn] - except error.LookupError: - result = 1 - continue - if not fctx.data() == data: - ui.write('difference in: %s\n' % fn) - result = 1 - if not fctx.flags() == mode: - ui.write('wrong flags for: %s\n' % fn) + if opts.get('stupid', ui.configbool('hgsubversion', 'stupid')): + svnfiles = set() + result = 0 + + hgfiles = set(ctx) - util.ignoredfiles + + svndata = svn.list_files(branchpath, srev) + for i, (fn, type) in enumerate(svndata): + util.progress(ui, 'verify', i, total=len(hgfiles)) + + if type != 'f': + continue + svnfiles.add(fn) + fp = fn + if branchpath: + fp = branchpath + '/' + fn + data, mode = svn.get_file(posixpath.normpath(fp), srev) + try: + fctx = ctx[fn] + except error.LookupError: + result = 1 + continue + if not fctx.data() == data: + ui.write('difference in: %s\n' % fn) + result = 1 + if not fctx.flags() == mode: + ui.write('wrong flags for: %s\n' % fn) + result = 1 + + if hgfiles != svnfiles: + unexpected = hgfiles - svnfiles + for f in sorted(unexpected): + ui.write('unexpected file: %s\n' % f) + missing = svnfiles - hgfiles + for f in sorted(missing): + ui.write('missing file: %s\n' % f) result = 1 - if hgfiles != svnfiles: - unexpected = hgfiles - svnfiles - for f in sorted(unexpected): - ui.write('unexpected file: %s\n' % f) - missing = svnfiles - hgfiles - for f in sorted(missing): - ui.write('missing file: %s\n' % f) - result = 1 + util.progress(ui, 'verify', None, total=len(hgfiles)) - util.progress(ui, 'verify', None, total=len(hgfiles)) + else: + class VerifyEditor(svnwrap.Editor): + """editor that verifies a repository against the given context.""" + def __init__(self, ui, ctx): + self.ui = ui + self.ctx = ctx + self.unexpected = set(ctx) - util.ignoredfiles + self.missing = set() + self.failed = False + + self.total = len(self.unexpected) + self.seen = 0 + + def open_root(self, base_revnum, pool=None): + pass + + def add_directory(self, path, parent_baton, copyfrom_path, + copyfrom_revision, pool=None): + self.file = None + self.props = None + + def open_directory(self, path, parent_baton, base_revision, pool=None): + self.file = None + self.props = None + + def add_file(self, path, parent_baton=None, copyfrom_path=None, + copyfrom_revision=None, file_pool=None): + + if path in self.unexpected: + self.unexpected.remove(path) + self.file = path + self.props = {} + else: + self.total += 1 + self.missing.add(path) + self.failed = True + self.file = None + self.props = None + + self.seen += 1 + util.progress(self.ui, 'verify', self.seen, total=self.total) + + def open_file(self, path, base_revnum): + raise NotImplementedError() + + def apply_textdelta(self, file_baton, base_checksum, pool=None): + stream = editor.NeverClosingStringIO() + handler = svnwrap.apply_txdelta('', stream) + if not callable(handler): + raise hgutil.Abort('Error in Subversion bindings: ' + 'cannot call handler!') + def txdelt_window(window): + handler(window) + # window being None means we're done + if window: + return + + fctx = self.ctx[self.file] + hgdata = fctx.data() + svndata = stream.getvalue() + + if 'svn:executable' in self.props: + if fctx.flags() != 'x': + self.ui.warn('wrong flags for: %s\n' % self.file) + self.failed = True + elif 'svn:special' in self.props: + hgdata = 'link ' + hgdata + if fctx.flags() != 'l': + self.ui.warn('wrong flags for: %s\n' % self.file) + self.failed = True + elif fctx.flags(): + self.ui.warn('wrong flags for: %s\n' % self.file) + self.failed = True + + if hgdata != svndata: + self.ui.warn('difference in: %s\n' % self.file) + self.failed = True + + if self.file is not None: + return txdelt_window + + def change_dir_prop(self, dir_baton, name, value, pool=None): + pass + + def change_file_prop(self, file_baton, name, value, pool=None): + if self.props is not None: + self.props[name] = value + + def close_directory(self, dir_baton, pool=None): + pass + + def delete_entry(self, path, revnum, pool=None): + raise NotImplementedError() + + def check(self): + util.progress(self.ui, 'verify', None, total=self.total) + + for f in self.unexpected: + self.ui.warn('unexpected file: %s\n' % f) + self.failed = True + for f in self.missing: + self.ui.warn('missing file: %s\n' % f) + self.failed = True + return not self.failed + + v = VerifyEditor(ui, ctx) + svnrepo.svnremoterepo(ui, branchurl).svn.get_revision(srev, v) + if v.check(): + result = 0 + else: + result = 1 return result diff --git a/tests/comprehensive/test_verify_and_startrev.py b/tests/comprehensive/test_verify_and_startrev.py --- a/tests/comprehensive/test_verify_and_startrev.py +++ b/tests/comprehensive/test_verify_and_startrev.py @@ -42,7 +42,10 @@ def _do_case(self, name, stupid, layout) assert len(self.repo) > 0 for i in repo: ctx = repo[i] - self.assertEqual(verify.verify(repo.ui, repo, rev=ctx.node()), 0) + self.assertEqual(verify.verify(repo.ui, repo, rev=ctx.node(), + stupid=True), 0) + self.assertEqual(verify.verify(repo.ui, repo, rev=ctx.node(), + stupid=False), 0) # check a startrev clone if layout == 'single' and name not in _skipshallow: @@ -59,7 +62,18 @@ def _do_case(self, name, stupid, layout) repo.ui.pushbuffer() self.assertEqual(0, verify.verify(repo.ui, shallowrepo, - rev=shallowtip.node())) + rev=shallowtip.node(), + stupid=True)) + self.assertEqual(0, verify.verify(repo.ui, shallowrepo, + rev=shallowtip.node(), + stupid=False)) + + stupidui = ui.ui(repo.ui) + stupidui.config('hgsubversion', 'stupid', True) + self.assertEqual(verify.verify(stupidui, repo, rev=ctx.node(), + stupid=True), 0) + self.assertEqual(verify.verify(stupidui, repo, rev=ctx.node(), + stupid=False), 0) # viewing diff's of lists of files is easier on the eyes self.assertMultiLineEqual('\n'.join(fulltip), '\n'.join(shallowtip), diff --git a/tests/test_fetch_mappings.py b/tests/test_fetch_mappings.py --- a/tests/test_fetch_mappings.py +++ b/tests/test_fetch_mappings.py @@ -12,8 +12,8 @@ from mercurial import util as hgutil from hgsubversion import maps from hgsubversion import svncommands -from hgsubversion import verify from hgsubversion import util +from hgsubversion import verify class MapTests(test_util.TestBase): @property diff --git a/tests/test_utility_commands.py b/tests/test_utility_commands.py --- a/tests/test_utility_commands.py +++ b/tests/test_utility_commands.py @@ -246,15 +246,16 @@ class UtilityTests(test_util.TestBase): authors=author_path) self.assertMultiLineEqual(open(author_path).read(), 'Augie=\nevil=\n') - def test_svnverify(self): + def test_svnverify(self, stupid=False): repo, repo_path = self.load_and_fetch('binaryfiles.svndump', - noupdate=False) - ret = verify.verify(self.ui(), repo, [], rev=1) + noupdate=False, stupid=stupid) + ret = verify.verify(self.ui(), repo, [], rev=1, stupid=stupid) self.assertEqual(0, ret) repo_path = self.load_svndump('binaryfiles-broken.svndump') u = self.ui() u.pushbuffer() - ret = verify.verify(u, repo, [test_util.fileurl(repo_path)], rev=1) + ret = verify.verify(u, repo, [test_util.fileurl(repo_path)], + rev=1, stupid=stupid) output = u.popbuffer() self.assertEqual(1, ret) output = re.sub(r'file://\S+', 'file://', output) @@ -265,16 +266,20 @@ unexpected file: binary1 missing file: binary3 """, output) - def test_svnverify_corruption(self): + def test_svnverify_stupid(self): + self.test_svnverify(True) + + def test_corruption(self, stupid=False): SUCCESS = 0 FAILURE = 1 repo, repo_path = self.load_and_fetch('correct.svndump', layout='single', - subdir='') + subdir='', stupid=stupid) ui = self.ui() - self.assertEqual(SUCCESS, verify.verify(ui, self.repo, rev='tip')) + self.assertEqual(SUCCESS, verify.verify(ui, self.repo, rev='tip', + stupid=stupid)) corrupt_source = test_util.fileurl(self.load_svndump('corrupt.svndump')) @@ -300,6 +305,9 @@ missing file: binary3 self.assertEqual((FAILURE, expected), (code, actual)) + def test_corruption_stupid(self): + self.test_corruption(True) + def suite(): all_tests = [unittest.TestLoader().loadTestsFromTestCase(UtilityTests), ]