changeset 889:7a98fbadcae9

revsets: huge speedups for fromsvn and svnrev I have a hgsubversion repo that contains over 300,000 commits. In that repo, this patch improves performance as follows: hg --time log -r 'first(fromsvn())' Before: 40.3 sec After: 0.8 sec hg --time log -r 'svnrev(350000)' Before: 40.3 sec After: 0.1 sec Note: the performance of these revset implementations is very sensitive to doing as little work as possible per line of the rev_map file. I originally attempted to hide the file format details by hoisting the parsing of each line up into RevMap.readmapfile, but the current less abstract code is dramatically (10x or more) faster. If the revmap file is missing, we error out and print a message describing what to do.
author Bryan O'Sullivan <bryano@fb.com>
date Sat, 12 May 2012 05:38:34 -0700
parents c6388ed0ec0a
children 78db88de9622 3bfb7e985c47
files hgsubversion/maps.py hgsubversion/util.py
diffstat 2 files changed, 50 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/hgsubversion/maps.py
+++ b/hgsubversion/maps.py
@@ -1,5 +1,6 @@
 ''' Module for self-contained maps. '''
 
+import errno
 import os
 from mercurial import util as hgutil
 from mercurial import node
@@ -182,7 +183,8 @@ class RevMap(dict):
 
     def __init__(self, repo):
         dict.__init__(self)
-        self.path = os.path.join(repo.path, 'svn', 'rev_map')
+        self.path = self.mappath(repo)
+        self.repo = repo
         self.ypath = os.path.join(repo.path, 'svn', 'lastpulled')
         # TODO(durin42): Consider moving management of the youngest
         # file to svnmeta itself rather than leaving it here.
@@ -212,13 +214,26 @@ class RevMap(dict):
         check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum
         return sorted(filter(check, self.iteritems()), reverse=True)
 
-    def _load(self):
-        f = open(self.path)
+    @staticmethod
+    def mappath(repo):
+        return os.path.join(repo.path, 'svn', 'rev_map')
+
+    @classmethod
+    def readmapfile(cls, repo, missingok=True):
+        try:
+            f = open(cls.mappath(repo))
+        except IOError, err:
+            if not missingok or err.errno != errno.ENOENT:
+                raise
+            return iter([])
         ver = int(f.readline())
-        if ver != self.VERSION:
+        if ver != cls.VERSION:
             print 'revmap too new -- please upgrade'
             raise NotImplementedError
-        for l in f:
+        return f
+
+    def _load(self):
+        for l in self.readmapfile(self.repo):
             revnum, ha, branch = l.split(' ', 2)
             if branch == '\n':
                 branch = None
@@ -230,7 +245,6 @@ class RevMap(dict):
             if revnum < self.oldest or not self.oldest:
                 self.oldest = revnum
             dict.__setitem__(self, (revnum, branch), node.bin(ha))
-        f.close()
 
     def _write(self):
         f = open(self.path, 'w')
--- a/hgsubversion/util.py
+++ b/hgsubversion/util.py
@@ -1,3 +1,4 @@
+import errno
 import re
 import os
 import urllib
@@ -13,6 +14,8 @@ try:
 except ImportError:
     pass
 
+import maps
+
 ignoredfiles = set(['.hgtags', '.hgsvnexternals', '.hgsub', '.hgsubstate'])
 
 b_re = re.compile(r'^\+\+\+ b\/([^\n]*)', re.MULTILINE)
@@ -279,11 +282,17 @@ def revset_fromsvn(repo, subset, x):
     '''
     args = revset.getargs(x, 0, 0, "fromsvn takes no arguments")
 
-    def matches(r):
-        convertinfo = repo[r].extra().get('convert_revision', '')
-        return convertinfo[:4] == 'svn:'
-
-    return [r for r in subset if matches(r)]
+    rev = repo.changelog.rev
+    bin = node.bin
+    try:
+        svnrevs = set(rev(bin(l.split(' ', 2)[1]))
+                      for l in maps.RevMap.readmapfile(repo, missingok=False))
+        return filter(svnrevs.__contains__, subset)
+    except IOError, err:
+        if err.errno != errno.ENOENT:
+            raise
+        raise hgutil.Abort("svn metadata is missing - "
+                           "run 'hg svn rebuildmeta' to reconstruct it")
 
 def revset_svnrev(repo, subset, x):
     '''``svnrev(number)``
@@ -294,17 +303,25 @@ def revset_svnrev(repo, subset, x):
     rev = revset.getstring(args[0],
                            "the argument to svnrev() must be a number")
     try:
-        rev = int(rev)
+        revnum = int(rev)
     except ValueError:
         raise error.ParseError("the argument to svnrev() must be a number")
 
-    def matches(r):
-        convertinfo = repo[r].extra().get('convert_revision', '')
-        if convertinfo[:4] != 'svn:':
-            return False
-        return int(convertinfo[40:].rsplit('@', 1)[-1]) == rev
-
-    return [r for r in subset if matches(r)]
+    rev = rev + ' '
+    revs = []
+    try:
+        for l in maps.RevMap.readmapfile(repo, missingok=False):
+            if l.startswith(rev):
+                n = l.split(' ', 2)[1]
+                r = repo[node.bin(n)].rev()
+                if r in subset:
+                    revs.append(r)
+        return revs
+    except IOError, err:
+        if err.errno != errno.ENOENT:
+            raise
+        raise hgutil.Abort("svn metadata is missing - "
+                           "run 'hg svn rebuildmeta' to reconstruct it")
 
 revsets = {
     'fromsvn': revset_fromsvn,