changeset 300:4aba7542f6a9

Various cleanups, cosmetics and removal of superfluous assertions.
author Dan Villiom Podlaski Christiansen <danchr@gmail.com>
date Fri, 27 Mar 2009 03:16:21 +0100
parents 3e27514d575c
children 79440ed81011
files fetch_command.py hg_delta_editor.py svnwrap/svn_swig_wrapper.py
diffstat 3 files changed, 106 insertions(+), 57 deletions(-) [+]
line wrap: on
line diff
--- a/fetch_command.py
+++ b/fetch_command.py
@@ -28,10 +28,12 @@ def fetch_revisions(ui, svn_url, hg_repo
                     **opts):
     """pull new revisions from Subversion
     """
+
     svn_url = util.normalize_url(svn_url)
     old_encoding = merc_util._encoding
     merc_util._encoding = 'UTF-8'
     skipto_rev=int(skipto_rev)
+
     have_replay = not stupid
     if have_replay and not callable(
         delta.svn_txdelta_apply(None, None, None)[0]): #pragma: no cover
@@ -41,7 +43,6 @@ def fetch_revisions(ui, svn_url, hg_repo
                   ' contribute a patch to use the ctypes bindings instead'
                   ' of SWIG.\n')
         have_replay = False
-    initializing_repo = False
     svn = svnwrap.SubversionRepo(svn_url, username=merc_util.getuser())
     author_host = "@%s" % svn.uuid
     tag_locations = tag_locations.split(',')
@@ -52,7 +53,9 @@ def fetch_revisions(ui, svn_url, hg_repo
                                                  tag_locations=tag_locations,
                                                  authors=authors,
                                                  filemap=filemap)
+
     if os.path.exists(hg_editor.uuid_file):
+        initializing_repo = False
         uuid = open(hg_editor.uuid_file).read()
         assert uuid == svn.uuid
         start = hg_editor.last_known_revision()
--- a/hg_delta_editor.py
+++ b/hg_delta_editor.py
@@ -244,10 +244,12 @@ class HgChangeReceiver(delta.Editor):
     def _path_and_branch_for_path(self, path, existing=True):
         return self._split_branch_path(path, existing=existing)[:2]
 
+    def _branch_for_path(self, path, existing=True):
+        return self._path_and_branch_for_path(path, existing=existing)[1]
+
     def _localname(self, path):
         """Compute the local name for a branch located at path.
         """
-        assert not path.startswith('tags/')
         if path == 'trunk':
             return None
         elif path.startswith('branches/'):
@@ -272,8 +274,6 @@ class HgChangeReceiver(delta.Editor):
         known.
         """
         path = self._normalize_path(path)
-        if path.startswith('tags/'):
-            return None, None, None
         test = ''
         path_comps = path.split('/')
         while self._localname(test) not in self.branches and len(path_comps):
@@ -372,6 +372,10 @@ class HgChangeReceiver(delta.Editor):
 
         Otherwise, returns False.
         """
+        return self._split_tag_path(path)[-1] or False
+
+    def _split_tag_path(self, path):
+        r = (None, None, None)
         path = self._normalize_path(path)
         for tags_path in self.tag_locations:
             if path and (path.startswith(tags_path) and
@@ -885,8 +889,8 @@ class HgChangeReceiver(delta.Editor):
         # parentctx is not an ancestor of childctx, files are unrelated
         return False
 
-    def add_file(self, path, parent_baton, copyfrom_path,
-                 copyfrom_revision, file_pool=None):
+    def add_file(self, path, parent_baton=None, copyfrom_path=None,
+                 copyfrom_revision=None, file_pool=None):
         self.current_file = 'foobaz'
         self.base_revision = None
         if path in self.deleted_files:
--- a/svnwrap/svn_swig_wrapper.py
+++ b/svnwrap/svn_swig_wrapper.py
@@ -5,12 +5,16 @@ import shutil
 import sys
 import tempfile
 import hashlib
+import collections
+import gc
 
 from svn import client
 from svn import core
 from svn import delta
 from svn import ra
 
+from mercurial import util as hgutil
+
 def version():
     return '%d.%d.%d' % (core.SVN_VER_MAJOR, core.SVN_VER_MINOR, core.SVN_VER_MICRO)
 
@@ -43,8 +47,8 @@ class RaCallbacks(ra.Callbacks):
     def get_client_string(pool):
         return 'hgsubversion'
 
-
 def user_pass_prompt(realm, default_username, ms, pool): #pragma: no cover
+    # FIXME: should use getpass() and username() from mercurial.ui
     creds = core.svn_auth_cred_simple_t()
     creds.may_save = ms
     if default_username:
@@ -146,6 +150,8 @@ class SubversionRepo(object):
         self.uname = username
         self.auth_baton_pool = core.Pool()
         self.auth_baton = _create_auth_baton(self.auth_baton_pool)
+        # self.init_ra_and_client() assumes that a pool already exists
+        self.pool = core.Pool()
 
         self.init_ra_and_client()
         self.uuid = ra.get_uuid(self.ra, self.pool)
@@ -160,7 +166,25 @@ class SubversionRepo(object):
         """Initializes the RA and client layers, because sometimes getting
         unified diffs runs the remote server out of open files.
         """
-        # while we're in here we'll recreate our pool
+        # Debugging code; retained for possible later use.
+        if False:
+            gc.collect()
+            import pympler.muppy.tracker
+            try:
+                self.memory_tracker
+                try:
+                    self.memory_tracker.print_diff(self.memory_base)
+                except:
+                    print 'HOP'
+                    self.memory_base = self.memory_tracker.create_summary()
+            except:
+                print 'HEP'
+                self.memory_tracker = pympler.muppy.tracker.SummaryTracker()
+
+        # while we're in here we'll recreate our pool, but first, we clear it
+        # and destroy it to make possible leaks cause fatal errors.
+        self.pool.clear()
+        self.pool.destroy()
         self.pool = core.Pool()
         self.client_context = client.create_context()
 
@@ -185,6 +209,9 @@ class SubversionRepo(object):
 
     def branches(self):
         """Get the branches defined in this repo assuming a standard layout.
+
+        This method should be eliminated; this class does not have
+        sufficient knowledge to yield all known tags.
         """
         branches = self.list_dir('branches').keys()
         branch_info = {}
@@ -207,11 +234,22 @@ class SubversionRepo(object):
         """Get the current tags in this repo assuming a standard layout.
 
         This returns a dictionary of tag: (source path, source rev)
+
+        This method should be eliminated; this class does not have
+        sufficient knowledge to yield all known tags.
         """
         return self.tags_at_rev(self.HEAD)
     tags = property(tags)
 
     def tags_at_rev(self, revision):
+        """Get the tags in this repo at the given revision, assuming a
+        standard layout.
+
+        This returns a dictionary of tag: (source path, source rev)
+
+        This method should be eliminated; this class does not have
+        sufficient knowledge to yield all known tags.
+        """
         try:
             tags = self.list_dir('tags', revision=revision).keys()
         except core.SubversionException, e:
@@ -277,53 +315,62 @@ class SubversionRepo(object):
         # convinced there must be some kind of svn bug here.
         #return self.fetch_history_at_paths(['tags', 'trunk', 'branches'],
         #                                   start=start)
-        # this does the same thing, but at the repo root + filtering. It's
-        # kind of tough cookies, sadly.
-        for r in self.fetch_history_at_paths([''], start=start,
-                                             chunk_size=chunk_size):
-            should_yield = False
-            i = 0
-            paths = list(r.paths.keys())
-            while i < len(paths) and not should_yield:
-                p = paths[i]
-                if (p.startswith('trunk') or p.startswith('tags')
-                    or p.startswith('branches')):
-                    should_yield = True
-                i += 1
-            if should_yield:
-                yield r
-
+        # However, we no longer need such filtering, as we gracefully handle
+        # branches located at arbitrary locations.
+        return self.fetch_history_at_paths([''], start=start, stop=stop,
+                                           chunk_size=chunk_size)
 
     def fetch_history_at_paths(self, paths, start=None, stop=None,
-                               chunk_size=1000):
-        revisions = []
-        def callback(paths, revnum, author, date, message, pool):
-            r = Revision(revnum, author, message, date, paths,
-                         strip_path=self.subdir)
-            revisions.append(r)
+                               chunk_size=_chunk_size):
+        '''TODO: This method should be merged with self.revisions() as
+        they are now functionally equivalent.'''
         if not start:
             start = self.START
         if not stop:
             stop = self.HEAD
         while stop > start:
-            ra.get_log(self.ra,
-                       paths,
-                       start+1,
-                       stop,
-                       chunk_size, #limit of how many log messages to load
-                       True, # don't need to know changed paths
-                       True, # stop on copies
-                       callback,
-                       self.pool)
-            if len(revisions) < chunk_size:
-                # this means there was no history for the path, so force the
-                # loop to exit
-                start = stop
+            def callback(paths, revnum, author, date, message, pool):
+                r = Revision(revnum, author, message, date, paths,
+                             strip_path=self.subdir)
+                revisions.append(r)
+            # use a queue; we only access revisions in a FIFO manner
+            revisions = collections.deque()
+
+            try:
+                # TODO: using min(start + chunk_size, stop) may be preferable;
+                #       ra.get_log(), even with chunk_size set, takes a while
+                #       when converting the 65k+ rev. in LLVM.
+                ra.get_log(self.ra,
+                           paths,
+                           start+1,
+                           stop,
+                           chunk_size, #limit of how many log messages to load
+                           True, # don't need to know changed paths
+                           True, # stop on copies
+                           callback,
+                           self.pool)
+            except core.SubversionException, e:
+                if e.apr_err not in [core.SVN_ERR_FS_NOT_FOUND]:
+                    raise
+                else:
+                    raise hgutil.Abort('%s not found at revision %d!'
+                                       % (self.subdir.rstrip('/'), stop))
+
+            while len(revisions) > 1:
+                yield revisions.popleft()
+                # Now is a good time to do a quick garbage collection.
+                gc.collect(0)
+
+            if len(revisions) == 0:
+                # exit the loop; there is no history for the path.
+                break
             else:
-                start = revisions[-1].revnum
-            while len(revisions) > 0:
-                yield revisions[0]
-                revisions.pop(0)
+                r = revisions.popleft()
+                start = r.revnum
+                yield r
+            self.init_ra_and_client()
+            # Now is a good time to do a thorough garbage colection.
+            gc.collect()
 
     def commit(self, paths, message, file_data, base_revision, addeddirs,
                deleteddirs, properties, copies):
@@ -342,6 +389,7 @@ class SubversionRepo(object):
         checksum = []
         # internal dir batons can fall out of scope and get GCed before svn is
         # done with them. This prevents that (credit to gvn for the idea).
+        # TODO: verify that these are not the cause of our leaks
         batons = [edit_baton, ]
         def driver_cb(parent, path, pool):
             if not parent:
@@ -374,7 +422,7 @@ class SubversionRepo(object):
                         frompath = self.svn_url + '/' + frompath
                     baton = editor.add_file(path, parent, frompath, fromrev, pool)
                 except (core.SubversionException, TypeError), e: #pragma: no cover
-                    print e.message
+                    print e
                     raise
             elif action == 'delete':
                 baton = editor.delete_entry(path, base_revision, parent, pool)
@@ -404,14 +452,11 @@ class SubversionRepo(object):
         editor.close_edit(edit_baton, self.pool)
 
     def get_replay(self, revision, editor, oldest_rev_i_have=0):
-        # this method has a tendency to chew through RAM if you don't re-init
-        self.init_ra_and_client()
         e_ptr, e_baton = delta.make_editor(editor)
         try:
             ra.replay(self.ra, revision, oldest_rev_i_have, True, e_ptr,
                       e_baton, self.pool)
         except core.SubversionException, e: #pragma: no cover
-            # can I depend on this number being constant?
             if (e.apr_err == core.SVN_ERR_RA_NOT_IMPLEMENTED or
                 e.apr_err == core.SVN_ERR_UNSUPPORTED_FEATURE):
                 raise SubversionRepoCanNotReplay, ('This Subversion server '
@@ -425,9 +470,6 @@ class SubversionRepo(object):
         """
         if not self.hasdiff3:
             raise SubversionRepoCanNotDiff()
-        # works around an svn server keeping too many open files (observed
-        # in an svnserve from the 1.2 era)
-        self.init_ra_and_client()
 
         assert path[0] != '/'
         url = self.svn_url + '/' + path
@@ -491,7 +533,7 @@ class SubversionRepo(object):
             notfound = (core.SVN_ERR_FS_NOT_FOUND,
                         core.SVN_ERR_RA_DAV_PATH_NOT_FOUND)
             if e.apr_err in notfound: # File not found
-                raise IOError()
+                raise IOError, e.args[0]
             raise
         if mode  == 'l':
             linkprefix = "link "
@@ -533,11 +575,11 @@ class SubversionRepo(object):
         revision.
         """
         dirpath = dirpath.strip('/')
-        pool = core.Pool()
         rpath = '/'.join([self.svn_url, dirpath]).strip('/')
         rev = optrev(revision)
         try:
-            entries = client.ls(rpath, rev, True, self.client_context, pool)
+            entries = client.ls(rpath, rev, True, self.client_context,
+                                self.pool)
         except core.SubversionException, e:
             if e.apr_err == core.SVN_ERR_FS_NOT_FOUND:
                 raise IOError('%s cannot be found at r%d' % (dirpath, revision))