# HG changeset patch # User Dan Villiom Podlaski Christiansen # Date 1238120181 -3600 # Node ID 4aba7542f6a9af262abfcc76914d408c67de51eb # Parent 3e27514d575cabd9f0338b4c5ffe34dc59bb2297 Various cleanups, cosmetics and removal of superfluous assertions. diff --git a/fetch_command.py b/fetch_command.py --- a/fetch_command.py +++ b/fetch_command.py @@ -28,10 +28,12 @@ def fetch_revisions(ui, svn_url, hg_repo **opts): """pull new revisions from Subversion """ + svn_url = util.normalize_url(svn_url) old_encoding = merc_util._encoding merc_util._encoding = 'UTF-8' skipto_rev=int(skipto_rev) + have_replay = not stupid if have_replay and not callable( delta.svn_txdelta_apply(None, None, None)[0]): #pragma: no cover @@ -41,7 +43,6 @@ def fetch_revisions(ui, svn_url, hg_repo ' contribute a patch to use the ctypes bindings instead' ' of SWIG.\n') have_replay = False - initializing_repo = False svn = svnwrap.SubversionRepo(svn_url, username=merc_util.getuser()) author_host = "@%s" % svn.uuid tag_locations = tag_locations.split(',') @@ -52,7 +53,9 @@ def fetch_revisions(ui, svn_url, hg_repo tag_locations=tag_locations, authors=authors, filemap=filemap) + if os.path.exists(hg_editor.uuid_file): + initializing_repo = False uuid = open(hg_editor.uuid_file).read() assert uuid == svn.uuid start = hg_editor.last_known_revision() diff --git a/hg_delta_editor.py b/hg_delta_editor.py --- a/hg_delta_editor.py +++ b/hg_delta_editor.py @@ -244,10 +244,12 @@ class HgChangeReceiver(delta.Editor): def _path_and_branch_for_path(self, path, existing=True): return self._split_branch_path(path, existing=existing)[:2] + def _branch_for_path(self, path, existing=True): + return self._path_and_branch_for_path(path, existing=existing)[1] + def _localname(self, path): """Compute the local name for a branch located at path. """ - assert not path.startswith('tags/') if path == 'trunk': return None elif path.startswith('branches/'): @@ -272,8 +274,6 @@ class HgChangeReceiver(delta.Editor): known. """ path = self._normalize_path(path) - if path.startswith('tags/'): - return None, None, None test = '' path_comps = path.split('/') while self._localname(test) not in self.branches and len(path_comps): @@ -372,6 +372,10 @@ class HgChangeReceiver(delta.Editor): Otherwise, returns False. """ + return self._split_tag_path(path)[-1] or False + + def _split_tag_path(self, path): + r = (None, None, None) path = self._normalize_path(path) for tags_path in self.tag_locations: if path and (path.startswith(tags_path) and @@ -885,8 +889,8 @@ class HgChangeReceiver(delta.Editor): # parentctx is not an ancestor of childctx, files are unrelated return False - def add_file(self, path, parent_baton, copyfrom_path, - copyfrom_revision, file_pool=None): + def add_file(self, path, parent_baton=None, copyfrom_path=None, + copyfrom_revision=None, file_pool=None): self.current_file = 'foobaz' self.base_revision = None if path in self.deleted_files: diff --git a/svnwrap/svn_swig_wrapper.py b/svnwrap/svn_swig_wrapper.py --- a/svnwrap/svn_swig_wrapper.py +++ b/svnwrap/svn_swig_wrapper.py @@ -5,12 +5,16 @@ import shutil import sys import tempfile import hashlib +import collections +import gc from svn import client from svn import core from svn import delta from svn import ra +from mercurial import util as hgutil + def version(): return '%d.%d.%d' % (core.SVN_VER_MAJOR, core.SVN_VER_MINOR, core.SVN_VER_MICRO) @@ -43,8 +47,8 @@ class RaCallbacks(ra.Callbacks): def get_client_string(pool): return 'hgsubversion' - def user_pass_prompt(realm, default_username, ms, pool): #pragma: no cover + # FIXME: should use getpass() and username() from mercurial.ui creds = core.svn_auth_cred_simple_t() creds.may_save = ms if default_username: @@ -146,6 +150,8 @@ class SubversionRepo(object): self.uname = username self.auth_baton_pool = core.Pool() self.auth_baton = _create_auth_baton(self.auth_baton_pool) + # self.init_ra_and_client() assumes that a pool already exists + self.pool = core.Pool() self.init_ra_and_client() self.uuid = ra.get_uuid(self.ra, self.pool) @@ -160,7 +166,25 @@ class SubversionRepo(object): """Initializes the RA and client layers, because sometimes getting unified diffs runs the remote server out of open files. """ - # while we're in here we'll recreate our pool + # Debugging code; retained for possible later use. + if False: + gc.collect() + import pympler.muppy.tracker + try: + self.memory_tracker + try: + self.memory_tracker.print_diff(self.memory_base) + except: + print 'HOP' + self.memory_base = self.memory_tracker.create_summary() + except: + print 'HEP' + self.memory_tracker = pympler.muppy.tracker.SummaryTracker() + + # while we're in here we'll recreate our pool, but first, we clear it + # and destroy it to make possible leaks cause fatal errors. + self.pool.clear() + self.pool.destroy() self.pool = core.Pool() self.client_context = client.create_context() @@ -185,6 +209,9 @@ class SubversionRepo(object): def branches(self): """Get the branches defined in this repo assuming a standard layout. + + This method should be eliminated; this class does not have + sufficient knowledge to yield all known tags. """ branches = self.list_dir('branches').keys() branch_info = {} @@ -207,11 +234,22 @@ class SubversionRepo(object): """Get the current tags in this repo assuming a standard layout. This returns a dictionary of tag: (source path, source rev) + + This method should be eliminated; this class does not have + sufficient knowledge to yield all known tags. """ return self.tags_at_rev(self.HEAD) tags = property(tags) def tags_at_rev(self, revision): + """Get the tags in this repo at the given revision, assuming a + standard layout. + + This returns a dictionary of tag: (source path, source rev) + + This method should be eliminated; this class does not have + sufficient knowledge to yield all known tags. + """ try: tags = self.list_dir('tags', revision=revision).keys() except core.SubversionException, e: @@ -277,53 +315,62 @@ class SubversionRepo(object): # convinced there must be some kind of svn bug here. #return self.fetch_history_at_paths(['tags', 'trunk', 'branches'], # start=start) - # this does the same thing, but at the repo root + filtering. It's - # kind of tough cookies, sadly. - for r in self.fetch_history_at_paths([''], start=start, - chunk_size=chunk_size): - should_yield = False - i = 0 - paths = list(r.paths.keys()) - while i < len(paths) and not should_yield: - p = paths[i] - if (p.startswith('trunk') or p.startswith('tags') - or p.startswith('branches')): - should_yield = True - i += 1 - if should_yield: - yield r - + # However, we no longer need such filtering, as we gracefully handle + # branches located at arbitrary locations. + return self.fetch_history_at_paths([''], start=start, stop=stop, + chunk_size=chunk_size) def fetch_history_at_paths(self, paths, start=None, stop=None, - chunk_size=1000): - revisions = [] - def callback(paths, revnum, author, date, message, pool): - r = Revision(revnum, author, message, date, paths, - strip_path=self.subdir) - revisions.append(r) + chunk_size=_chunk_size): + '''TODO: This method should be merged with self.revisions() as + they are now functionally equivalent.''' if not start: start = self.START if not stop: stop = self.HEAD while stop > start: - ra.get_log(self.ra, - paths, - start+1, - stop, - chunk_size, #limit of how many log messages to load - True, # don't need to know changed paths - True, # stop on copies - callback, - self.pool) - if len(revisions) < chunk_size: - # this means there was no history for the path, so force the - # loop to exit - start = stop + def callback(paths, revnum, author, date, message, pool): + r = Revision(revnum, author, message, date, paths, + strip_path=self.subdir) + revisions.append(r) + # use a queue; we only access revisions in a FIFO manner + revisions = collections.deque() + + try: + # TODO: using min(start + chunk_size, stop) may be preferable; + # ra.get_log(), even with chunk_size set, takes a while + # when converting the 65k+ rev. in LLVM. + ra.get_log(self.ra, + paths, + start+1, + stop, + chunk_size, #limit of how many log messages to load + True, # don't need to know changed paths + True, # stop on copies + callback, + self.pool) + except core.SubversionException, e: + if e.apr_err not in [core.SVN_ERR_FS_NOT_FOUND]: + raise + else: + raise hgutil.Abort('%s not found at revision %d!' + % (self.subdir.rstrip('/'), stop)) + + while len(revisions) > 1: + yield revisions.popleft() + # Now is a good time to do a quick garbage collection. + gc.collect(0) + + if len(revisions) == 0: + # exit the loop; there is no history for the path. + break else: - start = revisions[-1].revnum - while len(revisions) > 0: - yield revisions[0] - revisions.pop(0) + r = revisions.popleft() + start = r.revnum + yield r + self.init_ra_and_client() + # Now is a good time to do a thorough garbage colection. + gc.collect() def commit(self, paths, message, file_data, base_revision, addeddirs, deleteddirs, properties, copies): @@ -342,6 +389,7 @@ class SubversionRepo(object): checksum = [] # internal dir batons can fall out of scope and get GCed before svn is # done with them. This prevents that (credit to gvn for the idea). + # TODO: verify that these are not the cause of our leaks batons = [edit_baton, ] def driver_cb(parent, path, pool): if not parent: @@ -374,7 +422,7 @@ class SubversionRepo(object): frompath = self.svn_url + '/' + frompath baton = editor.add_file(path, parent, frompath, fromrev, pool) except (core.SubversionException, TypeError), e: #pragma: no cover - print e.message + print e raise elif action == 'delete': baton = editor.delete_entry(path, base_revision, parent, pool) @@ -404,14 +452,11 @@ class SubversionRepo(object): editor.close_edit(edit_baton, self.pool) def get_replay(self, revision, editor, oldest_rev_i_have=0): - # this method has a tendency to chew through RAM if you don't re-init - self.init_ra_and_client() e_ptr, e_baton = delta.make_editor(editor) try: ra.replay(self.ra, revision, oldest_rev_i_have, True, e_ptr, e_baton, self.pool) except core.SubversionException, e: #pragma: no cover - # can I depend on this number being constant? if (e.apr_err == core.SVN_ERR_RA_NOT_IMPLEMENTED or e.apr_err == core.SVN_ERR_UNSUPPORTED_FEATURE): raise SubversionRepoCanNotReplay, ('This Subversion server ' @@ -425,9 +470,6 @@ class SubversionRepo(object): """ if not self.hasdiff3: raise SubversionRepoCanNotDiff() - # works around an svn server keeping too many open files (observed - # in an svnserve from the 1.2 era) - self.init_ra_and_client() assert path[0] != '/' url = self.svn_url + '/' + path @@ -491,7 +533,7 @@ class SubversionRepo(object): notfound = (core.SVN_ERR_FS_NOT_FOUND, core.SVN_ERR_RA_DAV_PATH_NOT_FOUND) if e.apr_err in notfound: # File not found - raise IOError() + raise IOError, e.args[0] raise if mode == 'l': linkprefix = "link " @@ -533,11 +575,11 @@ class SubversionRepo(object): revision. """ dirpath = dirpath.strip('/') - pool = core.Pool() rpath = '/'.join([self.svn_url, dirpath]).strip('/') rev = optrev(revision) try: - entries = client.ls(rpath, rev, True, self.client_context, pool) + entries = client.ls(rpath, rev, True, self.client_context, + self.pool) except core.SubversionException, e: if e.apr_err == core.SVN_ERR_FS_NOT_FOUND: raise IOError('%s cannot be found at r%d' % (dirpath, revision))