# HG changeset patch # User Augie Fackler # Date 1466989836 14400 # Node ID 9a6bb365786159343633fd8c27bef6cc795fc7ae # Parent 797c7b58a735e6a582db140bca3a3f26255bf7db# Parent 8d8fc10d0d0a49ecac2cf6284dc5b729f1a3b9e8 Merge with stable. diff --git a/hgsubversion/editor.py b/hgsubversion/editor.py --- a/hgsubversion/editor.py +++ b/hgsubversion/editor.py @@ -581,6 +581,12 @@ class HgEditor(svnwrap.Editor): try: if not self.meta.is_path_valid(path): return + + # are we skipping this branch entirely? + br_path, branch = self.meta.split_branch_path(path)[:2] + if self.meta.skipbranch(branch): + return + try: handler(window) except AssertionError, e: # pragma: no cover diff --git a/hgsubversion/help/subversion.rst b/hgsubversion/help/subversion.rst --- a/hgsubversion/help/subversion.rst +++ b/hgsubversion/help/subversion.rst @@ -352,6 +352,25 @@ settings: Password stores are only supported with the SWIG bindings. + ``hgsubversion.revmapimpl`` + + Set the revision map implementation. ``plain`` which is simple and works + well for small repos. ``sqlite`` is a sqlite based implementation that + works better for large repos with a lot of revisions. The default is + ``plain``. + + If it is set to an implementation different from what the repo is using, + a migration will run automatically when the revision map is accessed. + + ``hgsubversion.sqlitepragmas`` + + A list of sqlite PRAGMA statements to tweak sqlite. Each item should be + in the format ``key=value`` without ``PRAGMA``, or spaces, or quotation + marks. Refer to https://www.sqlite.org/pragma.html for possible options. + + For example, setting it to ``synchronous=0, journal_mode=memory`` will + give you better performance at the cost of possible database corruption. + ``hgsubversion.stupid`` Setting this boolean option to true will force using a slower method for pulling revisions from Subversion. This method is compatible with servers diff --git a/hgsubversion/layouts/custom.py b/hgsubversion/layouts/custom.py --- a/hgsubversion/layouts/custom.py +++ b/hgsubversion/layouts/custom.py @@ -18,7 +18,9 @@ class CustomLayout(base.BaseLayout): self.svn_to_hg = {} self.hg_to_svn = {} - for hg_branch, svn_path in meta.ui.configitems('hgsubversionbranch'): + meta._gen_cachedconfig('custombranches', {}, configname='hgsubversionbranch') + + for hg_branch, svn_path in meta.custombranches.iteritems(): hg_branch = hg_branch.strip() if hg_branch == 'default' or not hg_branch: diff --git a/hgsubversion/maps.py b/hgsubversion/maps.py --- a/hgsubversion/maps.py +++ b/hgsubversion/maps.py @@ -1,92 +1,236 @@ ''' Module for self-contained maps. ''' +import collections +import contextlib import errno import os +import re +import sqlite3 +import sys +import weakref +from mercurial import error from mercurial import util as hgutil from mercurial.node import bin, hex, nullid -import svncommands +import subprocess import util -class AuthorMap(dict): - '''A mapping from Subversion-style authors to Mercurial-style - authors, and back. The data is stored persistently on disk. - - If the 'hgsubversion.defaultauthors' configuration option is set to false, - attempting to obtain an unknown author will fail with an Abort. +class BaseMap(dict): + '''A base class for the different type of mappings: author, branch, and + tags.''' + def __init__(self, ui, filepath): + super(BaseMap, self).__init__() + self._ui = ui - If the 'hgsubversion.caseignoreauthors' configuration option is set to true, - the userid from Subversion is always compared lowercase. - ''' + self._commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*') + self.syntaxes = ('re', 'glob') - def __init__(self, meta): - '''Initialise a new AuthorMap. + self._filepath = filepath + self.load(filepath) - The ui argument is used to print diagnostic messages. + # Append mappings specified from the commandline. A little + # magic here: our name in the config mapping is the same as + # the class name lowercased. + clmap = util.configpath(self._ui, self.mapname()) + if clmap: + self.load(clmap) - The path argument is the location of the backing store, - typically .hg/svn/authors. + @classmethod + def mapname(cls): + return cls.__name__.lower() + + def _findkey(self, key): + '''Takes a string and finds the first corresponding key that matches + via regex''' + if not key: + return None + + # compile a new regex key if we're given a string; can't use + # hgutil.compilere since we need regex.sub + k = key + if isinstance(key, str): + k = re.compile(re.escape(key)) + + # preference goes to matching the exact pattern, i.e. 'foo' should + # first match 'foo' before trying regexes + for regex in self: + if regex.pattern == k.pattern: + return regex + + # if key isn't a string, then we are done; nothing matches + if not isinstance(key, str): + return None + + # now we test the regex; the above loop will be faster and is + # equivalent to not having regexes (i.e. just doing string compares) + for regex in self: + if regex.search(key): + return regex + return None + + def get(self, key, default=None): + '''Similar to dict.get, except we use our own matcher, _findkey.''' + if self._findkey(key): + return self[key] + return default + + def __getitem__(self, key): + '''Similar to dict.get, except we use our own matcher, _findkey. If the key is + a string, then we can use our regex matching to map its value. ''' - self.meta = meta - self.defaulthost = '' - if meta.defaulthost: - self.defaulthost = '@%s' % meta.defaulthost.lstrip('@') + k = self._findkey(key) + val = super(BaseMap, self).__getitem__(k) - self.super = super(AuthorMap, self) - self.super.__init__() - self.load(self.meta.authors_file) + # if key is a string then we can transform it using our regex, else we + # don't have enough information, so we just return the val + if isinstance(key, str): + val = k.sub(val, key) - # append authors specified from the commandline - clmap = util.configpath(self.meta.ui, 'authormap') - if clmap: - self.load(clmap) + return val - def load(self, path): - ''' Load mappings from a file at the specified path. ''' + def __setitem__(self, key, value): + '''Similar to dict.__setitem__, except we compile the string into a regex, if + need be. + ''' + # try to find the regex already in the map + k = self._findkey(key) + # if we found one, then use it + if k: + key = k + # else make a new regex + if isinstance(key, str): + key = re.compile(re.escape(key)) + super(BaseMap, self).__setitem__(key, value) + + def __contains__(self, key): + '''Similar to dict.get, except we use our own matcher, _findkey.''' + return self._findkey(key) is not None + def load(self, path): + '''Load mappings from a file at the specified path.''' path = os.path.expandvars(path) if not os.path.exists(path): return writing = False - if path != self.meta.authors_file: - writing = open(self.meta.authors_file, 'a') + mapfile = self._filepath + if path != mapfile: + writing = open(mapfile, 'a') - self.meta.ui.debug('reading authormap from %s\n' % path) + self._ui.debug('reading %s from %s\n' % (self.mapname() , path)) f = open(path, 'r') - for number, line_org in enumerate(f): + syntax = '' + for number, line in enumerate(f): - line = line_org.split('#')[0] - if not line.strip(): + if writing: + writing.write(line) + + # strip out comments + if "#" in line: + # remove comments prefixed by an even number of escapes + line = self._commentre.sub(r'\1', line) + # fixup properly escaped comments that survived the above + line = line.replace("\\#", "#") + line = line.rstrip() + if not line: continue + if line.startswith('syntax:'): + s = line[7:].strip() + syntax = '' + if s in self.syntaxes: + syntax = s + continue + pat = syntax + for s in self.syntaxes: + if line.startswith(s + ':'): + pat = s + line = line[len(s) + 1:] + break + + # split on the first '=' try: src, dst = line.split('=', 1) except (IndexError, ValueError): - msg = 'ignoring line %i in author map %s: %s\n' - self.meta.ui.status(msg % (number, path, line.rstrip())) + msg = 'ignoring line %i in %s %s: %s\n' + self._ui.status(msg % (number, self.mapname(), path, + line.rstrip())) continue src = src.strip() dst = dst.strip() - if self.meta.caseignoreauthors: - src = src.lower() - - if writing: - if not src in self: - self.meta.ui.debug('adding author %s to author map\n' % src) - elif dst != self[src]: - msg = 'overriding author: "%s" to "%s" (%s)\n' - self.meta.ui.status(msg % (self[src], dst, src)) - writing.write(line_org) - + if pat != 're': + src = re.escape(src) + if pat == 'glob': + src = src.replace('\\*', '.*') + src = re.compile(src) + + if src not in self: + self._ui.debug('adding %s to %s\n' % (src, self.mapname())) + elif dst != self[src]: + msg = 'overriding %s: "%s" to "%s" (%s)\n' + self._ui.status(msg % (self.mapname(), self[src], dst, src)) self[src] = dst f.close() if writing: writing.close() +class AuthorMap(BaseMap): + '''A mapping from Subversion-style authors to Mercurial-style + authors, and back. The data is stored persistently on disk. + + If the 'hgsubversion.defaultauthors' configuration option is set to false, + attempting to obtain an unknown author will fail with an Abort. + + If the 'hgsubversion.caseignoreauthors' configuration option is set to true, + the userid from Subversion is always compared lowercase. + ''' + + def __init__(self, ui, filepath, defaulthost, caseignoreauthors, + mapauthorscmd, defaultauthors): + '''Initialise a new AuthorMap. + + The ui argument is used to print diagnostic messages. + + The path argument is the location of the backing store, + typically .hg/svn/authors. + ''' + if defaulthost: + self.defaulthost = '@%s' % defaulthost.lstrip('@') + else: + self.defaulthost = '' + self._caseignoreauthors = caseignoreauthors + self._mapauthorscmd = mapauthorscmd + self._defaulthost = defaulthost + self._defaultauthors = defaultauthors + + super(AuthorMap, self).__init__(ui, filepath) + + def _lowercase(self, key): + '''Determine whether or not to lowercase a str or regex using the + meta.caseignoreauthors.''' + k = key + if self._caseignoreauthors: + if isinstance(key, str): + k = key.lower() + else: + k = re.compile(key.pattern.lower()) + return k + + def __setitem__(self, key, value): + '''Similar to dict.__setitem__, except we check caseignoreauthors to + use lowercase string or not + ''' + super(AuthorMap, self).__setitem__(self._lowercase(key), value) + + def __contains__(self, key): + '''Similar to dict.__contains__, except we check caseignoreauthors to + use lowercase string or not + ''' + return super(AuthorMap, self).__contains__(self._lowercase(key)) + def __getitem__(self, author): ''' Similar to dict.__getitem__, except in case of an unknown author. In such cases, a new value is generated and added to the dictionary @@ -94,20 +238,34 @@ class AuthorMap(dict): if author is None: author = '(no author)' + if not isinstance(author, str): + return super(AuthorMap, self).__getitem__(author) + search_author = author - if self.meta.caseignoreauthors: + if self._caseignoreauthors: search_author = author.lower() + result = None if search_author in self: - result = self.super.__getitem__(search_author) - elif self.meta.defaultauthors: - self[author] = result = '%s%s' % (author, self.defaulthost) - msg = 'substituting author "%s" for default "%s"\n' - self.meta.ui.debug(msg % (author, result)) - else: - msg = 'author %s has no entry in the author map!' - raise hgutil.Abort(msg % author) - self.meta.ui.debug('mapping author "%s" to "%s"\n' % (author, result)) + result = super(AuthorMap, self).__getitem__(search_author) + elif self._mapauthorscmd: + cmd = self._mapauthorscmd % author + process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) + output, err = process.communicate() + retcode = process.poll() + if retcode: + msg = 'map author command "%s" exited with error' + raise hgutil.Abort(msg % cmd) + self[author] = result = output.strip() + if not result: + if self._defaultauthors: + self[author] = result = '%s%s' % (author, self.defaulthost) + msg = 'substituting author "%s" for default "%s"\n' + self._ui.debug(msg % (author, result)) + else: + msg = 'author %s has no entry in the author map!' + raise hgutil.Abort(msg % author) + self._ui.debug('mapping author "%s" to "%s"\n' % (author, result)) return result def reverselookup(self, author): @@ -127,24 +285,22 @@ class Tags(dict): """ VERSION = 2 - def __init__(self, meta, endrev=None): + def __init__(self, ui, filepath, endrev=None): dict.__init__(self) - self.meta = meta + self._filepath = filepath + self._ui = ui self.endrev = endrev - if os.path.isfile(self.meta.tagfile): + if os.path.isfile(self._filepath): self._load() else: self._write() def _load(self): - f = open(self.meta.tagfile) + f = open(self._filepath) ver = int(f.readline()) if ver < self.VERSION: - self.meta.ui.status('tag map outdated, running rebuildmeta...\n') - f.close() - os.unlink(self.meta.tagfile) - svncommands.rebuildmeta(self.meta.ui, self.meta.repo, ()) - return + raise error.Abort( + 'tag map outdated, please run `hg svn rebuildmeta`') elif ver != self.VERSION: raise hgutil.Abort('tagmap too new -- please upgrade') for l in f: @@ -160,7 +316,7 @@ class Tags(dict): def _write(self): assert self.endrev is None - f = open(self.meta.tagfile, 'w') + f = open(self._filepath, 'w') f.write('%s\n' % self.VERSION) f.close() @@ -181,7 +337,7 @@ class Tags(dict): if not tag: raise hgutil.Abort('tag cannot be empty') ha, revision = info - f = open(self.meta.tagfile, 'a') + f = open(self._filepath, 'a') f.write('%s %s %s\n' % (hex(ha), revision, tag)) f.close() dict.__setitem__(self, tag, ha) @@ -191,44 +347,97 @@ class RevMap(dict): VERSION = 1 - def __init__(self, meta): + lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulled_file, + default=0, deserializer=int) + + def __init__(self, revmap_path, lastpulled_path): dict.__init__(self) - self.meta = meta + self._filepath = revmap_path + self._lastpulled_file = lastpulled_path self._hashes = None + # disable iteration to have a consistent interface with SqliteRevMap + # it's less about performance since RevMap needs iteration internally + self._allowiter = False - if os.path.isfile(self.meta.revmap_file): + self.firstpulled = 0 + if os.path.isfile(self._filepath): self._load() else: self._write() def hashes(self): if self._hashes is None: - self._hashes = dict((v, k) for (k, v) in self.iteritems()) + self._hashes = dict((v, k) for (k, v) in self._origiteritems()) return self._hashes - def branchedits(self, branch, rev): - check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum - return sorted(filter(check, self.iteritems()), reverse=True) + def branchedits(self, branch, revnum): + check = lambda x: x[0][1] == branch and x[0][0] < revnum + return sorted(filter(check, self._origiteritems()), reverse=True) - @classmethod - def readmapfile(cls, path, missingok=True): + def branchmaxrevnum(self, branch, maxrevnum): + result = 0 + for num, br in self._origiterkeys(): + if br == branch and num <= maxrevnum and num > result: + result = num + return result + + @property + def lasthash(self): + lines = list(self._readmapfile()) + if not lines: + return None + return bin(lines[-1].split(' ', 2)[1]) + + def revhashes(self, revnum): + for key, value in self._origiteritems(): + if key[0] == revnum: + yield value + + def clear(self): + self._write() + dict.clear(self) + self._hashes = None + + def batchset(self, items, lastpulled): + '''Set items in batches + + items is an array of (rev num, branch, binary hash) + + For performance reason, internal in-memory state is not updated. + To get an up-to-date RevMap, reconstruct the object. + ''' + with open(self._filepath, 'a') as f: + f.write(''.join('%s %s %s\n' % (revnum, hex(binhash), br or '') + for revnum, br, binhash in items)) + self.lastpulled = lastpulled + + def _readmapfile(self): + path = self._filepath try: f = open(path) except IOError, err: - if not missingok or err.errno != errno.ENOENT: + if err.errno != errno.ENOENT: raise return iter([]) ver = int(f.readline()) - if ver != cls.VERSION: + if ver == SqliteRevMap.VERSION: + revmap = SqliteRevMap(self._filepath, self._lastpulled_file) + tmppath = '%s.tmp' % self._filepath + revmap.exportrevmapv1(tmppath) + os.rename(tmppath, self._filepath) + hgutil.unlinkpath(revmap._dbpath) + hgutil.unlinkpath(revmap._rowcountpath, ignoremissing=True) + return self._readmapfile() + if ver != self.VERSION: raise hgutil.Abort('revmap too new -- please upgrade') return f @util.gcdisable def _load(self): - lastpulled = self.meta.lastpulled - firstpulled = self.meta.firstpulled + lastpulled = self.lastpulled + firstpulled = self.firstpulled setitem = dict.__setitem__ - for l in self.readmapfile(self.meta.revmap_file): + for l in self._readmapfile(): revnum, ha, branch = l.split(' ', 2) if branch == '\n': branch = None @@ -240,34 +449,342 @@ class RevMap(dict): if revnum < firstpulled or not firstpulled: firstpulled = revnum setitem(self, (revnum, branch), bin(ha)) - self.meta.lastpulled = lastpulled - self.meta.firstpulled = firstpulled + if self.lastpulled != lastpulled: + self.lastpulled = lastpulled + self.firstpulled = firstpulled def _write(self): - f = open(self.meta.revmap_file, 'w') - f.write('%s\n' % self.VERSION) - f.close() + with open(self._filepath, 'w') as f: + f.write('%s\n' % self.VERSION) def __setitem__(self, key, ha): revnum, branch = key - f = open(self.meta.revmap_file, 'a') b = branch or '' - f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n') - f.close() - if revnum > self.meta.lastpulled or not self.meta.lastpulled: - self.meta.lastpulled = revnum - if revnum < self.meta.firstpulled or not self.meta.firstpulled: - self.meta.firstpulled = revnum + with open(self._filepath, 'a') as f: + f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n') + if revnum > self.lastpulled or not self.lastpulled: + self.lastpulled = revnum + if revnum < self.firstpulled or not self.firstpulled: + self.firstpulled = revnum dict.__setitem__(self, (revnum, branch), ha) if self._hashes is not None: self._hashes[ha] = (revnum, branch) + @classmethod + def _wrapitermethods(cls): + def wrap(orig): + def wrapper(self, *args, **kwds): + if not self._allowiter: + raise NotImplementedError( + 'Iteration methods on RevMap are disabled ' + + 'to avoid performance issues on SqliteRevMap') + return orig(self, *args, **kwds) + return wrapper + methodre = re.compile(r'^_*(?:iter|view)?(?:keys|items|values)?_*$') + for name in filter(methodre.match, dir(cls)): + orig = getattr(cls, name) + setattr(cls, '_orig%s' % name, orig) + setattr(cls, name, wrap(orig)) + +RevMap._wrapitermethods() + + +class SqliteRevMap(collections.MutableMapping): + """RevMap backed by sqlite3. + + It tries to address performance issues for a very large rev map. + As such iteration is unavailable for both the map itself and the + reverse map (self.hashes). + + It migrates from the old RevMap upon first use. Then it will bump the + version of revmap so RevMap no longer works. The real database is a + separated file which has a ".db" suffix. + """ + + VERSION = 2 + + TABLESCHEMA = [ + '''CREATE TABLE IF NOT EXISTS revmap ( + rev INTEGER NOT NULL, + branch TEXT NOT NULL DEFAULT '', + hash BLOB NOT NULL)''', + ] + + INDEXSCHEMA = [ + 'CREATE UNIQUE INDEX IF NOT EXISTS revbranch ON revmap (rev,branch);', + 'CREATE INDEX IF NOT EXISTS hash ON revmap (hash);', + ] + + # "bytes" in Python 2 will get truncated at '\0' when storing as sqlite + # blobs. "buffer" does not have this issue. Python 3 does not have "buffer" + # but "bytes" won't get truncated. + sqlblobtype = bytes if sys.version_info >= (3, 0) else buffer + + class ReverseRevMap(object): + # collections.Mapping is not suitable since we don't want 2/3 of + # its required interfaces: __iter__, __len__. + def __init__(self, revmap): + self.revmap = weakref.proxy(revmap) + self._cache = {} + + def get(self, key, default=None): + if key not in self._cache: + result = None + for row in self.revmap._query( + 'SELECT rev, branch FROM revmap WHERE hash=?', + (SqliteRevMap.sqlblobtype(key),)): + result = (row[0], row[1] or None) + break + self._cache[key] = result + return self._cache[key] or default + + def __contains__(self, key): + return self.get(key) != None + + def __getitem__(self, key): + dummy = self._cache + item = self.get(key, dummy) + if item == dummy: + raise KeyError(key) + else: + return item + + def keys(self): + for row in self.revmap._query('SELECT hash FROM revmap'): + yield bytes(row[0]) + + lastpulled = util.fileproperty('_lastpulled', lambda x: x._lastpulledpath, + default=0, deserializer=int) + rowcount = util.fileproperty('_rowcount', lambda x: x._rowcountpath, + default=0, deserializer=int) + + def __init__(self, revmap_path, lastpulled_path, sqlitepragmas=None): + self._filepath = revmap_path + self._dbpath = revmap_path + '.db' + self._rowcountpath = self._dbpath + '.rowcount' + self._lastpulledpath = lastpulled_path + + self._db = None + self._hashes = None + self._sqlitepragmas = sqlitepragmas + self.firstpulled = 0 + self._updatefirstlastpulled() + # __iter__ is expensive and thus disabled by default + # it should only be enabled for testing + self._allowiter = False + + def hashes(self): + if self._hashes is None: + self._hashes = self.ReverseRevMap(self) + return self._hashes + + def branchedits(self, branch, revnum): + return [((r[0], r[1] or None), bytes(r[2])) for r in + self._query('SELECT rev, branch, hash FROM revmap ' + + 'WHERE rev < ? AND branch = ? ' + + 'ORDER BY rev DESC, branch DESC', + (revnum, branch or ''))] + + def branchmaxrevnum(self, branch, maxrev): + for row in self._query('SELECT rev FROM revmap ' + + 'WHERE rev <= ? AND branch = ? ' + + 'ORDER By rev DESC LIMIT 1', + (maxrev, branch or '')): + return row[0] + return 0 + + @property + def lasthash(self): + for row in self._query('SELECT hash FROM revmap ORDER BY rev DESC'): + return bytes(row[0]) + return None + + def revhashes(self, revnum): + for row in self._query('SELECT hash FROM revmap WHERE rev = ?', + (revnum,)): + yield bytes(row[0]) + + def clear(self): + hgutil.unlinkpath(self._filepath, ignoremissing=True) + hgutil.unlinkpath(self._dbpath, ignoremissing=True) + hgutil.unlinkpath(self._rowcountpath, ignoremissing=True) + self._db = None + self._hashes = None + self._firstpull = None + self._lastpull = None + + def batchset(self, items, lastpulled): + with self._transaction(): + self._insert(items) + self.lastpulled = lastpulled + + def __getitem__(self, key): + for row in self._querybykey('SELECT hash', key): + return bytes(row[0]) + raise KeyError(key) + + def __iter__(self): + if not self._allowiter: + raise NotImplementedError( + 'SqliteRevMap.__iter__ is not implemented intentionally ' + + 'to avoid performance issues') + # collect result to avoid nested transaction issues + rows = [] + for row in self._query('SELECT rev, branch FROM revmap'): + rows.append((row[0], row[1] or None)) + return iter(rows) + + def __len__(self): + # rowcount is faster than "SELECT COUNT(1)". the latter is not O(1) + return self.rowcount + + def __setitem__(self, key, binha): + revnum, branch = key + with self._transaction(): + self._insert([(revnum, branch, binha)]) + if revnum < self.firstpulled or not self.firstpulled: + self.firstpulled = revnum + if revnum > self.lastpulled or not self.lastpulled: + self.lastpulled = revnum + if self._hashes is not None: + self._hashes._cache[binha] = key + + def __delitem__(self, key): + for row in self._querybykey('DELETE', key): + if self.rowcount > 0: + self.rowcount -= 1 + return + # For performance reason, self._hashes is not updated + raise KeyError(key) + + @contextlib.contextmanager + def _transaction(self, mode='IMMEDIATE'): + if self._db is None: + self._opendb() + with self._db as db: + db.execute('BEGIN %s' % mode) + yield db + + def _query(self, sql, params=()): + with self._transaction() as db: + cur = db.execute(sql, params) + try: + for row in cur: + yield row + finally: + cur.close() + + def _querybykey(self, prefix, key): + revnum, branch = key + return self._query( + '%s FROM revmap WHERE rev=? AND branch=?' + % prefix, (revnum, branch or '')) + + def _insert(self, rows): + # convert to a safe type so '\0' does not truncate the blob + if rows and type(rows[0][-1]) is not self.sqlblobtype: + rows = [(r, b, self.sqlblobtype(h)) for r, b, h in rows] + self._db.executemany( + 'INSERT OR REPLACE INTO revmap (rev, branch, hash) ' + + 'VALUES (?, ?, ?)', rows) + # If REPLACE happens, rowcount can be wrong. But it is only used to + # calculate how many revisions pulled, and during pull we don't + # replace rows. So it is fine. + self.rowcount += len(rows) + + def _opendb(self): + '''Open the database and make sure the table is created on demand.''' + version = None + try: + version = int(open(self._filepath).read(2)) + except (ValueError, IOError): + pass + if version and version not in [RevMap.VERSION, self.VERSION]: + raise error.Abort('revmap too new -- please upgrade') + + if self._db: + self._db.close() + + # if version mismatch, the database is considered invalid + if version != self.VERSION: + hgutil.unlinkpath(self._dbpath, ignoremissing=True) + + self._db = sqlite3.connect(self._dbpath) + self._db.text_factory = bytes + + # cache size affects random accessing (e.g. index building) + # performance greatly. default is 2MB (2000 KB), we want to have + # a big enough cache that can hold the entire map. + cachesize = 2000 + for path, ratio in [(self._filepath, 1.7), (self._dbpath, 1)]: + if os.path.exists(path): + cachesize += os.stat(path).st_size * ratio // 1000 + self._db.execute('PRAGMA cache_size=%d' % (-cachesize)) + + # PRAGMA statements provided by the user + for pragma in (self._sqlitepragmas or []): + # drop malicious ones + if re.match(r'\A\w+=\w+\Z', pragma): + self._db.execute('PRAGMA %s' % pragma) + + # disable auto-commit. everything is inside a transaction + self._db.isolation_level = 'DEFERRED' + + with self._transaction('EXCLUSIVE'): + map(self._db.execute, self.TABLESCHEMA) + if version == RevMap.VERSION: + self.rowcount = 0 + self._importrevmapv1() + elif not self.rowcount: + self.rowcount = self._db.execute( + 'SELECT COUNT(1) FROM revmap').fetchone()[0] + + # "bulk insert; then create index" is about 2.4x as fast as + # "create index; then bulk insert" on a large repo + map(self._db.execute, self.INDEXSCHEMA) + + # write a dummy rev map file with just the revision number + if version != self.VERSION: + f = open(self._filepath, 'w') + f.write('%s\n' % self.VERSION) + f.close() + + def _updatefirstlastpulled(self): + sql = 'SELECT rev FROM revmap ORDER BY rev %s LIMIT 1' + for row in self._query(sql % 'ASC'): + self.firstpulled = row[0] + for row in self._query(sql % 'DESC'): + if row[0] > self.lastpulled: + self.lastpulled = row[0] + + @util.gcdisable + def _importrevmapv1(self): + with open(self._filepath, 'r') as f: + # 1st line is version + assert(int(f.readline())) == RevMap.VERSION + data = {} + for line in f: + revnum, ha, branch = line[:-1].split(' ', 2) + # ignore malicious lines + if len(ha) != 40: + continue + data[revnum, branch or None] = bin(ha) + self._insert([(r, b, h) for (r, b), h in data.iteritems()]) + + @util.gcdisable + def exportrevmapv1(self, path): + with open(path, 'w') as f: + f.write('%s\n' % RevMap.VERSION) + for row in self._query('SELECT rev, branch, hash FROM revmap'): + rev, br, ha = row + f.write('%s %s %s\n' % (rev, hex(ha), br)) + class FileMap(object): VERSION = 1 - def __init__(self, meta): + def __init__(self, ui, filepath): '''Initialise a new FileMap. The ui argument is used to print diagnostic messages. @@ -275,16 +792,17 @@ class FileMap(object): The path argument is the location of the backing store, typically .hg/svn/filemap. ''' - self.meta = meta + self._filename = filepath + self._ui = ui self.include = {} self.exclude = {} - if os.path.isfile(self.meta.filemap_file): + if os.path.isfile(self._filename): self._load() else: self._write() # append file mapping specified from the commandline - clmap = util.configpath(self.meta.ui, 'filemap') + clmap = util.configpath(self._ui, 'filemap') if clmap: self.load(clmap) @@ -326,22 +844,20 @@ class FileMap(object): mapping = getattr(self, m) if path in mapping: msg = 'duplicate %s entry in %s: "%s"\n' - self.meta.ui.status(msg % (m, fn, path)) + self._ui.status(msg % (m, fn, path)) return bits = m.rstrip('e'), path - self.meta.ui.debug('%sing %s\n' % bits) + self._ui.debug('%sing %s\n' % bits) # respect rule order mapping[path] = len(self) - if fn != self.meta.filemap_file: - f = open(self.meta.filemap_file, 'a') - f.write(m + ' ' + path + '\n') - f.close() + if fn != self._filename: + with open(self._filename, 'a') as f: + f.write(m + ' ' + path + '\n') def load(self, fn): - self.meta.ui.debug('reading file map from %s\n' % fn) - f = open(fn, 'r') - self.load_fd(f, fn) - f.close() + self._ui.debug('reading file map from %s\n' % fn) + with open(fn, 'r') as f: + self.load_fd(f, fn) def load_fd(self, f, fn): for line in f: @@ -354,26 +870,24 @@ class FileMap(object): if cmd in ('include', 'exclude'): self.add(fn, cmd, path) continue - self.meta.ui.warn('unknown filemap command %s\n' % cmd) + self._ui.warn('unknown filemap command %s\n' % cmd) except IndexError: msg = 'ignoring bad line in filemap %s: %s\n' - self.meta.ui.warn(msg % (fn, line.rstrip())) + self._ui.warn(msg % (fn, line.rstrip())) def _load(self): - self.meta.ui.debug('reading in-repo file map from %s\n' % self.meta.filemap_file) - f = open(self.meta.filemap_file) - ver = int(f.readline()) - if ver != self.VERSION: - raise hgutil.Abort('filemap too new -- please upgrade') - self.load_fd(f, self.meta.filemap_file) - f.close() + self._ui.debug('reading in-repo file map from %s\n' % self._filename) + with open(self._filename) as f: + ver = int(f.readline()) + if ver != self.VERSION: + raise hgutil.Abort('filemap too new -- please upgrade') + self.load_fd(f, self._filename) def _write(self): - f = open(self.meta.filemap_file, 'w') - f.write('%s\n' % self.VERSION) - f.close() + with open(self._filename, 'w') as f: + f.write('%s\n' % self.VERSION) -class BranchMap(dict): +class BranchMap(BaseMap): '''Facility for controlled renaming of branch names. Example: oldname = newname @@ -383,63 +897,7 @@ class BranchMap(dict): changes on other will now be on default (have no branch name set). ''' - def __init__(self, meta): - self.meta = meta - self.super = super(BranchMap, self) - self.super.__init__() - self.load(self.meta.branchmap_file) - - # append branch mapping specified from the commandline - clmap = util.configpath(self.meta.ui, 'branchmap') - if clmap: - self.load(clmap) - - def load(self, path): - '''Load mappings from a file at the specified path.''' - if not os.path.exists(path): - return - - writing = False - if path != self.meta.branchmap_file: - writing = open(self.meta.branchmap_file, 'a') - - self.meta.ui.debug('reading branchmap from %s\n' % path) - f = open(path, 'r') - for number, line in enumerate(f): - - if writing: - writing.write(line) - - line = line.split('#')[0] - if not line.strip(): - continue - - try: - src, dst = line.split('=', 1) - except (IndexError, ValueError): - msg = 'ignoring line %i in branch map %s: %s\n' - self.meta.ui.status(msg % (number, path, line.rstrip())) - continue - - src = src.strip() - dst = dst.strip() - self.meta.ui.debug('adding branch %s to branch map\n' % src) - - if not dst: - # prevent people from assuming such lines are valid - raise hgutil.Abort('removing branches is not supported, yet\n' - '(line %i in branch map %s)' - % (number, path)) - elif src in self and dst != self[src]: - msg = 'overriding branch: "%s" to "%s" (%s)\n' - self.meta.ui.status(msg % (self[src], dst, src)) - self[src] = dst - - f.close() - if writing: - writing.close() - -class TagMap(dict): +class TagMap(BaseMap): '''Facility for controlled renaming of tags. Example: oldname = newname @@ -448,54 +906,3 @@ class TagMap(dict): The oldname tag from SVN will be represented as newname in the hg tags; the other tag will not be reflected in the hg repository. ''' - - def __init__(self, meta): - self.meta = meta - self.super = super(TagMap, self) - self.super.__init__() - self.load(self.meta.tagmap_file) - - # append tag mapping specified from the commandline - clmap = util.configpath(self.meta.ui, 'tagmap') - if clmap: - self.load(clmap) - - def load(self, path): - '''Load mappings from a file at the specified path.''' - if not os.path.exists(path): - return - - writing = False - if path != self.meta.tagmap_file: - writing = open(self.meta.tagmap_file, 'a') - - self.meta.ui.debug('reading tag renames from %s\n' % path) - f = open(path, 'r') - for number, line in enumerate(f): - - if writing: - writing.write(line) - - line = line.split('#')[0] - if not line.strip(): - continue - - try: - src, dst = line.split('=', 1) - except (IndexError, ValueError): - msg = 'ignoring line %i in tag renames %s: %s\n' - self.meta.ui.status(msg % (number, path, line.rstrip())) - continue - - src = src.strip() - dst = dst.strip() - self.meta.ui.debug('adding tag %s to tag renames\n' % src) - - if src in self and dst != self[src]: - msg = 'overriding tag rename: "%s" to "%s" (%s)\n' - self.meta.ui.status(msg % (self[src], dst, src)) - self[src] = dst - - f.close() - if writing: - writing.close() diff --git a/hgsubversion/replay.py b/hgsubversion/replay.py --- a/hgsubversion/replay.py +++ b/hgsubversion/replay.py @@ -65,13 +65,13 @@ def _convert_rev(ui, meta, svn, r, tbdel editor.current.rev = r editor.setsvn(svn) - if firstrun and meta.firstpulled <= 0: + if firstrun and meta.revmap.firstpulled <= 0: # We know nothing about this project, so fetch everything before # trying to apply deltas. ui.debug('replay: fetching full revision\n') svn.get_revision(r.revnum, editor) else: - svn.get_replay(r.revnum, editor, meta.firstpulled) + svn.get_replay(r.revnum, editor, meta.revmap.firstpulled) editor.close() current = editor.current @@ -103,7 +103,7 @@ def _convert_rev(ui, meta, svn, r, tbdel closebranches = {} for branch in tbdelta['branches'][1]: - branchedits = meta.revmap.branchedits(branch, rev) + branchedits = meta.revmap.branchedits(branch, rev.revnum) if len(branchedits) < 1: # can't close a branch that never existed continue @@ -121,6 +121,12 @@ def _convert_rev(ui, meta, svn, r, tbdel if branch in current.emptybranches and files: del current.emptybranches[branch] + if meta.skipbranch(branch): + # make sure we also get rid of it from emptybranches + if branch in current.emptybranches: + del current.emptybranches[branch] + continue + files = dict(files) parents = meta.get_parent_revision(rev.revnum, branch), revlog.nullid if parents[0] in closedrevs and branch in meta.closebranches: @@ -195,6 +201,9 @@ def _convert_rev(ui, meta, svn, r, tbdel # 2. handle branches that need to be committed without any files for branch in current.emptybranches: + if meta.skipbranch(branch): + continue + ha = meta.get_parent_revision(rev.revnum, branch) if ha == node.nullid: continue diff --git a/hgsubversion/stupid.py b/hgsubversion/stupid.py --- a/hgsubversion/stupid.py +++ b/hgsubversion/stupid.py @@ -568,7 +568,7 @@ def fetch_branchrev(svn, meta, branch, b return files, filectxfn def checkbranch(meta, r, branch): - branchedits = meta.revmap.branchedits(branch, r) + branchedits = meta.revmap.branchedits(branch, r.revnum) if not branchedits: return None branchtip = branchedits[0][1] @@ -689,6 +689,10 @@ def convert_rev(ui, meta, svn, r, tbdelt date = meta.fixdate(r.date) check_deleted_branches = set(tbdelta['branches'][1]) for b in branches: + + if meta.skipbranch(b): + continue + parentctx = meta.repo[meta.get_parent_revision(r.revnum, b)] tag = meta.get_path_tag(meta.remotename(b)) kind = svn.checkpath(branches[b], r.revnum) @@ -704,7 +708,7 @@ def convert_rev(ui, meta, svn, r, tbdelt # path does not support this case with svn >= 1.7. We can fix # it, or we can force the existing fetch_branchrev() path. Do # the latter for now. - incremental = (meta.firstpulled > 0 and + incremental = (meta.revmap.firstpulled > 0 and parentctx.rev() != node.nullrev and not firstrun) diff --git a/hgsubversion/svncommands.py b/hgsubversion/svncommands.py --- a/hgsubversion/svncommands.py +++ b/hgsubversion/svncommands.py @@ -64,8 +64,12 @@ def _buildmeta(ui, repo, args, partial=F youngest = 0 startrev = 0 - sofar = [] branchinfo = {} + + if not partial: + hgutil.unlinkpath(meta.revmap_file, ignoremissing=True) + + revmap = meta.revmap if partial: try: # we can't use meta.lastpulled here because we are bootstraping the @@ -75,9 +79,8 @@ def _buildmeta(ui, repo, args, partial=F youngestpath = os.path.join(meta.metapath, 'lastpulled') if os.path.exists(youngestpath): youngest = util.load(youngestpath) - sofar = list(maps.RevMap.readmapfile(meta.revmap_file)) - if sofar and len(sofar[-1].split(' ', 2)) > 1: - lasthash = sofar[-1].split(' ', 2)[1] + lasthash = revmap.lasthash + if len(revmap) > 0 and lasthash: startrev = repo[lasthash].rev() + 1 branchinfo = util.load(meta.branch_info_file) foundpartialinfo = True @@ -91,9 +94,9 @@ def _buildmeta(ui, repo, args, partial=F except AttributeError: ui.status('no metadata available -- doing a full rebuild\n') - revmap = open(meta.revmap_file, 'w') - revmap.write('%d\n' % maps.RevMap.VERSION) - revmap.writelines(sofar) + if not partial: + revmap.clear() + last_rev = -1 if not partial and os.path.exists(meta.tagfile): os.unlink(meta.tagfile) @@ -107,13 +110,8 @@ def _buildmeta(ui, repo, args, partial=F # it would make us use O(revisions^2) time, so we perform an extra traversal # of the repository instead. During this traversal, we find all converted # changesets that close a branch, and store their first parent - for rev in xrange(startrev, len(repo)): - ui.progress('prepare', rev - startrev, total=numrevs) - try: - ctx = repo[rev] - except error.RepoError: - # this revision is hidden - continue + for ctx in util.get_contexts(repo, startrev): + ui.progress('prepare', ctx.rev() - startrev, total=numrevs) convinfo = util.getsvnrev(ctx, None) if not convinfo: @@ -137,16 +135,11 @@ def _buildmeta(ui, repo, args, partial=F else: closed.add(parentctx.rev()) - meta.lastpulled = youngest ui.progress('prepare', None, total=numrevs) - for rev in xrange(startrev, len(repo)): - ui.progress('rebuild', rev-startrev, total=numrevs) - try: - ctx = repo[rev] - except error.RepoError: - # this revision is hidden - continue + revmapbuf = [] + for ctx in util.get_contexts(repo, startrev): + ui.progress('rebuild', ctx.rev() - startrev, total=numrevs) convinfo = util.getsvnrev(ctx, None) if not convinfo: @@ -226,7 +219,7 @@ def _buildmeta(ui, repo, args, partial=F continue branch = meta.layoutobj.localname(commitpath) - revmap.write('%s %s %s\n' % (revision, ctx.hex(), branch or '')) + revmapbuf.append((revision, branch, ctx.node())) revision = int(revision) if revision > last_rev: @@ -254,7 +247,7 @@ def _buildmeta(ui, repo, args, partial=F branch = meta.layoutobj.localname(parentpath) break - if rev in closed: + if ctx.rev() in closed: # a direct child of this changeset closes the branch; drop it branchinfo.pop(branch, None) elif ctx.extra().get('close'): @@ -276,6 +269,7 @@ def _buildmeta(ui, repo, args, partial=F int(parentrev), revision) + revmap.batchset(revmapbuf, youngest) ui.progress('rebuild', None, total=numrevs) # save off branch info diff --git a/hgsubversion/svnexternals.py b/hgsubversion/svnexternals.py --- a/hgsubversion/svnexternals.py +++ b/hgsubversion/svnexternals.py @@ -88,7 +88,7 @@ class BadDefinition(Exception): pass re_defold = re.compile(r'^\s*(.*?)\s+(?:-r\s*(\d+|\{REV\})\s+)?([a-zA-Z+]+://.*)\s*$') -re_defnew = re.compile(r'^\s*(?:-r\s*(\d+|\{REV\})\s+)?((?:[a-zA-Z+]+://|\^/).*)\s+(\S+)\s*$') +re_defnew = re.compile(r'^\s*(?:-r\s*(\d+|\{REV\})\s+)?((?:[a-zA-Z+]+://|\^/)\S*)\s+(\S+)\s*$') re_scheme = re.compile(r'^[a-zA-Z+]+://') def parsedefinition(line): @@ -120,13 +120,84 @@ def parsedefinition(line): class RelativeSourceError(Exception): pass +def resolvedots(url): + """ + Fix references that include .. entries. + Scans a URL for .. type entries and resolves them but will not allow any + number of ..s to take us out of domain so http://.. will raise an exception. + + Tests, (Don't know how to construct a round trip for this so doctest): + >>> # Relative URL within servers svn area + >>> resolvedots( + ... "http://some.svn.server/svn/some_repo/../other_repo") + 'http://some.svn.server/svn/other_repo' + >>> # Complex One + >>> resolvedots( + ... "http://some.svn.server/svn/repo/../other/repo/../../other_repo") + 'http://some.svn.server/svn/other_repo' + >>> # Another Complex One + >>> resolvedots( + ... "http://some.svn.server/svn/repo/dir/subdir/../../../other_repo/dir") + 'http://some.svn.server/svn/other_repo/dir' + >>> # Last Complex One - SVN Allows this & seen it used even if it is BAD! + >>> resolvedots( + ... "http://svn.server/svn/my_repo/dir/subdir/../../other_dir") + 'http://svn.server/svn/my_repo/other_dir' + >>> # Outside the SVN Area might be OK + >>> resolvedots( + ... "http://svn.server/svn/some_repo/../../other_svn_repo") + 'http://svn.server/other_svn_repo' + >>> # Complex One + >>> resolvedots( + ... "http://some.svn.server/svn/repo/../other/repo/../../other_repo") + 'http://some.svn.server/svn/other_repo' + >>> # On another server is not a relative URL should give an exception + >>> resolvedots( + ... "http://some.svn.server/svn/some_repo/../../../other_server") + Traceback (most recent call last): + ... + RelativeSourceError: Relative URL cannot be to another server + """ + orig = url.split('/') + fixed = [] + for item in orig: + if item != '..': + fixed.append(item) + elif len(fixed) > 3: # Don't allow things to go out of domain + fixed.pop() + else: + raise RelativeSourceError( + 'Relative URL cannot be to another server') + return '/'.join(fixed) + + + def resolvesource(ui, svnroot, source): + """ Resolve the source as either matching the scheme re or by resolving + relative URLs which start with ^ and my include relative .. references. + + >>> root = 'http://some.svn.server/svn/some_repo' + >>> resolvesource(None, root, 'http://other.svn.server') + 'http://other.svn.server' + >>> resolvesource(None, root, 'ssh://other.svn.server') + 'ssh://other.svn.server' + >>> resolvesource(None, root, '^/other_repo') + 'http://some.svn.server/svn/some_repo/other_repo' + >>> resolvesource(None, root, '^/sub_repo') + 'http://some.svn.server/svn/some_repo/sub_repo' + >>> resolvesource(None, root, '^/../other_repo') + 'http://some.svn.server/svn/other_repo' + >>> resolvesource(None, root, '^/../../../server/other_repo') + Traceback (most recent call last): + ... + RelativeSourceError: Relative URL cannot be to another server + """ if re_scheme.search(source): return source if source.startswith('^/'): if svnroot is None: raise RelativeSourceError() - return svnroot + source[1:] + return resolvedots(svnroot + source[1:]) ui.warn(_('ignoring unsupported non-fully qualified external: %r\n' % source)) return None @@ -232,7 +303,7 @@ class externalsupdater: if source == exturl: if extrev != rev: self.ui.status(_('updating external on %s@%s\n') % - (wpath, rev or 'HEAD')) + (wpath, rev or pegrev or 'HEAD')) cwd = os.path.join(self.repo.root, path) self.svn(['update'] + revspec, cwd) return @@ -245,7 +316,8 @@ class externalsupdater: pegrev = rev if pegrev: source = '%s@%s' % (source, pegrev) - self.ui.status(_('fetching external %s@%s\n') % (wpath, rev or 'HEAD')) + self.ui.status(_('fetching external %s@%s\n') % + (wpath, rev or pegrev or 'HEAD')) self.svn(['co'] + revspec + [source, dest], cwd) def delete(self, wpath): @@ -268,12 +340,12 @@ class externalsupdater: def svn(self, args, cwd): args = ['svn'] + args - self.ui.debug(_('updating externals: %r, cwd=%s\n') % (args, cwd)) + self.ui.note(_('updating externals: %r, cwd=%s\n') % (args, cwd)) shell = os.name == 'nt' p = subprocess.Popen(args, cwd=cwd, shell=shell, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in p.stdout: - self.ui.note(line) + self.ui.debug(line) p.wait() if p.returncode != 0: raise hgutil.Abort("subprocess '%s' failed" % ' '.join(args)) @@ -424,7 +496,7 @@ class svnsubrepo(subrepo.svnsubrepo): def dirty(self, ignoreupdate=False): # You cannot compare anything with HEAD. Just accept it # can be anything. - if hasattr(self, '_wcrevs'): + if hgutil.safehasattr(self, '_wcrevs'): wcrevs = self._wcrevs() else: wcrev = self._wcrev() @@ -447,3 +519,7 @@ class svnsubrepo(subrepo.svnsubrepo): if self._state[1] == 'HEAD': return 'HEAD' return super(svnsubrepo, self).basestate() + +if __name__ == "__main__": + import doctest + doctest.testmod() diff --git a/hgsubversion/svnmeta.py b/hgsubversion/svnmeta.py --- a/hgsubversion/svnmeta.py +++ b/hgsubversion/svnmeta.py @@ -27,7 +27,6 @@ class SVNMeta(object): self.ui = repo.ui self.repo = repo self.path = os.path.normpath(repo.join('..')) - self.firstpulled = 0 self.lastdate = '1970-01-01 00:00:00 -0000' self.addedtags = {} self.deletedtags = {} @@ -52,9 +51,9 @@ class SVNMeta(object): self.subdir = subdir # generated properties that have a persistent file stored on disk - self._gen_cachedconfig('lastpulled', 0, configname=False) self._gen_cachedconfig('defaultauthors', True) self._gen_cachedconfig('caseignoreauthors', False) + self._gen_cachedconfig('mapauthorscmd', None) self._gen_cachedconfig('defaulthost', self.uuid) self._gen_cachedconfig('usebranchnames', True) self._gen_cachedconfig('defaultmessage', '') @@ -69,7 +68,7 @@ class SVNMeta(object): """Return a cached value for a config option. If the cache is uninitialized then try to read its value from disk. Option can be overridden by the commandline. - name: property name, e.g. 'lastpulled' + name: property name, e.g. 'defaultauthors' filename: name of file in .hg/svn configname: commandline option name default: default value @@ -94,6 +93,8 @@ class SVNMeta(object): c = self.ui.configint('hgsubversion', configname, default) elif isinstance(default, list): c = self.ui.configlist('hgsubversion', configname, default) + elif isinstance(default, dict): + c = dict(self.ui.configitems(configname)) else: c = self.ui.config('hgsubversion', configname, default) @@ -136,14 +137,14 @@ class SVNMeta(object): filename = name if configname is None: configname = name - prop = property(lambda x: self._get_cachedconfig(name, - filename, - configname, - default, - pre=pre), - lambda x, y: self._set_cachedconfig(y, - name, - filename)) + prop = property(lambda x: x._get_cachedconfig(name, + filename, + configname, + default, + pre=pre), + lambda x, y: x._set_cachedconfig(y, + name, + filename)) setattr(SVNMeta, name, prop) def layout_from_subversion(self, svn, revision=None): @@ -218,7 +219,7 @@ class SVNMeta(object): @property def editor(self): - if not hasattr(self, '_editor'): + if not hgutil.safehasattr(self, '_editor'): self._editor = editor.HgEditor(self) return self._editor @@ -284,13 +285,15 @@ class SVNMeta(object): return os.path.join(self.metapath, 'branch_info') @property - def authors_file(self): + def authormap_file(self): return os.path.join(self.metapath, 'authors') @property def authors(self): if self._authors is None: - self._authors = maps.AuthorMap(self) + self._authors = maps.AuthorMap( + self.ui, self.authormap_file, self.defaulthost, + self.caseignoreauthors, self.mapauthorscmd, self.defaultauthors) return self._authors @property @@ -300,7 +303,7 @@ class SVNMeta(object): @property def filemap(self): if self._filemap is None: - self._filemap = maps.FileMap(self) + self._filemap = maps.FileMap(self.ui, self.filemap_file) return self._filemap @property @@ -310,7 +313,7 @@ class SVNMeta(object): @property def branchmap(self): if self._branchmap is None: - self._branchmap = maps.BranchMap(self) + self._branchmap = maps.BranchMap(self.ui, self.branchmap_file) return self._branchmap @property @@ -321,7 +324,7 @@ class SVNMeta(object): @property def tags(self): if self._tags is None: - self._tags = maps.Tags(self) + self._tags = maps.Tags(self.ui, self.tagfile) return self._tags @property @@ -332,7 +335,7 @@ class SVNMeta(object): @property def tagmap(self): if self._tagmap is None: - self._tagmap = maps.TagMap(self) + self._tagmap = maps.TagMap(self.ui, self.tagmap_file) return self._tagmap @property @@ -342,9 +345,34 @@ class SVNMeta(object): @property def revmap(self): if self._revmap is None: - self._revmap = maps.RevMap(self) + lastpulled_path = os.path.join(self.metapath, 'lastpulled') + opts = {} + if self.revmapclass is maps.SqliteRevMap: + # sqlite revmap takes an optional option: sqlitepragmas + opts['sqlitepragmas'] = self.ui.configlist( + 'hgsubversion', 'sqlitepragmas') + self._revmap = self.revmapclass( + self.revmap_file, lastpulled_path, **opts) return self._revmap + @property + def revmapexists(self): + return os.path.exists(self.revmap_file) + + _defaultrevmapclass = maps.RevMap + + @property + def revmapclass(self): + impl = self.ui.config('hgsubversion', 'revmapimpl') + if impl == 'plain': + return maps.RevMap + elif impl == 'sqlite': + return maps.SqliteRevMap + elif impl is None: + return self._defaultrevmapclass + else: + raise hgutil.Abort('unknown revmapimpl: %s' % impl) + def fixdate(self, date): if date is not None: date = date.replace('T', ' ').replace('Z', '').split('.')[0] @@ -388,6 +416,19 @@ class SVNMeta(object): } return extra + def skipbranch(self, name): + '''Returns whether or not we're skipping a branch.''' + # sometimes it's easier to pass the path instead of just the branch + # name, so we test for that here + if name: + bname = self.split_branch_path(name) + if bname != (None, None, None): + name = bname[1] + + # if the mapped branch == '' and the original branch name == '' then we + # won't commit this branch + return name and not self.branchmap.get(name, True) + def mapbranch(self, extra, close=False): if close: extra['close'] = 1 @@ -440,6 +481,13 @@ class SVNMeta(object): path = self.normalize(path) return self.layoutobj.get_path_tag(path, self.layoutobj.taglocations) + def get_tag_path(self, name): + """Return a path corresponding to the given tag name""" + try: + return self.layoutobj.taglocations[0] + '/' + name + except IndexError: + return None + def split_branch_path(self, path, existing=True): """Figure out which branch inside our repo this path represents, and also figure out which path inside that branch it is. @@ -531,12 +579,7 @@ class SVNMeta(object): """ if (number, branch) in self.revmap: return number, branch - real_num = 0 - for num, br in self.revmap.iterkeys(): - if br != branch: - continue - if num <= number and num > real_num: - real_num = num + real_num = self.revmap.branchmaxrevnum(branch, number) if branch in self.branches: parent_branch = self.branches[branch][0] parent_branch_rev = self.branches[branch][1] @@ -576,7 +619,7 @@ class SVNMeta(object): return node.hex(self.revmap[tagged]) tag = fromtag # Reference an existing tag - limitedtags = maps.Tags(self, endrev=number - 1) + limitedtags = maps.Tags(self.ui, self.tagfile, endrev=number - 1) if tag in limitedtags: return limitedtags[tag] r, br = self.get_parent_svn_branch_and_rev(number - 1, branch, exact) diff --git a/hgsubversion/svnrepo.py b/hgsubversion/svnrepo.py --- a/hgsubversion/svnrepo.py +++ b/hgsubversion/svnrepo.py @@ -157,7 +157,7 @@ class svnremoterepo(peerrepository): @property def svnurl(self): - return self.svn.svn_url + return self.svnauth[0] @propertycache def svn(self): diff --git a/hgsubversion/svnwrap/subvertpy_wrapper.py b/hgsubversion/svnwrap/subvertpy_wrapper.py --- a/hgsubversion/svnwrap/subvertpy_wrapper.py +++ b/hgsubversion/svnwrap/subvertpy_wrapper.py @@ -22,6 +22,7 @@ try: from subvertpy import delta from subvertpy import properties from subvertpy import ra + from subvertpy import repos import subvertpy except ImportError: raise ImportError('Subvertpy %d.%d.%d or later required, but not found' @@ -51,6 +52,18 @@ def version(): svnvers += '-' + subversion_version[3] return (svnvers, 'Subvertpy ' + _versionstr(subvertpy.__version__)) +def create_and_load(repopath, dumpfd): + ''' create a new repository at repopath and load the given dump into it ''' + repo = repos.create(repopath) + + nullfd = open(os.devnull, 'w') + + try: + repo.load_fs(dumpfd, nullfd, repos.LOAD_UUID_FORCE) + finally: + dumpfd.close() + nullfd.close() + # exported values ERR_FS_ALREADY_EXISTS = subvertpy.ERR_FS_ALREADY_EXISTS ERR_FS_CONFLICT = subvertpy.ERR_FS_CONFLICT @@ -186,7 +199,8 @@ class SubversionRepo(object): Note that password stores do not work, the parameter is only here to ensure that the API is the same as for the SWIG wrapper. """ - def __init__(self, url='', username='', password='', head=None, password_stores=None): + def __init__(self, url='', username='', password='', head=None, + password_stores=None): parsed = common.parse_url(url, username, password) # --username and --password override URL credentials self.username = parsed[0] @@ -450,11 +464,17 @@ class SubversionRepo(object): else: # visiting a directory - if path in addeddirs: - direditor = editor.add_directory(path) - elif path in deleteddirs: + if path in deleteddirs: direditor = editor.delete_entry(path, base_revision) - continue + + if path not in addeddirs: + continue + + if path in addeddirs: + frompath, fromrev = copies.get(path, (None, -1)) + if frompath: + frompath = self.path2url(frompath) + direditor = editor.add_directory(path, frompath, fromrev) else: direditor = editor.open_directory(path) diff --git a/hgsubversion/svnwrap/svn_swig_wrapper.py b/hgsubversion/svnwrap/svn_swig_wrapper.py --- a/hgsubversion/svnwrap/svn_swig_wrapper.py +++ b/hgsubversion/svnwrap/svn_swig_wrapper.py @@ -21,6 +21,7 @@ try: from svn import core from svn import delta from svn import ra + from svn import repos subversion_version = (core.SVN_VER_MAJOR, core.SVN_VER_MINOR, core.SVN_VER_MICRO) @@ -36,6 +37,21 @@ if subversion_version < required_binding def version(): return '%d.%d.%d' % subversion_version, 'SWIG' +def create_and_load(repopath, dumpfd): + ''' create a new repository at repopath and load the given dump into it ''' + pool = core.Pool() + r = repos.svn_repos_create(repopath, '', '', None, None, pool) + + try: + repos.svn_repos_load_fs2(r, dumpfd, None, + repos.svn_repos_load_uuid_force, + '', False, False, None, pool) + finally: + dumpfd.close() + + pool.destroy() + + # exported values ERR_FS_ALREADY_EXISTS = core.SVN_ERR_FS_ALREADY_EXISTS ERR_FS_CONFLICT = core.SVN_ERR_FS_CONFLICT @@ -170,7 +186,7 @@ def _create_auth_baton(pool, password_st providers.append(p) else: for p in platform_specific: - if hasattr(core, p): + if getattr(core, p, None) is not None: try: providers.append(getattr(core, p)()) except RuntimeError: @@ -205,7 +221,8 @@ class SubversionRepo(object): It uses the SWIG Python bindings, see above for requirements. """ - def __init__(self, url='', username='', password='', head=None, password_stores=None): + def __init__(self, url='', username='', password='', head=None, + password_stores=None): parsed = common.parse_url(url, username, password) # --username and --password override URL credentials self.username = parsed[0] @@ -400,10 +417,16 @@ class SubversionRepo(object): if path in deleteddirs: bat = editor.delete_entry(path, base_revision, parent, pool) batons.append(bat) - return bat + + if path not in addeddirs: + return bat + if path not in file_data: if path in addeddirs: - bat = editor.add_directory(path, parent, None, -1, pool) + frompath, fromrev = copies.get(path, (None, -1)) + if frompath: + frompath = self.path2url(frompath) + bat = editor.add_directory(path, parent, frompath, fromrev, pool) else: bat = editor.open_directory(path, parent, base_revision, pool) batons.append(bat) diff --git a/hgsubversion/util.py b/hgsubversion/util.py --- a/hgsubversion/util.py +++ b/hgsubversion/util.py @@ -44,6 +44,27 @@ def configpath(ui, name): path = ui.config('hgsubversion', name) return path and hgutil.expandpath(path) +def fileproperty(fname, pathfunc, default=None, + serializer=str, deserializer=str): + """define a property that is backed by a file""" + def fget(self): + if not hgutil.safehasattr(self, fname): + path = pathfunc(self) + if os.path.exists(path): + with open(path, 'r') as f: + setattr(self, fname, deserializer(f.read())) + else: + setattr(self, fname, default) + return getattr(self, fname) + + def fset(self, value): + setattr(self, fname, value) + path = pathfunc(self) + with open(path, 'w') as f: + f.write(serializer(value)) + + return property(fget, fset) + def filterdiff(diff, oldrev, newrev): diff = newfile_devnull_re.sub(r'--- \1\t(revision 0)' '\n' r'+++ \1\t(working copy)', @@ -320,16 +341,11 @@ def revset_fromsvn(repo, subset, x): rev = repo.changelog.rev bin = node.bin meta = repo.svnmeta(skiperrorcheck=True) - try: - svnrevs = set(rev(bin(l.split(' ', 2)[1])) - for l in maps.RevMap.readmapfile(meta.revmap_file, - missingok=False)) - return filter(svnrevs.__contains__, subset) - except IOError, err: - if err.errno != errno.ENOENT: - raise + if not meta.revmapexists: raise hgutil.Abort("svn metadata is missing - " "run 'hg svn rebuildmeta' to reconstruct it") + svnrevs = set(rev(h) for h in meta.revmap.hashes().keys()) + return filter(svnrevs.__contains__, subset) def revset_svnrev(repo, subset, x): '''``svnrev(number)`` @@ -344,22 +360,16 @@ def revset_svnrev(repo, subset, x): except ValueError: raise error.ParseError("the argument to svnrev() must be a number") - rev = rev + ' ' - revs = [] meta = repo.svnmeta(skiperrorcheck=True) - try: - for l in maps.RevMap.readmapfile(meta.revmap_file, missingok=False): - if l.startswith(rev): - n = l.split(' ', 2)[1] - r = repo[node.bin(n)].rev() - if r in subset: - revs.append(r) - return revs - except IOError, err: - if err.errno != errno.ENOENT: - raise + if not meta.revmapexists: raise hgutil.Abort("svn metadata is missing - " "run 'hg svn rebuildmeta' to reconstruct it") + revs = [] + for n in meta.revmap.revhashes(revnum): + r = repo[n].rev() + if r in subset: + revs.append(r) + return revs revsets = { 'fromsvn': revset_fromsvn, @@ -388,3 +398,12 @@ def parse_revnum(svnrepo, r): return svnrepo.last_changed_rev else: raise error.RepoLookupError("unknown Subversion revision %r" % r) + +def get_contexts(repo, fromrev=0): + """Generator yielding contexts from the repository.""" + + for rev in xrange(fromrev, len(repo)): + try: + yield repo[rev] + except error.RepoLookupError: + pass diff --git a/hgsubversion/wrappers.py b/hgsubversion/wrappers.py --- a/hgsubversion/wrappers.py +++ b/hgsubversion/wrappers.py @@ -103,7 +103,7 @@ def incoming(orig, ui, repo, origsource= meta = repo.svnmeta(svn.uuid, svn.subdir) ui.status('incoming changes from %s\n' % other.svnurl) - svnrevisions = list(svn.revisions(start=meta.lastpulled)) + svnrevisions = list(svn.revisions(start=meta.revmap.lastpulled)) if opts.get('newest_first'): svnrevisions.reverse() # Returns 0 if there are incoming changes, 1 otherwise. @@ -204,6 +204,7 @@ def push(repo, dest, force, revs): hasobsolete = False temporary_commits = [] + obsmarkers = [] try: # TODO: implement --rev/#rev support # TODO: do credentials specified in the URL still work? @@ -300,9 +301,7 @@ def push(repo, dest, force, revs): if meta.get_source_rev(ctx=c)[0] == pushedrev.revnum: # This is corresponds to the changeset we just pushed if hasobsolete: - ui.note('marking %s as obsoleted by %s\n' % - (original_ctx.hex(), c.hex())) - obsolete.createmarkers(repo, [(original_ctx, [c])]) + obsmarkers.append([(original_ctx, [c])]) tip_ctx = c @@ -343,7 +342,14 @@ def push(repo, dest, force, revs): finally: util.swap_out_encoding() - if not hasobsolete: + if hasobsolete: + for marker in obsmarkers: + obsolete.createmarkers(repo, marker) + beforepush = marker[0][0] + afterpush = marker[0][1][0] + ui.note('marking %s as obsoleted by %s\n' % + (beforepush.hex(), afterpush.hex())) + else: # strip the original changesets since the push was # successful and changeset obsolescence is unavailable util.strip(ui, repo, outgoing, "all") @@ -413,8 +419,7 @@ def pull(repo, source, heads=[], force=F meta.branchmap['default'] = meta.branch ui = repo.ui - start = meta.lastpulled - origrevcount = len(meta.revmap) + start = meta.revmap.lastpulled if start <= 0: # we are initializing a new repository @@ -507,7 +512,7 @@ def pull(repo, source, heads=[], force=F util.swap_out_encoding(old_encoding) if lastpulled is not None: - meta.lastpulled = lastpulled + meta.revmap.lastpulled = lastpulled revisions = len(meta.revmap) - oldrevisions if revisions == 0: @@ -590,6 +595,7 @@ def rebase(orig, ui, repo, **opts): optionmap = { 'tagpaths': ('hgsubversion', 'tagpaths'), 'authors': ('hgsubversion', 'authormap'), + 'mapauthorscmd': ('hgsubversion', 'mapauthorscmd'), 'branchdir': ('hgsubversion', 'branchdir'), 'trunkdir': ('hgsubversion', 'trunkdir'), 'infix': ('hgsubversion', 'infix'), diff --git a/tests/comprehensive/test_rebuildmeta.py b/tests/comprehensive/test_rebuildmeta.py --- a/tests/comprehensive/test_rebuildmeta.py +++ b/tests/comprehensive/test_rebuildmeta.py @@ -129,10 +129,14 @@ def _run_assertions(self, name, single, old, new = util.load(stf, resave=False), util.load(dtf, resave=False) if tf == 'lastpulled' and (name, self.stupid, single) in expect_youngest_skew: - self.assertNotEqual(old, new, - 'rebuildmeta unexpected match on youngest rev!') + self.assertNotEqual( + old, new, + 'rebuildmeta unexpected match on lastpulled: ' + 'old %d new %d, case %r %r %r' % ( + old, new, name, self.stupid, single)) continue - self.assertEqual(old, new, tf + ' differs') + self.assertEqual( + old, new, '%s differs old: %r new %r'% (tf, old, new)) try: self.assertEqual(src.branchmap(), dest.branchmap()) except AttributeError: @@ -141,7 +145,12 @@ def _run_assertions(self, name, single, srcbi = util.load(os.path.join(src.path, 'svn', 'branch_info')) destbi = util.load(os.path.join(dest.path, 'svn', 'branch_info')) self.assertEqual(sorted(srcbi.keys()), sorted(destbi.keys())) - revkeys = svnmeta.SVNMeta(dest).revmap.keys() + revmap = svnmeta.SVNMeta(dest).revmap + # revmap disables __iter__ intentionally to avoid possible slow code + # (not using database index in SqliteRevMap) + # we need to fetch all keys so enable it by setting _allowiter + revmap._allowiter = True + revkeys = revmap.keys() for branch in destbi: srcinfo = srcbi[branch] destinfo = destbi[branch] diff --git a/tests/comprehensive/test_sqlite_revmap.py b/tests/comprehensive/test_sqlite_revmap.py new file mode 100644 --- /dev/null +++ b/tests/comprehensive/test_sqlite_revmap.py @@ -0,0 +1,77 @@ +import os +import unittest +import sys + +# wrapped in a try/except because of weirdness in how +# run.py works as compared to nose. +try: + import test_util +except ImportError: + sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) + import test_util + +# interesting and fast tests +import test_fetch_mappings +import test_fetch_renames +import test_pull +import test_template_keywords +import test_utility_commands + +# comprehensive tests +try: + import test_custom_layout +except ImportError: + sys.path.insert(0, os.path.dirname(__file__)) + import test_custom_layout + +import test_rebuildmeta +import test_updatemeta + +from hgsubversion import svnmeta, maps + + +class SqliteRevMapMixIn(object): + # do not double the test size by being wrapped again + obsolete_mode_tests = False + stupid_mode_tests = False + + def setUp(self): + assert svnmeta.SVNMeta._defaultrevmapclass is maps.RevMap + svnmeta.SVNMeta._defaultrevmapclass = maps.SqliteRevMap + super(SqliteRevMapMixIn, self).setUp() + + def tearDown(self): + assert svnmeta.SVNMeta._defaultrevmapclass is maps.SqliteRevMap + svnmeta.SVNMeta._defaultrevmapclass = maps.RevMap + super(SqliteRevMapMixIn, self).tearDown() + + def shortDescription(self): + text = super(SqliteRevMapMixIn, self).shortDescription() + if text: + text += ' (sqlite revmap)' + return text + +def buildtestclass(cls, selector=None): + name = 'SqliteRevMap%s' % cls.__name__ + newcls = type(name, (SqliteRevMapMixIn, cls,), {}) + + # remove test cases not selected by selector + if selector: + for name in dir(newcls): + if name.startswith('test_') and not selector(name[5:]): + setattr(newcls, name, None) + + globals()[name] = newcls + +def svndumpselector(name): + return name in ['branch_rename_to_trunk', + 'tag_name_same_as_branch'] + +buildtestclass(test_fetch_mappings.MapTests) +buildtestclass(test_fetch_renames.TestFetchRenames) +buildtestclass(test_pull.TestPull) +buildtestclass(test_template_keywords.TestLogKeywords) +buildtestclass(test_utility_commands.UtilityTests) + +buildtestclass(test_rebuildmeta.RebuildMetaTests, svndumpselector) +buildtestclass(test_updatemeta.UpdateMetaTests, svndumpselector) diff --git a/tests/fixtures/rename-closed-branch-dir.sh b/tests/fixtures/rename-closed-branch-dir.sh new file mode 100644 --- /dev/null +++ b/tests/fixtures/rename-closed-branch-dir.sh @@ -0,0 +1,69 @@ +#!/bin/sh +# +# Generate rename-closed-branch-dir.svndump +# + +mkdir temp +cd temp + +mkdir project +cd project +mkdir trunk +mkdir branches +mkdir tags +cd .. + +svnadmin create testrepo +CURRENT_DIR=`pwd` +svnurl=file://"$CURRENT_DIR"/testrepo +#svn import project-orig $svnurl -m "init project" + +svn co $svnurl project +cd project +svn add * +svn ci -m "init project" + +cd trunk +echo a > a.txt +svn add a.txt +svn ci -m "add a.txt in trunk" + +# Create a branch +svn up +cd ../branches +svn copy ../trunk async-db +svn ci -m "add branch async-db" +svn up + +# Implement feature +cd async-db +echo b > b.txt +svn add b.txt +svn ci -m "Async functionality" + +# Merge feature branch +cd ../../trunk +svn merge $svnurl/branches/async-db +svn ci -m "Merged branch async-db" +cd .. +svn up + +# Create branch folder for unnecessary branches +svn mkdir $svnurl/branches/dead -m "Create branch folder for unnecessary branches" +svn up + +# We don't need the 'async-db' branch, anymore. +svn copy $svnurl/branches/async-db $svnurl/branches/dead -m "We don't need the 'async-db' branch, anymore." +svn up + +# Rename 'dead' folder to 'closed' +svn move $svnurl/branches/dead $svnurl/branches/closed -m "Renamed 'dead' folder to 'closed'" +svn up + +# Move 'branches/closed' to 'tags/closed' +svn move $svnurl/branches/closed $svnurl/tags/closed -m "Moved 'branches/closed' to 'tags/closed'." +svn up + +# Dump repository +cd .. +svnadmin dump testrepo > ../rename-closed-branch-dir.svndump diff --git a/tests/fixtures/rename-closed-branch-dir.svndump b/tests/fixtures/rename-closed-branch-dir.svndump new file mode 100644 --- /dev/null +++ b/tests/fixtures/rename-closed-branch-dir.svndump @@ -0,0 +1,296 @@ +SVN-fs-dump-format-version: 2 + +UUID: 2efdcfe9-9dfd-40a7-a9cc-bf5b70806ff3 + +Revision-number: 0 +Prop-content-length: 56 +Content-length: 56 + +K 8 +svn:date +V 27 +2016-01-27T15:35:29.673334Z +PROPS-END + +Revision-number: 1 +Prop-content-length: 112 +Content-length: 112 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:30.079847Z +K 7 +svn:log +V 12 +init project +PROPS-END + +Node-path: branches +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Node-path: tags +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Node-path: trunk +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Revision-number: 2 +Prop-content-length: 118 +Content-length: 118 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:31.065912Z +K 7 +svn:log +V 18 +add a.txt in trunk +PROPS-END + +Node-path: trunk/a.txt +Node-kind: file +Node-action: add +Prop-content-length: 10 +Text-content-length: 2 +Text-content-md5: 60b725f10c9c85c70d97880dfe8191b3 +Text-content-sha1: 3f786850e387550fdab836ed7e6dc881de23001b +Content-length: 12 + +PROPS-END +a + + +Revision-number: 3 +Prop-content-length: 119 +Content-length: 119 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:34.051261Z +K 7 +svn:log +V 19 +add branch async-db +PROPS-END + +Node-path: branches/async-db +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 2 +Node-copyfrom-path: trunk + + +Revision-number: 4 +Prop-content-length: 119 +Content-length: 119 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:36.101507Z +K 7 +svn:log +V 19 +Async functionality +PROPS-END + +Node-path: branches/async-db/b.txt +Node-kind: file +Node-action: add +Prop-content-length: 10 +Text-content-length: 2 +Text-content-md5: 3b5d5c3712955042212316173ccf37be +Text-content-sha1: 89e6c98d92887913cadf06b2adb97f26cde4849b +Content-length: 12 + +PROPS-END +b + + +Revision-number: 5 +Prop-content-length: 122 +Content-length: 122 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:38.055736Z +K 7 +svn:log +V 22 +Merged branch async-db +PROPS-END + +Node-path: trunk +Node-kind: dir +Node-action: change +Prop-content-length: 57 +Content-length: 57 + +K 13 +svn:mergeinfo +V 22 +/branches/async-db:3-4 +PROPS-END + + +Node-path: trunk/b.txt +Node-kind: file +Node-action: add +Node-copyfrom-rev: 4 +Node-copyfrom-path: branches/async-db/b.txt +Text-copy-source-md5: 3b5d5c3712955042212316173ccf37be +Text-copy-source-sha1: 89e6c98d92887913cadf06b2adb97f26cde4849b + + +Revision-number: 6 +Prop-content-length: 145 +Content-length: 145 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:40.046670Z +K 7 +svn:log +V 45 +Create branch folder for unnecessary branches +PROPS-END + +Node-path: branches/dead +Node-kind: dir +Node-action: add +Prop-content-length: 10 +Content-length: 10 + +PROPS-END + + +Revision-number: 7 +Prop-content-length: 145 +Content-length: 145 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:41.048576Z +K 7 +svn:log +V 45 +We don't need the 'async-db' branch, anymore. +PROPS-END + +Node-path: branches/dead/async-db +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 6 +Node-copyfrom-path: branches/async-db + + +Revision-number: 8 +Prop-content-length: 133 +Content-length: 133 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:42.046536Z +K 7 +svn:log +V 33 +Renamed 'dead' folder to 'closed' +PROPS-END + +Node-path: branches/closed +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 7 +Node-copyfrom-path: branches/dead + + +Node-path: branches/dead +Node-action: delete + + +Revision-number: 9 +Prop-content-length: 141 +Content-length: 141 + +K 10 +svn:author +V 5 +augie +K 8 +svn:date +V 27 +2016-01-27T15:35:43.048056Z +K 7 +svn:log +V 41 +Moved 'branches/closed' to 'tags/closed'. +PROPS-END + +Node-path: branches/closed +Node-action: delete + + +Node-path: tags/closed +Node-kind: dir +Node-action: add +Node-copyfrom-rev: 8 +Node-copyfrom-path: branches/closed + + diff --git a/tests/run.py b/tests/run.py --- a/tests/run.py +++ b/tests/run.py @@ -5,53 +5,6 @@ import os import sys import unittest -import test_util -test_util.SkipTest = None - -def tests(): - import test_binaryfiles - import test_diff - import test_externals - import test_fetch_branches - import test_fetch_command - import test_fetch_command_regexes - import test_fetch_exec - import test_fetch_mappings - import test_fetch_renames - import test_fetch_symlinks - import test_fetch_truncated - import test_hooks - import test_svn_pre_commit_hooks - import test_pull - import test_pull_fallback - import test_push_command - import test_push_renames - import test_push_dirs - import test_push_eol - import test_push_autoprops - import test_single_dir_clone - import test_single_dir_push - import test_svnwrap - import test_tags - import test_template_keywords - import test_utility_commands - import test_unaffected_core - import test_urls - - sys.path.append(os.path.dirname(__file__)) - sys.path.append(os.path.join(os.path.dirname(__file__), 'comprehensive')) - - import test_rebuildmeta - import test_stupid_pull - import test_updatemeta - import test_verify_and_startrev - - return locals() - -def comprehensive(mod): - dir = os.path.basename(os.path.dirname(mod.__file__)) - return dir == 'comprehensive' - if __name__ == '__main__': description = ("This script runs the hgsubversion tests. If no tests are " "specified, all known tests are implied.") @@ -100,26 +53,33 @@ if __name__ == '__main__': import tempfile sys.stdout = tempfile.TemporaryFile() - all_tests = tests() - - args = [i.split('.py')[0].replace('-', '_') for i in args] + args = [os.path.basename(os.path.splitext(arg)[0]).replace('-', '_') + for arg in args] loader = unittest.TestLoader() suite = unittest.TestSuite() + if sys.version_info[:2] < (2, 7): + import glob + def discover(start_dir, pattern='test*.py', top_level_dir=None): + tests = [] + sys.path.append(start_dir) + for path in glob.glob(os.path.join(start_dir, pattern)): + name = os.path.splitext(os.path.basename(path))[0] + tests.append(loader.loadTestsFromModule(__import__(name))) + return tests + loader.discover = discover + if not args: - check = lambda x: options.comprehensive or not comprehensive(x) - suite.addTests(loader.loadTestsFromModule(m) - for (n, m) in sorted(all_tests.iteritems()) - if check(m)) + suite.addTests(loader.discover('.')) + + if options.comprehensive: + suite.addTests(loader.discover('comprehensive', + top_level_dir='comprehensive')) else: - for arg in args: - if arg == 'test_util': - continue - elif arg not in all_tests: - print >> sys.stderr, 'test module %s not available' % arg - else: - suite.addTest(loader.loadTestsFromModule(all_tests[arg])) + sys.path.append(os.path.join(os.path.dirname(__file__), 'comprehensive')) + + suite.addTests(loader.loadTestsFromNames(args)) runner = unittest.TextTestRunner(**testargs) result = runner.run(suite) diff --git a/tests/test_fetch_command.py b/tests/test_fetch_command.py --- a/tests/test_fetch_command.py +++ b/tests/test_fetch_command.py @@ -93,7 +93,7 @@ class TestBasicRepoLayout(test_util.Test 'test_files_copied_from_outside_btt.svndump') self.assertEqual(node.hex(repo['tip'].node()), '3c78170e30ddd35f2c32faa0d8646ab75bba4f73') - self.assertEqual(test_util.repolen(repo.changelog), 2) + self.assertEqual(test_util.repolen(repo), 2) def test_file_renamed_in_from_outside_btt(self): repo = self._load_fixture_and_fetch( diff --git a/tests/test_fetch_mappings.py b/tests/test_fetch_mappings.py --- a/tests/test_fetch_mappings.py +++ b/tests/test_fetch_mappings.py @@ -92,7 +92,10 @@ class MapTests(test_util.TestBase): new = open(os.path.join(repopath, 'authors'), 'w') new.write(open(orig).read()) new.close() - test = maps.AuthorMap(self.repo.svnmeta(skiperrorcheck=True)) + meta = self.repo.svnmeta(skiperrorcheck=True) + test = maps.AuthorMap( + meta.ui, meta.authormap_file, meta.defaulthost, + meta.caseignoreauthors, meta.mapauthorscmd, meta.defaultauthors) fromself = set(test) test.load(orig) all_tests = set(test) @@ -114,6 +117,15 @@ class MapTests(test_util.TestBase): self.assertEqual(self.repo['tip'].user(), 'evil@5b65bade-98f3-4993-a01f-b7a6710da339') + def test_author_map_mapauthorscmd(self): + repo_path = self.load_svndump('replace_trunk_with_branch.svndump') + ui = self.ui() + ui.setconfig('hgsubversion', 'mapauthorscmd', 'echo "svn: %s"') + commands.clone(ui, test_util.fileurl(repo_path), + self.wc_path) + self.assertEqual(self.repo[0].user(), 'svn: Augie') + self.assertEqual(self.repo['tip'].user(), 'svn: evil') + def _loadwithfilemap(self, svndump, filemapcontent, failonmissing=True): repo_path = self.load_svndump(svndump) @@ -200,6 +212,22 @@ class MapTests(test_util.TestBase): self.assert_('good-name' in branches) self.assertEquals(self.repo[2].branch(), 'default') + def test_branchmap_regex_and_glob(self): + repo_path = self.load_svndump('branchmap.svndump') + branchmap = open(self.branchmap, 'w') + branchmap.write("syntax:re\n") + branchmap.write("bad(.*) = good-\\1 # stuffy\n") + branchmap.write("glob:feat* = default\n") + branchmap.close() + ui = self.ui() + ui.setconfig('hgsubversion', 'branchmap', self.branchmap) + commands.clone(ui, test_util.fileurl(repo_path), + self.wc_path, branchmap=self.branchmap) + branches = set(self.repo[i].branch() for i in self.repo) + self.assert_('badname' not in branches) + self.assert_('good-name' in branches) + self.assertEquals(self.repo[2].branch(), 'default') + def test_branchmap_tagging(self): '''test tagging a renamed branch, which used to raise an exception''' repo_path = self.load_svndump('commit-to-tag.svndump') @@ -290,6 +318,23 @@ class MapTests(test_util.TestBase): for r in repo: self.assertEquals(verify.verify(ui, repo, rev=r), 0) + def test_branchmap_no_replacement(self): + '''test that empty mappings are accepted + + Empty mappings are lines like 'this ='. We check that such branches are + not converted. + ''' + repo_path = self.load_svndump('branchmap.svndump') + branchmap = open(self.branchmap, 'w') + branchmap.write("badname =\n") + branchmap.close() + ui = self.ui() + ui.setconfig('hgsubversion', 'branchmap', self.branchmap) + commands.clone(ui, test_util.fileurl(repo_path), + self.wc_path, branchmap=self.branchmap) + branches = set(self.repo[i].branch() for i in self.repo) + self.assertEquals(sorted(branches), ['default', 'feature']) + def test_tagmap(self): repo_path = self.load_svndump('basic_tag_tests.svndump') tagmap = open(self.tagmap, 'w') diff --git a/tests/test_push_command.py b/tests/test_push_command.py --- a/tests/test_push_command.py +++ b/tests/test_push_command.py @@ -138,7 +138,7 @@ class PushTests(test_util.TestBase): open(os.path.join(repo_path, 'conf', 'svnserve.conf'), 'w').write('[general]\nanon-access=write\n[sasl]\n') self.port = random.randint(socket.IPPORT_USERRESERVED, 65535) - self.host = 'localhost' + self.host = socket.gethostname() args = ['svnserve', '--daemon', '--foreground', '--listen-port=%d' % self.port, '--listen-host=%s' % self.host, diff --git a/tests/test_revmap_migrate.py b/tests/test_revmap_migrate.py new file mode 100644 --- /dev/null +++ b/tests/test_revmap_migrate.py @@ -0,0 +1,68 @@ +import test_util + +from mercurial import util as hgutil +from hgsubversion import svnmeta, maps +from mercurial.node import hex + +class TestRevMapMigrate(test_util.TestBase): + + def _test_revmap_migrate(self, fromclass, toclass): + # revmap interfaces to test + getters = [ + lambda x: x.branchedits('the_branch', 3), + lambda x: x.branchedits('the_branch', 4), + lambda x: x.branchedits('the_branch', 5), + lambda x: x.branchedits('the_branch', 6), + lambda x: x.branchedits(None, 5), + lambda x: x.branchedits('non_existed', 10), + lambda x: x.branchmaxrevnum('the_branch', 3), + lambda x: x.branchmaxrevnum('the_branch', 4), + lambda x: x.branchmaxrevnum('the_branch', 5), + lambda x: x.branchmaxrevnum('the_branch', 6), + lambda x: x.branchmaxrevnum(None, 5), + lambda x: x.branchmaxrevnum('non_existed', 10), + lambda x: list(x.revhashes(3)), + lambda x: list(x.revhashes(4)), + lambda x: list(x.revhashes(42)), + lambda x: list(x.revhashes(105)), + lambda x: x.firstpulled, + lambda x: x.lastpulled, + lambda x: x.lasthash, + ] + + svnmeta.SVNMeta._defaultrevmapclass = fromclass + repo = self._load_fixture_and_fetch('two_heads.svndump') + meta = svnmeta.SVNMeta(repo) + self.assertEqual(meta.revmap.__class__, fromclass) + origrevmap = meta.revmap + + # insert fake special (duplicated, with '\0') data + origrevmap[103, None] = b'\0' * 20 + origrevmap[104, None] = b'\0' * 18 + b'cd' + origrevmap[105, None] = b'ab\0cdefghijklmnopqrs' + origrevmap[104, None] = b'\0' * 18 + b'\xff\0' + origrevmap[105, 'ab'] = origrevmap[105, None] + + origvalues = [f(meta.revmap) for f in getters] + + # migrate to another format (transparently) + svnmeta.SVNMeta._defaultrevmapclass = toclass + meta = svnmeta.SVNMeta(repo) + self.assertEqual(meta.revmap.__class__, toclass) + + # enable iteration otherwise we cannot use iteritems + origrevmap._allowiter = True + for k, v in origrevmap.iteritems(): + newv = meta.revmap[k] + self.assertEqual(newv, v) + self.assertEqual(len(newv), 20) + self.assertEqual(meta.revmap[meta.revmap.hashes()[v]], v) + + newvalues = [f(meta.revmap) for f in getters] + self.assertEqual(origvalues, newvalues) + + def test_revmap_migrate_up(self): + self._test_revmap_migrate(maps.RevMap, maps.SqliteRevMap) + + def test_revmap_migrate_down(self): + self._test_revmap_migrate(maps.SqliteRevMap, maps.RevMap) diff --git a/tests/test_svnwrap.py b/tests/test_svnwrap.py --- a/tests/test_svnwrap.py +++ b/tests/test_svnwrap.py @@ -11,15 +11,11 @@ class TestBasicRepoLayout(unittest.TestC def setUp(self): self.tmpdir = tempfile.mkdtemp('svnwrap_test') self.repo_path = '%s/testrepo' % self.tmpdir - subprocess.call(['svnadmin', 'create', self.repo_path, ]) - inp = open(os.path.join(os.path.dirname(__file__), 'fixtures', - 'project_root_at_repo_root.svndump')) - proc = subprocess.call(['svnadmin', 'load', self.repo_path, ], - stdin=inp, - close_fds=test_util.canCloseFds, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) - assert proc == 0 + + with open(os.path.join(test_util.FIXTURES, + 'project_root_at_repo_root.svndump')) as fp: + svnwrap.create_and_load(self.repo_path, fp) + self.repo = svnwrap.SubversionRepo(test_util.fileurl(self.repo_path)) def tearDown(self): diff --git a/tests/test_tags.py b/tests/test_tags.py --- a/tests/test_tags.py +++ b/tests/test_tags.py @@ -4,6 +4,7 @@ import os, sys, cStringIO, difflib import unittest from mercurial import commands +from mercurial import error from mercurial import hg from mercurial import node from mercurial import ui @@ -162,14 +163,12 @@ rename a tag 'magic2': '\xa3\xa2D\x86aM\xc0v\xb9\xb0\x18\x14\xad\xacwBUi}\xe2', }) - def test_old_tag_map_rebuilds(self): + def test_old_tag_map_aborts(self): repo = self._load_fixture_and_fetch('tag_name_same_as_branch.svndump') tm = os.path.join(repo.path, 'svn', 'tagmap') open(tm, 'w').write('1\n') # force tags to load since it is lazily loaded when needed - repo.svnmeta().tags - commands.pull(repo.ui, repo) - self.assertEqual(open(tm).read().splitlines()[0], '2') + self.assertRaises(error.Abort, lambda: repo.svnmeta().tags) def _debug_print_tags(self, repo, ctx, fp): def formatnode(ctx): diff --git a/tests/test_urls.py b/tests/test_urls.py --- a/tests/test_urls.py +++ b/tests/test_urls.py @@ -8,58 +8,61 @@ from hgsubversion import svnrepo class TestSubversionUrls(test_util.TestBase): def test_standard_url(self): - self.assertEqual((None, None, 'file:///var/svn/repo'), - parse_url('file:///var/svn/repo')) + self.check_parse_url((None, None, 'file:///var/svn/repo'), + ('file:///var/svn/repo', )) def test_user_url(self): - self.assertEqual( + self.check_parse_url( ('joe', None, 'https://svn.testurl.com/repo'), - parse_url('https://joe@svn.testurl.com/repo')) - self.assertEqual( + ('https://joe@svn.testurl.com/repo', )) + self.check_parse_url( ('bob', None, 'https://svn.testurl.com/repo'), - parse_url('https://joe@svn.testurl.com/repo', 'bob')) + ('https://joe@svn.testurl.com/repo', 'bob', )) def test_password_url(self): - self.assertEqual( + self.check_parse_url( (None, 't3stpw', 'svn+ssh://svn.testurl.com/repo'), - parse_url('svn+ssh://:t3stpw@svn.testurl.com/repo')) - self.assertEqual( + ('svn+ssh://:t3stpw@svn.testurl.com/repo', )) + self.check_parse_url( (None, '123abc', 'svn+ssh://svn.testurl.com/repo'), - parse_url('svn+ssh://:t3stpw@svn.testurl.com/repo', None, '123abc')) + ('svn+ssh://:t3stpw@svn.testurl.com/repo', None, '123abc', )) def test_svnssh_preserve_user(self): - self.assertEqual( + self.check_parse_url( ('user', 't3stpw', 'svn+ssh://user@svn.testurl.com/repo',), - parse_url('svn+ssh://user:t3stpw@svn.testurl.com/repo')) - self.assertEqual( + ('svn+ssh://user:t3stpw@svn.testurl.com/repo', )) + self.check_parse_url( ('bob', '123abc', 'svn+ssh://bob@svn.testurl.com/repo',), - parse_url('svn+ssh://user:t3stpw@svn.testurl.com/repo', 'bob', '123abc')) - self.assertEqual( + ('svn+ssh://user:t3stpw@svn.testurl.com/repo', 'bob', '123abc', )) + self.check_parse_url( ('user2', None, 'svn+ssh://user2@svn.testurl.com/repo',), - parse_url('svn+ssh://user2@svn.testurl.com/repo')) - self.assertEqual( + ('svn+ssh://user2@svn.testurl.com/repo', )) + self.check_parse_url( ('bob', None, 'svn+ssh://bob@svn.testurl.com/repo',), - parse_url('svn+ssh://user2@svn.testurl.com/repo', 'bob')) + ('svn+ssh://user2@svn.testurl.com/repo', 'bob', )) def test_user_password_url(self): - self.assertEqual( + self.check_parse_url( ('joe', 't3stpw', 'https://svn.testurl.com/repo'), - parse_url('https://joe:t3stpw@svn.testurl.com/repo')) - self.assertEqual( + ('https://joe:t3stpw@svn.testurl.com/repo', )) + self.check_parse_url( ('bob', '123abc', 'https://svn.testurl.com/repo'), - parse_url('https://joe:t3stpw@svn.testurl.com/repo', 'bob', '123abc')) + ('https://joe:t3stpw@svn.testurl.com/repo', 'bob', '123abc', )) def test_url_rewriting(self): ui = test_util.ui.ui() ui.setconfig('hgsubversion', 'username', 'bob') repo = svnrepo.svnremoterepo(ui, 'svn+ssh://joe@foo/bar') self.assertEqual('svn+ssh://bob@foo/bar', repo.svnauth[0]) + self.assertEqual('svn+ssh://bob@foo/bar', repo.svnurl) repo = svnrepo.svnremoterepo(ui, 'svn+http://joe@foo/bar') self.assertEqual(('http://foo/bar', 'bob', None), repo.svnauth) + self.assertEqual('http://foo/bar', repo.svnurl) repo = svnrepo.svnremoterepo(ui, 'svn+https://joe@foo/bar') self.assertEqual(('https://foo/bar', 'bob', None), repo.svnauth) + self.assertEqual('https://foo/bar', repo.svnurl) def test_quoting(self): ui = self.ui() @@ -72,3 +75,11 @@ class TestSubversionUrls(test_util.TestB repo1 = svnrepo.svnremoterepo(ui, repo_url + subdir) repo2 = svnrepo.svnremoterepo(ui, repo_url + quoted_subdir) self.assertEqual(repo1.svnurl, repo2.svnurl) + + def check_parse_url(self, expected, args): + self.assertEqual(expected, parse_url(*args)) + if len(args) == 1: + repo = svnrepo.svnremoterepo(self.ui(), path=args[0]) + self.assertEqual(expected[2], repo.svnauth[0]) + self.assertEqual(expected[2], repo.svnurl) + diff --git a/tests/test_util.py b/tests/test_util.py --- a/tests/test_util.py +++ b/tests/test_util.py @@ -28,6 +28,7 @@ from mercurial import util as hgutil from mercurial import extensions from hgsubversion import compathacks +from hgsubversion import svnrepo from hgsubversion import svnwrap try: @@ -39,14 +40,12 @@ except ImportError: try: SkipTest = unittest.SkipTest except AttributeError: - try: - from unittest2 import SkipTest - except ImportError: - try: - from nose import SkipTest - except ImportError: - SkipTest = None + if 'nose' in sys.modules: + SkipTest = sys.modules['nose'].SkipTest + else: + SkipTest = None +from hgsubversion import svnwrap from hgsubversion import util from hgsubversion import svnwrap @@ -210,15 +209,25 @@ def getlocalpeer(repo): localrepo = repo return localrepo -def repolen(repo): +def repolen(repo, svnonly=False): """Naively calculate the amount of available revisions in a repository. this is usually equal to len(repo) -- except in the face of obsolete revisions. + + if svnonly is true, only count revisions converted from Subversion. """ # kind of nasty way of calculating the length, but fortunately, # our test repositories tend to be rather small - return len([r for r in repo]) + revs = set(repo) + + if obsolete: + revs -= obsolete.getrevs(repo, 'obsolete') + + if svnonly: + revs = set(r for r in revs if util.getsvnrev(repo[r])) + + return len(revs) def _makeskip(name, message): if SkipTest: @@ -534,12 +543,8 @@ class TestBase(unittest.TestCase): ''' path = self._makerepopath() assert not os.path.exists(path) - subprocess.call(['svnadmin', 'create', path,], - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - inp = open(os.path.join(FIXTURES, fixture_name)) - proc = subprocess.Popen(['svnadmin', 'load', path,], stdin=inp, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - proc.communicate() + with open(os.path.join(FIXTURES, fixture_name)) as inp: + svnwrap.create_and_load(path, inp) return path def load_repo_tarball(self, fixture_name): @@ -596,7 +601,7 @@ class TestBase(unittest.TestCase): return hg.repository(testui(), self.wc_path) - def load_and_fetch(self, fixture_name, *args, **opts): + def load(self, fixture_name): if fixture_name.endswith('.svndump'): repo_path = self.load_svndump(fixture_name) elif fixture_name.endswith('tar.gz'): @@ -604,6 +609,10 @@ class TestBase(unittest.TestCase): else: assert False, 'Unknown fixture type' + return repo_path + + def load_and_fetch(self, fixture_name, *args, **opts): + repo_path = self.load(fixture_name) return self.fetch(repo_path, *args, **opts), repo_path def _load_fixture_and_fetch(self, *args, **kwargs): @@ -704,7 +713,8 @@ class TestBase(unittest.TestCase): changed + removed, filectxfn, 'an_author', - '2008-10-07 20:59:48 -0500') + '2008-10-07 20:59:48 -0500', + {'branch': parentctx.branch()}) nodeid = repo.commitctx(ctx) repo = self.repo hg.clean(repo, nodeid) @@ -773,5 +783,21 @@ files: {files} commands.log(_ui, repo, rev=None, template=templ, graph=True) return _ui.popbuffer() + def svnlog(self, repo=None): + '''log of the remote Subversion repository corresponding to repo + + In order to make the format suitable for direct comparison in + tests, we exclude dates and convert the path operations into + a tuple. + ''' + + if repo is None: + repo = self.repo + + return [(r.revnum, r.message, + dict((p, (op.action, op.copyfrom_path, int(op.copyfrom_rev))) + for (p, op) in r.paths.items())) + for r in svnrepo.svnremoterepo(repo.ui).svn.revisions()] + def draw(self, repo): sys.stdout.write(self.getgraph(repo))