view hgsubversion/ @ 1424:a794cbc174a9

maps: document RevMap.batchset will not update internal state Judging from the name, RevMap.batchset seems to be equivalent to multiple "__setitem__"s. But it neither calls dict.__setitem__ nor updates "_hashes". Update the docstring to make it clear the in-memory state is not updated.
author Jun Wu <>
date Tue, 24 May 2016 00:11:50 +0100
parents 372afb75f465
children 5b9002d28418
line wrap: on
line source

''' Module for self-contained maps. '''

import errno
import os
import re
from mercurial import util as hgutil
from mercurial.node import bin, hex, nullid

import subprocess
import svncommands
import util

class BaseMap(dict):
    '''A base class for the different type of mappings: author, branch, and
    def __init__(self, meta):
        self.meta = meta
        super(BaseMap, self).__init__()

        self._commentre = re.compile(r'((^|[^\\])(\\\\)*)#.*')
        self.syntaxes = ('re', 'glob')

        # trickery: all subclasses have the same name as their file and config
        # names, e.g. AuthorMap is meta.authormap_file for the filename and
        # 'authormap' for the config option
        self.mapname = self.__class__.__name__.lower()
        self.mapfilename = self.mapname + '_file'

        # append mappings specified from the commandline
        clmap = util.configpath(self.meta.ui, self.mapname)
        if clmap:

    def _findkey(self, key):
        '''Takes a string and finds the first corresponding key that matches
        via regex'''
        if not key:
            return None

        # compile a new regex key if we're given a string; can't use
        # hgutil.compilere since we need regex.sub
        k = key
        if isinstance(key, str):
            k = re.compile(re.escape(key))

        # preference goes to matching the exact pattern, i.e. 'foo' should
        # first match 'foo' before trying regexes
        for regex in self:
            if regex.pattern == k.pattern:
                return regex

        # if key isn't a string, then we are done; nothing matches
        if not isinstance(key, str):
            return None

        # now we test the regex; the above loop will be faster and is
        # equivalent to not having regexes (i.e. just doing string compares)
        for regex in self:
                return regex
        return None

    def get(self, key, default=None):
        '''Similar to dict.get, except we use our own matcher, _findkey.'''
        if self._findkey(key):
            return self[key]
        return default

    def __getitem__(self, key):
        '''Similar to dict.get, except we use our own matcher, _findkey. If the key is
        a string, then we can use our regex matching to map its value.
        k = self._findkey(key)
        val = super(BaseMap, self).__getitem__(k)

        # if key is a string then we can transform it using our regex, else we
        # don't have enough information, so we just return the val
        if isinstance(key, str):
            val = k.sub(val, key)

        return val

    def __setitem__(self, key, value):
        '''Similar to dict.__setitem__, except we compile the string into a regex, if
        need be.
        # try to find the regex already in the map
        k = self._findkey(key)
        # if we found one, then use it
        if k:
            key = k
        # else make a new regex
        if isinstance(key, str):
            key = re.compile(re.escape(key))
        super(BaseMap, self).__setitem__(key, value)

    def __contains__(self, key):
        '''Similar to dict.get, except we use our own matcher, _findkey.'''
        return self._findkey(key) is not None

    def load(self, path):
        '''Load mappings from a file at the specified path.'''
        path = os.path.expandvars(path)
        if not os.path.exists(path):

        writing = False
        mapfile = self.meta.__getattribute__(self.mapfilename)
        if path != mapfile:
            writing = open(mapfile, 'a')

        self.meta.ui.debug('reading %s from %s\n' % (self.mapname , path))
        f = open(path, 'r')
        syntax = ''
        for number, line in enumerate(f):

            if writing:

            # strip out comments
            if "#" in line:
                # remove comments prefixed by an even number of escapes
                line = self._commentre.sub(r'\1', line)
                # fixup properly escaped comments that survived the above
                line = line.replace("\\#", "#")
            line = line.rstrip()
            if not line:

            if line.startswith('syntax:'):
                s = line[7:].strip()
                syntax = ''
                if s in self.syntaxes:
                    syntax = s
            pat = syntax
            for s in self.syntaxes:
                if line.startswith(s + ':'):
                    pat = s
                    line = line[len(s) + 1:]

            # split on the first '='
                src, dst = line.split('=', 1)
            except (IndexError, ValueError):
                msg = 'ignoring line %i in %s %s: %s\n'
                self.meta.ui.status(msg % (number, self.mapname, path,

            src = src.strip()
            dst = dst.strip()

            if pat != 're':
                src = re.escape(src)
            if pat == 'glob':
                src = src.replace('\\*', '.*')
            src = re.compile(src)

            if src not in self:
                self.meta.ui.debug('adding %s to %s\n' % (src, self.mapname))
            elif dst != self[src]:
                msg = 'overriding %s: "%s" to "%s" (%s)\n'
                self.meta.ui.status(msg % (self.mapname, self[src], dst, src))
            self[src] = dst

        if writing:

class AuthorMap(BaseMap):
    '''A mapping from Subversion-style authors to Mercurial-style
    authors, and back. The data is stored persistently on disk.

    If the 'hgsubversion.defaultauthors' configuration option is set to false,
    attempting to obtain an unknown author will fail with an Abort.

    If the 'hgsubversion.caseignoreauthors' configuration option is set to true,
    the userid from Subversion is always compared lowercase.

    def __init__(self, meta):
        '''Initialise a new AuthorMap.

        The ui argument is used to print diagnostic messages.

        The path argument is the location of the backing store,
        typically .hg/svn/authors.
        self.defaulthost = ''
        if meta.defaulthost:
            self.defaulthost = '@%s' % meta.defaulthost.lstrip('@')

        super(AuthorMap, self).__init__(meta)

    def _lowercase(self, key):
        '''Determine whether or not to lowercase a str or regex using the
        k = key
        if self.meta.caseignoreauthors:
            if isinstance(key, str):
                k = key.lower()
                k = re.compile(key.pattern.lower())
        return k

    def __setitem__(self, key, value):
        '''Similar to dict.__setitem__, except we check caseignoreauthors to
        use lowercase string or not
        super(AuthorMap, self).__setitem__(self._lowercase(key), value)

    def __contains__(self, key):
        '''Similar to dict.__contains__, except we check caseignoreauthors to
        use lowercase string or not
        return super(AuthorMap, self).__contains__(self._lowercase(key))

    def __getitem__(self, author):
        ''' Similar to dict.__getitem__, except in case of an unknown author.
        In such cases, a new value is generated and added to the dictionary
        as well as the backing store. '''
        if author is None:
            author = '(no author)'

        if not isinstance(author, str):
            return super(AuthorMap, self).__getitem__(author)

        search_author = author
        if self.meta.caseignoreauthors:
            search_author = author.lower()

        result = None
        if search_author in self:
            result = super(AuthorMap, self).__getitem__(search_author)
        elif self.meta.mapauthorscmd:
            cmd = self.meta.mapauthorscmd % author
            process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
            output, err = process.communicate()
            retcode = process.poll()
            if retcode:
                msg = 'map author command "%s" exited with error'
                raise hgutil.Abort(msg % cmd)
            self[author] = result = output.strip()
        if not result:
            if self.meta.defaultauthors:
                self[author] = result = '%s%s' % (author, self.defaulthost)
                msg = 'substituting author "%s" for default "%s"\n'
                self.meta.ui.debug(msg % (author, result))
                msg = 'author %s has no entry in the author map!'
                raise hgutil.Abort(msg % author)
        self.meta.ui.debug('mapping author "%s" to "%s"\n' % (author, result))
        return result

    def reverselookup(self, author):
        for svnauthor, hgauthor in self.iteritems():
            if author == hgauthor:
                return svnauthor
            # Mercurial incorrectly splits at e.g. '.', so we roll our own.
            return author.rsplit('@', 1)[0]

class Tags(dict):
    """Map tags to converted node identifier.

    tag names are non-empty strings. Tags are saved in a file
    called tagmap, for backwards compatibility reasons.
    VERSION = 2

    def __init__(self, meta, endrev=None):
        self.meta = meta
        self.endrev = endrev
        if os.path.isfile(self.meta.tagfile):

    def _load(self):
        f = open(self.meta.tagfile)
        ver = int(f.readline())
        if ver < self.VERSION:
            self.meta.ui.status('tag map outdated, running rebuildmeta...\n')
            svncommands.rebuildmeta(self.meta.ui, self.meta.repo, ())
        elif ver != self.VERSION:
            raise hgutil.Abort('tagmap too new -- please upgrade')
        for l in f:
            ha, revision, tag = l.split(' ', 2)
            revision = int(revision)
            tag = tag[:-1]
            if self.endrev is not None and revision > self.endrev:
            if not tag:
            dict.__setitem__(self, tag, bin(ha))

    def _write(self):
        assert self.endrev is None
        f = open(self.meta.tagfile, 'w')
        f.write('%s\n' % self.VERSION)

    def update(self, other):
        for k, v in other.iteritems():
            self[k] = v

    def __contains__(self, tag):
        return (tag and dict.__contains__(self, tag)
                and dict.__getitem__(self, tag) != nullid)

    def __getitem__(self, tag):
        if tag and tag in self:
            return dict.__getitem__(self, tag)
        raise KeyError()

    def __setitem__(self, tag, info):
        if not tag:
            raise hgutil.Abort('tag cannot be empty')
        ha, revision = info
        f = open(self.meta.tagfile, 'a')
        f.write('%s %s %s\n' % (hex(ha), revision, tag))
        dict.__setitem__(self, tag, ha)

class RevMap(dict):

    VERSION = 1

    def __init__(self, meta):
        self.meta = meta
        self._hashes = None

        if os.path.isfile(self.meta.revmap_file):

    def hashes(self):
        if self._hashes is None:
            self._hashes = dict((v, k) for (k, v) in self.iteritems())
        return self._hashes

    def branchedits(self, branch, rev):
        check = lambda x: x[0][1] == branch and x[0][0] < rev.revnum
        return sorted(filter(check, self.iteritems()), reverse=True)

    def branchmaxrevnum(self, branch, maxrevnum):
        result = 0
        for num, br in self.iterkeys():
            if br == branch and num <= maxrevnum and num > result:
                result = num
        return result

    def lasthash(self):
        lines = list(self._readmapfile())
        if not lines:
            return None
        return bin(lines[-1].split(' ', 2)[1])

    def revhashes(self, revnum):
        for key, value in self.iteritems():
            if key[0] == revnum:
                yield value

    def clear(self):
        self._hashes = None

    def batchset(self, items):
        '''Set items in batches

        items is an array of (rev num, branch, binary hash)

        For performance reason, internal in-memory state is not updated.
        To get an up-to-date RevMap, reconstruct the object.
        f = open(self.meta.revmap_file, 'a')
        f.write(''.join('%s %s %s\n' % (revnum, hex(binhash), br or '')
                        for revnum, br, binhash in items))

    def _readmapfile(self):
        path = self.meta.revmap_file
            f = open(path)
        except IOError, err:
            if err.errno != errno.ENOENT:
            return iter([])
        ver = int(f.readline())
        if ver != self.VERSION:
            raise hgutil.Abort('revmap too new -- please upgrade')
        return f

    def exists(cls, meta):
        return os.path.exists(meta.revmap_file)

    def _load(self):
        lastpulled = self.meta.lastpulled
        firstpulled = self.meta.firstpulled
        setitem = dict.__setitem__
        for l in self._readmapfile():
            revnum, ha, branch = l.split(' ', 2)
            if branch == '\n':
                branch = None
                branch = branch[:-1]
            revnum = int(revnum)
            if revnum > lastpulled or not lastpulled:
                lastpulled = revnum
            if revnum < firstpulled or not firstpulled:
                firstpulled = revnum
            setitem(self, (revnum, branch), bin(ha))
        self.meta.lastpulled = lastpulled
        self.meta.firstpulled = firstpulled

    def _write(self):
        f = open(self.meta.revmap_file, 'w')
        f.write('%s\n' % self.VERSION)

    def __setitem__(self, key, ha):
        revnum, branch = key
        f = open(self.meta.revmap_file, 'a')
        b = branch or ''
        f.write(str(revnum) + ' ' + hex(ha) + ' ' + b + '\n')
        if revnum > self.meta.lastpulled or not self.meta.lastpulled:
            self.meta.lastpulled = revnum
        if revnum < self.meta.firstpulled or not self.meta.firstpulled:
            self.meta.firstpulled = revnum
        dict.__setitem__(self, (revnum, branch), ha)
        if self._hashes is not None:
            self._hashes[ha] = (revnum, branch)

class FileMap(object):

    VERSION = 1

    def __init__(self, meta):
        '''Initialise a new FileMap.

        The ui argument is used to print diagnostic messages.

        The path argument is the location of the backing store,
        typically .hg/svn/filemap.
        self.meta = meta
        self.include = {}
        self.exclude = {}
        if os.path.isfile(self.meta.filemap_file):

        # append file mapping specified from the commandline
        clmap = util.configpath(self.meta.ui, 'filemap')
        if clmap:

    def _rpairs(self, name):
        e = len(name)
        while e != -1:
            yield name[:e], name[e+1:]
            e = name.rfind('/', 0, e)
        yield '.', name

    def check(self, m, path):
        m = getattr(self, m)
        for pre, _suf in self._rpairs(path):
            if pre in m:
                return m[pre]
        return -1

    def __contains__(self, path):
        if not len(path):
            return True
        if len(self.include):
            inc = self.check('include', path)
        elif not len(self.exclude):
            return True
            inc = 0
        if len(self.exclude):
            exc = self.check('exclude', path)
            exc = -1
        # respect rule order: newer rules override older
        return inc > exc

    # Needed so empty filemaps are false
    def __len__(self):
        return len(self.include) + len(self.exclude)

    def add(self, fn, m, path):
        mapping = getattr(self, m)
        if path in mapping:
            msg = 'duplicate %s entry in %s: "%s"\n'
            self.meta.ui.status(msg % (m, fn, path))
        bits = m.rstrip('e'), path
        self.meta.ui.debug('%sing %s\n' % bits)
        # respect rule order
        mapping[path] = len(self)
        if fn != self.meta.filemap_file:
            f = open(self.meta.filemap_file, 'a')
            f.write(m + ' ' + path + '\n')

    def load(self, fn):
        self.meta.ui.debug('reading file map from %s\n' % fn)
        f = open(fn, 'r')
        self.load_fd(f, fn)

    def load_fd(self, f, fn):
        for line in f:
            if line.strip() == '' or line.strip()[0] == '#':
                cmd, path = line.split(' ', 1)
                cmd = cmd.strip()
                path = path.strip()
                if cmd in ('include', 'exclude'):
                    self.add(fn, cmd, path)
                self.meta.ui.warn('unknown filemap command %s\n' % cmd)
            except IndexError:
                msg = 'ignoring bad line in filemap %s: %s\n'
                self.meta.ui.warn(msg % (fn, line.rstrip()))

    def _load(self):
        self.meta.ui.debug('reading in-repo file map from %s\n' % self.meta.filemap_file)
        f = open(self.meta.filemap_file)
        ver = int(f.readline())
        if ver != self.VERSION:
            raise hgutil.Abort('filemap too new -- please upgrade')
        self.load_fd(f, self.meta.filemap_file)

    def _write(self):
        f = open(self.meta.filemap_file, 'w')
        f.write('%s\n' % self.VERSION)

class BranchMap(BaseMap):
    '''Facility for controlled renaming of branch names. Example:

    oldname = newname
    other = default

    All changes on the oldname branch will now be on the newname branch; all
    changes on other will now be on default (have no branch name set).

    def __init__(self, meta):
        super(BranchMap, self).__init__(meta)

class TagMap(BaseMap):
    '''Facility for controlled renaming of tags. Example:

    oldname = newname
    other =

        The oldname tag from SVN will be represented as newname in the hg tags;
        the other tag will not be reflected in the hg repository.

    def __init__(self, meta):
        super(TagMap, self).__init__(meta)