view hgsubversion/util.py @ 1045:67e11b650e94

handle invalid UTF-8 in commit messages consistently 312f36a425f0 fixed this for replay, but not for stupid mode; we address this by consolidating our means of generating a Mercurial commit message in a single function in 'util.py'.
author Dan Villiom Podlaski Christiansen <dan@cabo.dk>
date Thu, 08 Aug 2013 09:38:55 +0200
parents ada2400241c4
children 37989fbbab5c
line wrap: on
line source

import cPickle as pickle
import errno
import re
import os
import urllib

from mercurial import cmdutil
from mercurial import error
from mercurial import hg
from mercurial import node
from mercurial import repair
from mercurial import util as hgutil

try:
    from collections import deque
except:
    from mercurial.util import deque

try:
    from mercurial import revset
except ImportError:
    pass

import maps

ignoredfiles = set(['.hgtags', '.hgsvnexternals', '.hgsub', '.hgsubstate'])

b_re = re.compile(r'^\+\+\+ b\/([^\n]*)', re.MULTILINE)
a_re = re.compile(r'^--- a\/([^\n]*)', re.MULTILINE)
devnull_re = re.compile(r'^([-+]{3}) /dev/null', re.MULTILINE)
header_re = re.compile(r'^diff --git .* b\/(.*)', re.MULTILINE)
newfile_devnull_re = re.compile(r'^--- /dev/null\n\+\+\+ b/([^\n]*)',
                                re.MULTILINE)


def formatrev(rev):
    if rev == -1:
        return '\t(working copy)'
    return '\t(revision %d)' % rev

def configpath(ui, name):
    path = ui.config('hgsubversion', name)
    return path and hgutil.expandpath(path)

def filterdiff(diff, oldrev, newrev):
    diff = newfile_devnull_re.sub(r'--- \1\t(revision 0)' '\n'
                                  r'+++ \1\t(working copy)',
                                  diff)
    oldrev = formatrev(oldrev)
    newrev = formatrev(newrev)
    diff = a_re.sub(r'--- \1' + oldrev, diff)
    diff = b_re.sub(r'+++ \1' + newrev, diff)
    diff = devnull_re.sub(r'\1 /dev/null\t(working copy)', diff)
    diff = header_re.sub(r'Index: \1' + '\n' + ('=' * 67), diff)
    return diff


def parentrev(ui, repo, meta, hashes):
    """Find the svn parent revision of the repo's dirstate.
    """
    workingctx = repo.parents()[0]
    outrev = outgoing_revisions(repo, hashes, workingctx.node())
    if outrev:
        workingctx = repo[outrev[-1]].parents()[0]
    return workingctx


def islocalrepo(url):
    path = str(url) # convert once up front
    if path.startswith('file:///'):
        prefixlen = len('file://')
    elif path.startswith('file:/'):
        prefixlen = len('file:')
    else:
        return False
    if '#' in path.split('/')[-1]: # strip off #anchor
        path = path[:path.rfind('#')]
    path = url[prefixlen:]
    path = urllib.url2pathname(path).replace(os.sep, '/')
    while '/' in path:
        if reduce(lambda x, y: x and y,
                  map(lambda p: os.path.exists(os.path.join(path, p)),
                      ('hooks', 'format', 'db',))):
            return True
        path = path.rsplit('/', 1)[0]
    return False

def strip(ui, repo, changesets, *args , **opts):
    try:
        repair.strip(ui, repo, changesets, *args, **opts)
    except TypeError:
        # only 2.1.2 and later allow strip to take a list of nodes
        for changeset in changesets:
            repair.strip(ui, repo, changeset, *args, **opts)


def version(ui):
    """Return version information if available."""
    try:
        import __version__
        return __version__.version
    except ImportError:
        try:
            dn = os.path.dirname
            repo = hg.repository(ui, dn(dn(__file__)))
            ver = repo.dirstate.parents()[0]
            return node.hex(ver)[:12]
        except:
            return 'unknown'


def normalize_url(url):
    if not url:
        return url
    if url.startswith('svn+http://') or url.startswith('svn+https://'):
        url = url[4:]
    url, revs, checkout = parseurl(url)
    url = url.rstrip('/')
    if checkout:
        url = '%s#%s' % (url, checkout)
    return url


def load_string(file_path, default=None, limit=1024):
    if not os.path.exists(file_path):
        return default
    try:
        f = open(file_path, 'r')
        ret = f.read(limit)
        f.close()
    except:
        return default
    if ret == '':
        return default
    return ret


def save_string(file_path, string):
    if string is None:
        string = ""
    f = open(file_path, 'wb')
    f.write(str(string))
    f.close()

def pickle_atomic(data, file_path):
    """pickle some data to a path atomically.

    This is present because I kept corrupting my revmap by managing to hit ^C
    during the pickle of that file.
    """
    f = hgutil.atomictempfile(file_path, 'w+b', 0644)
    pickle.dump(data, f)
    # Older versions of hg have .rename() instead of .close on
    # atomictempfile.
    if getattr(hgutil.atomictempfile, 'rename', False):
        f.rename()
    else:
        f.close()

def parseurl(url, heads=[]):
    parsed = hg.parseurl(url, heads)
    if len(parsed) == 3:
        # old hg, remove when we can be 1.5-only
        svn_url, heads, checkout = parsed
    else:
        svn_url, heads = parsed
        if isinstance(heads, tuple) and len(heads) == 2:
            # hg 1.6 or later
            _junk, heads = heads
        if heads:
            checkout = heads[0]
        else:
            checkout = None
    return svn_url, heads, checkout


class PrefixMatch(object):
    def __init__(self, prefix):
        self.p = prefix

    def files(self):
        return []

    def __call__(self, fn):
        return fn.startswith(self.p)

def outgoing_revisions(repo, reverse_map, sourcerev):
    """Given a repo and an hg_editor, determines outgoing revisions for the
    current working copy state.
    """
    outgoing_rev_hashes = []
    if sourcerev in reverse_map:
        return
    sourcerev = repo[sourcerev]
    while (not sourcerev.node() in reverse_map
           and sourcerev.node() != node.nullid):
        outgoing_rev_hashes.append(sourcerev.node())
        sourcerev = sourcerev.parents()
        if len(sourcerev) != 1:
            raise hgutil.Abort("Sorry, can't find svn parent of a merge revision.")
        sourcerev = sourcerev[0]
    if sourcerev.node() != node.nullid:
        return outgoing_rev_hashes

def outgoing_common_and_heads(repo, reverse_map, sourcerev):
    """Given a repo and an hg_editor, determines outgoing revisions for the
    current working copy state. Returns a tuple (common, heads) like
    discovery.findcommonoutgoing does.
    """
    if sourcerev in reverse_map:
        return ([sourcerev], [sourcerev]) # nothing outgoing
    sourcecx = repo[sourcerev]
    while (not sourcecx.node() in reverse_map
           and sourcecx.node() != node.nullid):
        ps = sourcecx.parents()
        if len(ps) != 1:
            raise hgutil.Abort("Sorry, can't find svn parent of a merge revision.")
        sourcecx = ps[0]
    if sourcecx.node() != node.nullid:
        return ([sourcecx.node()], [sourcerev])
    return ([sourcerev], [sourcerev]) # nothing outgoing

def getmessage(ui, rev):
    msg = rev.message

    if msg:
        try:
            msg.decode('utf-8')
            return msg

        except UnicodeDecodeError:
            # ancient svn failed to enforce utf8 encoding
            return msg.decode('iso-8859-1').encode('utf-8')
    else:
        return ui.config('hgsubversion', 'defaultmessage', '')

def describe_commit(ui, h, b):
    ui.note(' committed to "%s" as %s\n' % ((b or 'default'), node.short(h)))


def swap_out_encoding(new_encoding="UTF-8"):
    from mercurial import encoding
    old = encoding.encoding
    encoding.encoding = new_encoding
    return old

def isancestor(ctx, ancestorctx):
    """Return True if ancestorctx is equal or an ancestor of ctx."""
    if ctx == ancestorctx:
        return True
    for actx in ctx.ancestors():
        if actx == ancestorctx:
            return True
    return False

def issamefile(parentctx, childctx, f):
    """Return True if f exists and is the same in childctx and parentctx"""
    if f not in parentctx or f not in childctx:
        return False
    if parentctx == childctx:
        return True
    if parentctx.rev() > childctx.rev():
        parentctx, childctx = childctx, parentctx

    def selfandancestors(selfctx):
        yield selfctx
        for ctx in selfctx.ancestors():
            yield ctx

    for pctx in selfandancestors(childctx):
        if pctx.rev() <= parentctx.rev():
            return True
        if f in pctx.files():
            return False
    # parentctx is not an ancestor of childctx, files are unrelated
    return False


def getsvnrev(ctx, defval=None):
    '''Extract SVN revision from commit metadata'''
    return ctx.extra().get('convert_revision', defval)


def _templatehelper(ctx, kw):
    '''
    Helper function for displaying information about converted changesets.
    '''
    convertinfo = getsvnrev(ctx, '')

    if not convertinfo or not convertinfo.startswith('svn:'):
        return ''

    if kw == 'svnuuid':
        return convertinfo[4:40]
    elif kw == 'svnpath':
        return convertinfo[40:].rsplit('@', 1)[0]
    elif kw == 'svnrev':
        return convertinfo[40:].rsplit('@', 1)[-1]
    else:
        raise hgutil.Abort('unrecognized hgsubversion keyword %s' % kw)

def svnrevkw(**args):
    """:svnrev: String. Converted subversion revision number."""
    return _templatehelper(args['ctx'], 'svnrev')

def svnpathkw(**args):
    """:svnpath: String. Converted subversion revision project path."""
    return _templatehelper(args['ctx'], 'svnpath')

def svnuuidkw(**args):
    """:svnuuid: String. Converted subversion revision repository identifier."""
    return _templatehelper(args['ctx'], 'svnuuid')

templatekeywords = {
    'svnrev': svnrevkw,
    'svnpath': svnpathkw,
    'svnuuid': svnuuidkw,
}

def revset_fromsvn(repo, subset, x):
    '''``fromsvn()``
    Select changesets that originate from Subversion.
    '''
    args = revset.getargs(x, 0, 0, "fromsvn takes no arguments")

    rev = repo.changelog.rev
    bin = node.bin
    try:
        svnrevs = set(rev(bin(l.split(' ', 2)[1]))
                      for l in maps.RevMap.readmapfile(repo, missingok=False))
        return filter(svnrevs.__contains__, subset)
    except IOError, err:
        if err.errno != errno.ENOENT:
            raise
        raise hgutil.Abort("svn metadata is missing - "
                           "run 'hg svn rebuildmeta' to reconstruct it")

def revset_svnrev(repo, subset, x):
    '''``svnrev(number)``
    Select changesets that originate in the given Subversion revision.
    '''
    args = revset.getargs(x, 1, 1, "svnrev takes one argument")

    rev = revset.getstring(args[0],
                           "the argument to svnrev() must be a number")
    try:
        revnum = int(rev)
    except ValueError:
        raise error.ParseError("the argument to svnrev() must be a number")

    rev = rev + ' '
    revs = []
    try:
        for l in maps.RevMap.readmapfile(repo, missingok=False):
            if l.startswith(rev):
                n = l.split(' ', 2)[1]
                r = repo[node.bin(n)].rev()
                if r in subset:
                    revs.append(r)
        return revs
    except IOError, err:
        if err.errno != errno.ENOENT:
            raise
        raise hgutil.Abort("svn metadata is missing - "
                           "run 'hg svn rebuildmeta' to reconstruct it")

revsets = {
    'fromsvn': revset_fromsvn,
    'svnrev': revset_svnrev,
}

def revset_stringset(orig, repo, subset, x):
    if x.startswith('r') and x[1:].isdigit():
        return revset_svnrev(repo, subset, ('string', x[1:]))
    return orig(repo, subset, x)

def getfilestoresize(ui):
    """Return the replay or stupid file memory store size in megabytes or -1"""
    size = ui.configint('hgsubversion', 'filestoresize', 200)
    if size >= 0:
        size = size*(2**20)
    else:
        size = -1
    return size

def parse_revnum(svnrepo, r):
    try:
        return int(r or 0)
    except ValueError:
        if isinstance(r, str) and r.lower() in ('head', 'tip'):
            return svnrepo.last_changed_rev
        else:
            raise error.RepoLookupError("unknown Subversion revision %r" % r)