view hgsubversion/replay.py @ 787:4bbc6bf947f5 1.2.1

replay: fetch full revision at most once per run (issue252) Before this change, hgsubversion was fetching full revisions from the first revision the project was created to the first revision containing converted data. Unfortunately, some projects exhibits such spans longer than 500 revisions, during which hgsubversion was uselessly scanning the whole tree. The fix is not technically perfect, we could record somewhere that while no data was converted we scanned the project already, instead of scanning once at every hgsubversion run until a revision is converted. But it should be good enough unless someone runs hgsubversion once for every target revision. One repository exhibiting this behaviour: svn://svn.zankasoftware.com/zanka
author Patrick Mezard <pmezard@gmail.com>
date Sun, 13 Feb 2011 20:10:52 +0100
parents 607f43a0f09c
children be5bbb2f2d68
line wrap: on
line source

import errno
import traceback

from mercurial import revlog
from mercurial import node
from mercurial import context
from mercurial import util as hgutil

import svnexternals
import util


class MissingPlainTextError(Exception):
    """Exception raised when the repo lacks a source file required for replaying
    a txdelta.
    """

class ReplayException(Exception):
    """Exception raised when you try and commit but the replay encountered an
    exception.
    """

def updateexternals(ui, meta, current):
    # TODO fix and re-enable externals for single-directory clones
    if not current.externals or meta.layout == 'single':
        return

    # accumulate externals records for all branches
    revnum = current.rev.revnum
    branches = {}
    for path, entry in current.externals.iteritems():
        if not meta.is_path_valid(path):
            ui.warn('WARNING: Invalid path %s in externals\n' % path)
            continue

        p, b, bp = meta.split_branch_path(path)
        if bp not in branches:
            parent = meta.get_parent_revision(revnum, b)
            pctx = meta.repo[parent]
            branches[bp] = (svnexternals.parse(ui, pctx), pctx)
        branches[bp][0][p] = entry

    # register externals file changes
    for bp, (external, pctx) in branches.iteritems():
        if bp and bp[-1] != '/':
            bp += '/'
        updates = svnexternals.getchanges(ui, meta.repo, pctx, external)
        for fn, data in updates.iteritems():
            path = (bp and bp + fn) or fn
            if data is not None:
                current.set(path, data, False, False)
            else:
                current.delete(path)

def convert_rev(ui, meta, svn, r, tbdelta, firstrun):

    editor = meta.editor
    editor.current.clear()
    editor.current.rev = r

    if firstrun and meta.revmap.oldest <= 0:
        # We know nothing about this project, so fetch everything before
        # trying to apply deltas.
        ui.debug('replay: fetching full revision\n')
        svn.get_revision(r.revnum, editor)
    else:
        svn.get_replay(r.revnum, editor, meta.revmap.oldest)

    current = editor.current
    current.findmissing(svn)

    updateexternals(ui, meta, current)

    if current.exception is not None:  #pragma: no cover
        traceback.print_exception(*current.exception)
        raise ReplayException()
    if current.missing:
        raise MissingPlainTextError()

    # paranoidly generate the list of files to commit
    files_to_commit = set(current.files.keys())
    files_to_commit.update(current.symlinks.keys())
    files_to_commit.update(current.execfiles.keys())
    files_to_commit.update(current.deleted.keys())
    # back to a list and sort so we get sane behavior
    files_to_commit = list(files_to_commit)
    files_to_commit.sort()
    branch_batches = {}
    rev = current.rev
    date = meta.fixdate(rev.date)

    # build up the branches that have files on them
    for f in files_to_commit:
        if not meta.is_path_valid(f):
            continue
        p, b = meta.split_branch_path(f)[:2]
        if b not in branch_batches:
            branch_batches[b] = []
        branch_batches[b].append((p, f))

    closebranches = {}
    for branch in tbdelta['branches'][1]:
        branchedits = meta.revmap.branchedits(branch, rev)
        if len(branchedits) < 1:
            # can't close a branch that never existed
            continue
        ha = branchedits[0][1]
        closebranches[branch] = ha

    extraempty = (set(tbdelta['branches'][0]) -
                  (set(current.emptybranches) | set(branch_batches.keys())))
    current.emptybranches.update([(x, False) for x in extraempty])

    # 1. handle normal commits
    closedrevs = closebranches.values()
    for branch, files in branch_batches.iteritems():

        if branch in current.emptybranches and files:
            del current.emptybranches[branch]

        files = dict(files)
        parents = meta.get_parent_revision(rev.revnum, branch), revlog.nullid
        if parents[0] in closedrevs and branch in meta.closebranches:
            continue

        extra = meta.genextra(rev.revnum, branch)
        tag = None
        if branch is not None:
            # New regular tag without modifications, it will be committed by
            # svnmeta.committag(), we can skip the whole branch for now
            tag = meta.get_path_tag(meta.remotename(branch))
            if (tag and tag not in meta.tags
                and branch not in meta.branches
                and branch not in meta.repo.branchtags()
                and not files):
                continue

        parentctx = meta.repo.changectx(parents[0])
        if tag:
            if parentctx.node() == node.nullid:
                continue
            extra.update({'branch': parentctx.extra().get('branch', None),
                          'close': 1})

        def filectxfn(repo, memctx, path):
            current_file = files[path]
            if current_file in current.deleted:
                raise IOError(errno.ENOENT, '%s is deleted' % path)
            copied = current.copies.get(current_file)
            flags = parentctx.flags(path)
            is_exec = current.execfiles.get(current_file, 'x' in flags)
            is_link = current.symlinks.get(current_file, 'l' in flags)
            if current_file in current.files:
                data = current.files[current_file]
                if is_link and data.startswith('link '):
                    data = data[len('link '):]
                elif is_link:
                    ui.debug('file marked as link, but may contain data: '
                             '%s (%r)\n' % (current_file, flags))
            else:
                data = parentctx.filectx(path).data()
            return context.memfilectx(path=path,
                                      data=data,
                                      islink=is_link, isexec=is_exec,
                                      copied=copied)

        meta.mapbranch(extra)
        current_ctx = context.memctx(meta.repo,
                                     parents,
                                     rev.message or util.default_commit_msg(ui),
                                     files.keys(),
                                     filectxfn,
                                     meta.authors[rev.author],
                                     date,
                                     extra)

        new_hash = meta.repo.commitctx(current_ctx)
        util.describe_commit(ui, new_hash, branch)
        if (rev.revnum, branch) not in meta.revmap and not tag:
            meta.revmap[rev.revnum, branch] = new_hash
        if tag:
            meta.movetag(tag, new_hash, rev, date)
            meta.addedtags.pop(tag, None)

    # 2. handle branches that need to be committed without any files
    for branch in current.emptybranches:

        ha = meta.get_parent_revision(rev.revnum, branch)
        if ha == node.nullid:
            continue

        parent_ctx = meta.repo.changectx(ha)
        def del_all_files(*args):
            raise IOError(errno.ENOENT, 'deleting all files')

        # True here meant nuke all files, shouldn't happen with branch closing
        if current.emptybranches[branch]: #pragma: no cover
            raise hgutil.Abort('Empty commit to an open branch attempted. '
                               'Please report this issue.')

        extra = meta.genextra(rev.revnum, branch)
        meta.mapbranch(extra)

        current_ctx = context.memctx(meta.repo,
                                     (ha, node.nullid),
                                     rev.message or ' ',
                                     [],
                                     del_all_files,
                                     meta.authors[rev.author],
                                     date,
                                     extra)
        new_hash = meta.repo.commitctx(current_ctx)
        util.describe_commit(ui, new_hash, branch)
        if (rev.revnum, branch) not in meta.revmap:
            meta.revmap[rev.revnum, branch] = new_hash

    return closebranches