Mercurial > hgsubversion
view hgsubversion/replay.py @ 787:4bbc6bf947f5 1.2.1
replay: fetch full revision at most once per run (issue252)
Before this change, hgsubversion was fetching full revisions from the first
revision the project was created to the first revision containing converted
data. Unfortunately, some projects exhibits such spans longer than 500
revisions, during which hgsubversion was uselessly scanning the whole tree. The
fix is not technically perfect, we could record somewhere that while no data
was converted we scanned the project already, instead of scanning once at every
hgsubversion run until a revision is converted. But it should be good enough
unless someone runs hgsubversion once for every target revision.
One repository exhibiting this behaviour:
svn://svn.zankasoftware.com/zanka
author | Patrick Mezard <pmezard@gmail.com> |
---|---|
date | Sun, 13 Feb 2011 20:10:52 +0100 |
parents | 607f43a0f09c |
children | be5bbb2f2d68 |
line wrap: on
line source
import errno import traceback from mercurial import revlog from mercurial import node from mercurial import context from mercurial import util as hgutil import svnexternals import util class MissingPlainTextError(Exception): """Exception raised when the repo lacks a source file required for replaying a txdelta. """ class ReplayException(Exception): """Exception raised when you try and commit but the replay encountered an exception. """ def updateexternals(ui, meta, current): # TODO fix and re-enable externals for single-directory clones if not current.externals or meta.layout == 'single': return # accumulate externals records for all branches revnum = current.rev.revnum branches = {} for path, entry in current.externals.iteritems(): if not meta.is_path_valid(path): ui.warn('WARNING: Invalid path %s in externals\n' % path) continue p, b, bp = meta.split_branch_path(path) if bp not in branches: parent = meta.get_parent_revision(revnum, b) pctx = meta.repo[parent] branches[bp] = (svnexternals.parse(ui, pctx), pctx) branches[bp][0][p] = entry # register externals file changes for bp, (external, pctx) in branches.iteritems(): if bp and bp[-1] != '/': bp += '/' updates = svnexternals.getchanges(ui, meta.repo, pctx, external) for fn, data in updates.iteritems(): path = (bp and bp + fn) or fn if data is not None: current.set(path, data, False, False) else: current.delete(path) def convert_rev(ui, meta, svn, r, tbdelta, firstrun): editor = meta.editor editor.current.clear() editor.current.rev = r if firstrun and meta.revmap.oldest <= 0: # We know nothing about this project, so fetch everything before # trying to apply deltas. ui.debug('replay: fetching full revision\n') svn.get_revision(r.revnum, editor) else: svn.get_replay(r.revnum, editor, meta.revmap.oldest) current = editor.current current.findmissing(svn) updateexternals(ui, meta, current) if current.exception is not None: #pragma: no cover traceback.print_exception(*current.exception) raise ReplayException() if current.missing: raise MissingPlainTextError() # paranoidly generate the list of files to commit files_to_commit = set(current.files.keys()) files_to_commit.update(current.symlinks.keys()) files_to_commit.update(current.execfiles.keys()) files_to_commit.update(current.deleted.keys()) # back to a list and sort so we get sane behavior files_to_commit = list(files_to_commit) files_to_commit.sort() branch_batches = {} rev = current.rev date = meta.fixdate(rev.date) # build up the branches that have files on them for f in files_to_commit: if not meta.is_path_valid(f): continue p, b = meta.split_branch_path(f)[:2] if b not in branch_batches: branch_batches[b] = [] branch_batches[b].append((p, f)) closebranches = {} for branch in tbdelta['branches'][1]: branchedits = meta.revmap.branchedits(branch, rev) if len(branchedits) < 1: # can't close a branch that never existed continue ha = branchedits[0][1] closebranches[branch] = ha extraempty = (set(tbdelta['branches'][0]) - (set(current.emptybranches) | set(branch_batches.keys()))) current.emptybranches.update([(x, False) for x in extraempty]) # 1. handle normal commits closedrevs = closebranches.values() for branch, files in branch_batches.iteritems(): if branch in current.emptybranches and files: del current.emptybranches[branch] files = dict(files) parents = meta.get_parent_revision(rev.revnum, branch), revlog.nullid if parents[0] in closedrevs and branch in meta.closebranches: continue extra = meta.genextra(rev.revnum, branch) tag = None if branch is not None: # New regular tag without modifications, it will be committed by # svnmeta.committag(), we can skip the whole branch for now tag = meta.get_path_tag(meta.remotename(branch)) if (tag and tag not in meta.tags and branch not in meta.branches and branch not in meta.repo.branchtags() and not files): continue parentctx = meta.repo.changectx(parents[0]) if tag: if parentctx.node() == node.nullid: continue extra.update({'branch': parentctx.extra().get('branch', None), 'close': 1}) def filectxfn(repo, memctx, path): current_file = files[path] if current_file in current.deleted: raise IOError(errno.ENOENT, '%s is deleted' % path) copied = current.copies.get(current_file) flags = parentctx.flags(path) is_exec = current.execfiles.get(current_file, 'x' in flags) is_link = current.symlinks.get(current_file, 'l' in flags) if current_file in current.files: data = current.files[current_file] if is_link and data.startswith('link '): data = data[len('link '):] elif is_link: ui.debug('file marked as link, but may contain data: ' '%s (%r)\n' % (current_file, flags)) else: data = parentctx.filectx(path).data() return context.memfilectx(path=path, data=data, islink=is_link, isexec=is_exec, copied=copied) meta.mapbranch(extra) current_ctx = context.memctx(meta.repo, parents, rev.message or util.default_commit_msg(ui), files.keys(), filectxfn, meta.authors[rev.author], date, extra) new_hash = meta.repo.commitctx(current_ctx) util.describe_commit(ui, new_hash, branch) if (rev.revnum, branch) not in meta.revmap and not tag: meta.revmap[rev.revnum, branch] = new_hash if tag: meta.movetag(tag, new_hash, rev, date) meta.addedtags.pop(tag, None) # 2. handle branches that need to be committed without any files for branch in current.emptybranches: ha = meta.get_parent_revision(rev.revnum, branch) if ha == node.nullid: continue parent_ctx = meta.repo.changectx(ha) def del_all_files(*args): raise IOError(errno.ENOENT, 'deleting all files') # True here meant nuke all files, shouldn't happen with branch closing if current.emptybranches[branch]: #pragma: no cover raise hgutil.Abort('Empty commit to an open branch attempted. ' 'Please report this issue.') extra = meta.genextra(rev.revnum, branch) meta.mapbranch(extra) current_ctx = context.memctx(meta.repo, (ha, node.nullid), rev.message or ' ', [], del_all_files, meta.authors[rev.author], date, extra) new_hash = meta.repo.commitctx(current_ctx) util.describe_commit(ui, new_hash, branch) if (rev.revnum, branch) not in meta.revmap: meta.revmap[rev.revnum, branch] = new_hash return closebranches