Mercurial > hgsubversion
diff fetch_command.py @ 239:e2214c8fc91f
Put all stupid stuff in it's own module (separate from fetch-command).
author | Dirkjan Ochtman <dirkjan@ochtman.nl> |
---|---|
date | Wed, 08 Apr 2009 17:34:01 +0200 |
parents | c90cfa665b81 |
children |
line wrap: on
line diff
--- a/fetch_command.py +++ b/fetch_command.py @@ -1,26 +1,15 @@ -import cStringIO -import re import os -from mercurial import patch -from mercurial import node -from mercurial import context -from mercurial import revlog from mercurial import util as merc_util from svn import core from svn import delta import hg_delta_editor import svnwrap -import svnexternals +import stupid as stupidmod import util -def print_your_svn_is_old_message(ui): #pragma: no cover - ui.status("In light of that, I'll fall back and do diffs, but it won't do " - "as good a job. You should really upgrade your server.\n") - - def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None, tag_locations='tags', authors=None, @@ -87,11 +76,11 @@ def fetch_revisions(ui, svn_url, hg_repo replay_convert_rev(hg_editor, svn, r) except svnwrap.SubversionRepoCanNotReplay, e: #pragma: no cover ui.status('%s\n' % e.message) - print_your_svn_is_old_message(ui) + stupidmod.print_your_svn_is_old_message(ui) have_replay = False - stupid_svn_server_pull_rev(ui, svn, hg_editor, r) + stupidmod.svn_server_pull_rev(ui, svn, hg_editor, r) else: - stupid_svn_server_pull_rev(ui, svn, hg_editor, r) + stupidmod.svn_server_pull_rev(ui, svn, hg_editor, r) converted = True except core.SubversionException, e: #pragma: no cover if (e.apr_err == core.SVN_ERR_RA_DAV_REQUEST_FAILED @@ -141,552 +130,3 @@ def replay_convert_rev(hg_editor, svn, r hg_editor.missing_plaintexts = set() hg_editor.ui.note('\n') hg_editor.commit_current_delta() - - -binary_file_re = re.compile(r'''Index: ([^\n]*) -=* -Cannot display: file marked as a binary type.''') - -property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*) -_* -(?:Added|Name): svn:executable - \+''') - -property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*) -_* -(?:Deleted|Name): svn:executable - -''') - -empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''') - -any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE) - -property_special_set_re = re.compile(r'''Property changes on: ([^\n]*) -_* -(?:Added|Name): svn:special - \+''') - -property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*) -_* -(?:Deleted|Name): svn:special - \-''') - -def mempatchproxy(parentctx, files): - # Avoid circular references patch.patchfile -> mempatch - patchfile = patch.patchfile - - class mempatch(patchfile): - def __init__(self, ui, fname, opener, missing=False): - patchfile.__init__(self, ui, fname, None, False) - - def readlines(self, fname): - if fname not in parentctx: - raise IOError('Cannot find %r to patch' % fname) - fctx = parentctx[fname] - data = fctx.data() - if 'l' in fctx.flags(): - data = 'link ' + data - return cStringIO.StringIO(data).readlines() - - def writelines(self, fname, lines): - files[fname] = ''.join(lines) - - def unlink(self, fname): - files[fname] = None - - return mempatch - - -def filteriterhunks(hg_editor): - iterhunks = patch.iterhunks - def filterhunks(ui, fp, sourcefile=None): - applycurrent = False - for data in iterhunks(ui, fp, sourcefile): - if data[0] == 'file': - if hg_editor._is_file_included(data[1][1]): - applycurrent = True - else: - applycurrent = False - assert data[0] != 'git', 'Filtering git hunks not supported.' - if applycurrent: - yield data - return filterhunks - -def stupid_diff_branchrev(ui, svn, hg_editor, branch, r, parentctx): - """Extract all 'branch' content at a given revision. - - Return a tuple (files, filectxfn) where 'files' is the list of all files - in the branch at the given revision, and 'filectxfn' is a memctx compatible - callable to retrieve individual file information. Raise BadPatchApply upon - error. - """ - def make_diff_path(branch): - if branch == 'trunk' or branch is None: - return 'trunk' - elif branch.startswith('../'): - return branch[3:] - return 'branches/%s' % branch - parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch) - diff_path = make_diff_path(branch) - try: - if br_p == branch: - # letting patch handle binaries sounded - # cool, but it breaks patch in sad ways - d = svn.get_unified_diff(diff_path, r.revnum, deleted=False, - ignore_type=False) - else: - d = svn.get_unified_diff(diff_path, r.revnum, - other_path=make_diff_path(br_p), - other_rev=parent_rev, - deleted=True, ignore_type=True) - if d: - raise BadPatchApply('branch creation with mods') - except svnwrap.SubversionRepoCanNotDiff: - raise BadPatchApply('subversion diffing code is not supported') - except core.SubversionException, e: - if (hasattr(e, 'apr_err') and e.apr_err != core.SVN_ERR_FS_NOT_FOUND): - raise - raise BadPatchApply('previous revision does not exist') - if '\0' in d: - raise BadPatchApply('binary diffs are not supported') - files_data = {} - binary_files = {} - touched_files = {} - for m in binary_file_re.findall(d): - # we have to pull each binary file by hand as a fulltext, - # which sucks but we've got no choice - binary_files[m] = 1 - touched_files[m] = 1 - d2 = empty_file_patch_wont_make_re.sub('', d) - d2 = property_exec_set_re.sub('', d2) - d2 = property_exec_removed_re.sub('', d2) - for f in any_file_re.findall(d): - # Here we ensure that all files, including the new empty ones - # are marked as touched. Content is loaded on demand. - touched_files[f] = 1 - if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0: - try: - oldpatchfile = patch.patchfile - olditerhunks = patch.iterhunks - patch.patchfile = mempatchproxy(parentctx, files_data) - patch.iterhunks = filteriterhunks(hg_editor) - try: - # We can safely ignore the changed list since we are - # handling non-git patches. Touched files are known - # by our memory patcher. - patch_st = patch.applydiff(ui, cStringIO.StringIO(d2), - {}, strip=0) - finally: - patch.patchfile = oldpatchfile - patch.iterhunks = olditerhunks - except patch.PatchError: - # TODO: this happens if the svn server has the wrong mime - # type stored and doesn't know a file is binary. It would - # be better to do one file at a time and only do a - # full fetch on files that had problems. - raise BadPatchApply('patching failed') - for x in files_data.iterkeys(): - ui.note('M %s\n' % x) - # if this patch didn't apply right, fall back to exporting the - # entire rev. - if patch_st == -1: - assert False, ('This should only happen on case-insensitive' - ' volumes.') - elif patch_st == 1: - # When converting Django, I saw fuzz on .po files that was - # causing revisions to end up failing verification. If that - # can be fixed, maybe this won't ever be reached. - raise BadPatchApply('patching succeeded with fuzz') - else: - ui.status('Not using patch for %s, diff had no hunks.\n' % - r.revnum) - - exec_files = {} - for m in property_exec_removed_re.findall(d): - exec_files[m] = False - for m in property_exec_set_re.findall(d): - exec_files[m] = True - for m in exec_files: - touched_files[m] = 1 - link_files = {} - for m in property_special_set_re.findall(d): - # TODO(augie) when a symlink is removed, patching will fail. - # We're seeing that above - there's gotta be a better - # workaround than just bailing like that. - assert m in files_data - link_files[m] = True - for m in property_special_removed_re.findall(d): - assert m in files_data - link_files[m] = False - - for p in r.paths: - if p.startswith(diff_path) and r.paths[p].action == 'D': - p2 = p[len(diff_path)+1:].strip('/') - if p2 in parentctx: - files_data[p2] = None - continue - # If this isn't in the parent ctx, it must've been a dir - files_data.update([(f, None) for f in parentctx if f.startswith(p2 + '/')]) - - for f in files_data: - touched_files[f] = 1 - - copies = getcopies(svn, hg_editor, branch, diff_path, r, touched_files, - parentctx) - - def filectxfn(repo, memctx, path): - if path in files_data and files_data[path] is None: - raise IOError() - - if path in binary_files: - data, mode = svn.get_file(diff_path + '/' + path, r.revnum) - isexe = 'x' in mode - islink = 'l' in mode - else: - isexe = exec_files.get(path, 'x' in parentctx.flags(path)) - islink = link_files.get(path, 'l' in parentctx.flags(path)) - data = '' - if path in files_data: - data = files_data[path] - if islink: - data = data[len('link '):] - elif path in parentctx: - data = parentctx[path].data() - - copied = copies.get(path) - return context.memfilectx(path=path, data=data, islink=islink, - isexec=isexe, copied=copied) - - return list(touched_files), filectxfn - -def makecopyfinder(r, branchpath, rootdir): - """Return a function detecting copies. - - Returned copyfinder(path) returns None if no copy information can - be found or ((source, sourcerev), sourcepath) where "sourcepath" is the - copy source path, "sourcerev" the source svn revision and "source" is the - copy record path causing the copy to occur. If a single file was copied - "sourcepath" and "source" are the same, while file copies dectected from - directory copies return the copied source directory in "source". - """ - # filter copy information for current branch - branchpath = branchpath + '/' - fullbranchpath = rootdir + branchpath - copies = [] - for path, e in r.paths.iteritems(): - if not e.copyfrom_path: - continue - if not path.startswith(branchpath): - continue - if not e.copyfrom_path.startswith(fullbranchpath): - # ignore cross branch copies - continue - dest = path[len(branchpath):] - source = e.copyfrom_path[len(fullbranchpath):] - copies.append((dest, (source, e.copyfrom_rev))) - - copies.sort(reverse=True) - exactcopies = dict(copies) - - def finder(path): - if path in exactcopies: - return exactcopies[path], exactcopies[path][0] - # look for parent directory copy, longest first - for dest, (source, sourcerev) in copies: - dest = dest + '/' - if not path.startswith(dest): - continue - sourcepath = source + '/' + path[len(dest):] - return (source, sourcerev), sourcepath - return None - - return finder - -def getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx): - """Return a mapping {dest: source} for every file copied into r. - """ - if parentctx.node() == revlog.nullid: - return {} - - # Extract svn copy information, group them by copy source. - # The idea is to duplicate the replay behaviour where copies are - # evaluated per copy event (one event for all files in a directory copy, - # one event for single file copy). We assume that copy events match - # copy sources in revision info. - svncopies = {} - finder = makecopyfinder(r, branchpath, svn.subdir) - for f in files: - copy = finder(f) - if copy: - svncopies.setdefault(copy[0], []).append((f, copy[1])) - if not svncopies: - return {} - - # cache changeset contexts and map them to source svn revisions - ctxs = {} - def getctx(svnrev): - if svnrev in ctxs: - return ctxs[svnrev] - changeid = hg_editor.get_parent_revision(svnrev + 1, branch) - ctx = None - if changeid != revlog.nullid: - ctx = hg_editor.repo.changectx(changeid) - ctxs[svnrev] = ctx - return ctx - - # check svn copies really make sense in mercurial - hgcopies = {} - for (sourcepath, rev), copies in svncopies.iteritems(): - sourcectx = getctx(rev) - if sourcectx is None: - continue - sources = [s[1] for s in copies] - if not hg_editor.aresamefiles(sourcectx, parentctx, sources): - continue - hgcopies.update(copies) - return hgcopies - -def stupid_fetch_externals(svn, branchpath, r, parentctx): - """Extract svn:externals for the current revision and branch - - Return an externalsfile instance or None if there are no externals - to convert and never were. - """ - externals = svnexternals.externalsfile() - if '.hgsvnexternals' in parentctx: - externals.read(parentctx['.hgsvnexternals'].data()) - # Detect property additions only, changes are handled by checking - # existing entries individually. Projects are unlikely to store - # externals on many different root directories, so we trade code - # duplication and complexity for a constant lookup price at every - # revision in the common case. - dirs = set(externals) - if parentctx.node() == revlog.nullid: - dirs.update([p for p,k in svn.list_files(branchpath, r.revnum) if k == 'd']) - dirs.add('') - else: - branchprefix = branchpath + '/' - for path, e in r.paths.iteritems(): - if e.action == 'D': - continue - if not path.startswith(branchprefix) and path != branchpath: - continue - kind = svn.checkpath(path, r.revnum) - if kind != 'd': - continue - path = path[len(branchprefix):] - dirs.add(path) - if e.action == 'M' or (e.action == 'A' and e.copyfrom_path): - # Do not recurse in copied directories, changes are marked - # as 'M', except for the copied one. - continue - for child, k in svn.list_files(branchprefix + path, r.revnum): - if k == 'd': - dirs.add((path + '/' + child).strip('/')) - - # Retrieve new or updated values - for dir in dirs: - try: - values = svn.list_props(branchpath + '/' + dir, r.revnum) - externals[dir] = values.get('svn:externals', '') - except IOError: - externals[dir] = '' - - if not externals and '.hgsvnexternals' not in parentctx: - # Do not create empty externals files - return None - return externals - -def stupid_fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx): - """Extract all 'branch' content at a given revision. - - Return a tuple (files, filectxfn) where 'files' is the list of all files - in the branch at the given revision, and 'filectxfn' is a memctx compatible - callable to retrieve individual file information. - """ - files = [] - if parentctx.node() == revlog.nullid: - # Initial revision, fetch all files - for path, kind in svn.list_files(branchpath, r.revnum): - if kind == 'f': - files.append(path) - else: - branchprefix = branchpath + '/' - for path, e in r.paths.iteritems(): - if not path.startswith(branchprefix): - continue - if not hg_editor._is_path_valid(path): - continue - kind = svn.checkpath(path, r.revnum) - path = path[len(branchprefix):] - if kind == 'f': - files.append(path) - elif kind == 'd': - if e.action == 'M': - continue - dirpath = branchprefix + path - for child, k in svn.list_files(dirpath, r.revnum): - if k == 'f': - files.append(path + '/' + child) - else: - if path in parentctx: - files.append(path) - continue - # Assume it's a deleted directory - path = path + '/' - deleted = [f for f in parentctx if f.startswith(path)] - files += deleted - - copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx) - - def filectxfn(repo, memctx, path): - data, mode = svn.get_file(branchpath + '/' + path, r.revnum) - isexec = 'x' in mode - islink = 'l' in mode - copied = copies.get(path) - return context.memfilectx(path=path, data=data, islink=islink, - isexec=isexec, copied=copied) - - return files, filectxfn - -def stupid_svn_server_pull_rev(ui, svn, hg_editor, r): - # this server fails at replay - branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files) - deleted_branches = {} - brpaths = branches.values() - bad_branch_paths = {} - for br, bp in branches.iteritems(): - bad_branch_paths[br] = [] - - # This next block might be needed, but for now I'm omitting it until it can be - # proven necessary. - # for bad in brpaths: - # if bad.startswith(bp) and len(bad) > len(bp): - # bad_branch_paths[br].append(bad[len(bp)+1:]) - - # We've go a branch that contains other branches. We have to be careful to - # get results similar to real replay in this case. - for existingbr in hg_editor.branches: - bad = hg_editor._remotename(existingbr) - if bad.startswith(bp) and len(bad) > len(bp): - bad_branch_paths[br].append(bad[len(bp)+1:]) - for p in r.paths: - if hg_editor._is_path_tag(p): - continue - branch = hg_editor._localname(p) - if r.paths[p].action == 'R' and branch in hg_editor.branches: - branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum, - hg_editor.revmap.iteritems()), reverse=True) - is_closed = False - if len(branchedits) > 0: - branchtip = branchedits[0][1] - for child in hg_editor.repo[branchtip].children(): - if child.branch() == 'closed-branches': - is_closed = True - break - if not is_closed: - deleted_branches[branch] = branchtip - - date = hg_editor.fixdate(r.date) - check_deleted_branches = set() - for b in branches: - parentctx = hg_editor.repo[hg_editor.get_parent_revision(r.revnum, b)] - if parentctx.branch() != (b or 'default'): - check_deleted_branches.add(b) - kind = svn.checkpath(branches[b], r.revnum) - if kind != 'd': - # Branch does not exist at this revision. Get parent revision and - # remove everything. - deleted_branches[b] = parentctx.node() - continue - else: - try: - files_touched, filectxfn2 = stupid_diff_branchrev( - ui, svn, hg_editor, b, r, parentctx) - except BadPatchApply, e: - # Either this revision or the previous one does not exist. - ui.status("Fetching entire revision: %s.\n" % e.args[0]) - files_touched, filectxfn2 = stupid_fetch_branchrev( - svn, hg_editor, b, branches[b], r, parentctx) - - externals = stupid_fetch_externals(svn, branches[b], r, parentctx) - if externals is not None: - files_touched.append('.hgsvnexternals') - - def filectxfn(repo, memctx, path): - if path == '.hgsvnexternals': - if not externals: - raise IOError() - return context.memfilectx(path=path, data=externals.write(), - islink=False, isexec=False, copied=None) - for bad in bad_branch_paths[b]: - if path.startswith(bad): - raise IOError() - return filectxfn2(repo, memctx, path) - - extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir) - if '' in files_touched: - files_touched.remove('') - excluded = [f for f in files_touched - if not hg_editor._is_file_included(f)] - for f in excluded: - files_touched.remove(f) - if parentctx.node() != node.nullid or files_touched: - # TODO(augie) remove this debug code? Or maybe it's sane to have it. - for f in files_touched: - if f: - assert f[0] != '/' - current_ctx = context.memctx(hg_editor.repo, - [parentctx.node(), revlog.nullid], - r.message or util.default_commit_msg, - files_touched, - filectxfn, - hg_editor.authorforsvnauthor(r.author), - date, - extra) - ha = hg_editor.repo.commitctx(current_ctx) - branch = extra.get('branch', None) - if not branch in hg_editor.branches: - hg_editor.branches[branch] = None, 0, r.revnum - hg_editor.add_to_revmap(r.revnum, b, ha) - hg_editor._save_metadata() - util.describe_commit(ui, ha, b) - # These are branches which would have an 'R' status in svn log. This means they were - # replaced by some other branch, so we need to verify they get marked as closed. - for branch in check_deleted_branches: - branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum, - hg_editor.revmap.iteritems()), reverse=True) - is_closed = False - if len(branchedits) > 0: - branchtip = branchedits[0][1] - for child in hg_editor.repo[branchtip].children(): - if child.branch() == 'closed-branches': - is_closed = True - break - if not is_closed: - deleted_branches[branch] = branchtip - for b, parent in deleted_branches.iteritems(): - if parent == node.nullid: - continue - parentctx = hg_editor.repo[parent] - files_touched = parentctx.manifest().keys() - def filectxfn(repo, memctx, path): - raise IOError() - closed = node.nullid - if 'closed-branches' in hg_editor.repo.branchtags(): - closed = hg_editor.repo['closed-branches'].node() - parents = (parent, closed) - current_ctx = context.memctx(hg_editor.repo, - parents, - r.message or util.default_commit_msg, - files_touched, - filectxfn, - hg_editor.authorforsvnauthor(r.author), - date, - {'branch': 'closed-branches'}) - ha = hg_editor.repo.commitctx(current_ctx) - ui.status('Marked branch %s as closed.\n' % (b or 'default')) - hg_editor._save_metadata() - -class BadPatchApply(Exception): - pass