Mercurial > hgsubversion
diff fetch_command.py @ 0:f2636cfed115
Initial import of hgsubversion into a public repository.
author | Augie Fackler <durin42@gmail.com> |
---|---|
date | Tue, 30 Sep 2008 11:42:52 -0500 |
parents | |
children | 1a5bb173170b |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/fetch_command.py @@ -0,0 +1,447 @@ +import cStringIO +import re +import operator +import os +import shutil +import stat +import tempfile + +from mercurial import patch +from mercurial import node +from mercurial import context +from mercurial import revlog +from svn import core +from svn import delta + +import hg_delta_editor +import svnwrap +import util + + +def print_your_svn_is_old_message(ui): + ui.status("In light of that, I'll fall back and do diffs, but it won't do " + "as good a job. You should really upgrade your server.") + + +@util.register_subcommand('pull') +def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None, + tag_locations='tags', + **opts): + """Pull new revisions from Subversion. + """ + skipto_rev=int(skipto_rev) + have_replay = not stupid + if have_replay and not callable(delta.svn_txdelta_apply(None, None, + None)[0]): + ui.status('You are using old Subversion SWIG bindings. Replay will not' + ' work until you upgrade to 1.5.0 or newer. Falling back to' + ' a slower method that may be buggier. Please upgrade, or' + ' contribute a patch to use the ctypes bindings instead' + ' of SWIG.') + have_replay = False + initializing_repo = False + svn = svnwrap.SubversionRepo(svn_url) + author_host = "@%s" % svn.uuid + tag_locations = tag_locations.split(',') + hg_editor = hg_delta_editor.HgChangeReceiver(hg_repo_path, + ui_=ui, + subdir=svn.subdir, + author_host=author_host, + tag_locations=tag_locations) + if os.path.exists(hg_editor.uuid_file): + uuid = open(hg_editor.uuid_file).read() + assert uuid == svn.uuid + start = int(open(hg_editor.last_revision_handled_file, 'r').read()) + else: + open(hg_editor.uuid_file, 'w').write(svn.uuid) + open(hg_editor.svn_url_file, 'w').write(svn_url) + open(hg_editor.last_revision_handled_file, 'w').write(str(0)) + initializing_repo = True + start = skipto_rev + + # start converting revisions + for r in svn.revisions(start=start): + valid = False + hg_editor.update_branch_tag_map_for_rev(r) + for p in r.paths: + if hg_editor._is_path_valid(p): + valid = True + continue + if initializing_repo and start > 0: + assert False, 'This feature not ready yet.' + if valid: + # got a 502? Try more than once! + tries = 0 + converted = False + while not converted and tries < 3: + try: + ui.status('converting %s\n' % r) + if have_replay: + try: + replay_convert_rev(hg_editor, svn, r) + except svnwrap.SubversionRepoCanNotReplay, e: + ui.status('%s\n' % e.message) + print_your_svn_is_old_message(ui) + have_replay = False + stupid_svn_server_pull_rev(ui, svn, hg_editor, r) + else: + stupid_svn_server_pull_rev(ui, svn, hg_editor, r) + converted = True + open(hg_editor.last_revision_handled_file, + 'w').write(str(r.revnum)) + except core.SubversionException, e: + if hasattr(e, 'message') and ( + 'Server sent unexpected return value (502 Bad Gateway)' + ' in response to PROPFIND') in e.message: + tries += 1 + ui.status('Got a 502, retrying (%s)\n' % tries) + else: + raise + + +def replay_convert_rev(hg_editor, svn, r): + hg_editor.set_current_rev(r) + svn.get_replay(r.revnum, hg_editor) + if hg_editor.missing_plaintexts: + files_to_grab = set() + dirs_to_list = [] + props = {} + for p in hg_editor.missing_plaintexts: + p2 = p + if svn.subdir: + p2 = p2[len(svn.subdir)-1:] + # this *sometimes* raises on me, and I have + # no idea why. TODO(augie) figure out the why. + try: + pl = svn.proplist(p2, r.revnum, recurse=True) + except core.SubversionException, e: + pass + props.update(pl) + if p[-1] == '/': + dirs_to_list.append(p) + else: + files_to_grab.add(p) + while dirs_to_list: + p = dirs_to_list.pop(0) + l = svn.list_dir(p[:-1], r.revnum) + for f in l: + + if l[f].kind == core.svn_node_dir: + dirs_to_list.append(p+f+'/') + elif l[f].kind == core.svn_node_file: + files_to_grab.add(p+f) + for p in files_to_grab: + p2 = p + if svn.subdir: + p2 = p2[len(svn.subdir)-1:] + hg_editor.current_files[p] = svn.get_file(p2, r.revnum) + hg_editor.current_files_exec[p] = False + if p in props: + if 'svn:executable' in props[p]: + hg_editor.current_files_exec[p] = True + if 'svn:special' in props[p]: + hg_editor.current_files_symlink[p] = True + hg_editor.missing_plaintexts = set() + hg_editor.commit_current_delta() + + +binary_file_re = re.compile(r'''Index: ([^\n]*) +=* +Cannot display: file marked as a binary type.''') + +property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*) +_* +Added: svn:executable + \+ \* +''') + +property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*) +_* +Deleted: svn:executable + - \* +''') + +empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''') + +any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE) + +property_special_set_re = re.compile(r'''Property changes on: ([^\n]*) +_* +Added: svn:special + \+ \* +''') + +property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*) +_* +Added: svn:special + \- \* +''') + +def make_diff_path(b): + if b == None: + return 'trunk' + return 'branches/' + b + + +def stupid_svn_server_pull_rev(ui, svn, hg_editor, r): + used_diff = True + delete_all_files = False + # this server fails at replay + branches = hg_editor.branches_in_paths(r.paths) + temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp') + if not os.path.exists(temp_location): + os.makedirs(temp_location) + for b in branches: + our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location) + diff_path = make_diff_path(b) + parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, b) + parent_ha = hg_editor.get_parent_revision(r.revnum, b) + files_touched = set() + link_files = {} + exec_files = {} + try: + if br_p == b: + d = svn.get_unified_diff(diff_path, r.revnum, deleted=False, + # letting patch handle binaries sounded + # cool, but it breaks patch in sad ways + ignore_type=False) + else: + d = svn.get_unified_diff(diff_path, r.revnum, + other_path=make_diff_path(br_p), + other_rev=parent_rev, + deleted=True, ignore_type=True) + if d: + ui.status('Branch creation with mods, pulling full rev.\n') + raise BadPatchApply() + for m in binary_file_re.findall(d): + # we have to pull each binary file by hand as a fulltext, + # which sucks but we've got no choice + file_path = os.path.join(our_tempdir, m) + files_touched.add(m) + try: + try: + os.makedirs(os.path.dirname(file_path)) + except OSError, e: + pass + f = open(file_path, 'w') + f.write(svn.get_file(diff_path+'/'+m, r.revnum)) + f.close() + except core.SubversionException, e: + if (e.message.endswith("' path not found") + or e.message.startswith("File not found: revision")): + pass + else: + raise + d2 = empty_file_patch_wont_make_re.sub('', d) + d2 = property_exec_set_re.sub('', d2) + d2 = property_exec_removed_re.sub('', d2) + old_cwd = os.getcwd() + os.chdir(our_tempdir) + for f in any_file_re.findall(d): + files_touched.add(f) + # this check is here because modified binary files will get + # created before here. + if os.path.exists(f): + continue + dn = os.path.dirname(f) + if dn and not os.path.exists(dn): + os.makedirs(dn) + if f in hg_editor.repo[parent_ha].manifest(): + data = hg_editor.repo[parent_ha].filectx(f).data() + fi = open(f, 'w') + fi.write(data) + fi.close() + else: + open(f, 'w').close() + if f.startswith(our_tempdir): + f = f[len(our_tempdir)+1:] + os.chdir(old_cwd) + if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0: + old_cwd = os.getcwd() + os.chdir(our_tempdir) + changed = {} + try: + patch_st = patch.applydiff(ui, cStringIO.StringIO(d2), + changed, strip=0) + except patch.PatchError: + # TODO: this happens if the svn server has the wrong mime + # type stored and doesn't know a file is binary. It would + # be better to do one file at a time and only do a + # full fetch on files that had problems. + os.chdir(old_cwd) + raise BadPatchApply() + for x in changed.iterkeys(): + ui.status('M %s\n' % x) + files_touched.add(x) + os.chdir(old_cwd) + # if this patch didn't apply right, fall back to exporting the + # entire rev. + if patch_st == -1: + parent_ctx = hg_editor.repo[parent_ha] + parent_manifest = parent_ctx.manifest() + for fn in files_touched: + if (fn in parent_manifest and + 'l' in parent_ctx.filectx(fn).flags()): + # I think this might be an underlying bug in svn - + # I get diffs of deleted symlinks even though I + # specifically said no deletes above. + ui.status('Pulling whole rev because of a deleted' + 'symlink') + raise BadPatchApply() + assert False, ('This should only happen on case-insensitive' + ' volumes.') + elif patch_st == 1: + # When converting Django, I saw fuzz on .po files that was + # causing revisions to end up failing verification. If that + # can be fixed, maybe this won't ever be reached. + ui.status('There was some fuzz, not using diff after all.') + raise BadPatchApply() + else: + ui.status('Not using patch for %s, diff had no hunks.\n' % + r.revnum) + + # we create the files if they don't exist here because we know + # that we'll never have diff info for a deleted file, so if the + # property is set, we should force the file to exist no matter what. + for m in property_exec_removed_re.findall(d): + f = os.path.join(our_tempdir, m) + if not os.path.exists(f): + d = os.path.dirname(f) + if not os.path.exists(d): + os.makedirs(d) + if not m in hg_editor.repo[parent_ha].manifest(): + open(f, 'w').close() + else: + data = hg_editor.repo[parent_ha].filectx(m).data() + fp = open(f, 'w') + fp.write(data) + fp.close() + exec_files[m] = False + files_touched.add(m) + for m in property_exec_set_re.findall(d): + f = os.path.join(our_tempdir, m) + if not os.path.exists(f): + d = os.path.dirname(f) + if not os.path.exists(d): + os.makedirs(d) + if m not in hg_editor.repo[parent_ha].manifest(): + open(f, 'w').close() + else: + data = hg_editor.repo[parent_ha].filectx(m).data() + fp = open(f, 'w') + fp.write(data) + fp.close() + exec_files[m] = True + files_touched.add(m) + for m in property_special_set_re.findall(d): + # TODO(augie) when a symlink is removed, patching will fail. + # We're seeing that above - there's gotta be a better + # workaround than just bailing like that. + path = os.path.join(our_tempdir, m) + assert os.path.exists(path) + link_path = open(path).read() + link_path = link_path[len('link '):] + os.remove(path) + link_files[m] = link_path + files_touched.add(m) + except core.SubversionException, e: + if (e.apr_err == 160013 or (hasattr(e, 'message') and + 'was not found in the repository at revision ' in e.message)): + # Either this revision or the previous one does not exist. + try: + ui.status("fetching entire rev previous rev does not exist.\n") + used_diff = False + svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir) + except core.SubversionException, e: + if e.apr_err == 170000 or (e.message.startswith("URL '") + and e.message.endswith("' doesn't exist")): + delete_all_files = True + else: + raise + + except BadPatchApply, e: + # previous rev didn't exist, so this is most likely the first + # revision. We'll have to pull all files by hand. + try: + ui.status("fetching entire rev because raised.\n") + used_diff = False + shutil.rmtree(our_tempdir) + os.makedirs(our_tempdir) + svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir) + except core.SubversionException, e: + if e.apr_err == 170000 or (e.message.startswith("URL '") + and e.message.endswith("' doesn't exist")): + delete_all_files = True + else: + raise + for p in r.paths: + if p.startswith(diff_path) and r.paths[p].action == 'D': + p2 = p[len(diff_path)+1:] + files_touched.add(p2) + p3 = os.path.join(our_tempdir, p2) + if os.path.exists(p3) and not os.path.isdir(p3): + os.unlink(p3) + if p2 and p2[0] == '/': + p2 = p2[1:] + # If this isn't in the parent ctx, it must've been a dir + if not p2 in hg_editor.repo[parent_ha]: + d_files = [f for f in hg_editor.repo[parent_ha].manifest().iterkeys() + if f.startswith(p2 + '/')] + for d in d_files: + files_touched.add(d) + if delete_all_files: + for p in hg_editor.repo[parent_ha].manifest().iterkeys(): + files_touched.add(p) + if not used_diff: + for p in reduce(operator.add, [[os.path.join(x[0], y) for y in x[2]] + for x in + list(os.walk(our_tempdir))]): + p_real = p[len(our_tempdir)+1:] + if os.path.islink(p): + link_files[p_real] = os.readlink(p) + exec_files[p_real] = (os.lstat(p).st_mode & 0100 != 0) + files_touched.add(p_real) + for p in hg_editor.repo[parent_ha].manifest().iterkeys(): + # TODO this might not be a required step. + files_touched.add(p) + date = r.date.replace('T', ' ').replace('Z', '').split('.')[0] + date += ' -0000' + def filectxfn(repo, memctx, path): + disk_path = os.path.join(our_tempdir, path) + if path in link_files: + return context.memfilectx(path=path, data=link_files[path], + islink=True, isexec=False, + copied=False) + fp = open(disk_path) + exe = exec_files.get(path, None) + if exe is None and path in hg_editor.repo[parent_ha]: + exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags() + return context.memfilectx(path=path, data=fp.read(), islink=False, + isexec=exe, copied=False) + extra = {} + if b: + extra['branch'] = b + if parent_ha != node.nullid or files_touched: + # TODO(augie) remove this debug code? Or maybe it's sane to have it. + for f in files_touched: + if f: + assert f[0] != '/' + current_ctx = context.memctx(hg_editor.repo, + [parent_ha, revlog.nullid], + r.message or '...', + files_touched, + filectxfn, + '%s%s' % (r.author, + hg_editor.author_host), + date, + extra) + ha = hg_editor.repo.commitctx(current_ctx) + hg_editor.revmap[r.revnum, b] = ha + hg_editor._save_metadata() + ui.status('committed as %s on branch %s\n' % + (node.hex(ha), b or 'default')) + shutil.rmtree(our_tempdir) + + +class BadPatchApply(Exception): + pass