Mercurial > hgsubversion
diff hg_delta_editor.py @ 0:f2636cfed115
Initial import of hgsubversion into a public repository.
author | Augie Fackler <durin42@gmail.com> |
---|---|
date | Tue, 30 Sep 2008 11:42:52 -0500 |
parents | |
children | 1a5bb173170b |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/hg_delta_editor.py @@ -0,0 +1,614 @@ +import cStringIO +import cPickle as pickle +import os +import sys +import tempfile +import traceback + +from mercurial import context +from mercurial import hg +from mercurial import ui +from mercurial import revlog +from mercurial import node +from svn import delta +from svn import core + +def pickle_atomic(data, file_path, dir=None): + """pickle some data to a path atomically. + + This is present because I kept corrupting my revmap by managing to hit ^C + during the pickle of that file. + """ + try: + f, path = tempfile.mkstemp(prefix='pickling', dir=dir) + f = os.fdopen(f, 'w') + pickle.dump(data, f) + f.close() + except: + raise + else: + os.rename(path, file_path) + +def stash_exception_on_self(fn): + """Stash any exception raised in the method on self. + + This is required because the SWIG bindings just mutate any exception into + a generic Subversion exception with no way of telling what the original was. + This allows the editor object to notice when you try and commit and really + got an exception in the replay process. + """ + def fun(self, *args, **kwargs): + try: + return fn(self, *args, **kwargs) + except: + if not hasattr(self, '_exception_info'): + self._exception_info = sys.exc_info() + raise + return fun + + +class HgChangeReceiver(delta.Editor): + def __init__(self, path, ui_=None, subdir='', author_host='', + tag_locations=['tags']): + """path is the path to the target hg repo. + + subdir is the subdirectory of the edits *on the svn server*. + It is needed for stripping paths off in certain cases. + """ + if not ui_: + ui_ = ui.ui() + self.ui = ui_ + self.path = path + self.__setup_repo(path) + self.subdir = subdir + if self.subdir and self.subdir[0] == '/': + self.subdir = self.subdir[1:] + self.revmap = {} + if os.path.exists(self.revmap_file): + f = open(self.revmap_file) + self.revmap = pickle.load(f) + f.close() + self.branches = {} + if os.path.exists(self.branch_info_file): + f = open(self.branch_info_file) + self.branches = pickle.load(f) + f.close() + self.tags = {} + if os.path.exists(self.tag_info_file): + f = open(self.tag_info_file) + self.tags = pickle.load(f) + f.close() + if os.path.exists(self.tag_locations_file): + f = open(self.tag_locations_file) + self.tag_locations = pickle.load(f) + else: + self.tag_locations = tag_locations + pickle_atomic(self.tag_locations, self.tag_locations_file, + self.meta_data_dir) + + self.clear_current_info() + self.author_host = author_host + + def __setup_repo(self, repo_path): + '''Verify the repo is going to work out for us. + + This method will fail an assertion if the repo exists but doesn't have + the Subversion metadata. + ''' + if os.path.isdir(repo_path) and len(os.listdir(repo_path)): + self.repo = hg.repository(self.ui, repo_path) + assert os.path.isfile(self.revmap_file) + assert os.path.isfile(self.svn_url_file) + assert os.path.isfile(self.uuid_file) + assert os.path.isfile(self.last_revision_handled_file) + else: + self.repo = hg.repository(self.ui, repo_path, create=True) + os.makedirs(os.path.dirname(self.uuid_file)) + + def clear_current_info(self): + '''Clear the info relevant to a replayed revision so that the next + revision can be replayed. + ''' + self.current_files = {} + self.deleted_files = {} + self.current_rev = None + self.current_files_exec = {} + self.current_files_symlink = {} + self.missing_plaintexts = set() + self.commit_branches_empty = {} + self.base_revision = None + + def _save_metadata(self): + '''Save the Subversion metadata. This should really be called after + every revision is created. + ''' + pickle_atomic(self.revmap, self.revmap_file, self.meta_data_dir) + pickle_atomic(self.branches, self.branch_info_file, self.meta_data_dir) + pickle_atomic(self.tags, self.tag_info_file, self.meta_data_dir) + + def branches_in_paths(self, paths): + '''Given a list of paths, return the set of branches that are touched. + ''' + branches = set([]) + for p in paths: + if self._is_path_valid(p): + junk, branch = self._path_and_branch_for_path(p) + branches.add(branch) + return branches + + def _path_and_branch_for_path(self, path): + '''Figure out which branch inside our repo this path represents, and + also figure out which path inside that branch it is. + + Raises an exception if it can't perform its job. + ''' + path = self._normalize_path(path) + if path.startswith('trunk'): + p = path[len('trunk'):] + if p and p[0] == '/': + p = p[1:] + return p, None + elif path.startswith('branches/'): + p = path[len('branches/'):] + br = p.split('/')[0] + p = p[len(br)+1:] + if p and p[0] == '/': + p = p[1:] + return p, br + raise Exception,'Things went boom: ' + path + + def set_current_rev(self, rev): + '''Set the revision we're currently converting. + ''' + self.current_rev = rev + + def _normalize_path(self, path): + '''Normalize a path to strip of leading slashes and our subdir if we + have one. + ''' + if path and path[0] == '/': + path = path[1:] + if path and path.startswith(self.subdir): + path = path[len(self.subdir):] + if path and path[0] == '/': + path = path[1:] + return path + + def _is_path_valid(self, path): + path = self._normalize_path(path) + if path.startswith('trunk'): + return True + elif path.startswith('branches/'): + br = path.split('/')[1] + return len(br) > 0 + return False + + def _is_path_tag(self, path): + """If path represents the path to a tag, returns the tag name. + + Otherwise, returns False. + """ + path = self._normalize_path(path) + for tags_path in self.tag_locations: + if path and (path.startswith(tags_path) and + len(path) > len('%s/' % tags_path)): + return path[len(tags_path)+1:].split('/')[0] + return False + + def get_parent_svn_branch_and_rev(self, number, branch): + number -= 1 + if (number, branch) in self.revmap: + return number, branch + real_num = 0 + for num, br in self.revmap.iterkeys(): + if br != branch: + continue + if num <= number and num > real_num: + real_num = num + if real_num == 0: + if branch in self.branches: + parent_branch = self.branches[branch][0] + parent_branch_rev = self.branches[branch][1] + branch_created_rev = self.branches[branch][2] + if parent_branch == 'trunk': + parent_branch = None + if branch_created_rev <= number+1 and branch != parent_branch: + return self.get_parent_svn_branch_and_rev( + parent_branch_rev+1, + parent_branch) + if real_num != 0: + return real_num, branch + return None, None + + def get_parent_revision(self, number, branch): + '''Get the parent revision hash for a commit on a specific branch. + ''' + r, br = self.get_parent_svn_branch_and_rev(number, branch) + if r is not None: + return self.revmap[r, br] + return revlog.nullid + + def update_branch_tag_map_for_rev(self, revision): + paths = revision.paths + added_branches = {} + added_tags = {} + tags_to_delete = set() + for p in paths: + if self._is_path_valid(p): + fi, br = self._path_and_branch_for_path(p) + if fi == '' and br not in self.branches: + # TODO handle creating a branch from a tag + src_p = paths[p].copyfrom_path + src_rev = paths[p].copyfrom_rev + src_tag = self._is_path_tag(src_p) + + if not src_p or not (self._is_path_valid(src_p) or src_tag): + # we'll imply you're a branch off of trunk + # if you have no path, but if you do, it must be valid + # or else we assume trunk as well + src_branch = None + src_rev = revision.revnum + elif src_tag: + # this is a branch created from a tag. Note that this + # really does happen (see Django) + src_branch, src_rev = self.tags[src_tag] + added_branches[br] = (src_branch, src_rev, + revision.revnum) + else: + # Not from a tag, and from a valid repo path + (src_p, + src_branch) = self._path_and_branch_for_path(src_p) + added_branches[br] = src_branch, src_rev, revision.revnum + elif br in added_branches: + if paths[p].copyfrom_rev > added_branches[br][1]: + x,y,z = added_branches[br] + added_branches[br] = x, paths[p].copyfrom_rev, z + else: + t_name = self._is_path_tag(p) + if t_name == False: + continue + src_p, src_rev = paths[p].copyfrom_path, paths[p].copyfrom_rev + # if you commit to a tag, I'm calling you stupid and ignoring + # you. + if src_p is not None and src_rev is not None: + if self._is_path_valid(src_p): + file, branch = self._path_and_branch_for_path(src_p) + else: + # some crazy people make tags from other tags + file = '' + from_tag = self._is_path_tag(src_p) + if not from_tag: + continue + branch, src_rev = self.tags[from_tag] + if t_name not in added_tags: + added_tags[t_name] = branch, src_rev + elif file and src_rev > added_tags[t_name][1]: + added_tags[t_name] = branch, src_rev + elif (paths[p].action == 'D' and p.endswith(t_name) + and t_name in self.tags): + tags_to_delete.add(t_name) + for t in tags_to_delete: + del self.tags[t] + self.tags.update(added_tags) + self.branches.update(added_branches) + + def commit_current_delta(self): + if hasattr(self, '_exception_info'): + traceback.print_exception(*self._exception_info) + raise ReplayException() + if self.missing_plaintexts: + raise MissingPlainTextError() + files_to_commit = self.current_files.keys() + files_to_commit.extend(self.current_files_symlink.keys()) + files_to_commit.extend(self.current_files_exec.keys()) + files_to_commit = sorted(list(set(files_to_commit))) + branch_batches = {} + rev = self.current_rev + date = rev.date.replace('T', ' ').replace('Z', '').split('.')[0] + date += ' -0000' + + # build up the branches that have files on them + for f in files_to_commit: + if not self._is_path_valid(f): + continue + p, b = self._path_and_branch_for_path(f) + if b not in branch_batches: + branch_batches[b] = [] + branch_batches[b].append((p, f)) + + for branch, files in branch_batches.iteritems(): + if branch in self.commit_branches_empty and files: + del self.commit_branches_empty[branch] + extra = {} + files = dict(files) + + parents = (self.get_parent_revision(rev.revnum, branch), + revlog.nullid) + if branch is not None: + if branch not in self.branches: + continue + if parents == (revlog.nullid, revlog.nullid): + assert False, ('a non-trunk branch should probably have' + ' parents figured out by this point') + extra['branch'] = branch + parent_ctx = self.repo.changectx(parents[0]) + def filectxfn(repo, memctx, path): + is_link = False + is_exec = False + copied = None + current_file = files[path] + if current_file in self.deleted_files: + raise IOError() + # TODO(augie) tag copies from files + if path in parent_ctx: + is_exec = 'x' in parent_ctx.flags(path) + is_link = 'l' in parent_ctx.flags(path) + if current_file in self.current_files_exec: + is_exec = self.current_files_exec[current_file] + if current_file in self.current_files_symlink: + is_link = self.current_files_symlink[current_file] + if current_file in self.current_files: + data = self.current_files[current_file] + if is_link: + assert data.startswith('link ') + data = data[len('link '):] + else: + data = parent_ctx.filectx(path).data() + return context.memfilectx(path=path, + data=data, + islink=is_link, isexec=is_exec, + copied=copied) + current_ctx = context.memctx(self.repo, + parents, + rev.message or '...', + files.keys(), + filectxfn, + '%s%s' %(rev.author, self.author_host), + date, + extra) + new_hash = self.repo.commitctx(current_ctx) + self.ui.status('committed as %s on branch %s\n' % + (node.hex(new_hash), (branch or 'default'))) + if (rev.revnum, branch) not in self.revmap: + self.revmap[rev.revnum, branch] = new_hash + self._save_metadata() + # now we handle branches that need to be committed without any files + for branch in self.commit_branches_empty: + ha = self.get_parent_revision(rev.revnum, branch) + if ha == node.nullid: + continue + parent_ctx = self.repo.changectx(ha) + def del_all_files(*args): + raise IOError + extra = {} + if branch: + extra['branch'] = branch + # True here means nuke all files + files = [] + if self.commit_branches_empty[branch]: + files = parent_ctx.manifest().keys() + current_ctx = context.memctx(self.repo, + (ha, node.nullid), + rev.message or ' ', + files, + del_all_files, + '%s%s' % (rev.author, + self.author_host), + date, + extra) + new_hash = self.repo.commitctx(current_ctx) + self.ui.status('committed as %s on branch %s\n' % + (node.hex(new_hash), (branch or 'default'))) + if (rev.revnum, branch) not in self.revmap: + self.revmap[rev.revnum, branch] = new_hash + self._save_metadata() + self.clear_current_info() + + @property + def meta_data_dir(self): + return os.path.join(self.path, '.hg', 'svn') + + def meta_file_named(self, name): + return os.path.join(self.meta_data_dir, name) + + @property + def revmap_file(self): + return self.meta_file_named('rev_map') + + @property + def svn_url_file(self): + return self.meta_file_named('url') + + @property + def uuid_file(self): + return self.meta_file_named('uuid') + + @property + def last_revision_handled_file(self): + return self.meta_file_named('last_rev') + + @property + def branch_info_file(self): + return self.meta_file_named('branch_info') + + @property + def tag_info_file(self): + return self.meta_file_named('tag_info') + + @property + def tag_locations_file(self): + return self.meta_file_named('tag_locations') + + @property + def url(self): + return open(self.svn_url_file).read() + + @stash_exception_on_self + def delete_entry(self, path, revision_bogus, parent_baton, pool=None): + if self._is_path_valid(path): + br_path, branch = self._path_and_branch_for_path(path) + ha = self.get_parent_revision(self.current_rev.revnum, branch) + if ha == revlog.nullid: + return + ctx = self.repo.changectx(ha) + if br_path not in ctx: + br_path2 = '' + if br_path != '': + br_path2 = br_path + '/' + # assuming it is a directory + for f in ctx: + if f.startswith(br_path2): + f_p = '%s/%s' % (path, f[len(br_path2):]) + self.deleted_files[f_p] = True + self.current_files[f_p] = '' + self.ui.status('D %s\n' % f_p) + self.deleted_files[path] = True + self.current_files[path] = '' + self.ui.status('D %s\n' % path) + + @stash_exception_on_self + def open_file(self, path, parent_baton, base_revision, p=None): + self.current_file = 'foobaz' + if self._is_path_valid(path): + self.current_file = path + self.ui.status('M %s\n' % path) + if base_revision != -1: + self.base_revision = base_revision + else: + self.base_revision = None + self.should_edit_most_recent_plaintext = True + + @stash_exception_on_self + def add_file(self, path, parent_baton, copyfrom_path, + copyfrom_revision, file_pool=None): + self.current_file = 'foobaz' + self.base_revision = None + if path in self.deleted_files: + del self.deleted_files[path] + if self._is_path_valid(path): + self.current_file = path + self.should_edit_most_recent_plaintext = False + if copyfrom_path: + self.ui.status('A+ %s\n' % path) + # TODO(augie) handle this better, actually mark a copy + (from_file, + from_branch) = self._path_and_branch_for_path(copyfrom_path) + ha = self.get_parent_revision(copyfrom_revision + 1, + from_branch) + ctx = self.repo.changectx(ha) + if from_file in ctx: + fctx = ctx.filectx(from_file) + cur_file = self.current_file + self.current_files[cur_file] = fctx.data() + self.current_files_symlink[cur_file] = 'l' in fctx.flags() + self.current_files_exec[cur_file] = 'x' in fctx.flags() + else: + self.ui.status('A %s\n' % path) + + + @stash_exception_on_self + def add_directory(self, path, parent_baton, copyfrom_path, + copyfrom_revision, dir_pool=None): + if self._is_path_valid(path): + junk, branch = self._path_and_branch_for_path(path) + if not copyfrom_path and not junk: + self.commit_branches_empty[branch] = True + else: + self.commit_branches_empty[branch] = False + if not (self._is_path_valid(path) and copyfrom_path and + self._is_path_valid(copyfrom_path)): + return + + cp_f, br_from = self._path_and_branch_for_path(copyfrom_path) + new_hash = self.get_parent_revision(copyfrom_revision + 1, br_from) + if new_hash == node.nullid: + self.missing_plaintexts.add('%s/' % path) + return + cp_f_ctx = self.repo.changectx(new_hash) + if cp_f != '/' and cp_f != '': + cp_f = '%s/' % cp_f + else: + cp_f = '' + for f in cp_f_ctx: + if f.startswith(cp_f): + f2 = f[len(cp_f):] + fctx = cp_f_ctx.filectx(f) + fp_c = path + '/' + f2 + self.current_files[fp_c] = fctx.data() + self.current_files_exec[fp_c] = 'x' in fctx.flags() + self.current_files_symlink[fp_c] = 'l' in fctx.flags() + # TODO(augie) tag copies from files + + @stash_exception_on_self + def change_file_prop(self, file_baton, name, value, pool=None): + if name == 'svn:executable': + self.current_files_exec[self.current_file] = bool(value) + elif name == 'svn:special': + self.current_files_symlink[self.current_file] = bool(value) + + @stash_exception_on_self + def open_directory(self, path, parent_baton, base_revision, dir_pool=None): + if self._is_path_valid(path): + p_, branch = self._path_and_branch_for_path(path) + if p_ == '': + self.commit_branches_empty[branch] = False + + @stash_exception_on_self + def apply_textdelta(self, file_baton, base_checksum, pool=None): + base = '' + if not self._is_path_valid(self.current_file): + return lambda x: None + if (self.current_file in self.current_files + and not self.should_edit_most_recent_plaintext): + base = self.current_files[self.current_file] + elif (base_checksum is not None or + self.should_edit_most_recent_plaintext): + p_, br = self._path_and_branch_for_path(self.current_file) + par_rev = self.current_rev.revnum + if self.base_revision: + par_rev = self.base_revision + 1 + ha = self.get_parent_revision(par_rev, br) + if ha != revlog.nullid: + ctx = self.repo.changectx(ha) + if not p_ in ctx: + self.missing_plaintexts.add(self.current_file) + # short circuit exit since we can't do anything anyway + return lambda x: None + base = ctx.filectx(p_).data() + source = cStringIO.StringIO(base) + target = cStringIO.StringIO() + self.stream = target + + handler, baton = delta.svn_txdelta_apply(source, target, None) + if not callable(handler): + # TODO(augie) Raise a real exception, don't just fail an assertion. + assert False, 'handler not callable, bindings are broken' + def txdelt_window(window): + try: + if not self._is_path_valid(self.current_file): + return + handler(window, baton) + # window being None means commit this file + if not window: + self.current_files[self.current_file] = target.getvalue() + except core.SubversionException, e: + if e.message == 'Delta source ended unexpectedly': + self.missing_plaintexts.add(self.current_file) + else: + self._exception_info = sys.exc_info() + raise + except: + print len(base), self.current_file + self._exception_info = sys.exc_info() + raise + return txdelt_window + +class MissingPlainTextError(Exception): + """Exception raised when the repo lacks a source file required for replaying + a txdelta. + """ + +class ReplayException(Exception): + """Exception raised when you try and commit but the replay encountered an + exception. + """