diff hg_delta_editor.py @ 0:f2636cfed115

Initial import of hgsubversion into a public repository.
author Augie Fackler <durin42@gmail.com>
date Tue, 30 Sep 2008 11:42:52 -0500
parents
children 1a5bb173170b
line wrap: on
line diff
new file mode 100644
--- /dev/null
+++ b/hg_delta_editor.py
@@ -0,0 +1,614 @@
+import cStringIO
+import cPickle as pickle
+import os
+import sys
+import tempfile
+import traceback
+
+from mercurial import context
+from mercurial import hg
+from mercurial import ui
+from mercurial import revlog
+from mercurial import node
+from svn import delta
+from svn import core
+
+def pickle_atomic(data, file_path, dir=None):
+    """pickle some data to a path atomically.
+
+    This is present because I kept corrupting my revmap by managing to hit ^C
+    during the pickle of that file.
+    """
+    try:
+        f, path = tempfile.mkstemp(prefix='pickling', dir=dir)
+        f = os.fdopen(f, 'w')
+        pickle.dump(data, f)
+        f.close()
+    except:
+        raise
+    else:
+        os.rename(path, file_path)
+
+def stash_exception_on_self(fn):
+    """Stash any exception raised in the method on self.
+
+    This is required because the SWIG bindings just mutate any exception into
+    a generic Subversion exception with no way of telling what the original was.
+    This allows the editor object to notice when you try and commit and really
+    got an exception in the replay process.
+    """
+    def fun(self, *args, **kwargs):
+        try:
+            return fn(self, *args, **kwargs)
+        except:
+            if not hasattr(self, '_exception_info'):
+                self._exception_info = sys.exc_info()
+            raise
+    return fun
+
+
+class HgChangeReceiver(delta.Editor):
+    def __init__(self, path, ui_=None, subdir='', author_host='',
+                 tag_locations=['tags']):
+        """path is the path to the target hg repo.
+
+        subdir is the subdirectory of the edits *on the svn server*.
+        It is needed for stripping paths off in certain cases.
+        """
+        if not ui_:
+            ui_ = ui.ui()
+        self.ui = ui_
+        self.path = path
+        self.__setup_repo(path)
+        self.subdir = subdir
+        if self.subdir and self.subdir[0] == '/':
+            self.subdir = self.subdir[1:]
+        self.revmap = {}
+        if os.path.exists(self.revmap_file):
+            f = open(self.revmap_file)
+            self.revmap = pickle.load(f)
+            f.close()
+        self.branches = {}
+        if os.path.exists(self.branch_info_file):
+            f = open(self.branch_info_file)
+            self.branches = pickle.load(f)
+            f.close()
+        self.tags = {}
+        if os.path.exists(self.tag_info_file):
+            f = open(self.tag_info_file)
+            self.tags = pickle.load(f)
+            f.close()
+        if os.path.exists(self.tag_locations_file):
+            f = open(self.tag_locations_file)
+            self.tag_locations = pickle.load(f)
+        else:
+            self.tag_locations = tag_locations
+        pickle_atomic(self.tag_locations, self.tag_locations_file,
+                      self.meta_data_dir)
+
+        self.clear_current_info()
+        self.author_host = author_host
+
+    def __setup_repo(self, repo_path):
+        '''Verify the repo is going to work out for us.
+
+        This method will fail an assertion if the repo exists but doesn't have
+        the Subversion metadata.
+        '''
+        if os.path.isdir(repo_path) and len(os.listdir(repo_path)):
+            self.repo = hg.repository(self.ui, repo_path)
+            assert os.path.isfile(self.revmap_file)
+            assert os.path.isfile(self.svn_url_file)
+            assert os.path.isfile(self.uuid_file)
+            assert os.path.isfile(self.last_revision_handled_file)
+        else:
+            self.repo = hg.repository(self.ui, repo_path, create=True)
+            os.makedirs(os.path.dirname(self.uuid_file))
+
+    def clear_current_info(self):
+        '''Clear the info relevant to a replayed revision so that the next
+        revision can be replayed.
+        '''
+        self.current_files = {}
+        self.deleted_files = {}
+        self.current_rev = None
+        self.current_files_exec = {}
+        self.current_files_symlink = {}
+        self.missing_plaintexts = set()
+        self.commit_branches_empty = {}
+        self.base_revision = None
+
+    def _save_metadata(self):
+        '''Save the Subversion metadata. This should really be called after
+        every revision is created.
+        '''
+        pickle_atomic(self.revmap, self.revmap_file, self.meta_data_dir)
+        pickle_atomic(self.branches, self.branch_info_file, self.meta_data_dir)
+        pickle_atomic(self.tags, self.tag_info_file, self.meta_data_dir)
+
+    def branches_in_paths(self, paths):
+        '''Given a list of paths, return the set of branches that are touched.
+        '''
+        branches = set([])
+        for p in paths:
+            if self._is_path_valid(p):
+                junk, branch = self._path_and_branch_for_path(p)
+                branches.add(branch)
+        return branches
+
+    def _path_and_branch_for_path(self, path):
+        '''Figure out which branch inside our repo this path represents, and
+        also figure out which path inside that branch it is.
+
+        Raises an exception if it can't perform its job.
+        '''
+        path = self._normalize_path(path)
+        if path.startswith('trunk'):
+            p = path[len('trunk'):]
+            if p and p[0] == '/':
+                p = p[1:]
+            return p, None
+        elif path.startswith('branches/'):
+            p = path[len('branches/'):]
+            br = p.split('/')[0]
+            p = p[len(br)+1:]
+            if p and p[0] == '/':
+                p = p[1:]
+            return p, br
+        raise Exception,'Things went boom: ' + path
+
+    def set_current_rev(self, rev):
+        '''Set the revision we're currently converting.
+        '''
+        self.current_rev = rev
+
+    def _normalize_path(self, path):
+        '''Normalize a path to strip of leading slashes and our subdir if we
+        have one.
+        '''
+        if path and path[0] == '/':
+            path = path[1:]
+        if path and path.startswith(self.subdir):
+            path = path[len(self.subdir):]
+        if path and path[0] == '/':
+            path = path[1:]
+        return path
+
+    def _is_path_valid(self, path):
+        path = self._normalize_path(path)
+        if path.startswith('trunk'):
+            return True
+        elif path.startswith('branches/'):
+            br = path.split('/')[1]
+            return len(br) > 0
+        return False
+
+    def _is_path_tag(self, path):
+        """If path represents the path to a tag, returns the tag name.
+
+        Otherwise, returns False.
+        """
+        path = self._normalize_path(path)
+        for tags_path in self.tag_locations:
+            if path and (path.startswith(tags_path) and
+                         len(path) > len('%s/' % tags_path)):
+                return path[len(tags_path)+1:].split('/')[0]
+        return False
+
+    def get_parent_svn_branch_and_rev(self, number, branch):
+        number -= 1
+        if (number, branch) in self.revmap:
+            return number, branch
+        real_num = 0
+        for num, br in self.revmap.iterkeys():
+            if br != branch:
+                continue
+            if num <= number and num > real_num:
+                real_num = num
+        if real_num == 0:
+            if branch in self.branches:
+                parent_branch = self.branches[branch][0]
+                parent_branch_rev = self.branches[branch][1]
+                branch_created_rev = self.branches[branch][2]
+                if parent_branch == 'trunk':
+                    parent_branch = None
+                if branch_created_rev <= number+1 and branch != parent_branch:
+                    return self.get_parent_svn_branch_and_rev(
+                                                    parent_branch_rev+1,
+                                                    parent_branch)
+        if real_num != 0:
+            return real_num, branch
+        return None, None
+
+    def get_parent_revision(self, number, branch):
+        '''Get the parent revision hash for a commit on a specific branch.
+        '''
+        r, br = self.get_parent_svn_branch_and_rev(number, branch)
+        if r is not None:
+            return self.revmap[r, br]
+        return revlog.nullid
+
+    def update_branch_tag_map_for_rev(self, revision):
+        paths = revision.paths
+        added_branches = {}
+        added_tags = {}
+        tags_to_delete = set()
+        for p in paths:
+            if self._is_path_valid(p):
+                fi, br = self._path_and_branch_for_path(p)
+                if fi == '' and br not in self.branches:
+                    # TODO handle creating a branch from a tag
+                    src_p = paths[p].copyfrom_path
+                    src_rev = paths[p].copyfrom_rev
+                    src_tag = self._is_path_tag(src_p)
+
+                    if not src_p or not (self._is_path_valid(src_p) or src_tag):
+                        # we'll imply you're a branch off of trunk
+                        # if you have no path, but if you do, it must be valid
+                        # or else we assume trunk as well
+                        src_branch = None
+                        src_rev = revision.revnum
+                    elif src_tag:
+                        # this is a branch created from a tag. Note that this
+                        # really does happen (see Django)
+                        src_branch, src_rev = self.tags[src_tag]
+                        added_branches[br] = (src_branch, src_rev,
+                                              revision.revnum)
+                    else:
+                        # Not from a tag, and from a valid repo path
+                        (src_p,
+                        src_branch) = self._path_and_branch_for_path(src_p)
+                    added_branches[br] = src_branch, src_rev, revision.revnum
+                elif br in added_branches:
+                    if paths[p].copyfrom_rev > added_branches[br][1]:
+                        x,y,z = added_branches[br]
+                        added_branches[br] = x, paths[p].copyfrom_rev, z
+            else:
+                t_name = self._is_path_tag(p)
+                if t_name == False:
+                    continue
+                src_p, src_rev = paths[p].copyfrom_path, paths[p].copyfrom_rev
+                # if you commit to a tag, I'm calling you stupid and ignoring
+                # you.
+                if src_p is not None and src_rev is not None:
+                    if self._is_path_valid(src_p):
+                        file, branch = self._path_and_branch_for_path(src_p)
+                    else:
+                        # some crazy people make tags from other tags
+                        file = ''
+                        from_tag = self._is_path_tag(src_p)
+                        if not from_tag:
+                            continue
+                        branch, src_rev = self.tags[from_tag]
+                    if t_name not in added_tags:
+                        added_tags[t_name] = branch, src_rev
+                    elif file and src_rev > added_tags[t_name][1]:
+                        added_tags[t_name] = branch, src_rev
+                elif (paths[p].action == 'D' and p.endswith(t_name)
+                      and t_name in self.tags):
+                        tags_to_delete.add(t_name)
+        for t in tags_to_delete:
+            del self.tags[t]
+        self.tags.update(added_tags)
+        self.branches.update(added_branches)
+
+    def commit_current_delta(self):
+        if hasattr(self, '_exception_info'):
+            traceback.print_exception(*self._exception_info)
+            raise ReplayException()
+        if self.missing_plaintexts:
+            raise MissingPlainTextError()
+        files_to_commit = self.current_files.keys()
+        files_to_commit.extend(self.current_files_symlink.keys())
+        files_to_commit.extend(self.current_files_exec.keys())
+        files_to_commit = sorted(list(set(files_to_commit)))
+        branch_batches = {}
+        rev = self.current_rev
+        date = rev.date.replace('T', ' ').replace('Z', '').split('.')[0]
+        date += ' -0000'
+
+        # build up the branches that have files on them
+        for f in files_to_commit:
+            if not  self._is_path_valid(f):
+                continue
+            p, b = self._path_and_branch_for_path(f)
+            if b not in branch_batches:
+                branch_batches[b] = []
+            branch_batches[b].append((p, f))
+
+        for branch, files in branch_batches.iteritems():
+            if branch in self.commit_branches_empty and files:
+                del self.commit_branches_empty[branch]
+            extra = {}
+            files = dict(files)
+
+            parents = (self.get_parent_revision(rev.revnum, branch),
+                       revlog.nullid)
+            if branch is not None:
+                if branch not in self.branches:
+                    continue
+                if parents == (revlog.nullid, revlog.nullid):
+                    assert False, ('a non-trunk branch should probably have'
+                                   ' parents figured out by this point')
+                extra['branch'] = branch
+            parent_ctx = self.repo.changectx(parents[0])
+            def filectxfn(repo, memctx, path):
+                is_link = False
+                is_exec = False
+                copied = None
+                current_file = files[path]
+                if current_file in self.deleted_files:
+                    raise IOError()
+                # TODO(augie) tag copies from files
+                if path in parent_ctx:
+                    is_exec = 'x' in parent_ctx.flags(path)
+                    is_link = 'l' in parent_ctx.flags(path)
+                if current_file in self.current_files_exec:
+                    is_exec = self.current_files_exec[current_file]
+                if current_file in self.current_files_symlink:
+                    is_link = self.current_files_symlink[current_file]
+                if current_file in self.current_files:
+                    data = self.current_files[current_file]
+                    if is_link:
+                        assert data.startswith('link ')
+                        data = data[len('link '):]
+                else:
+                    data = parent_ctx.filectx(path).data()
+                return context.memfilectx(path=path,
+                                          data=data,
+                                          islink=is_link, isexec=is_exec,
+                                          copied=copied)
+            current_ctx = context.memctx(self.repo,
+                                         parents,
+                                         rev.message or '...',
+                                         files.keys(),
+                                         filectxfn,
+                                         '%s%s' %(rev.author, self.author_host),
+                                         date,
+                                         extra)
+            new_hash = self.repo.commitctx(current_ctx)
+            self.ui.status('committed as %s on branch %s\n' %
+                           (node.hex(new_hash), (branch or 'default')))
+            if (rev.revnum, branch) not in self.revmap:
+                self.revmap[rev.revnum, branch] = new_hash
+                self._save_metadata()
+        # now we handle branches that need to be committed without any files
+        for branch in self.commit_branches_empty:
+            ha = self.get_parent_revision(rev.revnum, branch)
+            if ha == node.nullid:
+                continue
+            parent_ctx = self.repo.changectx(ha)
+            def del_all_files(*args):
+                raise IOError
+            extra = {}
+            if branch:
+                extra['branch'] = branch
+            # True here means nuke all files
+            files = []
+            if self.commit_branches_empty[branch]:
+                files = parent_ctx.manifest().keys()
+            current_ctx = context.memctx(self.repo,
+                                         (ha, node.nullid),
+                                         rev.message or ' ',
+                                         files,
+                                         del_all_files,
+                                         '%s%s' % (rev.author,
+                                                   self.author_host),
+                                         date,
+                                         extra)
+            new_hash = self.repo.commitctx(current_ctx)
+            self.ui.status('committed as %s on branch %s\n' %
+                           (node.hex(new_hash), (branch or 'default')))
+            if (rev.revnum, branch) not in self.revmap:
+                self.revmap[rev.revnum, branch] = new_hash
+                self._save_metadata()
+        self.clear_current_info()
+
+    @property
+    def meta_data_dir(self):
+        return os.path.join(self.path, '.hg', 'svn')
+
+    def meta_file_named(self, name):
+        return os.path.join(self.meta_data_dir, name)
+
+    @property
+    def revmap_file(self):
+        return self.meta_file_named('rev_map')
+
+    @property
+    def svn_url_file(self):
+        return self.meta_file_named('url')
+
+    @property
+    def uuid_file(self):
+        return self.meta_file_named('uuid')
+
+    @property
+    def last_revision_handled_file(self):
+        return self.meta_file_named('last_rev')
+
+    @property
+    def branch_info_file(self):
+        return self.meta_file_named('branch_info')
+
+    @property
+    def tag_info_file(self):
+        return self.meta_file_named('tag_info')
+
+    @property
+    def tag_locations_file(self):
+        return self.meta_file_named('tag_locations')
+
+    @property
+    def url(self):
+        return open(self.svn_url_file).read()
+
+    @stash_exception_on_self
+    def delete_entry(self, path, revision_bogus, parent_baton, pool=None):
+        if self._is_path_valid(path):
+            br_path, branch = self._path_and_branch_for_path(path)
+            ha = self.get_parent_revision(self.current_rev.revnum, branch)
+            if ha == revlog.nullid:
+                return
+            ctx = self.repo.changectx(ha)
+            if br_path not in ctx:
+                br_path2 = ''
+                if br_path != '':
+                    br_path2 = br_path + '/'
+                # assuming it is a directory
+                for f in ctx:
+                    if f.startswith(br_path2):
+                        f_p = '%s/%s' % (path, f[len(br_path2):])
+                        self.deleted_files[f_p] = True
+                        self.current_files[f_p] = ''
+                        self.ui.status('D %s\n' % f_p)
+            self.deleted_files[path] = True
+            self.current_files[path] = ''
+            self.ui.status('D %s\n' % path)
+
+    @stash_exception_on_self
+    def open_file(self, path, parent_baton, base_revision, p=None):
+        self.current_file = 'foobaz'
+        if self._is_path_valid(path):
+            self.current_file = path
+            self.ui.status('M %s\n' % path)
+            if base_revision != -1:
+                self.base_revision = base_revision
+            else:
+                self.base_revision = None
+            self.should_edit_most_recent_plaintext = True
+
+    @stash_exception_on_self
+    def add_file(self, path, parent_baton, copyfrom_path,
+                 copyfrom_revision, file_pool=None):
+        self.current_file = 'foobaz'
+        self.base_revision = None
+        if path in self.deleted_files:
+            del self.deleted_files[path]
+        if self._is_path_valid(path):
+            self.current_file = path
+            self.should_edit_most_recent_plaintext = False
+            if copyfrom_path:
+                self.ui.status('A+ %s\n' % path)
+                # TODO(augie) handle this better, actually mark a copy
+                (from_file,
+                 from_branch) = self._path_and_branch_for_path(copyfrom_path)
+                ha = self.get_parent_revision(copyfrom_revision + 1,
+                                              from_branch)
+                ctx = self.repo.changectx(ha)
+                if from_file in ctx:
+                    fctx = ctx.filectx(from_file)
+                    cur_file = self.current_file
+                    self.current_files[cur_file] = fctx.data()
+                    self.current_files_symlink[cur_file] = 'l' in fctx.flags()
+                    self.current_files_exec[cur_file] = 'x' in fctx.flags()
+            else:
+                self.ui.status('A %s\n' % path)
+
+
+    @stash_exception_on_self
+    def add_directory(self, path, parent_baton, copyfrom_path,
+                      copyfrom_revision, dir_pool=None):
+        if self._is_path_valid(path):
+            junk, branch = self._path_and_branch_for_path(path)
+            if not copyfrom_path and not junk:
+                self.commit_branches_empty[branch] = True
+            else:
+                self.commit_branches_empty[branch] = False
+        if not (self._is_path_valid(path) and copyfrom_path and
+                self._is_path_valid(copyfrom_path)):
+            return
+
+        cp_f, br_from = self._path_and_branch_for_path(copyfrom_path)
+        new_hash = self.get_parent_revision(copyfrom_revision + 1, br_from)
+        if new_hash == node.nullid:
+            self.missing_plaintexts.add('%s/' % path)
+            return
+        cp_f_ctx = self.repo.changectx(new_hash)
+        if cp_f != '/' and cp_f != '':
+            cp_f = '%s/' % cp_f
+        else:
+            cp_f = ''
+        for f in cp_f_ctx:
+            if f.startswith(cp_f):
+                f2 = f[len(cp_f):]
+                fctx = cp_f_ctx.filectx(f)
+                fp_c = path + '/' + f2
+                self.current_files[fp_c] = fctx.data()
+                self.current_files_exec[fp_c] = 'x' in fctx.flags()
+                self.current_files_symlink[fp_c] = 'l' in fctx.flags()
+                # TODO(augie) tag copies from files
+
+    @stash_exception_on_self
+    def change_file_prop(self, file_baton, name, value, pool=None):
+        if name == 'svn:executable':
+            self.current_files_exec[self.current_file] = bool(value)
+        elif name == 'svn:special':
+            self.current_files_symlink[self.current_file] = bool(value)
+
+    @stash_exception_on_self
+    def open_directory(self, path, parent_baton, base_revision, dir_pool=None):
+        if self._is_path_valid(path):
+            p_, branch = self._path_and_branch_for_path(path)
+            if p_ == '':
+                self.commit_branches_empty[branch] = False
+
+    @stash_exception_on_self
+    def apply_textdelta(self, file_baton, base_checksum, pool=None):
+        base = ''
+        if not self._is_path_valid(self.current_file):
+            return lambda x: None
+        if (self.current_file in self.current_files
+            and not self.should_edit_most_recent_plaintext):
+            base = self.current_files[self.current_file]
+        elif (base_checksum is not None or
+              self.should_edit_most_recent_plaintext):
+                p_, br = self._path_and_branch_for_path(self.current_file)
+                par_rev = self.current_rev.revnum
+                if self.base_revision:
+                    par_rev = self.base_revision + 1
+                ha = self.get_parent_revision(par_rev, br)
+                if ha != revlog.nullid:
+                    ctx = self.repo.changectx(ha)
+                    if not p_ in ctx:
+                        self.missing_plaintexts.add(self.current_file)
+                        # short circuit exit since we can't do anything anyway
+                        return lambda x: None
+                    base = ctx.filectx(p_).data()
+        source = cStringIO.StringIO(base)
+        target = cStringIO.StringIO()
+        self.stream = target
+
+        handler, baton = delta.svn_txdelta_apply(source, target, None)
+        if not callable(handler):
+            # TODO(augie) Raise a real exception, don't just fail an assertion.
+            assert False, 'handler not callable, bindings are broken'
+        def txdelt_window(window):
+            try:
+                if not self._is_path_valid(self.current_file):
+                    return
+                handler(window, baton)
+                # window being None means commit this file
+                if not window:
+                    self.current_files[self.current_file] = target.getvalue()
+            except core.SubversionException, e:
+                if e.message == 'Delta source ended unexpectedly':
+                    self.missing_plaintexts.add(self.current_file)
+                else:
+                    self._exception_info = sys.exc_info()
+                    raise
+            except:
+                print len(base), self.current_file
+                self._exception_info = sys.exc_info()
+                raise
+        return txdelt_window
+
+class MissingPlainTextError(Exception):
+    """Exception raised when the repo lacks a source file required for replaying
+    a txdelta.
+    """
+
+class ReplayException(Exception):
+    """Exception raised when you try and commit but the replay encountered an
+    exception.
+    """