changeset 943:c49c3c418f9d

editor: move RevisionData on the filesystem over a given threshold The implementation is similar to the one in mercurial.patch except the mode and copy information are currently kept outside. It minimizes changes to RevisionData and helps with files which properties are modified but not their contents, which filestore was not designed to handle. Besides, CopiedFile pushed from the editor may later be handled separately to resolve them at commit time, in which case we would store the metadata outside of the file stores.
author Patrick Mezard <patrick@mezard.eu>
date Sat, 06 Oct 2012 09:59:55 +0200
parents 4d9e80f6ba43
children d6db289f1548
files hgsubversion/editor.py hgsubversion/replay.py
diffstat 2 files changed, 107 insertions(+), 29 deletions(-) [+]
line wrap: on
line diff
--- a/hgsubversion/editor.py
+++ b/hgsubversion/editor.py
@@ -1,6 +1,9 @@
 import errno
 import cStringIO
 import sys
+import tempfile
+import shutil
+import os
 
 from mercurial import util as hgutil
 from mercurial import revlog
@@ -22,12 +25,69 @@ class NeverClosingStringIO(object):
         # object which prevent us from calling getvalue() afterwards.
         pass
 
+class FileStore(object):
+    def __init__(self, maxsize=None):
+        self._tempdir = None
+        self._files = {}
+        self._created = 0
+        self._maxsize = maxsize
+        if self._maxsize is None:
+            self._maxsize = 100*(2**20)
+        self._size = 0
+        self._data = {}
+
+    def setfile(self, fname, data):
+        if self._maxsize < 0 or (len(data) + self._size) <= self._maxsize:
+            self._data[fname] = data
+            self._size += len(data)
+        else:
+            if self._tempdir is None:
+                self._tempdir = tempfile.mkdtemp(prefix='hg-subversion-')
+            # Avoid filename issues with these simple names
+            fn = str(self._created)
+            fp = hgutil.posixfile(os.path.join(self._tempdir, fn), 'wb')
+            try:
+                fp.write(data)
+            finally:
+                fp.close()
+            self._created += 1
+            self._files[fname] = fn
+
+    def delfile(self, fname):
+        if fname in self._data:
+            del self._data[fname]
+        elif fname in self._files:
+            path = os.path.join(self._tempdir, self._files.pop(fname))
+            os.unlink(path)
+
+    def getfile(self, fname):
+        if fname in self._data:
+            return self._data[fname]
+        if self._tempdir is None or fname not in self._files:
+            raise IOError
+        path = os.path.join(self._tempdir, self._files[fname])
+        fp = hgutil.posixfile(path, 'rb')
+        try:
+            return fp.read()
+        finally:
+            fp.close()
+
+    def files(self):
+        return list(self._files) + list(self._data)
+
+    def close(self):
+        if self._tempdir is not None:
+            tempdir, self._tempdir = self._tempdir, None
+            shutil.rmtree(tempdir)
+        self._files = None
+        self._data = None
+
 class RevisionData(object):
 
     __slots__ = [
-        'file', 'added', 'files', 'deleted', 'rev', 'execfiles', 'symlinks', 'batons',
+        'file', 'added', 'deleted', 'rev', 'execfiles', 'symlinks', 'batons',
         'copies', 'missing', 'emptybranches', 'base', 'externals', 'ui',
-        'exception',
+        'exception', 'store',
     ]
 
     def __init__(self, ui):
@@ -35,8 +95,8 @@ class RevisionData(object):
         self.clear()
 
     def clear(self):
+        self.store = FileStore()
         self.added = set()
-        self.files = {}
         self.deleted = {}
         self.rev = None
         self.execfiles = {}
@@ -50,7 +110,7 @@ class RevisionData(object):
         self.exception = None
 
     def set(self, path, data, isexec=False, islink=False, copypath=None):
-        self.files[path] = data
+        self.store.setfile(path, data)
         self.execfiles[path] = isexec
         self.symlinks[path] = islink
         if path in self.deleted:
@@ -60,14 +120,29 @@ class RevisionData(object):
         if copypath is not None:
             self.copies[path] = copypath
 
+    def get(self, path):
+        if path in self.deleted:
+            raise IOError(errno.ENOENT, '%s is deleted' % path)
+        data = self.store.getfile(path)
+        isexec = self.execfiles.get(path)
+        islink = self.symlinks.get(path)
+        copied = self.copies.get(path)
+        return data, isexec, islink, copied
+
     def delete(self, path):
         self.deleted[path] = True
-        if path in self.files:
-            del self.files[path]
+        self.store.delfile(path)
         self.execfiles[path] = False
         self.symlinks[path] = False
         self.ui.note('D %s\n' % path)
 
+    def files(self):
+        """Return a sorted list of changed files."""
+        files = set(self.store.files())
+        for g in (self.symlinks, self.execfiles, self.deleted):
+            files.update(g)
+        return sorted(files)
+
     def findmissing(self, svn):
 
         if not self.missing:
@@ -103,6 +178,9 @@ class RevisionData(object):
         self.missing = set()
         self.ui.note('\n')
 
+    def close(self):
+        self.store.close()
+
 class EditingError(Exception):
     pass
 
--- a/hgsubversion/replay.py
+++ b/hgsubversion/replay.py
@@ -62,8 +62,13 @@ def _safe_message(msg):
           return msg.decode('iso-8859-1').encode('utf-8')
   return msg
 
-
 def convert_rev(ui, meta, svn, r, tbdelta, firstrun):
+    try:
+        return _convert_rev(ui, meta, svn, r, tbdelta, firstrun)
+    finally:
+        meta.editor.current.close()
+
+def _convert_rev(ui, meta, svn, r, tbdelta, firstrun):
 
     editor = meta.editor
     editor.current.clear()
@@ -89,14 +94,7 @@ def convert_rev(ui, meta, svn, r, tbdelt
     if current.missing:
         raise MissingPlainTextError()
 
-    # paranoidly generate the list of files to commit
-    files_to_commit = set(current.files.keys())
-    files_to_commit.update(current.symlinks.keys())
-    files_to_commit.update(current.execfiles.keys())
-    files_to_commit.update(current.deleted.keys())
-    # back to a list and sort so we get sane behavior
-    files_to_commit = list(files_to_commit)
-    files_to_commit.sort()
+    files_to_commit = current.files()
     branch_batches = {}
     rev = current.rev
     date = meta.fixdate(rev.date)
@@ -156,24 +154,26 @@ def convert_rev(ui, meta, svn, r, tbdelt
 
         def filectxfn(repo, memctx, path):
             current_file = files[path]
-            if current_file in current.deleted:
-                raise IOError(errno.ENOENT, '%s is deleted' % path)
-            copied = current.copies.get(current_file)
-            flags = parentctx.flags(path)
-            is_exec = current.execfiles.get(current_file, 'x' in flags)
-            is_link = current.symlinks.get(current_file, 'l' in flags)
-            if current_file in current.files:
-                data = current.files[current_file]
-                if is_link and data.startswith('link '):
-                    data = data[len('link '):]
-                elif is_link:
-                    ui.debug('file marked as link, but may contain data: '
-                             '%s (%r)\n' % (current_file, flags))
+            data, isexec, islink, copied = current.get(current_file)
+            if isexec is None or islink is None:
+                flags = parentctx.flags(path)
+                if isexec is None:
+                    isexec = 'x' in flags
+                if islink is None:
+                    islink = 'l' in flags
+
+            if data is not None:
+                if islink:
+                    if data.startswith('link '):
+                        data = data[len('link '):]
+                    else:
+                        ui.debug('file marked as link, but may contain data: '
+                            '%s\n' % current_file)
             else:
                 data = parentctx.filectx(path).data()
             return context.memfilectx(path=path,
                                       data=data,
-                                      islink=is_link, isexec=is_exec,
+                                      islink=islink, isexec=isexec,
                                       copied=copied)
 
         message = _safe_message(rev.message)