# HG changeset patch # User Patrick Mezard # Date 1349510395 -7200 # Node ID c49c3c418f9dce09919e5c97bbea63940a7420cb # Parent 4d9e80f6ba43ff36475982efd8039a915128985d editor: move RevisionData on the filesystem over a given threshold The implementation is similar to the one in mercurial.patch except the mode and copy information are currently kept outside. It minimizes changes to RevisionData and helps with files which properties are modified but not their contents, which filestore was not designed to handle. Besides, CopiedFile pushed from the editor may later be handled separately to resolve them at commit time, in which case we would store the metadata outside of the file stores. diff --git a/hgsubversion/editor.py b/hgsubversion/editor.py --- a/hgsubversion/editor.py +++ b/hgsubversion/editor.py @@ -1,6 +1,9 @@ import errno import cStringIO import sys +import tempfile +import shutil +import os from mercurial import util as hgutil from mercurial import revlog @@ -22,12 +25,69 @@ class NeverClosingStringIO(object): # object which prevent us from calling getvalue() afterwards. pass +class FileStore(object): + def __init__(self, maxsize=None): + self._tempdir = None + self._files = {} + self._created = 0 + self._maxsize = maxsize + if self._maxsize is None: + self._maxsize = 100*(2**20) + self._size = 0 + self._data = {} + + def setfile(self, fname, data): + if self._maxsize < 0 or (len(data) + self._size) <= self._maxsize: + self._data[fname] = data + self._size += len(data) + else: + if self._tempdir is None: + self._tempdir = tempfile.mkdtemp(prefix='hg-subversion-') + # Avoid filename issues with these simple names + fn = str(self._created) + fp = hgutil.posixfile(os.path.join(self._tempdir, fn), 'wb') + try: + fp.write(data) + finally: + fp.close() + self._created += 1 + self._files[fname] = fn + + def delfile(self, fname): + if fname in self._data: + del self._data[fname] + elif fname in self._files: + path = os.path.join(self._tempdir, self._files.pop(fname)) + os.unlink(path) + + def getfile(self, fname): + if fname in self._data: + return self._data[fname] + if self._tempdir is None or fname not in self._files: + raise IOError + path = os.path.join(self._tempdir, self._files[fname]) + fp = hgutil.posixfile(path, 'rb') + try: + return fp.read() + finally: + fp.close() + + def files(self): + return list(self._files) + list(self._data) + + def close(self): + if self._tempdir is not None: + tempdir, self._tempdir = self._tempdir, None + shutil.rmtree(tempdir) + self._files = None + self._data = None + class RevisionData(object): __slots__ = [ - 'file', 'added', 'files', 'deleted', 'rev', 'execfiles', 'symlinks', 'batons', + 'file', 'added', 'deleted', 'rev', 'execfiles', 'symlinks', 'batons', 'copies', 'missing', 'emptybranches', 'base', 'externals', 'ui', - 'exception', + 'exception', 'store', ] def __init__(self, ui): @@ -35,8 +95,8 @@ class RevisionData(object): self.clear() def clear(self): + self.store = FileStore() self.added = set() - self.files = {} self.deleted = {} self.rev = None self.execfiles = {} @@ -50,7 +110,7 @@ class RevisionData(object): self.exception = None def set(self, path, data, isexec=False, islink=False, copypath=None): - self.files[path] = data + self.store.setfile(path, data) self.execfiles[path] = isexec self.symlinks[path] = islink if path in self.deleted: @@ -60,14 +120,29 @@ class RevisionData(object): if copypath is not None: self.copies[path] = copypath + def get(self, path): + if path in self.deleted: + raise IOError(errno.ENOENT, '%s is deleted' % path) + data = self.store.getfile(path) + isexec = self.execfiles.get(path) + islink = self.symlinks.get(path) + copied = self.copies.get(path) + return data, isexec, islink, copied + def delete(self, path): self.deleted[path] = True - if path in self.files: - del self.files[path] + self.store.delfile(path) self.execfiles[path] = False self.symlinks[path] = False self.ui.note('D %s\n' % path) + def files(self): + """Return a sorted list of changed files.""" + files = set(self.store.files()) + for g in (self.symlinks, self.execfiles, self.deleted): + files.update(g) + return sorted(files) + def findmissing(self, svn): if not self.missing: @@ -103,6 +178,9 @@ class RevisionData(object): self.missing = set() self.ui.note('\n') + def close(self): + self.store.close() + class EditingError(Exception): pass diff --git a/hgsubversion/replay.py b/hgsubversion/replay.py --- a/hgsubversion/replay.py +++ b/hgsubversion/replay.py @@ -62,8 +62,13 @@ def _safe_message(msg): return msg.decode('iso-8859-1').encode('utf-8') return msg - def convert_rev(ui, meta, svn, r, tbdelta, firstrun): + try: + return _convert_rev(ui, meta, svn, r, tbdelta, firstrun) + finally: + meta.editor.current.close() + +def _convert_rev(ui, meta, svn, r, tbdelta, firstrun): editor = meta.editor editor.current.clear() @@ -89,14 +94,7 @@ def convert_rev(ui, meta, svn, r, tbdelt if current.missing: raise MissingPlainTextError() - # paranoidly generate the list of files to commit - files_to_commit = set(current.files.keys()) - files_to_commit.update(current.symlinks.keys()) - files_to_commit.update(current.execfiles.keys()) - files_to_commit.update(current.deleted.keys()) - # back to a list and sort so we get sane behavior - files_to_commit = list(files_to_commit) - files_to_commit.sort() + files_to_commit = current.files() branch_batches = {} rev = current.rev date = meta.fixdate(rev.date) @@ -156,24 +154,26 @@ def convert_rev(ui, meta, svn, r, tbdelt def filectxfn(repo, memctx, path): current_file = files[path] - if current_file in current.deleted: - raise IOError(errno.ENOENT, '%s is deleted' % path) - copied = current.copies.get(current_file) - flags = parentctx.flags(path) - is_exec = current.execfiles.get(current_file, 'x' in flags) - is_link = current.symlinks.get(current_file, 'l' in flags) - if current_file in current.files: - data = current.files[current_file] - if is_link and data.startswith('link '): - data = data[len('link '):] - elif is_link: - ui.debug('file marked as link, but may contain data: ' - '%s (%r)\n' % (current_file, flags)) + data, isexec, islink, copied = current.get(current_file) + if isexec is None or islink is None: + flags = parentctx.flags(path) + if isexec is None: + isexec = 'x' in flags + if islink is None: + islink = 'l' in flags + + if data is not None: + if islink: + if data.startswith('link '): + data = data[len('link '):] + else: + ui.debug('file marked as link, but may contain data: ' + '%s\n' % current_file) else: data = parentctx.filectx(path).data() return context.memfilectx(path=path, data=data, - islink=is_link, isexec=is_exec, + islink=islink, isexec=isexec, copied=copied) message = _safe_message(rev.message)