Mercurial > hgsubversion
changeset 943:c49c3c418f9d
editor: move RevisionData on the filesystem over a given threshold
The implementation is similar to the one in mercurial.patch except the
mode and copy information are currently kept outside. It minimizes
changes to RevisionData and helps with files which properties are
modified but not their contents, which filestore was not designed to
handle. Besides, CopiedFile pushed from the editor may later be handled
separately to resolve them at commit time, in which case we would store
the metadata outside of the file stores.
author | Patrick Mezard <patrick@mezard.eu> |
---|---|
date | Sat, 06 Oct 2012 09:59:55 +0200 |
parents | 4d9e80f6ba43 |
children | d6db289f1548 |
files | hgsubversion/editor.py hgsubversion/replay.py |
diffstat | 2 files changed, 107 insertions(+), 29 deletions(-) [+] |
line wrap: on
line diff
--- a/hgsubversion/editor.py +++ b/hgsubversion/editor.py @@ -1,6 +1,9 @@ import errno import cStringIO import sys +import tempfile +import shutil +import os from mercurial import util as hgutil from mercurial import revlog @@ -22,12 +25,69 @@ class NeverClosingStringIO(object): # object which prevent us from calling getvalue() afterwards. pass +class FileStore(object): + def __init__(self, maxsize=None): + self._tempdir = None + self._files = {} + self._created = 0 + self._maxsize = maxsize + if self._maxsize is None: + self._maxsize = 100*(2**20) + self._size = 0 + self._data = {} + + def setfile(self, fname, data): + if self._maxsize < 0 or (len(data) + self._size) <= self._maxsize: + self._data[fname] = data + self._size += len(data) + else: + if self._tempdir is None: + self._tempdir = tempfile.mkdtemp(prefix='hg-subversion-') + # Avoid filename issues with these simple names + fn = str(self._created) + fp = hgutil.posixfile(os.path.join(self._tempdir, fn), 'wb') + try: + fp.write(data) + finally: + fp.close() + self._created += 1 + self._files[fname] = fn + + def delfile(self, fname): + if fname in self._data: + del self._data[fname] + elif fname in self._files: + path = os.path.join(self._tempdir, self._files.pop(fname)) + os.unlink(path) + + def getfile(self, fname): + if fname in self._data: + return self._data[fname] + if self._tempdir is None or fname not in self._files: + raise IOError + path = os.path.join(self._tempdir, self._files[fname]) + fp = hgutil.posixfile(path, 'rb') + try: + return fp.read() + finally: + fp.close() + + def files(self): + return list(self._files) + list(self._data) + + def close(self): + if self._tempdir is not None: + tempdir, self._tempdir = self._tempdir, None + shutil.rmtree(tempdir) + self._files = None + self._data = None + class RevisionData(object): __slots__ = [ - 'file', 'added', 'files', 'deleted', 'rev', 'execfiles', 'symlinks', 'batons', + 'file', 'added', 'deleted', 'rev', 'execfiles', 'symlinks', 'batons', 'copies', 'missing', 'emptybranches', 'base', 'externals', 'ui', - 'exception', + 'exception', 'store', ] def __init__(self, ui): @@ -35,8 +95,8 @@ class RevisionData(object): self.clear() def clear(self): + self.store = FileStore() self.added = set() - self.files = {} self.deleted = {} self.rev = None self.execfiles = {} @@ -50,7 +110,7 @@ class RevisionData(object): self.exception = None def set(self, path, data, isexec=False, islink=False, copypath=None): - self.files[path] = data + self.store.setfile(path, data) self.execfiles[path] = isexec self.symlinks[path] = islink if path in self.deleted: @@ -60,14 +120,29 @@ class RevisionData(object): if copypath is not None: self.copies[path] = copypath + def get(self, path): + if path in self.deleted: + raise IOError(errno.ENOENT, '%s is deleted' % path) + data = self.store.getfile(path) + isexec = self.execfiles.get(path) + islink = self.symlinks.get(path) + copied = self.copies.get(path) + return data, isexec, islink, copied + def delete(self, path): self.deleted[path] = True - if path in self.files: - del self.files[path] + self.store.delfile(path) self.execfiles[path] = False self.symlinks[path] = False self.ui.note('D %s\n' % path) + def files(self): + """Return a sorted list of changed files.""" + files = set(self.store.files()) + for g in (self.symlinks, self.execfiles, self.deleted): + files.update(g) + return sorted(files) + def findmissing(self, svn): if not self.missing: @@ -103,6 +178,9 @@ class RevisionData(object): self.missing = set() self.ui.note('\n') + def close(self): + self.store.close() + class EditingError(Exception): pass
--- a/hgsubversion/replay.py +++ b/hgsubversion/replay.py @@ -62,8 +62,13 @@ def _safe_message(msg): return msg.decode('iso-8859-1').encode('utf-8') return msg - def convert_rev(ui, meta, svn, r, tbdelta, firstrun): + try: + return _convert_rev(ui, meta, svn, r, tbdelta, firstrun) + finally: + meta.editor.current.close() + +def _convert_rev(ui, meta, svn, r, tbdelta, firstrun): editor = meta.editor editor.current.clear() @@ -89,14 +94,7 @@ def convert_rev(ui, meta, svn, r, tbdelt if current.missing: raise MissingPlainTextError() - # paranoidly generate the list of files to commit - files_to_commit = set(current.files.keys()) - files_to_commit.update(current.symlinks.keys()) - files_to_commit.update(current.execfiles.keys()) - files_to_commit.update(current.deleted.keys()) - # back to a list and sort so we get sane behavior - files_to_commit = list(files_to_commit) - files_to_commit.sort() + files_to_commit = current.files() branch_batches = {} rev = current.rev date = meta.fixdate(rev.date) @@ -156,24 +154,26 @@ def convert_rev(ui, meta, svn, r, tbdelt def filectxfn(repo, memctx, path): current_file = files[path] - if current_file in current.deleted: - raise IOError(errno.ENOENT, '%s is deleted' % path) - copied = current.copies.get(current_file) - flags = parentctx.flags(path) - is_exec = current.execfiles.get(current_file, 'x' in flags) - is_link = current.symlinks.get(current_file, 'l' in flags) - if current_file in current.files: - data = current.files[current_file] - if is_link and data.startswith('link '): - data = data[len('link '):] - elif is_link: - ui.debug('file marked as link, but may contain data: ' - '%s (%r)\n' % (current_file, flags)) + data, isexec, islink, copied = current.get(current_file) + if isexec is None or islink is None: + flags = parentctx.flags(path) + if isexec is None: + isexec = 'x' in flags + if islink is None: + islink = 'l' in flags + + if data is not None: + if islink: + if data.startswith('link '): + data = data[len('link '):] + else: + ui.debug('file marked as link, but may contain data: ' + '%s\n' % current_file) else: data = parentctx.filectx(path).data() return context.memfilectx(path=path, data=data, - islink=is_link, isexec=is_exec, + islink=islink, isexec=isexec, copied=copied) message = _safe_message(rev.message)