changeset 859:1d07e86f5797

stupid: handle changes in svn 1.7 diff format Metadata changes are now represented like: Property changes on: a ___________________________________________________________________ Added: svn:executable ## -0,0 +1 ## +* instead of: Property changes on: a ___________________________________________________________________ Added: svn:executable + * Also, I got tired of massaging the diff with regexps, so I extracted the parsing logic in parsediff(). This is no small refactoring but it makes things cleaner and the test suite pass on 1.6 and 1.7 so...
author Patrick Mezard <patrick@mezard.eu>
date Thu, 19 Apr 2012 14:59:50 +0200
parents bb6a013abaed
children 4cf20a687bff c24130e9ddb7
files hgsubversion/stupid.py tests/test_fetch_command_regexes.py tests/test_fetch_symlinks.py
diffstat 3 files changed, 188 insertions(+), 63 deletions(-) [+]
line wrap: on
line diff
--- a/hgsubversion/stupid.py
+++ b/hgsubversion/stupid.py
@@ -12,40 +12,109 @@ import svnwrap
 import svnexternals
 import util
 
+# Here is a diff mixing content and property changes in svn >= 1.7
+#
+# Index: a
+# ===================================================================
+# --- a	(revision 12)
+# +++ a	(working copy)
+# @@ -1,2 +1,3 @@
+#  a
+#  a
+# +a
+# 
+# Property changes on: a
+# ___________________________________________________________________
+# Added: svn:executable
+# ## -0,0 +1 ##
+# +*
+
+class ParseError(Exception):
+    pass
 
-binary_file_re = re.compile(r'''Index: ([^\n]*)
+index_header = r'''Index: ([^\n]*)
 =*
-Cannot display: file marked as a binary type.''')
-
-property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*)
-_*
-(?:Added|Name): svn:executable
-   \+''')
-
-property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*)
-_*
-(?:Deleted|Name): svn:executable
-   -''')
-
-empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''')
-
-any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE)
+'''
 
-property_special_set_re = re.compile(r'''Property changes on: ([^\n]*)
+property_header = r'''Property changes on: ([^\n]*)
 _*
-(?:Added|Name): svn:special
-   \+''')
-
-property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*)
-_*
-(?:Deleted|Name): svn:special
-   \-''')
+'''
+
+headers_re = re.compile('(?:' + '|'.join([
+            index_header,
+            property_header,
+            ]) + ')')
+
+property_special_added = r'''(?:Added|Name): (svn:special)
+(?:   \+|## -0,0 \+1 ##
+\+)'''
+
+property_special_deleted = r'''(?:Deleted|Name): (svn:special)
+(?:   \-|## -1 \+0,0 ##
+\-)'''
+
+property_exec_added = r'''(?:Added|Name): (svn:executable)
+(?:   \+|## -0,0 \+1 ##
+\+)'''
+
+property_exec_deleted = r'''(?:Deleted|Name): (svn:executable)
+(?:   \-|## -1 \+0,0 ##
+\-)'''
+
+properties_re = re.compile('(?:' + '|'.join([
+            property_special_added,
+            property_special_deleted,
+            property_exec_added,
+            property_exec_deleted,
+            ]) + ')')
+
+class filediff:
+    def __init__(self, name):
+        self.name = name
+        self.diff = None
+        self.binary = False
+        self.executable = None
+        self.symlink = None
+        self.hasprops = False
+
+    def isempty(self):
+        return (not self.diff and not self.binary and not self.hasprops)
+
+def parsediff(diff):
+    changes = {}
+    headers = headers_re.split(diff)[1:]
+    if (len(headers) % 3) != 0:
+        # headers should be a sequence of (index file, property file, data)
+        raise ParseError('unexpected diff format')
+    files = []
+    for i in xrange(len(headers)/3):
+        iname, pname, data = headers[3*i:3*i+3]
+        fname = iname or pname
+        if fname not in changes:
+            changes[fname] = filediff(fname)
+            files.append(changes[fname])
+        f = changes[fname]
+        if iname is not None:
+            if data.strip():
+                f.binary = data.lstrip().startswith(
+                    'Cannot display: file marked as a binary type.')
+                if not f.binary and '@@' in data:
+                    # Non-empty diff
+                    f.diff = data
+        else:
+            f.hasprops = True
+            for m in properties_re.finditer(data):
+                p = m.group(1, 2, 3, 4)
+                if p[0] or p[1]:
+                    f.symlink = bool(p[0])
+                elif p[2] or p[3]:
+                    f.executable = bool(p[2])
+    return files
 
 
 class BadPatchApply(Exception):
     pass
 
-
 def print_your_svn_is_old_message(ui): # pragma: no cover
     ui.status("In light of that, I'll fall back and do diffs, but it won't do "
               "as good a job. You should really upgrade your server.\n")
@@ -215,17 +284,12 @@ def diff_branchrev(ui, svn, meta, branch
     if '\0' in d:
         raise BadPatchApply('binary diffs are not supported')
     files_data = {}
-    # we have to pull each binary file by hand as a fulltext,
-    # which sucks but we've got no choice
-    binary_files = set(binary_file_re.findall(d))
-    touched_files = set(binary_files)
-    d2 = empty_file_patch_wont_make_re.sub('', d)
-    d2 = property_exec_set_re.sub('', d2)
-    d2 = property_exec_removed_re.sub('', d2)
+    changed = parsediff(d)
     # Here we ensure that all files, including the new empty ones
     # are marked as touched. Content is loaded on demand.
-    touched_files.update(any_file_re.findall(d))
-    if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
+    touched_files = set(f.name for f in changed)
+    d2 = '\n'.join(f.diff for f in changed if f.diff)
+    if changed:
         files_data = patchrepo(ui, meta, parentctx, cStringIO.StringIO(d2))
         for x in files_data.iterkeys():
             ui.note('M  %s\n' % x)
@@ -233,23 +297,6 @@ def diff_branchrev(ui, svn, meta, branch
         ui.status('Not using patch for %s, diff had no hunks.\n' %
                   r.revnum)
 
-    exec_files = {}
-    for m in property_exec_removed_re.findall(d):
-        exec_files[m] = False
-    for m in property_exec_set_re.findall(d):
-        exec_files[m] = True
-    touched_files.update(exec_files)
-    link_files = {}
-    for m in property_special_set_re.findall(d):
-        # TODO(augie) when a symlink is removed, patching will fail.
-        # We're seeing that above - there's gotta be a better
-        # workaround than just bailing like that.
-        assert m in files_data
-        link_files[m] = True
-    for m in property_special_removed_re.findall(d):
-        assert m in files_data
-        link_files[m] = False
-
     unknown_files = set()
     for p in r.paths:
         action = r.paths[p].action
@@ -276,6 +323,11 @@ def diff_branchrev(ui, svn, meta, branch
     copies = getcopies(svn, meta, branch, branchpath, r, touched_files,
                        parentctx)
 
+    binary_files = set(f.name for f in changed if f.binary)
+    exec_files = dict((f.name, f.executable) for f in changed
+                      if f.executable is not None)
+    link_files = dict((f.name, f.symlink) for f in changed
+                      if f.symlink is not None)
     def filectxfn(repo, memctx, path):
         if path in files_data and files_data[path] is None:
             raise IOError(errno.ENOENT, '%s is deleted' % path)
--- a/tests/test_fetch_command_regexes.py
+++ b/tests/test_fetch_command_regexes.py
@@ -48,28 +48,99 @@ Name: svn:special
 
 class RegexTests(unittest.TestCase):
     def test_empty_file_re(self):
-        matches = stupid.empty_file_patch_wont_make_re.findall(two_empties)
-        assert sorted(matches) == ['__init__.py', 'bar/__init__.py']
+        changed = stupid.parsediff(two_empties)
+        self.assertEqual(3, len(changed))
+        self.assertEqual('__init__.py', changed[0].name)
+        self.assert_(changed[0].isempty())
+        self.assertEqual('bar/__init__.py', changed[1].name)
+        self.assert_(changed[1].isempty())
+        self.assertEqual('bar/test_muhaha.py', changed[2].name)
+        self.assert_(not changed[2].isempty())
 
     def test_any_matches_just_one(self):
         pat = '''Index: trunk/django/contrib/admin/urls/__init__.py
 ===================================================================
 '''
-        matches = stupid.any_file_re.findall(pat)
-        assert len(matches) == 1
+        changed = stupid.parsediff(pat)
+        self.assertEqual(['trunk/django/contrib/admin/urls/__init__.py'],
+                         [f.name for f in changed])
 
     def test_special_re(self):
-        matches = stupid.property_special_set_re.findall(special_delta)
-        assert len(matches) == 1
+        changed = stupid.parsediff(special_delta)
+        self.assertEqual(1, len(changed))
+        self.assert_(changed[0].symlink)
 
     def test_any_file_re(self):
-        matches = stupid.any_file_re.findall(two_empties)
-        assert sorted(matches) == ['__init__.py', 'bar/__init__.py',
-                                   'bar/test_muhaha.py']
+        changed = stupid.parsediff(two_empties)
+        self.assertEqual(['__init__.py', 'bar/__init__.py', 'bar/test_muhaha.py'],
+                         [f.name for f in changed])
 
     def test_binary_file_re(self):
-        matches = stupid.binary_file_re.findall(binary_delta)
-        assert matches == ['trunk/functional_tests/doc_tests/test_doctest_fixtures/doctest_fixtures_fixtures.pyc']
+        changed = stupid.parsediff(binary_delta)
+        binaries = [f.name for f in changed if f.binary]
+        self.assertEqual(['trunk/functional_tests/doc_tests/test_doctest_fixtures/doctest_fixtures_fixtures.pyc'],
+                         binaries)
+
+    def test_diff16(self):
+        data = """Index: d3/d
+===================================================================
+--- d3/d        (revision 0)
++++ d3/d        (revision 6)
+@@ -0,0 +1 @@
++d
+
+Property changes on: d3
+___________________________________________________________________
+Added: svn:externals
+   + ^/trunk/common/ext ext3
+
+
+
+Property changes on: .
+___________________________________________________________________
+Added: svn:mergeinfo
+   Merged /branches/branch:r4-5
+"""
+        changed = stupid.parsediff(data)
+        self.assertEqual(['d3/d', 'd3', '.'], [f.name for f in changed])
+        data = """Property changes on: empty1
+___________________________________________________________________
+Deleted: svn:executable
+   - *
+
+
+Property changes on: empty2
+___________________________________________________________________
+Added: svn:executable
+   + *
+
+
+Property changes on: binary1
+___________________________________________________________________
+Deleted: svn:executable
+   - *
+
+
+Property changes on: text1
+___________________________________________________________________
+Deleted: svn:executable
+   - *
+
+
+Property changes on: binary2
+___________________________________________________________________
+Added: svn:executable
+   + *
+
+
+Property changes on: text2
+___________________________________________________________________
+Added: svn:executable
+   + *
+"""
+        changed = stupid.parsediff(data)
+        self.assertEqual(['empty1', 'empty2', 'binary1', 'text1', 'binary2', 'text2'],
+                         [f.name for f in changed])
 
 def suite():
     return unittest.TestLoader().loadTestsFromTestCase(RegexTests)
--- a/tests/test_fetch_symlinks.py
+++ b/tests/test_fetch_symlinks.py
@@ -43,7 +43,9 @@ class TestFetchSymlinks(test_util.TestBa
         for rev in repo:
             ctx = repo[rev]
             for f in ctx.manifest():
-                self.assertEqual(f in links[rev], 'l' in ctx[f].flags())
+                l = 'l' in ctx[f].flags()
+                lref = f in links[rev]
+                self.assertEqual(lref, l, '%r != %r for %s@%r' % (lref, l, f, rev))
                 if f in links[rev]:
                     self.assertEqual(links[rev][f], ctx[f].data())
             for f in links[rev]: