comparison stupid.py @ 239:e2214c8fc91f

Put all stupid stuff in it's own module (separate from fetch-command).
author Dirkjan Ochtman <dirkjan@ochtman.nl>
date Wed, 08 Apr 2009 17:34:01 +0200
parents fetch_command.py@c90cfa665b81
children 4d3bcd2f26ed
comparison
equal deleted inserted replaced
238:e8b3ca865f93 239:e2214c8fc91f
1 import cStringIO
2 import re
3 import os
4
5 from mercurial import patch
6 from mercurial import node
7 from mercurial import context
8 from mercurial import revlog
9 from mercurial import util as merc_util
10 from svn import core
11 from svn import delta
12
13 import hg_delta_editor
14 import svnwrap
15 import svnexternals
16 import util
17
18
19 binary_file_re = re.compile(r'''Index: ([^\n]*)
20 =*
21 Cannot display: file marked as a binary type.''')
22
23 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*)
24 _*
25 (?:Added|Name): svn:executable
26 \+''')
27
28 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*)
29 _*
30 (?:Deleted|Name): svn:executable
31 -''')
32
33 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''')
34
35 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE)
36
37 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*)
38 _*
39 (?:Added|Name): svn:special
40 \+''')
41
42 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*)
43 _*
44 (?:Deleted|Name): svn:special
45 \-''')
46
47
48 class BadPatchApply(Exception):
49 pass
50
51
52 def print_your_svn_is_old_message(ui): #pragma: no cover
53 ui.status("In light of that, I'll fall back and do diffs, but it won't do "
54 "as good a job. You should really upgrade your server.\n")
55
56
57 def mempatchproxy(parentctx, files):
58 # Avoid circular references patch.patchfile -> mempatch
59 patchfile = patch.patchfile
60
61 class mempatch(patchfile):
62 def __init__(self, ui, fname, opener, missing=False):
63 patchfile.__init__(self, ui, fname, None, False)
64
65 def readlines(self, fname):
66 if fname not in parentctx:
67 raise IOError('Cannot find %r to patch' % fname)
68 fctx = parentctx[fname]
69 data = fctx.data()
70 if 'l' in fctx.flags():
71 data = 'link ' + data
72 return cStringIO.StringIO(data).readlines()
73
74 def writelines(self, fname, lines):
75 files[fname] = ''.join(lines)
76
77 def unlink(self, fname):
78 files[fname] = None
79
80 return mempatch
81
82
83 def filteriterhunks(hg_editor):
84 iterhunks = patch.iterhunks
85 def filterhunks(ui, fp, sourcefile=None):
86 applycurrent = False
87 for data in iterhunks(ui, fp, sourcefile):
88 if data[0] == 'file':
89 if hg_editor._is_file_included(data[1][1]):
90 applycurrent = True
91 else:
92 applycurrent = False
93 assert data[0] != 'git', 'Filtering git hunks not supported.'
94 if applycurrent:
95 yield data
96 return filterhunks
97
98
99 def diff_branchrev(ui, svn, hg_editor, branch, r, parentctx):
100 """Extract all 'branch' content at a given revision.
101
102 Return a tuple (files, filectxfn) where 'files' is the list of all files
103 in the branch at the given revision, and 'filectxfn' is a memctx compatible
104 callable to retrieve individual file information. Raise BadPatchApply upon
105 error.
106 """
107 def make_diff_path(branch):
108 if branch == 'trunk' or branch is None:
109 return 'trunk'
110 elif branch.startswith('../'):
111 return branch[3:]
112 return 'branches/%s' % branch
113 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch)
114 diff_path = make_diff_path(branch)
115 try:
116 if br_p == branch:
117 # letting patch handle binaries sounded
118 # cool, but it breaks patch in sad ways
119 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
120 ignore_type=False)
121 else:
122 d = svn.get_unified_diff(diff_path, r.revnum,
123 other_path=make_diff_path(br_p),
124 other_rev=parent_rev,
125 deleted=True, ignore_type=True)
126 if d:
127 raise BadPatchApply('branch creation with mods')
128 except svnwrap.SubversionRepoCanNotDiff:
129 raise BadPatchApply('subversion diffing code is not supported')
130 except core.SubversionException, e:
131 if (hasattr(e, 'apr_err') and e.apr_err != core.SVN_ERR_FS_NOT_FOUND):
132 raise
133 raise BadPatchApply('previous revision does not exist')
134 if '\0' in d:
135 raise BadPatchApply('binary diffs are not supported')
136 files_data = {}
137 binary_files = {}
138 touched_files = {}
139 for m in binary_file_re.findall(d):
140 # we have to pull each binary file by hand as a fulltext,
141 # which sucks but we've got no choice
142 binary_files[m] = 1
143 touched_files[m] = 1
144 d2 = empty_file_patch_wont_make_re.sub('', d)
145 d2 = property_exec_set_re.sub('', d2)
146 d2 = property_exec_removed_re.sub('', d2)
147 for f in any_file_re.findall(d):
148 # Here we ensure that all files, including the new empty ones
149 # are marked as touched. Content is loaded on demand.
150 touched_files[f] = 1
151 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
152 try:
153 oldpatchfile = patch.patchfile
154 olditerhunks = patch.iterhunks
155 patch.patchfile = mempatchproxy(parentctx, files_data)
156 patch.iterhunks = filteriterhunks(hg_editor)
157 try:
158 # We can safely ignore the changed list since we are
159 # handling non-git patches. Touched files are known
160 # by our memory patcher.
161 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
162 {}, strip=0)
163 finally:
164 patch.patchfile = oldpatchfile
165 patch.iterhunks = olditerhunks
166 except patch.PatchError:
167 # TODO: this happens if the svn server has the wrong mime
168 # type stored and doesn't know a file is binary. It would
169 # be better to do one file at a time and only do a
170 # full fetch on files that had problems.
171 raise BadPatchApply('patching failed')
172 for x in files_data.iterkeys():
173 ui.note('M %s\n' % x)
174 # if this patch didn't apply right, fall back to exporting the
175 # entire rev.
176 if patch_st == -1:
177 assert False, ('This should only happen on case-insensitive'
178 ' volumes.')
179 elif patch_st == 1:
180 # When converting Django, I saw fuzz on .po files that was
181 # causing revisions to end up failing verification. If that
182 # can be fixed, maybe this won't ever be reached.
183 raise BadPatchApply('patching succeeded with fuzz')
184 else:
185 ui.status('Not using patch for %s, diff had no hunks.\n' %
186 r.revnum)
187
188 exec_files = {}
189 for m in property_exec_removed_re.findall(d):
190 exec_files[m] = False
191 for m in property_exec_set_re.findall(d):
192 exec_files[m] = True
193 for m in exec_files:
194 touched_files[m] = 1
195 link_files = {}
196 for m in property_special_set_re.findall(d):
197 # TODO(augie) when a symlink is removed, patching will fail.
198 # We're seeing that above - there's gotta be a better
199 # workaround than just bailing like that.
200 assert m in files_data
201 link_files[m] = True
202 for m in property_special_removed_re.findall(d):
203 assert m in files_data
204 link_files[m] = False
205
206 for p in r.paths:
207 if p.startswith(diff_path) and r.paths[p].action == 'D':
208 p2 = p[len(diff_path)+1:].strip('/')
209 if p2 in parentctx:
210 files_data[p2] = None
211 continue
212 # If this isn't in the parent ctx, it must've been a dir
213 files_data.update([(f, None) for f in parentctx if f.startswith(p2 + '/')])
214
215 for f in files_data:
216 touched_files[f] = 1
217
218 copies = getcopies(svn, hg_editor, branch, diff_path, r, touched_files,
219 parentctx)
220
221 def filectxfn(repo, memctx, path):
222 if path in files_data and files_data[path] is None:
223 raise IOError()
224
225 if path in binary_files:
226 data, mode = svn.get_file(diff_path + '/' + path, r.revnum)
227 isexe = 'x' in mode
228 islink = 'l' in mode
229 else:
230 isexe = exec_files.get(path, 'x' in parentctx.flags(path))
231 islink = link_files.get(path, 'l' in parentctx.flags(path))
232 data = ''
233 if path in files_data:
234 data = files_data[path]
235 if islink:
236 data = data[len('link '):]
237 elif path in parentctx:
238 data = parentctx[path].data()
239
240 copied = copies.get(path)
241 return context.memfilectx(path=path, data=data, islink=islink,
242 isexec=isexe, copied=copied)
243
244 return list(touched_files), filectxfn
245
246 def makecopyfinder(r, branchpath, rootdir):
247 """Return a function detecting copies.
248
249 Returned copyfinder(path) returns None if no copy information can
250 be found or ((source, sourcerev), sourcepath) where "sourcepath" is the
251 copy source path, "sourcerev" the source svn revision and "source" is the
252 copy record path causing the copy to occur. If a single file was copied
253 "sourcepath" and "source" are the same, while file copies dectected from
254 directory copies return the copied source directory in "source".
255 """
256 # filter copy information for current branch
257 branchpath = branchpath + '/'
258 fullbranchpath = rootdir + branchpath
259 copies = []
260 for path, e in r.paths.iteritems():
261 if not e.copyfrom_path:
262 continue
263 if not path.startswith(branchpath):
264 continue
265 if not e.copyfrom_path.startswith(fullbranchpath):
266 # ignore cross branch copies
267 continue
268 dest = path[len(branchpath):]
269 source = e.copyfrom_path[len(fullbranchpath):]
270 copies.append((dest, (source, e.copyfrom_rev)))
271
272 copies.sort(reverse=True)
273 exactcopies = dict(copies)
274
275 def finder(path):
276 if path in exactcopies:
277 return exactcopies[path], exactcopies[path][0]
278 # look for parent directory copy, longest first
279 for dest, (source, sourcerev) in copies:
280 dest = dest + '/'
281 if not path.startswith(dest):
282 continue
283 sourcepath = source + '/' + path[len(dest):]
284 return (source, sourcerev), sourcepath
285 return None
286
287 return finder
288
289 def getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx):
290 """Return a mapping {dest: source} for every file copied into r.
291 """
292 if parentctx.node() == revlog.nullid:
293 return {}
294
295 # Extract svn copy information, group them by copy source.
296 # The idea is to duplicate the replay behaviour where copies are
297 # evaluated per copy event (one event for all files in a directory copy,
298 # one event for single file copy). We assume that copy events match
299 # copy sources in revision info.
300 svncopies = {}
301 finder = makecopyfinder(r, branchpath, svn.subdir)
302 for f in files:
303 copy = finder(f)
304 if copy:
305 svncopies.setdefault(copy[0], []).append((f, copy[1]))
306 if not svncopies:
307 return {}
308
309 # cache changeset contexts and map them to source svn revisions
310 ctxs = {}
311 def getctx(svnrev):
312 if svnrev in ctxs:
313 return ctxs[svnrev]
314 changeid = hg_editor.get_parent_revision(svnrev + 1, branch)
315 ctx = None
316 if changeid != revlog.nullid:
317 ctx = hg_editor.repo.changectx(changeid)
318 ctxs[svnrev] = ctx
319 return ctx
320
321 # check svn copies really make sense in mercurial
322 hgcopies = {}
323 for (sourcepath, rev), copies in svncopies.iteritems():
324 sourcectx = getctx(rev)
325 if sourcectx is None:
326 continue
327 sources = [s[1] for s in copies]
328 if not hg_editor.aresamefiles(sourcectx, parentctx, sources):
329 continue
330 hgcopies.update(copies)
331 return hgcopies
332
333 def fetch_externals(svn, branchpath, r, parentctx):
334 """Extract svn:externals for the current revision and branch
335
336 Return an externalsfile instance or None if there are no externals
337 to convert and never were.
338 """
339 externals = svnexternals.externalsfile()
340 if '.hgsvnexternals' in parentctx:
341 externals.read(parentctx['.hgsvnexternals'].data())
342 # Detect property additions only, changes are handled by checking
343 # existing entries individually. Projects are unlikely to store
344 # externals on many different root directories, so we trade code
345 # duplication and complexity for a constant lookup price at every
346 # revision in the common case.
347 dirs = set(externals)
348 if parentctx.node() == revlog.nullid:
349 dirs.update([p for p,k in svn.list_files(branchpath, r.revnum) if k == 'd'])
350 dirs.add('')
351 else:
352 branchprefix = branchpath + '/'
353 for path, e in r.paths.iteritems():
354 if e.action == 'D':
355 continue
356 if not path.startswith(branchprefix) and path != branchpath:
357 continue
358 kind = svn.checkpath(path, r.revnum)
359 if kind != 'd':
360 continue
361 path = path[len(branchprefix):]
362 dirs.add(path)
363 if e.action == 'M' or (e.action == 'A' and e.copyfrom_path):
364 # Do not recurse in copied directories, changes are marked
365 # as 'M', except for the copied one.
366 continue
367 for child, k in svn.list_files(branchprefix + path, r.revnum):
368 if k == 'd':
369 dirs.add((path + '/' + child).strip('/'))
370
371 # Retrieve new or updated values
372 for dir in dirs:
373 try:
374 values = svn.list_props(branchpath + '/' + dir, r.revnum)
375 externals[dir] = values.get('svn:externals', '')
376 except IOError:
377 externals[dir] = ''
378
379 if not externals and '.hgsvnexternals' not in parentctx:
380 # Do not create empty externals files
381 return None
382 return externals
383
384
385 def fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx):
386 """Extract all 'branch' content at a given revision.
387
388 Return a tuple (files, filectxfn) where 'files' is the list of all files
389 in the branch at the given revision, and 'filectxfn' is a memctx compatible
390 callable to retrieve individual file information.
391 """
392 files = []
393 if parentctx.node() == revlog.nullid:
394 # Initial revision, fetch all files
395 for path, kind in svn.list_files(branchpath, r.revnum):
396 if kind == 'f':
397 files.append(path)
398 else:
399 branchprefix = branchpath + '/'
400 for path, e in r.paths.iteritems():
401 if not path.startswith(branchprefix):
402 continue
403 if not hg_editor._is_path_valid(path):
404 continue
405 kind = svn.checkpath(path, r.revnum)
406 path = path[len(branchprefix):]
407 if kind == 'f':
408 files.append(path)
409 elif kind == 'd':
410 if e.action == 'M':
411 continue
412 dirpath = branchprefix + path
413 for child, k in svn.list_files(dirpath, r.revnum):
414 if k == 'f':
415 files.append(path + '/' + child)
416 else:
417 if path in parentctx:
418 files.append(path)
419 continue
420 # Assume it's a deleted directory
421 path = path + '/'
422 deleted = [f for f in parentctx if f.startswith(path)]
423 files += deleted
424
425 copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx)
426
427 def filectxfn(repo, memctx, path):
428 data, mode = svn.get_file(branchpath + '/' + path, r.revnum)
429 isexec = 'x' in mode
430 islink = 'l' in mode
431 copied = copies.get(path)
432 return context.memfilectx(path=path, data=data, islink=islink,
433 isexec=isexec, copied=copied)
434
435 return files, filectxfn
436
437 def svn_server_pull_rev(ui, svn, hg_editor, r):
438 # this server fails at replay
439 branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files)
440 deleted_branches = {}
441 brpaths = branches.values()
442 bad_branch_paths = {}
443 for br, bp in branches.iteritems():
444 bad_branch_paths[br] = []
445
446 # This next block might be needed, but for now I'm omitting it until it can be
447 # proven necessary.
448 # for bad in brpaths:
449 # if bad.startswith(bp) and len(bad) > len(bp):
450 # bad_branch_paths[br].append(bad[len(bp)+1:])
451
452 # We've go a branch that contains other branches. We have to be careful to
453 # get results similar to real replay in this case.
454 for existingbr in hg_editor.branches:
455 bad = hg_editor._remotename(existingbr)
456 if bad.startswith(bp) and len(bad) > len(bp):
457 bad_branch_paths[br].append(bad[len(bp)+1:])
458 for p in r.paths:
459 if hg_editor._is_path_tag(p):
460 continue
461 branch = hg_editor._localname(p)
462 if r.paths[p].action == 'R' and branch in hg_editor.branches:
463 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
464 hg_editor.revmap.iteritems()), reverse=True)
465 is_closed = False
466 if len(branchedits) > 0:
467 branchtip = branchedits[0][1]
468 for child in hg_editor.repo[branchtip].children():
469 if child.branch() == 'closed-branches':
470 is_closed = True
471 break
472 if not is_closed:
473 deleted_branches[branch] = branchtip
474
475 date = hg_editor.fixdate(r.date)
476 check_deleted_branches = set()
477 for b in branches:
478 parentctx = hg_editor.repo[hg_editor.get_parent_revision(r.revnum, b)]
479 if parentctx.branch() != (b or 'default'):
480 check_deleted_branches.add(b)
481 kind = svn.checkpath(branches[b], r.revnum)
482 if kind != 'd':
483 # Branch does not exist at this revision. Get parent revision and
484 # remove everything.
485 deleted_branches[b] = parentctx.node()
486 continue
487 else:
488 try:
489 files_touched, filectxfn2 = diff_branchrev(
490 ui, svn, hg_editor, b, r, parentctx)
491 except BadPatchApply, e:
492 # Either this revision or the previous one does not exist.
493 ui.status("Fetching entire revision: %s.\n" % e.args[0])
494 files_touched, filectxfn2 = fetch_branchrev(
495 svn, hg_editor, b, branches[b], r, parentctx)
496
497 externals = fetch_externals(svn, branches[b], r, parentctx)
498 if externals is not None:
499 files_touched.append('.hgsvnexternals')
500
501 def filectxfn(repo, memctx, path):
502 if path == '.hgsvnexternals':
503 if not externals:
504 raise IOError()
505 return context.memfilectx(path=path, data=externals.write(),
506 islink=False, isexec=False, copied=None)
507 for bad in bad_branch_paths[b]:
508 if path.startswith(bad):
509 raise IOError()
510 return filectxfn2(repo, memctx, path)
511
512 extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir)
513 if '' in files_touched:
514 files_touched.remove('')
515 excluded = [f for f in files_touched
516 if not hg_editor._is_file_included(f)]
517 for f in excluded:
518 files_touched.remove(f)
519 if parentctx.node() != node.nullid or files_touched:
520 # TODO(augie) remove this debug code? Or maybe it's sane to have it.
521 for f in files_touched:
522 if f:
523 assert f[0] != '/'
524 current_ctx = context.memctx(hg_editor.repo,
525 [parentctx.node(), revlog.nullid],
526 r.message or util.default_commit_msg,
527 files_touched,
528 filectxfn,
529 hg_editor.authorforsvnauthor(r.author),
530 date,
531 extra)
532 ha = hg_editor.repo.commitctx(current_ctx)
533 branch = extra.get('branch', None)
534 if not branch in hg_editor.branches:
535 hg_editor.branches[branch] = None, 0, r.revnum
536 hg_editor.add_to_revmap(r.revnum, b, ha)
537 hg_editor._save_metadata()
538 util.describe_commit(ui, ha, b)
539 # These are branches which would have an 'R' status in svn log. This means they were
540 # replaced by some other branch, so we need to verify they get marked as closed.
541 for branch in check_deleted_branches:
542 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
543 hg_editor.revmap.iteritems()), reverse=True)
544 is_closed = False
545 if len(branchedits) > 0:
546 branchtip = branchedits[0][1]
547 for child in hg_editor.repo[branchtip].children():
548 if child.branch() == 'closed-branches':
549 is_closed = True
550 break
551 if not is_closed:
552 deleted_branches[branch] = branchtip
553 for b, parent in deleted_branches.iteritems():
554 if parent == node.nullid:
555 continue
556 parentctx = hg_editor.repo[parent]
557 files_touched = parentctx.manifest().keys()
558 def filectxfn(repo, memctx, path):
559 raise IOError()
560 closed = node.nullid
561 if 'closed-branches' in hg_editor.repo.branchtags():
562 closed = hg_editor.repo['closed-branches'].node()
563 parents = (parent, closed)
564 current_ctx = context.memctx(hg_editor.repo,
565 parents,
566 r.message or util.default_commit_msg,
567 files_touched,
568 filectxfn,
569 hg_editor.authorforsvnauthor(r.author),
570 date,
571 {'branch': 'closed-branches'})
572 ha = hg_editor.repo.commitctx(current_ctx)
573 ui.status('Marked branch %s as closed.\n' % (b or 'default'))
574 hg_editor._save_metadata()