comparison stupid.py @ 304:ce676eff002b

First merge, totally untested.
author Dan Villiom Podlaski Christiansen <danchr@gmail.com>
date Fri, 01 May 2009 10:28:59 +0200
parents fetch_command.py@79440ed81011 fetch_command.py@d978192f0d63
children 1d48d9a34c19
comparison
equal deleted inserted replaced
303:f423a8780832 304:ce676eff002b
1 import cStringIO
2 import re
3
4 from mercurial import patch
5 from mercurial import node
6 from mercurial import context
7 from mercurial import revlog
8 from svn import core
9
10 import svnwrap
11 import svnexternals
12 import util
13
14
15 binary_file_re = re.compile(r'''Index: ([^\n]*)
16 =*
17 Cannot display: file marked as a binary type.''')
18
19 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*)
20 _*
21 (?:Added|Name): svn:executable
22 \+''')
23
24 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*)
25 _*
26 (?:Deleted|Name): svn:executable
27 -''')
28
29 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''')
30
31 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE)
32
33 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*)
34 _*
35 (?:Added|Name): svn:special
36 \+''')
37
38 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*)
39 _*
40 (?:Deleted|Name): svn:special
41 \-''')
42
43
44 class BadPatchApply(Exception):
45 pass
46
47
48 def print_your_svn_is_old_message(ui): #pragma: no cover
49 ui.status("In light of that, I'll fall back and do diffs, but it won't do "
50 "as good a job. You should really upgrade your server.\n")
51
52
53 def mempatchproxy(parentctx, files):
54 # Avoid circular references patch.patchfile -> mempatch
55 patchfile = patch.patchfile
56
57 class mempatch(patchfile):
58 def __init__(self, ui, fname, opener, missing=False):
59 patchfile.__init__(self, ui, fname, None, False)
60
61 def readlines(self, fname):
62 if fname not in parentctx:
63 raise IOError('Cannot find %r to patch' % fname)
64 fctx = parentctx[fname]
65 data = fctx.data()
66 if 'l' in fctx.flags():
67 data = 'link ' + data
68 return cStringIO.StringIO(data).readlines()
69
70 def writelines(self, fname, lines):
71 files[fname] = ''.join(lines)
72
73 def unlink(self, fname):
74 files[fname] = None
75
76 return mempatch
77
78
79 def filteriterhunks(hg_editor):
80 iterhunks = patch.iterhunks
81 def filterhunks(ui, fp, sourcefile=None):
82 applycurrent = False
83 for data in iterhunks(ui, fp, sourcefile):
84 if data[0] == 'file':
85 if hg_editor._is_file_included(data[1][1]):
86 applycurrent = True
87 else:
88 applycurrent = False
89 assert data[0] != 'git', 'Filtering git hunks not supported.'
90 if applycurrent:
91 yield data
92 return filterhunks
93
94
95 def diff_branchrev(ui, svn, hg_editor, branch, r, parentctx):
96 """Extract all 'branch' content at a given revision.
97
98 Return a tuple (files, filectxfn) where 'files' is the list of all files
99 in the branch at the given revision, and 'filectxfn' is a memctx compatible
100 callable to retrieve individual file information. Raise BadPatchApply upon
101 error.
102 """
103 def make_diff_path(branch):
104 if branch == 'trunk' or branch is None:
105 return 'trunk'
106 elif branch.startswith('../'):
107 return branch[3:]
108 return 'branches/%s' % branch
109 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch)
110 diff_path = make_diff_path(branch)
111 try:
112 if br_p == branch:
113 # letting patch handle binaries sounded
114 # cool, but it breaks patch in sad ways
115 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
116 ignore_type=False)
117 else:
118 d = svn.get_unified_diff(diff_path, r.revnum,
119 other_path=make_diff_path(br_p),
120 other_rev=parent_rev,
121 deleted=True, ignore_type=True)
122 if d:
123 raise BadPatchApply('branch creation with mods')
124 except svnwrap.SubversionRepoCanNotDiff:
125 raise BadPatchApply('subversion diffing code is not supported')
126 except core.SubversionException, e:
127 if (hasattr(e, 'apr_err') and e.apr_err != core.SVN_ERR_FS_NOT_FOUND):
128 raise
129 raise BadPatchApply('previous revision does not exist')
130 if '\0' in d:
131 raise BadPatchApply('binary diffs are not supported')
132 files_data = {}
133 binary_files = {}
134 touched_files = {}
135 for m in binary_file_re.findall(d):
136 # we have to pull each binary file by hand as a fulltext,
137 # which sucks but we've got no choice
138 binary_files[m] = 1
139 touched_files[m] = 1
140 d2 = empty_file_patch_wont_make_re.sub('', d)
141 d2 = property_exec_set_re.sub('', d2)
142 d2 = property_exec_removed_re.sub('', d2)
143 for f in any_file_re.findall(d):
144 # Here we ensure that all files, including the new empty ones
145 # are marked as touched. Content is loaded on demand.
146 touched_files[f] = 1
147 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
148 try:
149 oldpatchfile = patch.patchfile
150 olditerhunks = patch.iterhunks
151 patch.patchfile = mempatchproxy(parentctx, files_data)
152 patch.iterhunks = filteriterhunks(hg_editor)
153 try:
154 # We can safely ignore the changed list since we are
155 # handling non-git patches. Touched files are known
156 # by our memory patcher.
157 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
158 {}, strip=0)
159 finally:
160 patch.patchfile = oldpatchfile
161 patch.iterhunks = olditerhunks
162 except patch.PatchError:
163 # TODO: this happens if the svn server has the wrong mime
164 # type stored and doesn't know a file is binary. It would
165 # be better to do one file at a time and only do a
166 # full fetch on files that had problems.
167 raise BadPatchApply('patching failed')
168 for x in files_data.iterkeys():
169 ui.note('M %s\n' % x)
170 # if this patch didn't apply right, fall back to exporting the
171 # entire rev.
172 if patch_st == -1:
173 assert False, ('This should only happen on case-insensitive'
174 ' volumes.')
175 elif patch_st == 1:
176 # When converting Django, I saw fuzz on .po files that was
177 # causing revisions to end up failing verification. If that
178 # can be fixed, maybe this won't ever be reached.
179 raise BadPatchApply('patching succeeded with fuzz')
180 else:
181 ui.status('Not using patch for %s, diff had no hunks.\n' %
182 r.revnum)
183
184 exec_files = {}
185 for m in property_exec_removed_re.findall(d):
186 exec_files[m] = False
187 for m in property_exec_set_re.findall(d):
188 exec_files[m] = True
189 for m in exec_files:
190 touched_files[m] = 1
191 link_files = {}
192 for m in property_special_set_re.findall(d):
193 # TODO(augie) when a symlink is removed, patching will fail.
194 # We're seeing that above - there's gotta be a better
195 # workaround than just bailing like that.
196 assert m in files_data
197 link_files[m] = True
198 for m in property_special_removed_re.findall(d):
199 assert m in files_data
200 link_files[m] = False
201
202 for p in r.paths:
203 if p.startswith(diff_path) and r.paths[p].action == 'D':
204 p2 = p[len(diff_path)+1:].strip('/')
205 if p2 in parentctx:
206 files_data[p2] = None
207 continue
208 # If this isn't in the parent ctx, it must've been a dir
209 files_data.update([(f, None) for f in parentctx if f.startswith(p2 + '/')])
210
211 for f in files_data:
212 touched_files[f] = 1
213
214 copies = getcopies(svn, hg_editor, branch, diff_path, r, touched_files,
215 parentctx)
216
217 def filectxfn(repo, memctx, path):
218 if path in files_data and files_data[path] is None:
219 raise IOError()
220
221 if path in binary_files:
222 data, mode = svn.get_file(diff_path + '/' + path, r.revnum)
223 isexe = 'x' in mode
224 islink = 'l' in mode
225 else:
226 isexe = exec_files.get(path, 'x' in parentctx.flags(path))
227 islink = link_files.get(path, 'l' in parentctx.flags(path))
228 data = ''
229 if path in files_data:
230 data = files_data[path]
231 if islink:
232 data = data[len('link '):]
233 elif path in parentctx:
234 data = parentctx[path].data()
235
236 copied = copies.get(path)
237 return context.memfilectx(path=path, data=data, islink=islink,
238 isexec=isexe, copied=copied)
239
240 return list(touched_files), filectxfn
241
242 def makecopyfinder(r, branchpath, rootdir):
243 """Return a function detecting copies.
244
245 Returned copyfinder(path) returns None if no copy information can
246 be found or ((source, sourcerev), sourcepath) where "sourcepath" is the
247 copy source path, "sourcerev" the source svn revision and "source" is the
248 copy record path causing the copy to occur. If a single file was copied
249 "sourcepath" and "source" are the same, while file copies dectected from
250 directory copies return the copied source directory in "source".
251 """
252 # filter copy information for current branch
253 branchpath = branchpath + '/'
254 fullbranchpath = rootdir + branchpath
255 copies = []
256 for path, e in r.paths.iteritems():
257 if not e.copyfrom_path:
258 continue
259 if not path.startswith(branchpath):
260 continue
261 if not e.copyfrom_path.startswith(fullbranchpath):
262 # ignore cross branch copies
263 continue
264 dest = path[len(branchpath):]
265 source = e.copyfrom_path[len(fullbranchpath):]
266 copies.append((dest, (source, e.copyfrom_rev)))
267
268 copies.sort(reverse=True)
269 exactcopies = dict(copies)
270
271 def finder(path):
272 if path in exactcopies:
273 return exactcopies[path], exactcopies[path][0]
274 # look for parent directory copy, longest first
275 for dest, (source, sourcerev) in copies:
276 dest = dest + '/'
277 if not path.startswith(dest):
278 continue
279 sourcepath = source + '/' + path[len(dest):]
280 return (source, sourcerev), sourcepath
281 return None
282
283 return finder
284
285 def getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx):
286 """Return a mapping {dest: source} for every file copied into r.
287 """
288 if parentctx.node() == revlog.nullid:
289 return {}
290
291 # Extract svn copy information, group them by copy source.
292 # The idea is to duplicate the replay behaviour where copies are
293 # evaluated per copy event (one event for all files in a directory copy,
294 # one event for single file copy). We assume that copy events match
295 # copy sources in revision info.
296 svncopies = {}
297 finder = makecopyfinder(r, branchpath, svn.subdir)
298 for f in files:
299 copy = finder(f)
300 if copy:
301 svncopies.setdefault(copy[0], []).append((f, copy[1]))
302 if not svncopies:
303 return {}
304
305 # cache changeset contexts and map them to source svn revisions
306 ctxs = {}
307 def getctx(svnrev):
308 if svnrev in ctxs:
309 return ctxs[svnrev]
310 changeid = hg_editor.get_parent_revision(svnrev + 1, branch)
311 ctx = None
312 if changeid != revlog.nullid:
313 ctx = hg_editor.repo.changectx(changeid)
314 ctxs[svnrev] = ctx
315 return ctx
316
317 # check svn copies really make sense in mercurial
318 hgcopies = {}
319 for (sourcepath, rev), copies in svncopies.iteritems():
320 sourcectx = getctx(rev)
321 if sourcectx is None:
322 continue
323 sources = [s[1] for s in copies]
324 if not hg_editor.aresamefiles(sourcectx, parentctx, sources):
325 continue
326 hgcopies.update(copies)
327 return hgcopies
328
329 def fetch_externals(svn, branchpath, r, parentctx):
330 """Extract svn:externals for the current revision and branch
331
332 Return an externalsfile instance or None if there are no externals
333 to convert and never were.
334 """
335 externals = svnexternals.externalsfile()
336 if '.hgsvnexternals' in parentctx:
337 externals.read(parentctx['.hgsvnexternals'].data())
338 # Detect property additions only, changes are handled by checking
339 # existing entries individually. Projects are unlikely to store
340 # externals on many different root directories, so we trade code
341 # duplication and complexity for a constant lookup price at every
342 # revision in the common case.
343 dirs = set(externals)
344 if parentctx.node() == revlog.nullid:
345 dirs.update([p for p,k in svn.list_files(branchpath, r.revnum) if k == 'd'])
346 dirs.add('')
347 else:
348 branchprefix = branchpath + '/'
349 for path, e in r.paths.iteritems():
350 if e.action == 'D':
351 continue
352 if not path.startswith(branchprefix) and path != branchpath:
353 continue
354 kind = svn.checkpath(path, r.revnum)
355 if kind != 'd':
356 continue
357 path = path[len(branchprefix):]
358 dirs.add(path)
359 if e.action == 'M' or (e.action == 'A' and e.copyfrom_path):
360 # Do not recurse in copied directories, changes are marked
361 # as 'M', except for the copied one.
362 continue
363 for child, k in svn.list_files(branchprefix + path, r.revnum):
364 if k == 'd':
365 dirs.add((path + '/' + child).strip('/'))
366
367 # Retrieve new or updated values
368 for dir in dirs:
369 try:
370 values = svn.list_props(branchpath + '/' + dir, r.revnum)
371 externals[dir] = values.get('svn:externals', '')
372 except IOError:
373 externals[dir] = ''
374
375 if not externals and '.hgsvnexternals' not in parentctx:
376 # Do not create empty externals files
377 return None
378 return externals
379
380
381 def fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx):
382 """Extract all 'branch' content at a given revision.
383
384 Return a tuple (files, filectxfn) where 'files' is the list of all files
385 in the branch at the given revision, and 'filectxfn' is a memctx compatible
386 callable to retrieve individual file information.
387 """
388 files = []
389 if parentctx.node() == revlog.nullid:
390 # Initial revision, fetch all files
391 for path, kind in svn.list_files(branchpath, r.revnum):
392 if kind == 'f':
393 files.append(path)
394 else:
395 branchprefix = branchpath + '/'
396 for path, e in r.paths.iteritems():
397 if not path.startswith(branchprefix):
398 continue
399 if not hg_editor._is_path_valid(path):
400 continue
401 kind = svn.checkpath(path, r.revnum)
402 path = path[len(branchprefix):]
403 if kind == 'f':
404 files.append(path)
405 elif kind == 'd':
406 if e.action == 'M':
407 continue
408 dirpath = branchprefix + path
409 for child, k in svn.list_files(dirpath, r.revnum):
410 if k == 'f':
411 files.append(path + '/' + child)
412 else:
413 if path in parentctx:
414 files.append(path)
415 continue
416 # Assume it's a deleted directory
417 path = path + '/'
418 deleted = [f for f in parentctx if f.startswith(path)]
419 files += deleted
420
421 copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx)
422
423 def filectxfn(repo, memctx, path):
424 data, mode = svn.get_file(branchpath + '/' + path, r.revnum)
425 isexec = 'x' in mode
426 islink = 'l' in mode
427 copied = copies.get(path)
428 return context.memfilectx(path=path, data=data, islink=islink,
429 isexec=isexec, copied=copied)
430
431 return files, filectxfn
432
433 def svn_server_pull_rev(ui, svn, hg_editor, r):
434 # this server fails at replay
435 branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files)
436 deleted_branches = {}
437 brpaths = branches.values()
438 bad_branch_paths = {}
439 for br, bp in branches.iteritems():
440 bad_branch_paths[br] = []
441
442 # This next block might be needed, but for now I'm omitting it until it can be
443 # proven necessary.
444 # for bad in brpaths:
445 # if bad.startswith(bp) and len(bad) > len(bp):
446 # bad_branch_paths[br].append(bad[len(bp)+1:])
447
448 # We've go a branch that contains other branches. We have to be careful to
449 # get results similar to real replay in this case.
450 for existingbr in hg_editor.branches:
451 bad = hg_editor._remotename(existingbr)
452 if bad.startswith(bp) and len(bad) > len(bp):
453 bad_branch_paths[br].append(bad[len(bp)+1:])
454 for p in r.paths:
455 if hg_editor._is_path_tag(p):
456 continue
457 branch = hg_editor._localname(p)
458 if r.paths[p].action == 'R' and branch in hg_editor.branches:
459 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
460 hg_editor.revmap.iteritems()), reverse=True)
461 is_closed = False
462 if len(branchedits) > 0:
463 branchtip = branchedits[0][1]
464 for child in hg_editor.repo[branchtip].children():
465 if child.branch() == 'closed-branches':
466 is_closed = True
467 break
468 if not is_closed:
469 deleted_branches[branch] = branchtip
470
471 date = hg_editor.fixdate(r.date)
472 check_deleted_branches = set()
473 for b in branches:
474 parentctx = hg_editor.repo[hg_editor.get_parent_revision(r.revnum, b)]
475 if parentctx.branch() != (b or 'default'):
476 check_deleted_branches.add(b)
477 kind = svn.checkpath(branches[b], r.revnum)
478 if kind != 'd':
479 # Branch does not exist at this revision. Get parent revision and
480 # remove everything.
481 deleted_branches[b] = parentctx.node()
482 continue
483 else:
484 try:
485 files_touched, filectxfn2 = diff_branchrev(
486 ui, svn, hg_editor, b, r, parentctx)
487 except BadPatchApply, e:
488 # Either this revision or the previous one does not exist.
489 ui.status("Fetching entire revision: %s.\n" % e.args[0])
490 files_touched, filectxfn2 = fetch_branchrev(
491 svn, hg_editor, b, branches[b], r, parentctx)
492
493 externals = fetch_externals(svn, branches[b], r, parentctx)
494 if externals is not None:
495 files_touched.append('.hgsvnexternals')
496
497 def filectxfn(repo, memctx, path):
498 if path == '.hgsvnexternals':
499 if not externals:
500 raise IOError()
501 return context.memfilectx(path=path, data=externals.write(),
502 islink=False, isexec=False, copied=None)
503 for bad in bad_branch_paths[b]:
504 if path.startswith(bad):
505 raise IOError()
506 return filectxfn2(repo, memctx, path)
507
508 extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir)
509 if '' in files_touched:
510 files_touched.remove('')
511 excluded = [f for f in files_touched
512 if not hg_editor._is_file_included(f)]
513 for f in excluded:
514 files_touched.remove(f)
515 if parentctx.node() != node.nullid or files_touched:
516 for f in files_touched:
517 if f:
518 # this is a case that really shouldn't ever happen, it means something
519 # is very wrong
520 assert f[0] != '/'
521 current_ctx = context.memctx(hg_editor.repo,
522 [parentctx.node(), revlog.nullid],
523 r.message or util.default_commit_msg,
524 files_touched,
525 filectxfn,
526 hg_editor.authorforsvnauthor(r.author),
527 date,
528 extra)
529 ha = hg_editor.repo.commitctx(current_ctx)
530 branch = extra.get('branch', None)
531 if not branch in hg_editor.branches:
532 hg_editor.branches[branch] = None, 0, r.revnum
533 hg_editor.add_to_revmap(r.revnum, b, ha)
534 hg_editor._save_metadata()
535 util.describe_commit(ui, ha, b)
536 # These are branches which would have an 'R' status in svn log. This means they were
537 # replaced by some other branch, so we need to verify they get marked as closed.
538 for branch in check_deleted_branches:
539 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
540 hg_editor.revmap.iteritems()), reverse=True)
541 is_closed = False
542 if len(branchedits) > 0:
543 branchtip = branchedits[0][1]
544 for child in hg_editor.repo[branchtip].children():
545 if child.branch() == 'closed-branches':
546 is_closed = True
547 break
548 if not is_closed:
549 deleted_branches[branch] = branchtip
550 for b, parent in deleted_branches.iteritems():
551 if parent == node.nullid:
552 continue
553 parentctx = hg_editor.repo[parent]
554 files_touched = parentctx.manifest().keys()
555 def filectxfn(repo, memctx, path):
556 raise IOError()
557 closed = node.nullid
558 if 'closed-branches' in hg_editor.repo.branchtags():
559 closed = hg_editor.repo['closed-branches'].node()
560 parents = (parent, closed)
561 current_ctx = context.memctx(hg_editor.repo,
562 parents,
563 r.message or util.default_commit_msg,
564 files_touched,
565 filectxfn,
566 hg_editor.authorforsvnauthor(r.author),
567 date,
568 {'branch': 'closed-branches'})
569 ha = hg_editor.repo.commitctx(current_ctx)
570 ui.status('Marked branch %s as closed.\n' % (b or 'default'))
571 hg_editor._save_metadata()