comparison fetch_command.py @ 239:e2214c8fc91f

Put all stupid stuff in it's own module (separate from fetch-command).
author Dirkjan Ochtman <dirkjan@ochtman.nl>
date Wed, 08 Apr 2009 17:34:01 +0200
parents c90cfa665b81
children
comparison
equal deleted inserted replaced
238:e8b3ca865f93 239:e2214c8fc91f
1 import cStringIO
2 import re
3 import os 1 import os
4 2
5 from mercurial import patch
6 from mercurial import node
7 from mercurial import context
8 from mercurial import revlog
9 from mercurial import util as merc_util 3 from mercurial import util as merc_util
10 from svn import core 4 from svn import core
11 from svn import delta 5 from svn import delta
12 6
13 import hg_delta_editor 7 import hg_delta_editor
14 import svnwrap 8 import svnwrap
15 import svnexternals 9 import stupid as stupidmod
16 import util 10 import util
17
18
19 def print_your_svn_is_old_message(ui): #pragma: no cover
20 ui.status("In light of that, I'll fall back and do diffs, but it won't do "
21 "as good a job. You should really upgrade your server.\n")
22 11
23 12
24 def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None, 13 def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None,
25 tag_locations='tags', 14 tag_locations='tags',
26 authors=None, 15 authors=None,
85 if have_replay: 74 if have_replay:
86 try: 75 try:
87 replay_convert_rev(hg_editor, svn, r) 76 replay_convert_rev(hg_editor, svn, r)
88 except svnwrap.SubversionRepoCanNotReplay, e: #pragma: no cover 77 except svnwrap.SubversionRepoCanNotReplay, e: #pragma: no cover
89 ui.status('%s\n' % e.message) 78 ui.status('%s\n' % e.message)
90 print_your_svn_is_old_message(ui) 79 stupidmod.print_your_svn_is_old_message(ui)
91 have_replay = False 80 have_replay = False
92 stupid_svn_server_pull_rev(ui, svn, hg_editor, r) 81 stupidmod.svn_server_pull_rev(ui, svn, hg_editor, r)
93 else: 82 else:
94 stupid_svn_server_pull_rev(ui, svn, hg_editor, r) 83 stupidmod.svn_server_pull_rev(ui, svn, hg_editor, r)
95 converted = True 84 converted = True
96 except core.SubversionException, e: #pragma: no cover 85 except core.SubversionException, e: #pragma: no cover
97 if (e.apr_err == core.SVN_ERR_RA_DAV_REQUEST_FAILED 86 if (e.apr_err == core.SVN_ERR_RA_DAV_REQUEST_FAILED
98 and '502' in str(e) 87 and '502' in str(e)
99 and tries < 3): 88 and tries < 3):
139 data, mode = svn.get_file(p, r.revnum) 128 data, mode = svn.get_file(p, r.revnum)
140 hg_editor.set_file(p, data, 'x' in mode, 'l' in mode) 129 hg_editor.set_file(p, data, 'x' in mode, 'l' in mode)
141 hg_editor.missing_plaintexts = set() 130 hg_editor.missing_plaintexts = set()
142 hg_editor.ui.note('\n') 131 hg_editor.ui.note('\n')
143 hg_editor.commit_current_delta() 132 hg_editor.commit_current_delta()
144
145
146 binary_file_re = re.compile(r'''Index: ([^\n]*)
147 =*
148 Cannot display: file marked as a binary type.''')
149
150 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*)
151 _*
152 (?:Added|Name): svn:executable
153 \+''')
154
155 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*)
156 _*
157 (?:Deleted|Name): svn:executable
158 -''')
159
160 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''')
161
162 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE)
163
164 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*)
165 _*
166 (?:Added|Name): svn:special
167 \+''')
168
169 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*)
170 _*
171 (?:Deleted|Name): svn:special
172 \-''')
173
174 def mempatchproxy(parentctx, files):
175 # Avoid circular references patch.patchfile -> mempatch
176 patchfile = patch.patchfile
177
178 class mempatch(patchfile):
179 def __init__(self, ui, fname, opener, missing=False):
180 patchfile.__init__(self, ui, fname, None, False)
181
182 def readlines(self, fname):
183 if fname not in parentctx:
184 raise IOError('Cannot find %r to patch' % fname)
185 fctx = parentctx[fname]
186 data = fctx.data()
187 if 'l' in fctx.flags():
188 data = 'link ' + data
189 return cStringIO.StringIO(data).readlines()
190
191 def writelines(self, fname, lines):
192 files[fname] = ''.join(lines)
193
194 def unlink(self, fname):
195 files[fname] = None
196
197 return mempatch
198
199
200 def filteriterhunks(hg_editor):
201 iterhunks = patch.iterhunks
202 def filterhunks(ui, fp, sourcefile=None):
203 applycurrent = False
204 for data in iterhunks(ui, fp, sourcefile):
205 if data[0] == 'file':
206 if hg_editor._is_file_included(data[1][1]):
207 applycurrent = True
208 else:
209 applycurrent = False
210 assert data[0] != 'git', 'Filtering git hunks not supported.'
211 if applycurrent:
212 yield data
213 return filterhunks
214
215 def stupid_diff_branchrev(ui, svn, hg_editor, branch, r, parentctx):
216 """Extract all 'branch' content at a given revision.
217
218 Return a tuple (files, filectxfn) where 'files' is the list of all files
219 in the branch at the given revision, and 'filectxfn' is a memctx compatible
220 callable to retrieve individual file information. Raise BadPatchApply upon
221 error.
222 """
223 def make_diff_path(branch):
224 if branch == 'trunk' or branch is None:
225 return 'trunk'
226 elif branch.startswith('../'):
227 return branch[3:]
228 return 'branches/%s' % branch
229 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch)
230 diff_path = make_diff_path(branch)
231 try:
232 if br_p == branch:
233 # letting patch handle binaries sounded
234 # cool, but it breaks patch in sad ways
235 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
236 ignore_type=False)
237 else:
238 d = svn.get_unified_diff(diff_path, r.revnum,
239 other_path=make_diff_path(br_p),
240 other_rev=parent_rev,
241 deleted=True, ignore_type=True)
242 if d:
243 raise BadPatchApply('branch creation with mods')
244 except svnwrap.SubversionRepoCanNotDiff:
245 raise BadPatchApply('subversion diffing code is not supported')
246 except core.SubversionException, e:
247 if (hasattr(e, 'apr_err') and e.apr_err != core.SVN_ERR_FS_NOT_FOUND):
248 raise
249 raise BadPatchApply('previous revision does not exist')
250 if '\0' in d:
251 raise BadPatchApply('binary diffs are not supported')
252 files_data = {}
253 binary_files = {}
254 touched_files = {}
255 for m in binary_file_re.findall(d):
256 # we have to pull each binary file by hand as a fulltext,
257 # which sucks but we've got no choice
258 binary_files[m] = 1
259 touched_files[m] = 1
260 d2 = empty_file_patch_wont_make_re.sub('', d)
261 d2 = property_exec_set_re.sub('', d2)
262 d2 = property_exec_removed_re.sub('', d2)
263 for f in any_file_re.findall(d):
264 # Here we ensure that all files, including the new empty ones
265 # are marked as touched. Content is loaded on demand.
266 touched_files[f] = 1
267 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
268 try:
269 oldpatchfile = patch.patchfile
270 olditerhunks = patch.iterhunks
271 patch.patchfile = mempatchproxy(parentctx, files_data)
272 patch.iterhunks = filteriterhunks(hg_editor)
273 try:
274 # We can safely ignore the changed list since we are
275 # handling non-git patches. Touched files are known
276 # by our memory patcher.
277 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
278 {}, strip=0)
279 finally:
280 patch.patchfile = oldpatchfile
281 patch.iterhunks = olditerhunks
282 except patch.PatchError:
283 # TODO: this happens if the svn server has the wrong mime
284 # type stored and doesn't know a file is binary. It would
285 # be better to do one file at a time and only do a
286 # full fetch on files that had problems.
287 raise BadPatchApply('patching failed')
288 for x in files_data.iterkeys():
289 ui.note('M %s\n' % x)
290 # if this patch didn't apply right, fall back to exporting the
291 # entire rev.
292 if patch_st == -1:
293 assert False, ('This should only happen on case-insensitive'
294 ' volumes.')
295 elif patch_st == 1:
296 # When converting Django, I saw fuzz on .po files that was
297 # causing revisions to end up failing verification. If that
298 # can be fixed, maybe this won't ever be reached.
299 raise BadPatchApply('patching succeeded with fuzz')
300 else:
301 ui.status('Not using patch for %s, diff had no hunks.\n' %
302 r.revnum)
303
304 exec_files = {}
305 for m in property_exec_removed_re.findall(d):
306 exec_files[m] = False
307 for m in property_exec_set_re.findall(d):
308 exec_files[m] = True
309 for m in exec_files:
310 touched_files[m] = 1
311 link_files = {}
312 for m in property_special_set_re.findall(d):
313 # TODO(augie) when a symlink is removed, patching will fail.
314 # We're seeing that above - there's gotta be a better
315 # workaround than just bailing like that.
316 assert m in files_data
317 link_files[m] = True
318 for m in property_special_removed_re.findall(d):
319 assert m in files_data
320 link_files[m] = False
321
322 for p in r.paths:
323 if p.startswith(diff_path) and r.paths[p].action == 'D':
324 p2 = p[len(diff_path)+1:].strip('/')
325 if p2 in parentctx:
326 files_data[p2] = None
327 continue
328 # If this isn't in the parent ctx, it must've been a dir
329 files_data.update([(f, None) for f in parentctx if f.startswith(p2 + '/')])
330
331 for f in files_data:
332 touched_files[f] = 1
333
334 copies = getcopies(svn, hg_editor, branch, diff_path, r, touched_files,
335 parentctx)
336
337 def filectxfn(repo, memctx, path):
338 if path in files_data and files_data[path] is None:
339 raise IOError()
340
341 if path in binary_files:
342 data, mode = svn.get_file(diff_path + '/' + path, r.revnum)
343 isexe = 'x' in mode
344 islink = 'l' in mode
345 else:
346 isexe = exec_files.get(path, 'x' in parentctx.flags(path))
347 islink = link_files.get(path, 'l' in parentctx.flags(path))
348 data = ''
349 if path in files_data:
350 data = files_data[path]
351 if islink:
352 data = data[len('link '):]
353 elif path in parentctx:
354 data = parentctx[path].data()
355
356 copied = copies.get(path)
357 return context.memfilectx(path=path, data=data, islink=islink,
358 isexec=isexe, copied=copied)
359
360 return list(touched_files), filectxfn
361
362 def makecopyfinder(r, branchpath, rootdir):
363 """Return a function detecting copies.
364
365 Returned copyfinder(path) returns None if no copy information can
366 be found or ((source, sourcerev), sourcepath) where "sourcepath" is the
367 copy source path, "sourcerev" the source svn revision and "source" is the
368 copy record path causing the copy to occur. If a single file was copied
369 "sourcepath" and "source" are the same, while file copies dectected from
370 directory copies return the copied source directory in "source".
371 """
372 # filter copy information for current branch
373 branchpath = branchpath + '/'
374 fullbranchpath = rootdir + branchpath
375 copies = []
376 for path, e in r.paths.iteritems():
377 if not e.copyfrom_path:
378 continue
379 if not path.startswith(branchpath):
380 continue
381 if not e.copyfrom_path.startswith(fullbranchpath):
382 # ignore cross branch copies
383 continue
384 dest = path[len(branchpath):]
385 source = e.copyfrom_path[len(fullbranchpath):]
386 copies.append((dest, (source, e.copyfrom_rev)))
387
388 copies.sort(reverse=True)
389 exactcopies = dict(copies)
390
391 def finder(path):
392 if path in exactcopies:
393 return exactcopies[path], exactcopies[path][0]
394 # look for parent directory copy, longest first
395 for dest, (source, sourcerev) in copies:
396 dest = dest + '/'
397 if not path.startswith(dest):
398 continue
399 sourcepath = source + '/' + path[len(dest):]
400 return (source, sourcerev), sourcepath
401 return None
402
403 return finder
404
405 def getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx):
406 """Return a mapping {dest: source} for every file copied into r.
407 """
408 if parentctx.node() == revlog.nullid:
409 return {}
410
411 # Extract svn copy information, group them by copy source.
412 # The idea is to duplicate the replay behaviour where copies are
413 # evaluated per copy event (one event for all files in a directory copy,
414 # one event for single file copy). We assume that copy events match
415 # copy sources in revision info.
416 svncopies = {}
417 finder = makecopyfinder(r, branchpath, svn.subdir)
418 for f in files:
419 copy = finder(f)
420 if copy:
421 svncopies.setdefault(copy[0], []).append((f, copy[1]))
422 if not svncopies:
423 return {}
424
425 # cache changeset contexts and map them to source svn revisions
426 ctxs = {}
427 def getctx(svnrev):
428 if svnrev in ctxs:
429 return ctxs[svnrev]
430 changeid = hg_editor.get_parent_revision(svnrev + 1, branch)
431 ctx = None
432 if changeid != revlog.nullid:
433 ctx = hg_editor.repo.changectx(changeid)
434 ctxs[svnrev] = ctx
435 return ctx
436
437 # check svn copies really make sense in mercurial
438 hgcopies = {}
439 for (sourcepath, rev), copies in svncopies.iteritems():
440 sourcectx = getctx(rev)
441 if sourcectx is None:
442 continue
443 sources = [s[1] for s in copies]
444 if not hg_editor.aresamefiles(sourcectx, parentctx, sources):
445 continue
446 hgcopies.update(copies)
447 return hgcopies
448
449 def stupid_fetch_externals(svn, branchpath, r, parentctx):
450 """Extract svn:externals for the current revision and branch
451
452 Return an externalsfile instance or None if there are no externals
453 to convert and never were.
454 """
455 externals = svnexternals.externalsfile()
456 if '.hgsvnexternals' in parentctx:
457 externals.read(parentctx['.hgsvnexternals'].data())
458 # Detect property additions only, changes are handled by checking
459 # existing entries individually. Projects are unlikely to store
460 # externals on many different root directories, so we trade code
461 # duplication and complexity for a constant lookup price at every
462 # revision in the common case.
463 dirs = set(externals)
464 if parentctx.node() == revlog.nullid:
465 dirs.update([p for p,k in svn.list_files(branchpath, r.revnum) if k == 'd'])
466 dirs.add('')
467 else:
468 branchprefix = branchpath + '/'
469 for path, e in r.paths.iteritems():
470 if e.action == 'D':
471 continue
472 if not path.startswith(branchprefix) and path != branchpath:
473 continue
474 kind = svn.checkpath(path, r.revnum)
475 if kind != 'd':
476 continue
477 path = path[len(branchprefix):]
478 dirs.add(path)
479 if e.action == 'M' or (e.action == 'A' and e.copyfrom_path):
480 # Do not recurse in copied directories, changes are marked
481 # as 'M', except for the copied one.
482 continue
483 for child, k in svn.list_files(branchprefix + path, r.revnum):
484 if k == 'd':
485 dirs.add((path + '/' + child).strip('/'))
486
487 # Retrieve new or updated values
488 for dir in dirs:
489 try:
490 values = svn.list_props(branchpath + '/' + dir, r.revnum)
491 externals[dir] = values.get('svn:externals', '')
492 except IOError:
493 externals[dir] = ''
494
495 if not externals and '.hgsvnexternals' not in parentctx:
496 # Do not create empty externals files
497 return None
498 return externals
499
500 def stupid_fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx):
501 """Extract all 'branch' content at a given revision.
502
503 Return a tuple (files, filectxfn) where 'files' is the list of all files
504 in the branch at the given revision, and 'filectxfn' is a memctx compatible
505 callable to retrieve individual file information.
506 """
507 files = []
508 if parentctx.node() == revlog.nullid:
509 # Initial revision, fetch all files
510 for path, kind in svn.list_files(branchpath, r.revnum):
511 if kind == 'f':
512 files.append(path)
513 else:
514 branchprefix = branchpath + '/'
515 for path, e in r.paths.iteritems():
516 if not path.startswith(branchprefix):
517 continue
518 if not hg_editor._is_path_valid(path):
519 continue
520 kind = svn.checkpath(path, r.revnum)
521 path = path[len(branchprefix):]
522 if kind == 'f':
523 files.append(path)
524 elif kind == 'd':
525 if e.action == 'M':
526 continue
527 dirpath = branchprefix + path
528 for child, k in svn.list_files(dirpath, r.revnum):
529 if k == 'f':
530 files.append(path + '/' + child)
531 else:
532 if path in parentctx:
533 files.append(path)
534 continue
535 # Assume it's a deleted directory
536 path = path + '/'
537 deleted = [f for f in parentctx if f.startswith(path)]
538 files += deleted
539
540 copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx)
541
542 def filectxfn(repo, memctx, path):
543 data, mode = svn.get_file(branchpath + '/' + path, r.revnum)
544 isexec = 'x' in mode
545 islink = 'l' in mode
546 copied = copies.get(path)
547 return context.memfilectx(path=path, data=data, islink=islink,
548 isexec=isexec, copied=copied)
549
550 return files, filectxfn
551
552 def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
553 # this server fails at replay
554 branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files)
555 deleted_branches = {}
556 brpaths = branches.values()
557 bad_branch_paths = {}
558 for br, bp in branches.iteritems():
559 bad_branch_paths[br] = []
560
561 # This next block might be needed, but for now I'm omitting it until it can be
562 # proven necessary.
563 # for bad in brpaths:
564 # if bad.startswith(bp) and len(bad) > len(bp):
565 # bad_branch_paths[br].append(bad[len(bp)+1:])
566
567 # We've go a branch that contains other branches. We have to be careful to
568 # get results similar to real replay in this case.
569 for existingbr in hg_editor.branches:
570 bad = hg_editor._remotename(existingbr)
571 if bad.startswith(bp) and len(bad) > len(bp):
572 bad_branch_paths[br].append(bad[len(bp)+1:])
573 for p in r.paths:
574 if hg_editor._is_path_tag(p):
575 continue
576 branch = hg_editor._localname(p)
577 if r.paths[p].action == 'R' and branch in hg_editor.branches:
578 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
579 hg_editor.revmap.iteritems()), reverse=True)
580 is_closed = False
581 if len(branchedits) > 0:
582 branchtip = branchedits[0][1]
583 for child in hg_editor.repo[branchtip].children():
584 if child.branch() == 'closed-branches':
585 is_closed = True
586 break
587 if not is_closed:
588 deleted_branches[branch] = branchtip
589
590 date = hg_editor.fixdate(r.date)
591 check_deleted_branches = set()
592 for b in branches:
593 parentctx = hg_editor.repo[hg_editor.get_parent_revision(r.revnum, b)]
594 if parentctx.branch() != (b or 'default'):
595 check_deleted_branches.add(b)
596 kind = svn.checkpath(branches[b], r.revnum)
597 if kind != 'd':
598 # Branch does not exist at this revision. Get parent revision and
599 # remove everything.
600 deleted_branches[b] = parentctx.node()
601 continue
602 else:
603 try:
604 files_touched, filectxfn2 = stupid_diff_branchrev(
605 ui, svn, hg_editor, b, r, parentctx)
606 except BadPatchApply, e:
607 # Either this revision or the previous one does not exist.
608 ui.status("Fetching entire revision: %s.\n" % e.args[0])
609 files_touched, filectxfn2 = stupid_fetch_branchrev(
610 svn, hg_editor, b, branches[b], r, parentctx)
611
612 externals = stupid_fetch_externals(svn, branches[b], r, parentctx)
613 if externals is not None:
614 files_touched.append('.hgsvnexternals')
615
616 def filectxfn(repo, memctx, path):
617 if path == '.hgsvnexternals':
618 if not externals:
619 raise IOError()
620 return context.memfilectx(path=path, data=externals.write(),
621 islink=False, isexec=False, copied=None)
622 for bad in bad_branch_paths[b]:
623 if path.startswith(bad):
624 raise IOError()
625 return filectxfn2(repo, memctx, path)
626
627 extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir)
628 if '' in files_touched:
629 files_touched.remove('')
630 excluded = [f for f in files_touched
631 if not hg_editor._is_file_included(f)]
632 for f in excluded:
633 files_touched.remove(f)
634 if parentctx.node() != node.nullid or files_touched:
635 # TODO(augie) remove this debug code? Or maybe it's sane to have it.
636 for f in files_touched:
637 if f:
638 assert f[0] != '/'
639 current_ctx = context.memctx(hg_editor.repo,
640 [parentctx.node(), revlog.nullid],
641 r.message or util.default_commit_msg,
642 files_touched,
643 filectxfn,
644 hg_editor.authorforsvnauthor(r.author),
645 date,
646 extra)
647 ha = hg_editor.repo.commitctx(current_ctx)
648 branch = extra.get('branch', None)
649 if not branch in hg_editor.branches:
650 hg_editor.branches[branch] = None, 0, r.revnum
651 hg_editor.add_to_revmap(r.revnum, b, ha)
652 hg_editor._save_metadata()
653 util.describe_commit(ui, ha, b)
654 # These are branches which would have an 'R' status in svn log. This means they were
655 # replaced by some other branch, so we need to verify they get marked as closed.
656 for branch in check_deleted_branches:
657 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum,
658 hg_editor.revmap.iteritems()), reverse=True)
659 is_closed = False
660 if len(branchedits) > 0:
661 branchtip = branchedits[0][1]
662 for child in hg_editor.repo[branchtip].children():
663 if child.branch() == 'closed-branches':
664 is_closed = True
665 break
666 if not is_closed:
667 deleted_branches[branch] = branchtip
668 for b, parent in deleted_branches.iteritems():
669 if parent == node.nullid:
670 continue
671 parentctx = hg_editor.repo[parent]
672 files_touched = parentctx.manifest().keys()
673 def filectxfn(repo, memctx, path):
674 raise IOError()
675 closed = node.nullid
676 if 'closed-branches' in hg_editor.repo.branchtags():
677 closed = hg_editor.repo['closed-branches'].node()
678 parents = (parent, closed)
679 current_ctx = context.memctx(hg_editor.repo,
680 parents,
681 r.message or util.default_commit_msg,
682 files_touched,
683 filectxfn,
684 hg_editor.authorforsvnauthor(r.author),
685 date,
686 {'branch': 'closed-branches'})
687 ha = hg_editor.repo.commitctx(current_ctx)
688 ui.status('Marked branch %s as closed.\n' % (b or 'default'))
689 hg_editor._save_metadata()
690
691 class BadPatchApply(Exception):
692 pass