Mercurial > hgsubversion
comparison stupid.py @ 239:e2214c8fc91f
Put all stupid stuff in it's own module (separate from fetch-command).
author | Dirkjan Ochtman <dirkjan@ochtman.nl> |
---|---|
date | Wed, 08 Apr 2009 17:34:01 +0200 |
parents | fetch_command.py@c90cfa665b81 |
children | 4d3bcd2f26ed |
comparison
equal
deleted
inserted
replaced
238:e8b3ca865f93 | 239:e2214c8fc91f |
---|---|
1 import cStringIO | |
2 import re | |
3 import os | |
4 | |
5 from mercurial import patch | |
6 from mercurial import node | |
7 from mercurial import context | |
8 from mercurial import revlog | |
9 from mercurial import util as merc_util | |
10 from svn import core | |
11 from svn import delta | |
12 | |
13 import hg_delta_editor | |
14 import svnwrap | |
15 import svnexternals | |
16 import util | |
17 | |
18 | |
19 binary_file_re = re.compile(r'''Index: ([^\n]*) | |
20 =* | |
21 Cannot display: file marked as a binary type.''') | |
22 | |
23 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
24 _* | |
25 (?:Added|Name): svn:executable | |
26 \+''') | |
27 | |
28 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
29 _* | |
30 (?:Deleted|Name): svn:executable | |
31 -''') | |
32 | |
33 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''') | |
34 | |
35 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE) | |
36 | |
37 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
38 _* | |
39 (?:Added|Name): svn:special | |
40 \+''') | |
41 | |
42 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
43 _* | |
44 (?:Deleted|Name): svn:special | |
45 \-''') | |
46 | |
47 | |
48 class BadPatchApply(Exception): | |
49 pass | |
50 | |
51 | |
52 def print_your_svn_is_old_message(ui): #pragma: no cover | |
53 ui.status("In light of that, I'll fall back and do diffs, but it won't do " | |
54 "as good a job. You should really upgrade your server.\n") | |
55 | |
56 | |
57 def mempatchproxy(parentctx, files): | |
58 # Avoid circular references patch.patchfile -> mempatch | |
59 patchfile = patch.patchfile | |
60 | |
61 class mempatch(patchfile): | |
62 def __init__(self, ui, fname, opener, missing=False): | |
63 patchfile.__init__(self, ui, fname, None, False) | |
64 | |
65 def readlines(self, fname): | |
66 if fname not in parentctx: | |
67 raise IOError('Cannot find %r to patch' % fname) | |
68 fctx = parentctx[fname] | |
69 data = fctx.data() | |
70 if 'l' in fctx.flags(): | |
71 data = 'link ' + data | |
72 return cStringIO.StringIO(data).readlines() | |
73 | |
74 def writelines(self, fname, lines): | |
75 files[fname] = ''.join(lines) | |
76 | |
77 def unlink(self, fname): | |
78 files[fname] = None | |
79 | |
80 return mempatch | |
81 | |
82 | |
83 def filteriterhunks(hg_editor): | |
84 iterhunks = patch.iterhunks | |
85 def filterhunks(ui, fp, sourcefile=None): | |
86 applycurrent = False | |
87 for data in iterhunks(ui, fp, sourcefile): | |
88 if data[0] == 'file': | |
89 if hg_editor._is_file_included(data[1][1]): | |
90 applycurrent = True | |
91 else: | |
92 applycurrent = False | |
93 assert data[0] != 'git', 'Filtering git hunks not supported.' | |
94 if applycurrent: | |
95 yield data | |
96 return filterhunks | |
97 | |
98 | |
99 def diff_branchrev(ui, svn, hg_editor, branch, r, parentctx): | |
100 """Extract all 'branch' content at a given revision. | |
101 | |
102 Return a tuple (files, filectxfn) where 'files' is the list of all files | |
103 in the branch at the given revision, and 'filectxfn' is a memctx compatible | |
104 callable to retrieve individual file information. Raise BadPatchApply upon | |
105 error. | |
106 """ | |
107 def make_diff_path(branch): | |
108 if branch == 'trunk' or branch is None: | |
109 return 'trunk' | |
110 elif branch.startswith('../'): | |
111 return branch[3:] | |
112 return 'branches/%s' % branch | |
113 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch) | |
114 diff_path = make_diff_path(branch) | |
115 try: | |
116 if br_p == branch: | |
117 # letting patch handle binaries sounded | |
118 # cool, but it breaks patch in sad ways | |
119 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False, | |
120 ignore_type=False) | |
121 else: | |
122 d = svn.get_unified_diff(diff_path, r.revnum, | |
123 other_path=make_diff_path(br_p), | |
124 other_rev=parent_rev, | |
125 deleted=True, ignore_type=True) | |
126 if d: | |
127 raise BadPatchApply('branch creation with mods') | |
128 except svnwrap.SubversionRepoCanNotDiff: | |
129 raise BadPatchApply('subversion diffing code is not supported') | |
130 except core.SubversionException, e: | |
131 if (hasattr(e, 'apr_err') and e.apr_err != core.SVN_ERR_FS_NOT_FOUND): | |
132 raise | |
133 raise BadPatchApply('previous revision does not exist') | |
134 if '\0' in d: | |
135 raise BadPatchApply('binary diffs are not supported') | |
136 files_data = {} | |
137 binary_files = {} | |
138 touched_files = {} | |
139 for m in binary_file_re.findall(d): | |
140 # we have to pull each binary file by hand as a fulltext, | |
141 # which sucks but we've got no choice | |
142 binary_files[m] = 1 | |
143 touched_files[m] = 1 | |
144 d2 = empty_file_patch_wont_make_re.sub('', d) | |
145 d2 = property_exec_set_re.sub('', d2) | |
146 d2 = property_exec_removed_re.sub('', d2) | |
147 for f in any_file_re.findall(d): | |
148 # Here we ensure that all files, including the new empty ones | |
149 # are marked as touched. Content is loaded on demand. | |
150 touched_files[f] = 1 | |
151 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0: | |
152 try: | |
153 oldpatchfile = patch.patchfile | |
154 olditerhunks = patch.iterhunks | |
155 patch.patchfile = mempatchproxy(parentctx, files_data) | |
156 patch.iterhunks = filteriterhunks(hg_editor) | |
157 try: | |
158 # We can safely ignore the changed list since we are | |
159 # handling non-git patches. Touched files are known | |
160 # by our memory patcher. | |
161 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2), | |
162 {}, strip=0) | |
163 finally: | |
164 patch.patchfile = oldpatchfile | |
165 patch.iterhunks = olditerhunks | |
166 except patch.PatchError: | |
167 # TODO: this happens if the svn server has the wrong mime | |
168 # type stored and doesn't know a file is binary. It would | |
169 # be better to do one file at a time and only do a | |
170 # full fetch on files that had problems. | |
171 raise BadPatchApply('patching failed') | |
172 for x in files_data.iterkeys(): | |
173 ui.note('M %s\n' % x) | |
174 # if this patch didn't apply right, fall back to exporting the | |
175 # entire rev. | |
176 if patch_st == -1: | |
177 assert False, ('This should only happen on case-insensitive' | |
178 ' volumes.') | |
179 elif patch_st == 1: | |
180 # When converting Django, I saw fuzz on .po files that was | |
181 # causing revisions to end up failing verification. If that | |
182 # can be fixed, maybe this won't ever be reached. | |
183 raise BadPatchApply('patching succeeded with fuzz') | |
184 else: | |
185 ui.status('Not using patch for %s, diff had no hunks.\n' % | |
186 r.revnum) | |
187 | |
188 exec_files = {} | |
189 for m in property_exec_removed_re.findall(d): | |
190 exec_files[m] = False | |
191 for m in property_exec_set_re.findall(d): | |
192 exec_files[m] = True | |
193 for m in exec_files: | |
194 touched_files[m] = 1 | |
195 link_files = {} | |
196 for m in property_special_set_re.findall(d): | |
197 # TODO(augie) when a symlink is removed, patching will fail. | |
198 # We're seeing that above - there's gotta be a better | |
199 # workaround than just bailing like that. | |
200 assert m in files_data | |
201 link_files[m] = True | |
202 for m in property_special_removed_re.findall(d): | |
203 assert m in files_data | |
204 link_files[m] = False | |
205 | |
206 for p in r.paths: | |
207 if p.startswith(diff_path) and r.paths[p].action == 'D': | |
208 p2 = p[len(diff_path)+1:].strip('/') | |
209 if p2 in parentctx: | |
210 files_data[p2] = None | |
211 continue | |
212 # If this isn't in the parent ctx, it must've been a dir | |
213 files_data.update([(f, None) for f in parentctx if f.startswith(p2 + '/')]) | |
214 | |
215 for f in files_data: | |
216 touched_files[f] = 1 | |
217 | |
218 copies = getcopies(svn, hg_editor, branch, diff_path, r, touched_files, | |
219 parentctx) | |
220 | |
221 def filectxfn(repo, memctx, path): | |
222 if path in files_data and files_data[path] is None: | |
223 raise IOError() | |
224 | |
225 if path in binary_files: | |
226 data, mode = svn.get_file(diff_path + '/' + path, r.revnum) | |
227 isexe = 'x' in mode | |
228 islink = 'l' in mode | |
229 else: | |
230 isexe = exec_files.get(path, 'x' in parentctx.flags(path)) | |
231 islink = link_files.get(path, 'l' in parentctx.flags(path)) | |
232 data = '' | |
233 if path in files_data: | |
234 data = files_data[path] | |
235 if islink: | |
236 data = data[len('link '):] | |
237 elif path in parentctx: | |
238 data = parentctx[path].data() | |
239 | |
240 copied = copies.get(path) | |
241 return context.memfilectx(path=path, data=data, islink=islink, | |
242 isexec=isexe, copied=copied) | |
243 | |
244 return list(touched_files), filectxfn | |
245 | |
246 def makecopyfinder(r, branchpath, rootdir): | |
247 """Return a function detecting copies. | |
248 | |
249 Returned copyfinder(path) returns None if no copy information can | |
250 be found or ((source, sourcerev), sourcepath) where "sourcepath" is the | |
251 copy source path, "sourcerev" the source svn revision and "source" is the | |
252 copy record path causing the copy to occur. If a single file was copied | |
253 "sourcepath" and "source" are the same, while file copies dectected from | |
254 directory copies return the copied source directory in "source". | |
255 """ | |
256 # filter copy information for current branch | |
257 branchpath = branchpath + '/' | |
258 fullbranchpath = rootdir + branchpath | |
259 copies = [] | |
260 for path, e in r.paths.iteritems(): | |
261 if not e.copyfrom_path: | |
262 continue | |
263 if not path.startswith(branchpath): | |
264 continue | |
265 if not e.copyfrom_path.startswith(fullbranchpath): | |
266 # ignore cross branch copies | |
267 continue | |
268 dest = path[len(branchpath):] | |
269 source = e.copyfrom_path[len(fullbranchpath):] | |
270 copies.append((dest, (source, e.copyfrom_rev))) | |
271 | |
272 copies.sort(reverse=True) | |
273 exactcopies = dict(copies) | |
274 | |
275 def finder(path): | |
276 if path in exactcopies: | |
277 return exactcopies[path], exactcopies[path][0] | |
278 # look for parent directory copy, longest first | |
279 for dest, (source, sourcerev) in copies: | |
280 dest = dest + '/' | |
281 if not path.startswith(dest): | |
282 continue | |
283 sourcepath = source + '/' + path[len(dest):] | |
284 return (source, sourcerev), sourcepath | |
285 return None | |
286 | |
287 return finder | |
288 | |
289 def getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx): | |
290 """Return a mapping {dest: source} for every file copied into r. | |
291 """ | |
292 if parentctx.node() == revlog.nullid: | |
293 return {} | |
294 | |
295 # Extract svn copy information, group them by copy source. | |
296 # The idea is to duplicate the replay behaviour where copies are | |
297 # evaluated per copy event (one event for all files in a directory copy, | |
298 # one event for single file copy). We assume that copy events match | |
299 # copy sources in revision info. | |
300 svncopies = {} | |
301 finder = makecopyfinder(r, branchpath, svn.subdir) | |
302 for f in files: | |
303 copy = finder(f) | |
304 if copy: | |
305 svncopies.setdefault(copy[0], []).append((f, copy[1])) | |
306 if not svncopies: | |
307 return {} | |
308 | |
309 # cache changeset contexts and map them to source svn revisions | |
310 ctxs = {} | |
311 def getctx(svnrev): | |
312 if svnrev in ctxs: | |
313 return ctxs[svnrev] | |
314 changeid = hg_editor.get_parent_revision(svnrev + 1, branch) | |
315 ctx = None | |
316 if changeid != revlog.nullid: | |
317 ctx = hg_editor.repo.changectx(changeid) | |
318 ctxs[svnrev] = ctx | |
319 return ctx | |
320 | |
321 # check svn copies really make sense in mercurial | |
322 hgcopies = {} | |
323 for (sourcepath, rev), copies in svncopies.iteritems(): | |
324 sourcectx = getctx(rev) | |
325 if sourcectx is None: | |
326 continue | |
327 sources = [s[1] for s in copies] | |
328 if not hg_editor.aresamefiles(sourcectx, parentctx, sources): | |
329 continue | |
330 hgcopies.update(copies) | |
331 return hgcopies | |
332 | |
333 def fetch_externals(svn, branchpath, r, parentctx): | |
334 """Extract svn:externals for the current revision and branch | |
335 | |
336 Return an externalsfile instance or None if there are no externals | |
337 to convert and never were. | |
338 """ | |
339 externals = svnexternals.externalsfile() | |
340 if '.hgsvnexternals' in parentctx: | |
341 externals.read(parentctx['.hgsvnexternals'].data()) | |
342 # Detect property additions only, changes are handled by checking | |
343 # existing entries individually. Projects are unlikely to store | |
344 # externals on many different root directories, so we trade code | |
345 # duplication and complexity for a constant lookup price at every | |
346 # revision in the common case. | |
347 dirs = set(externals) | |
348 if parentctx.node() == revlog.nullid: | |
349 dirs.update([p for p,k in svn.list_files(branchpath, r.revnum) if k == 'd']) | |
350 dirs.add('') | |
351 else: | |
352 branchprefix = branchpath + '/' | |
353 for path, e in r.paths.iteritems(): | |
354 if e.action == 'D': | |
355 continue | |
356 if not path.startswith(branchprefix) and path != branchpath: | |
357 continue | |
358 kind = svn.checkpath(path, r.revnum) | |
359 if kind != 'd': | |
360 continue | |
361 path = path[len(branchprefix):] | |
362 dirs.add(path) | |
363 if e.action == 'M' or (e.action == 'A' and e.copyfrom_path): | |
364 # Do not recurse in copied directories, changes are marked | |
365 # as 'M', except for the copied one. | |
366 continue | |
367 for child, k in svn.list_files(branchprefix + path, r.revnum): | |
368 if k == 'd': | |
369 dirs.add((path + '/' + child).strip('/')) | |
370 | |
371 # Retrieve new or updated values | |
372 for dir in dirs: | |
373 try: | |
374 values = svn.list_props(branchpath + '/' + dir, r.revnum) | |
375 externals[dir] = values.get('svn:externals', '') | |
376 except IOError: | |
377 externals[dir] = '' | |
378 | |
379 if not externals and '.hgsvnexternals' not in parentctx: | |
380 # Do not create empty externals files | |
381 return None | |
382 return externals | |
383 | |
384 | |
385 def fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx): | |
386 """Extract all 'branch' content at a given revision. | |
387 | |
388 Return a tuple (files, filectxfn) where 'files' is the list of all files | |
389 in the branch at the given revision, and 'filectxfn' is a memctx compatible | |
390 callable to retrieve individual file information. | |
391 """ | |
392 files = [] | |
393 if parentctx.node() == revlog.nullid: | |
394 # Initial revision, fetch all files | |
395 for path, kind in svn.list_files(branchpath, r.revnum): | |
396 if kind == 'f': | |
397 files.append(path) | |
398 else: | |
399 branchprefix = branchpath + '/' | |
400 for path, e in r.paths.iteritems(): | |
401 if not path.startswith(branchprefix): | |
402 continue | |
403 if not hg_editor._is_path_valid(path): | |
404 continue | |
405 kind = svn.checkpath(path, r.revnum) | |
406 path = path[len(branchprefix):] | |
407 if kind == 'f': | |
408 files.append(path) | |
409 elif kind == 'd': | |
410 if e.action == 'M': | |
411 continue | |
412 dirpath = branchprefix + path | |
413 for child, k in svn.list_files(dirpath, r.revnum): | |
414 if k == 'f': | |
415 files.append(path + '/' + child) | |
416 else: | |
417 if path in parentctx: | |
418 files.append(path) | |
419 continue | |
420 # Assume it's a deleted directory | |
421 path = path + '/' | |
422 deleted = [f for f in parentctx if f.startswith(path)] | |
423 files += deleted | |
424 | |
425 copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx) | |
426 | |
427 def filectxfn(repo, memctx, path): | |
428 data, mode = svn.get_file(branchpath + '/' + path, r.revnum) | |
429 isexec = 'x' in mode | |
430 islink = 'l' in mode | |
431 copied = copies.get(path) | |
432 return context.memfilectx(path=path, data=data, islink=islink, | |
433 isexec=isexec, copied=copied) | |
434 | |
435 return files, filectxfn | |
436 | |
437 def svn_server_pull_rev(ui, svn, hg_editor, r): | |
438 # this server fails at replay | |
439 branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files) | |
440 deleted_branches = {} | |
441 brpaths = branches.values() | |
442 bad_branch_paths = {} | |
443 for br, bp in branches.iteritems(): | |
444 bad_branch_paths[br] = [] | |
445 | |
446 # This next block might be needed, but for now I'm omitting it until it can be | |
447 # proven necessary. | |
448 # for bad in brpaths: | |
449 # if bad.startswith(bp) and len(bad) > len(bp): | |
450 # bad_branch_paths[br].append(bad[len(bp)+1:]) | |
451 | |
452 # We've go a branch that contains other branches. We have to be careful to | |
453 # get results similar to real replay in this case. | |
454 for existingbr in hg_editor.branches: | |
455 bad = hg_editor._remotename(existingbr) | |
456 if bad.startswith(bp) and len(bad) > len(bp): | |
457 bad_branch_paths[br].append(bad[len(bp)+1:]) | |
458 for p in r.paths: | |
459 if hg_editor._is_path_tag(p): | |
460 continue | |
461 branch = hg_editor._localname(p) | |
462 if r.paths[p].action == 'R' and branch in hg_editor.branches: | |
463 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum, | |
464 hg_editor.revmap.iteritems()), reverse=True) | |
465 is_closed = False | |
466 if len(branchedits) > 0: | |
467 branchtip = branchedits[0][1] | |
468 for child in hg_editor.repo[branchtip].children(): | |
469 if child.branch() == 'closed-branches': | |
470 is_closed = True | |
471 break | |
472 if not is_closed: | |
473 deleted_branches[branch] = branchtip | |
474 | |
475 date = hg_editor.fixdate(r.date) | |
476 check_deleted_branches = set() | |
477 for b in branches: | |
478 parentctx = hg_editor.repo[hg_editor.get_parent_revision(r.revnum, b)] | |
479 if parentctx.branch() != (b or 'default'): | |
480 check_deleted_branches.add(b) | |
481 kind = svn.checkpath(branches[b], r.revnum) | |
482 if kind != 'd': | |
483 # Branch does not exist at this revision. Get parent revision and | |
484 # remove everything. | |
485 deleted_branches[b] = parentctx.node() | |
486 continue | |
487 else: | |
488 try: | |
489 files_touched, filectxfn2 = diff_branchrev( | |
490 ui, svn, hg_editor, b, r, parentctx) | |
491 except BadPatchApply, e: | |
492 # Either this revision or the previous one does not exist. | |
493 ui.status("Fetching entire revision: %s.\n" % e.args[0]) | |
494 files_touched, filectxfn2 = fetch_branchrev( | |
495 svn, hg_editor, b, branches[b], r, parentctx) | |
496 | |
497 externals = fetch_externals(svn, branches[b], r, parentctx) | |
498 if externals is not None: | |
499 files_touched.append('.hgsvnexternals') | |
500 | |
501 def filectxfn(repo, memctx, path): | |
502 if path == '.hgsvnexternals': | |
503 if not externals: | |
504 raise IOError() | |
505 return context.memfilectx(path=path, data=externals.write(), | |
506 islink=False, isexec=False, copied=None) | |
507 for bad in bad_branch_paths[b]: | |
508 if path.startswith(bad): | |
509 raise IOError() | |
510 return filectxfn2(repo, memctx, path) | |
511 | |
512 extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir) | |
513 if '' in files_touched: | |
514 files_touched.remove('') | |
515 excluded = [f for f in files_touched | |
516 if not hg_editor._is_file_included(f)] | |
517 for f in excluded: | |
518 files_touched.remove(f) | |
519 if parentctx.node() != node.nullid or files_touched: | |
520 # TODO(augie) remove this debug code? Or maybe it's sane to have it. | |
521 for f in files_touched: | |
522 if f: | |
523 assert f[0] != '/' | |
524 current_ctx = context.memctx(hg_editor.repo, | |
525 [parentctx.node(), revlog.nullid], | |
526 r.message or util.default_commit_msg, | |
527 files_touched, | |
528 filectxfn, | |
529 hg_editor.authorforsvnauthor(r.author), | |
530 date, | |
531 extra) | |
532 ha = hg_editor.repo.commitctx(current_ctx) | |
533 branch = extra.get('branch', None) | |
534 if not branch in hg_editor.branches: | |
535 hg_editor.branches[branch] = None, 0, r.revnum | |
536 hg_editor.add_to_revmap(r.revnum, b, ha) | |
537 hg_editor._save_metadata() | |
538 util.describe_commit(ui, ha, b) | |
539 # These are branches which would have an 'R' status in svn log. This means they were | |
540 # replaced by some other branch, so we need to verify they get marked as closed. | |
541 for branch in check_deleted_branches: | |
542 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum, | |
543 hg_editor.revmap.iteritems()), reverse=True) | |
544 is_closed = False | |
545 if len(branchedits) > 0: | |
546 branchtip = branchedits[0][1] | |
547 for child in hg_editor.repo[branchtip].children(): | |
548 if child.branch() == 'closed-branches': | |
549 is_closed = True | |
550 break | |
551 if not is_closed: | |
552 deleted_branches[branch] = branchtip | |
553 for b, parent in deleted_branches.iteritems(): | |
554 if parent == node.nullid: | |
555 continue | |
556 parentctx = hg_editor.repo[parent] | |
557 files_touched = parentctx.manifest().keys() | |
558 def filectxfn(repo, memctx, path): | |
559 raise IOError() | |
560 closed = node.nullid | |
561 if 'closed-branches' in hg_editor.repo.branchtags(): | |
562 closed = hg_editor.repo['closed-branches'].node() | |
563 parents = (parent, closed) | |
564 current_ctx = context.memctx(hg_editor.repo, | |
565 parents, | |
566 r.message or util.default_commit_msg, | |
567 files_touched, | |
568 filectxfn, | |
569 hg_editor.authorforsvnauthor(r.author), | |
570 date, | |
571 {'branch': 'closed-branches'}) | |
572 ha = hg_editor.repo.commitctx(current_ctx) | |
573 ui.status('Marked branch %s as closed.\n' % (b or 'default')) | |
574 hg_editor._save_metadata() |