Mercurial > hgsubversion
comparison stupid.py @ 304:ce676eff002b
First merge, totally untested.
author | Dan Villiom Podlaski Christiansen <danchr@gmail.com> |
---|---|
date | Fri, 01 May 2009 10:28:59 +0200 |
parents | fetch_command.py@79440ed81011 fetch_command.py@d978192f0d63 |
children | 1d48d9a34c19 |
comparison
equal
deleted
inserted
replaced
303:f423a8780832 | 304:ce676eff002b |
---|---|
1 import cStringIO | |
2 import re | |
3 | |
4 from mercurial import patch | |
5 from mercurial import node | |
6 from mercurial import context | |
7 from mercurial import revlog | |
8 from svn import core | |
9 | |
10 import svnwrap | |
11 import svnexternals | |
12 import util | |
13 | |
14 | |
15 binary_file_re = re.compile(r'''Index: ([^\n]*) | |
16 =* | |
17 Cannot display: file marked as a binary type.''') | |
18 | |
19 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
20 _* | |
21 (?:Added|Name): svn:executable | |
22 \+''') | |
23 | |
24 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
25 _* | |
26 (?:Deleted|Name): svn:executable | |
27 -''') | |
28 | |
29 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''') | |
30 | |
31 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE) | |
32 | |
33 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
34 _* | |
35 (?:Added|Name): svn:special | |
36 \+''') | |
37 | |
38 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
39 _* | |
40 (?:Deleted|Name): svn:special | |
41 \-''') | |
42 | |
43 | |
44 class BadPatchApply(Exception): | |
45 pass | |
46 | |
47 | |
48 def print_your_svn_is_old_message(ui): #pragma: no cover | |
49 ui.status("In light of that, I'll fall back and do diffs, but it won't do " | |
50 "as good a job. You should really upgrade your server.\n") | |
51 | |
52 | |
53 def mempatchproxy(parentctx, files): | |
54 # Avoid circular references patch.patchfile -> mempatch | |
55 patchfile = patch.patchfile | |
56 | |
57 class mempatch(patchfile): | |
58 def __init__(self, ui, fname, opener, missing=False): | |
59 patchfile.__init__(self, ui, fname, None, False) | |
60 | |
61 def readlines(self, fname): | |
62 if fname not in parentctx: | |
63 raise IOError('Cannot find %r to patch' % fname) | |
64 fctx = parentctx[fname] | |
65 data = fctx.data() | |
66 if 'l' in fctx.flags(): | |
67 data = 'link ' + data | |
68 return cStringIO.StringIO(data).readlines() | |
69 | |
70 def writelines(self, fname, lines): | |
71 files[fname] = ''.join(lines) | |
72 | |
73 def unlink(self, fname): | |
74 files[fname] = None | |
75 | |
76 return mempatch | |
77 | |
78 | |
79 def filteriterhunks(hg_editor): | |
80 iterhunks = patch.iterhunks | |
81 def filterhunks(ui, fp, sourcefile=None): | |
82 applycurrent = False | |
83 for data in iterhunks(ui, fp, sourcefile): | |
84 if data[0] == 'file': | |
85 if hg_editor._is_file_included(data[1][1]): | |
86 applycurrent = True | |
87 else: | |
88 applycurrent = False | |
89 assert data[0] != 'git', 'Filtering git hunks not supported.' | |
90 if applycurrent: | |
91 yield data | |
92 return filterhunks | |
93 | |
94 | |
95 def diff_branchrev(ui, svn, hg_editor, branch, r, parentctx): | |
96 """Extract all 'branch' content at a given revision. | |
97 | |
98 Return a tuple (files, filectxfn) where 'files' is the list of all files | |
99 in the branch at the given revision, and 'filectxfn' is a memctx compatible | |
100 callable to retrieve individual file information. Raise BadPatchApply upon | |
101 error. | |
102 """ | |
103 def make_diff_path(branch): | |
104 if branch == 'trunk' or branch is None: | |
105 return 'trunk' | |
106 elif branch.startswith('../'): | |
107 return branch[3:] | |
108 return 'branches/%s' % branch | |
109 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch) | |
110 diff_path = make_diff_path(branch) | |
111 try: | |
112 if br_p == branch: | |
113 # letting patch handle binaries sounded | |
114 # cool, but it breaks patch in sad ways | |
115 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False, | |
116 ignore_type=False) | |
117 else: | |
118 d = svn.get_unified_diff(diff_path, r.revnum, | |
119 other_path=make_diff_path(br_p), | |
120 other_rev=parent_rev, | |
121 deleted=True, ignore_type=True) | |
122 if d: | |
123 raise BadPatchApply('branch creation with mods') | |
124 except svnwrap.SubversionRepoCanNotDiff: | |
125 raise BadPatchApply('subversion diffing code is not supported') | |
126 except core.SubversionException, e: | |
127 if (hasattr(e, 'apr_err') and e.apr_err != core.SVN_ERR_FS_NOT_FOUND): | |
128 raise | |
129 raise BadPatchApply('previous revision does not exist') | |
130 if '\0' in d: | |
131 raise BadPatchApply('binary diffs are not supported') | |
132 files_data = {} | |
133 binary_files = {} | |
134 touched_files = {} | |
135 for m in binary_file_re.findall(d): | |
136 # we have to pull each binary file by hand as a fulltext, | |
137 # which sucks but we've got no choice | |
138 binary_files[m] = 1 | |
139 touched_files[m] = 1 | |
140 d2 = empty_file_patch_wont_make_re.sub('', d) | |
141 d2 = property_exec_set_re.sub('', d2) | |
142 d2 = property_exec_removed_re.sub('', d2) | |
143 for f in any_file_re.findall(d): | |
144 # Here we ensure that all files, including the new empty ones | |
145 # are marked as touched. Content is loaded on demand. | |
146 touched_files[f] = 1 | |
147 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0: | |
148 try: | |
149 oldpatchfile = patch.patchfile | |
150 olditerhunks = patch.iterhunks | |
151 patch.patchfile = mempatchproxy(parentctx, files_data) | |
152 patch.iterhunks = filteriterhunks(hg_editor) | |
153 try: | |
154 # We can safely ignore the changed list since we are | |
155 # handling non-git patches. Touched files are known | |
156 # by our memory patcher. | |
157 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2), | |
158 {}, strip=0) | |
159 finally: | |
160 patch.patchfile = oldpatchfile | |
161 patch.iterhunks = olditerhunks | |
162 except patch.PatchError: | |
163 # TODO: this happens if the svn server has the wrong mime | |
164 # type stored and doesn't know a file is binary. It would | |
165 # be better to do one file at a time and only do a | |
166 # full fetch on files that had problems. | |
167 raise BadPatchApply('patching failed') | |
168 for x in files_data.iterkeys(): | |
169 ui.note('M %s\n' % x) | |
170 # if this patch didn't apply right, fall back to exporting the | |
171 # entire rev. | |
172 if patch_st == -1: | |
173 assert False, ('This should only happen on case-insensitive' | |
174 ' volumes.') | |
175 elif patch_st == 1: | |
176 # When converting Django, I saw fuzz on .po files that was | |
177 # causing revisions to end up failing verification. If that | |
178 # can be fixed, maybe this won't ever be reached. | |
179 raise BadPatchApply('patching succeeded with fuzz') | |
180 else: | |
181 ui.status('Not using patch for %s, diff had no hunks.\n' % | |
182 r.revnum) | |
183 | |
184 exec_files = {} | |
185 for m in property_exec_removed_re.findall(d): | |
186 exec_files[m] = False | |
187 for m in property_exec_set_re.findall(d): | |
188 exec_files[m] = True | |
189 for m in exec_files: | |
190 touched_files[m] = 1 | |
191 link_files = {} | |
192 for m in property_special_set_re.findall(d): | |
193 # TODO(augie) when a symlink is removed, patching will fail. | |
194 # We're seeing that above - there's gotta be a better | |
195 # workaround than just bailing like that. | |
196 assert m in files_data | |
197 link_files[m] = True | |
198 for m in property_special_removed_re.findall(d): | |
199 assert m in files_data | |
200 link_files[m] = False | |
201 | |
202 for p in r.paths: | |
203 if p.startswith(diff_path) and r.paths[p].action == 'D': | |
204 p2 = p[len(diff_path)+1:].strip('/') | |
205 if p2 in parentctx: | |
206 files_data[p2] = None | |
207 continue | |
208 # If this isn't in the parent ctx, it must've been a dir | |
209 files_data.update([(f, None) for f in parentctx if f.startswith(p2 + '/')]) | |
210 | |
211 for f in files_data: | |
212 touched_files[f] = 1 | |
213 | |
214 copies = getcopies(svn, hg_editor, branch, diff_path, r, touched_files, | |
215 parentctx) | |
216 | |
217 def filectxfn(repo, memctx, path): | |
218 if path in files_data and files_data[path] is None: | |
219 raise IOError() | |
220 | |
221 if path in binary_files: | |
222 data, mode = svn.get_file(diff_path + '/' + path, r.revnum) | |
223 isexe = 'x' in mode | |
224 islink = 'l' in mode | |
225 else: | |
226 isexe = exec_files.get(path, 'x' in parentctx.flags(path)) | |
227 islink = link_files.get(path, 'l' in parentctx.flags(path)) | |
228 data = '' | |
229 if path in files_data: | |
230 data = files_data[path] | |
231 if islink: | |
232 data = data[len('link '):] | |
233 elif path in parentctx: | |
234 data = parentctx[path].data() | |
235 | |
236 copied = copies.get(path) | |
237 return context.memfilectx(path=path, data=data, islink=islink, | |
238 isexec=isexe, copied=copied) | |
239 | |
240 return list(touched_files), filectxfn | |
241 | |
242 def makecopyfinder(r, branchpath, rootdir): | |
243 """Return a function detecting copies. | |
244 | |
245 Returned copyfinder(path) returns None if no copy information can | |
246 be found or ((source, sourcerev), sourcepath) where "sourcepath" is the | |
247 copy source path, "sourcerev" the source svn revision and "source" is the | |
248 copy record path causing the copy to occur. If a single file was copied | |
249 "sourcepath" and "source" are the same, while file copies dectected from | |
250 directory copies return the copied source directory in "source". | |
251 """ | |
252 # filter copy information for current branch | |
253 branchpath = branchpath + '/' | |
254 fullbranchpath = rootdir + branchpath | |
255 copies = [] | |
256 for path, e in r.paths.iteritems(): | |
257 if not e.copyfrom_path: | |
258 continue | |
259 if not path.startswith(branchpath): | |
260 continue | |
261 if not e.copyfrom_path.startswith(fullbranchpath): | |
262 # ignore cross branch copies | |
263 continue | |
264 dest = path[len(branchpath):] | |
265 source = e.copyfrom_path[len(fullbranchpath):] | |
266 copies.append((dest, (source, e.copyfrom_rev))) | |
267 | |
268 copies.sort(reverse=True) | |
269 exactcopies = dict(copies) | |
270 | |
271 def finder(path): | |
272 if path in exactcopies: | |
273 return exactcopies[path], exactcopies[path][0] | |
274 # look for parent directory copy, longest first | |
275 for dest, (source, sourcerev) in copies: | |
276 dest = dest + '/' | |
277 if not path.startswith(dest): | |
278 continue | |
279 sourcepath = source + '/' + path[len(dest):] | |
280 return (source, sourcerev), sourcepath | |
281 return None | |
282 | |
283 return finder | |
284 | |
285 def getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx): | |
286 """Return a mapping {dest: source} for every file copied into r. | |
287 """ | |
288 if parentctx.node() == revlog.nullid: | |
289 return {} | |
290 | |
291 # Extract svn copy information, group them by copy source. | |
292 # The idea is to duplicate the replay behaviour where copies are | |
293 # evaluated per copy event (one event for all files in a directory copy, | |
294 # one event for single file copy). We assume that copy events match | |
295 # copy sources in revision info. | |
296 svncopies = {} | |
297 finder = makecopyfinder(r, branchpath, svn.subdir) | |
298 for f in files: | |
299 copy = finder(f) | |
300 if copy: | |
301 svncopies.setdefault(copy[0], []).append((f, copy[1])) | |
302 if not svncopies: | |
303 return {} | |
304 | |
305 # cache changeset contexts and map them to source svn revisions | |
306 ctxs = {} | |
307 def getctx(svnrev): | |
308 if svnrev in ctxs: | |
309 return ctxs[svnrev] | |
310 changeid = hg_editor.get_parent_revision(svnrev + 1, branch) | |
311 ctx = None | |
312 if changeid != revlog.nullid: | |
313 ctx = hg_editor.repo.changectx(changeid) | |
314 ctxs[svnrev] = ctx | |
315 return ctx | |
316 | |
317 # check svn copies really make sense in mercurial | |
318 hgcopies = {} | |
319 for (sourcepath, rev), copies in svncopies.iteritems(): | |
320 sourcectx = getctx(rev) | |
321 if sourcectx is None: | |
322 continue | |
323 sources = [s[1] for s in copies] | |
324 if not hg_editor.aresamefiles(sourcectx, parentctx, sources): | |
325 continue | |
326 hgcopies.update(copies) | |
327 return hgcopies | |
328 | |
329 def fetch_externals(svn, branchpath, r, parentctx): | |
330 """Extract svn:externals for the current revision and branch | |
331 | |
332 Return an externalsfile instance or None if there are no externals | |
333 to convert and never were. | |
334 """ | |
335 externals = svnexternals.externalsfile() | |
336 if '.hgsvnexternals' in parentctx: | |
337 externals.read(parentctx['.hgsvnexternals'].data()) | |
338 # Detect property additions only, changes are handled by checking | |
339 # existing entries individually. Projects are unlikely to store | |
340 # externals on many different root directories, so we trade code | |
341 # duplication and complexity for a constant lookup price at every | |
342 # revision in the common case. | |
343 dirs = set(externals) | |
344 if parentctx.node() == revlog.nullid: | |
345 dirs.update([p for p,k in svn.list_files(branchpath, r.revnum) if k == 'd']) | |
346 dirs.add('') | |
347 else: | |
348 branchprefix = branchpath + '/' | |
349 for path, e in r.paths.iteritems(): | |
350 if e.action == 'D': | |
351 continue | |
352 if not path.startswith(branchprefix) and path != branchpath: | |
353 continue | |
354 kind = svn.checkpath(path, r.revnum) | |
355 if kind != 'd': | |
356 continue | |
357 path = path[len(branchprefix):] | |
358 dirs.add(path) | |
359 if e.action == 'M' or (e.action == 'A' and e.copyfrom_path): | |
360 # Do not recurse in copied directories, changes are marked | |
361 # as 'M', except for the copied one. | |
362 continue | |
363 for child, k in svn.list_files(branchprefix + path, r.revnum): | |
364 if k == 'd': | |
365 dirs.add((path + '/' + child).strip('/')) | |
366 | |
367 # Retrieve new or updated values | |
368 for dir in dirs: | |
369 try: | |
370 values = svn.list_props(branchpath + '/' + dir, r.revnum) | |
371 externals[dir] = values.get('svn:externals', '') | |
372 except IOError: | |
373 externals[dir] = '' | |
374 | |
375 if not externals and '.hgsvnexternals' not in parentctx: | |
376 # Do not create empty externals files | |
377 return None | |
378 return externals | |
379 | |
380 | |
381 def fetch_branchrev(svn, hg_editor, branch, branchpath, r, parentctx): | |
382 """Extract all 'branch' content at a given revision. | |
383 | |
384 Return a tuple (files, filectxfn) where 'files' is the list of all files | |
385 in the branch at the given revision, and 'filectxfn' is a memctx compatible | |
386 callable to retrieve individual file information. | |
387 """ | |
388 files = [] | |
389 if parentctx.node() == revlog.nullid: | |
390 # Initial revision, fetch all files | |
391 for path, kind in svn.list_files(branchpath, r.revnum): | |
392 if kind == 'f': | |
393 files.append(path) | |
394 else: | |
395 branchprefix = branchpath + '/' | |
396 for path, e in r.paths.iteritems(): | |
397 if not path.startswith(branchprefix): | |
398 continue | |
399 if not hg_editor._is_path_valid(path): | |
400 continue | |
401 kind = svn.checkpath(path, r.revnum) | |
402 path = path[len(branchprefix):] | |
403 if kind == 'f': | |
404 files.append(path) | |
405 elif kind == 'd': | |
406 if e.action == 'M': | |
407 continue | |
408 dirpath = branchprefix + path | |
409 for child, k in svn.list_files(dirpath, r.revnum): | |
410 if k == 'f': | |
411 files.append(path + '/' + child) | |
412 else: | |
413 if path in parentctx: | |
414 files.append(path) | |
415 continue | |
416 # Assume it's a deleted directory | |
417 path = path + '/' | |
418 deleted = [f for f in parentctx if f.startswith(path)] | |
419 files += deleted | |
420 | |
421 copies = getcopies(svn, hg_editor, branch, branchpath, r, files, parentctx) | |
422 | |
423 def filectxfn(repo, memctx, path): | |
424 data, mode = svn.get_file(branchpath + '/' + path, r.revnum) | |
425 isexec = 'x' in mode | |
426 islink = 'l' in mode | |
427 copied = copies.get(path) | |
428 return context.memfilectx(path=path, data=data, islink=islink, | |
429 isexec=isexec, copied=copied) | |
430 | |
431 return files, filectxfn | |
432 | |
433 def svn_server_pull_rev(ui, svn, hg_editor, r): | |
434 # this server fails at replay | |
435 branches = hg_editor.branches_in_paths(r.paths, r.revnum, svn.checkpath, svn.list_files) | |
436 deleted_branches = {} | |
437 brpaths = branches.values() | |
438 bad_branch_paths = {} | |
439 for br, bp in branches.iteritems(): | |
440 bad_branch_paths[br] = [] | |
441 | |
442 # This next block might be needed, but for now I'm omitting it until it can be | |
443 # proven necessary. | |
444 # for bad in brpaths: | |
445 # if bad.startswith(bp) and len(bad) > len(bp): | |
446 # bad_branch_paths[br].append(bad[len(bp)+1:]) | |
447 | |
448 # We've go a branch that contains other branches. We have to be careful to | |
449 # get results similar to real replay in this case. | |
450 for existingbr in hg_editor.branches: | |
451 bad = hg_editor._remotename(existingbr) | |
452 if bad.startswith(bp) and len(bad) > len(bp): | |
453 bad_branch_paths[br].append(bad[len(bp)+1:]) | |
454 for p in r.paths: | |
455 if hg_editor._is_path_tag(p): | |
456 continue | |
457 branch = hg_editor._localname(p) | |
458 if r.paths[p].action == 'R' and branch in hg_editor.branches: | |
459 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum, | |
460 hg_editor.revmap.iteritems()), reverse=True) | |
461 is_closed = False | |
462 if len(branchedits) > 0: | |
463 branchtip = branchedits[0][1] | |
464 for child in hg_editor.repo[branchtip].children(): | |
465 if child.branch() == 'closed-branches': | |
466 is_closed = True | |
467 break | |
468 if not is_closed: | |
469 deleted_branches[branch] = branchtip | |
470 | |
471 date = hg_editor.fixdate(r.date) | |
472 check_deleted_branches = set() | |
473 for b in branches: | |
474 parentctx = hg_editor.repo[hg_editor.get_parent_revision(r.revnum, b)] | |
475 if parentctx.branch() != (b or 'default'): | |
476 check_deleted_branches.add(b) | |
477 kind = svn.checkpath(branches[b], r.revnum) | |
478 if kind != 'd': | |
479 # Branch does not exist at this revision. Get parent revision and | |
480 # remove everything. | |
481 deleted_branches[b] = parentctx.node() | |
482 continue | |
483 else: | |
484 try: | |
485 files_touched, filectxfn2 = diff_branchrev( | |
486 ui, svn, hg_editor, b, r, parentctx) | |
487 except BadPatchApply, e: | |
488 # Either this revision or the previous one does not exist. | |
489 ui.status("Fetching entire revision: %s.\n" % e.args[0]) | |
490 files_touched, filectxfn2 = fetch_branchrev( | |
491 svn, hg_editor, b, branches[b], r, parentctx) | |
492 | |
493 externals = fetch_externals(svn, branches[b], r, parentctx) | |
494 if externals is not None: | |
495 files_touched.append('.hgsvnexternals') | |
496 | |
497 def filectxfn(repo, memctx, path): | |
498 if path == '.hgsvnexternals': | |
499 if not externals: | |
500 raise IOError() | |
501 return context.memfilectx(path=path, data=externals.write(), | |
502 islink=False, isexec=False, copied=None) | |
503 for bad in bad_branch_paths[b]: | |
504 if path.startswith(bad): | |
505 raise IOError() | |
506 return filectxfn2(repo, memctx, path) | |
507 | |
508 extra = util.build_extra(r.revnum, b, svn.uuid, svn.subdir) | |
509 if '' in files_touched: | |
510 files_touched.remove('') | |
511 excluded = [f for f in files_touched | |
512 if not hg_editor._is_file_included(f)] | |
513 for f in excluded: | |
514 files_touched.remove(f) | |
515 if parentctx.node() != node.nullid or files_touched: | |
516 for f in files_touched: | |
517 if f: | |
518 # this is a case that really shouldn't ever happen, it means something | |
519 # is very wrong | |
520 assert f[0] != '/' | |
521 current_ctx = context.memctx(hg_editor.repo, | |
522 [parentctx.node(), revlog.nullid], | |
523 r.message or util.default_commit_msg, | |
524 files_touched, | |
525 filectxfn, | |
526 hg_editor.authorforsvnauthor(r.author), | |
527 date, | |
528 extra) | |
529 ha = hg_editor.repo.commitctx(current_ctx) | |
530 branch = extra.get('branch', None) | |
531 if not branch in hg_editor.branches: | |
532 hg_editor.branches[branch] = None, 0, r.revnum | |
533 hg_editor.add_to_revmap(r.revnum, b, ha) | |
534 hg_editor._save_metadata() | |
535 util.describe_commit(ui, ha, b) | |
536 # These are branches which would have an 'R' status in svn log. This means they were | |
537 # replaced by some other branch, so we need to verify they get marked as closed. | |
538 for branch in check_deleted_branches: | |
539 branchedits = sorted(filter(lambda x: x[0][1] == branch and x[0][0] < r.revnum, | |
540 hg_editor.revmap.iteritems()), reverse=True) | |
541 is_closed = False | |
542 if len(branchedits) > 0: | |
543 branchtip = branchedits[0][1] | |
544 for child in hg_editor.repo[branchtip].children(): | |
545 if child.branch() == 'closed-branches': | |
546 is_closed = True | |
547 break | |
548 if not is_closed: | |
549 deleted_branches[branch] = branchtip | |
550 for b, parent in deleted_branches.iteritems(): | |
551 if parent == node.nullid: | |
552 continue | |
553 parentctx = hg_editor.repo[parent] | |
554 files_touched = parentctx.manifest().keys() | |
555 def filectxfn(repo, memctx, path): | |
556 raise IOError() | |
557 closed = node.nullid | |
558 if 'closed-branches' in hg_editor.repo.branchtags(): | |
559 closed = hg_editor.repo['closed-branches'].node() | |
560 parents = (parent, closed) | |
561 current_ctx = context.memctx(hg_editor.repo, | |
562 parents, | |
563 r.message or util.default_commit_msg, | |
564 files_touched, | |
565 filectxfn, | |
566 hg_editor.authorforsvnauthor(r.author), | |
567 date, | |
568 {'branch': 'closed-branches'}) | |
569 ha = hg_editor.repo.commitctx(current_ctx) | |
570 ui.status('Marked branch %s as closed.\n' % (b or 'default')) | |
571 hg_editor._save_metadata() |