comparison fetch_command.py @ 108:de19a13edfa8

fetch_command: extract diff code in a function
author Patrick Mezard <pmezard@gmail.com>
date Tue, 25 Nov 2008 09:18:29 -0600
parents a8950157281a
children 460443a96497
comparison
equal deleted inserted replaced
107:a8950157281a 108:de19a13edfa8
170 _* 170 _*
171 (?:Deleted|Name): svn:special 171 (?:Deleted|Name): svn:special
172 \- \* 172 \- \*
173 ''') 173 ''')
174 174
175 def make_diff_path(b): 175 def stupid_diff_branchrev(ui, svn, hg_editor, branch, r, parentid, tempdir):
176 if b == None: 176 """Extract all 'branch' content at a given revision.
177 return 'trunk' 177
178 return 'branches/' + b 178 Return a tuple (files, filectxfn) where 'files' is the list of all files
179 in the branch at the given revision, and 'filectxfn' is a memctx compatible
180 callable to retrieve individual file information.
181 """
182 def make_diff_path(b):
183 if b == None:
184 return 'trunk'
185 return 'branches/' + b
186
187 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, branch)
188 diff_path = make_diff_path(branch)
189 files_touched = set()
190 if br_p == branch:
191 # letting patch handle binaries sounded
192 # cool, but it breaks patch in sad ways
193 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
194 ignore_type=False)
195 else:
196 d = svn.get_unified_diff(diff_path, r.revnum,
197 other_path=make_diff_path(br_p),
198 other_rev=parent_rev,
199 deleted=True, ignore_type=True)
200 if d:
201 ui.status('Branch creation with mods, pulling full rev.\n')
202 raise BadPatchApply()
203 opener = merc_util.opener(tempdir)
204 for m in binary_file_re.findall(d):
205 # we have to pull each binary file by hand as a fulltext,
206 # which sucks but we've got no choice
207 files_touched.add(m)
208 try:
209 f = opener(m, 'w')
210 f.write(svn.get_file(diff_path+'/'+m, r.revnum)[0])
211 f.close()
212 except IOError:
213 pass
214 d2 = empty_file_patch_wont_make_re.sub('', d)
215 d2 = property_exec_set_re.sub('', d2)
216 d2 = property_exec_removed_re.sub('', d2)
217 for f in any_file_re.findall(d):
218 if f in files_touched:
219 # this check is here because modified binary files will get
220 # created before here.
221 continue
222 files_touched.add(f)
223 data = ''
224 if f in hg_editor.repo[parentid]:
225 data = hg_editor.repo[parentid][f].data()
226 fp = opener(f, 'w')
227 fp.write(data)
228 fp.close()
229 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
230 old_cwd = os.getcwd()
231 os.chdir(tempdir)
232 changed = {}
233 try:
234 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
235 changed, strip=0)
236 except patch.PatchError:
237 # TODO: this happens if the svn server has the wrong mime
238 # type stored and doesn't know a file is binary. It would
239 # be better to do one file at a time and only do a
240 # full fetch on files that had problems.
241 os.chdir(old_cwd)
242 raise BadPatchApply()
243 for x in changed.iterkeys():
244 ui.status('M %s\n' % x)
245 files_touched.add(x)
246 os.chdir(old_cwd)
247 # if this patch didn't apply right, fall back to exporting the
248 # entire rev.
249 if patch_st == -1:
250 parent_ctx = hg_editor.repo[parentid]
251 parent_manifest = parent_ctx.manifest()
252 for fn in files_touched:
253 if (fn in parent_manifest and
254 'l' in parent_ctx.filectx(fn).flags()):
255 # I think this might be an underlying bug in svn -
256 # I get diffs of deleted symlinks even though I
257 # specifically said no deletes above.
258 ui.status('Pulling whole rev because of a deleted'
259 'symlink')
260 raise BadPatchApply()
261 assert False, ('This should only happen on case-insensitive'
262 ' volumes.')
263 elif patch_st == 1:
264 # When converting Django, I saw fuzz on .po files that was
265 # causing revisions to end up failing verification. If that
266 # can be fixed, maybe this won't ever be reached.
267 ui.status('There was some fuzz, not using diff after all.')
268 raise BadPatchApply()
269 else:
270 ui.status('Not using patch for %s, diff had no hunks.\n' %
271 r.revnum)
272
273 # we create the files if they don't exist here because we know
274 # that we'll never have diff info for a deleted file, so if the
275 # property is set, we should force the file to exist no matter what.
276 exec_files = {}
277 for m in property_exec_removed_re.findall(d):
278 exec_files[m] = False
279 for m in property_exec_set_re.findall(d):
280 exec_files[m] = True
281 for m in exec_files:
282 files_touched.add(m)
283 f = os.path.join(tempdir, m)
284 if not os.path.exists(f):
285 data = ''
286 if m in hg_editor.repo[parentid]:
287 data = hg_editor.repo[parentid][m].data()
288 fp = opener(m, 'w')
289 fp.write(data)
290 fp.close()
291 link_files = {}
292 for m in property_special_set_re.findall(d):
293 # TODO(augie) when a symlink is removed, patching will fail.
294 # We're seeing that above - there's gotta be a better
295 # workaround than just bailing like that.
296 path = os.path.join(tempdir, m)
297 assert os.path.exists(path)
298 link_path = open(path).read()
299 link_path = link_path[len('link '):]
300 os.remove(path)
301 link_files[m] = link_path
302 files_touched.add(m)
303
304 deleted_files = set()
305 for p in r.paths:
306 if p.startswith(diff_path) and r.paths[p].action == 'D':
307 p2 = p[len(diff_path)+1:].strip('/')
308 if p2 in hg_editor.repo[parentid]:
309 deleted_files.add(p2)
310 continue
311 # If this isn't in the parent ctx, it must've been a dir
312 deleted_files.update([f for f in hg_editor.repo[parentid]
313 if f.startswith(p2 + '/')])
314 files_touched.update(deleted_files)
315
316 copies = getcopies(svn, hg_editor, branch, diff_path, r, files_touched,
317 parentid)
318
319 def filectxfn(repo, memctx, path):
320 if path in deleted_files:
321 raise IOError()
322 if path in link_files:
323 return context.memfilectx(path=path, data=link_files[path],
324 islink=True, isexec=False,
325 copied=False)
326 data = opener(path).read()
327 exe = exec_files.get(path, None)
328 if exe is None and path in hg_editor.repo[parentid]:
329 exe = 'x' in hg_editor.repo[parentid].filectx(path).flags()
330 copied = copies.get(path)
331 return context.memfilectx(path=path, data=data, islink=False,
332 isexec=exe, copied=copied)
333
334 return list(files_touched), filectxfn
179 335
180 def makecopyfinder(r, branchpath, rootdir): 336 def makecopyfinder(r, branchpath, rootdir):
181 """Return a function detecting copies. 337 """Return a function detecting copies.
182 338
183 Returned copyfinder(path) returns None if no copy information can 339 Returned copyfinder(path) returns None if no copy information can
332 branches = hg_editor.branches_in_paths(r.paths) 488 branches = hg_editor.branches_in_paths(r.paths)
333 temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp') 489 temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp')
334 if not os.path.exists(temp_location): 490 if not os.path.exists(temp_location):
335 os.makedirs(temp_location) 491 os.makedirs(temp_location)
336 for b in branches: 492 for b in branches:
337 our_tempdir = None 493 our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location)
338 diff_path = make_diff_path(b)
339 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, b)
340 parent_ha = hg_editor.get_parent_revision(r.revnum, b) 494 parent_ha = hg_editor.get_parent_revision(r.revnum, b)
341 files_touched = set()
342 try: 495 try:
343 if br_p == b: 496 files_touched, filectxfn = stupid_diff_branchrev(
344 # letting patch handle binaries sounded 497 ui, svn, hg_editor, b, r, parent_ha, our_tempdir)
345 # cool, but it breaks patch in sad ways
346 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
347 ignore_type=False)
348 else:
349 d = svn.get_unified_diff(diff_path, r.revnum,
350 other_path=make_diff_path(br_p),
351 other_rev=parent_rev,
352 deleted=True, ignore_type=True)
353 if d:
354 ui.status('Branch creation with mods, pulling full rev.\n')
355 raise BadPatchApply()
356
357 our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location)
358 opener = merc_util.opener(our_tempdir)
359 for m in binary_file_re.findall(d):
360 # we have to pull each binary file by hand as a fulltext,
361 # which sucks but we've got no choice
362 files_touched.add(m)
363 try:
364 f = opener(m, 'w')
365 f.write(svn.get_file(diff_path+'/'+m, r.revnum)[0])
366 f.close()
367 except IOError:
368 pass
369 d2 = empty_file_patch_wont_make_re.sub('', d)
370 d2 = property_exec_set_re.sub('', d2)
371 d2 = property_exec_removed_re.sub('', d2)
372 for f in any_file_re.findall(d):
373 if f in files_touched:
374 # this check is here because modified binary files will get
375 # created before here.
376 continue
377 files_touched.add(f)
378 data = ''
379 if f in hg_editor.repo[parent_ha]:
380 data = hg_editor.repo[parent_ha][f].data()
381 fp = opener(f, 'w')
382 fp.write(data)
383 fp.close()
384 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
385 old_cwd = os.getcwd()
386 os.chdir(our_tempdir)
387 changed = {}
388 try:
389 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
390 changed, strip=0)
391 except patch.PatchError:
392 # TODO: this happens if the svn server has the wrong mime
393 # type stored and doesn't know a file is binary. It would
394 # be better to do one file at a time and only do a
395 # full fetch on files that had problems.
396 os.chdir(old_cwd)
397 raise BadPatchApply()
398 for x in changed.iterkeys():
399 ui.status('M %s\n' % x)
400 files_touched.add(x)
401 os.chdir(old_cwd)
402 # if this patch didn't apply right, fall back to exporting the
403 # entire rev.
404 if patch_st == -1:
405 parent_ctx = hg_editor.repo[parent_ha]
406 parent_manifest = parent_ctx.manifest()
407 for fn in files_touched:
408 if (fn in parent_manifest and
409 'l' in parent_ctx.filectx(fn).flags()):
410 # I think this might be an underlying bug in svn -
411 # I get diffs of deleted symlinks even though I
412 # specifically said no deletes above.
413 ui.status('Pulling whole rev because of a deleted'
414 'symlink')
415 raise BadPatchApply()
416 assert False, ('This should only happen on case-insensitive'
417 ' volumes.')
418 elif patch_st == 1:
419 # When converting Django, I saw fuzz on .po files that was
420 # causing revisions to end up failing verification. If that
421 # can be fixed, maybe this won't ever be reached.
422 ui.status('There was some fuzz, not using diff after all.')
423 raise BadPatchApply()
424 else:
425 ui.status('Not using patch for %s, diff had no hunks.\n' %
426 r.revnum)
427
428 # we create the files if they don't exist here because we know
429 # that we'll never have diff info for a deleted file, so if the
430 # property is set, we should force the file to exist no matter what.
431 exec_files = {}
432 for m in property_exec_removed_re.findall(d):
433 exec_files[m] = False
434 for m in property_exec_set_re.findall(d):
435 exec_files[m] = True
436 for m in exec_files:
437 files_touched.add(m)
438 f = os.path.join(our_tempdir, m)
439 if not os.path.exists(f):
440 data = ''
441 if m in hg_editor.repo[parent_ha]:
442 data = hg_editor.repo[parent_ha][m].data()
443 fp = opener(m, 'w')
444 fp.write(data)
445 fp.close()
446 link_files = {}
447 for m in property_special_set_re.findall(d):
448 # TODO(augie) when a symlink is removed, patching will fail.
449 # We're seeing that above - there's gotta be a better
450 # workaround than just bailing like that.
451 path = os.path.join(our_tempdir, m)
452 assert os.path.exists(path)
453 link_path = open(path).read()
454 link_path = link_path[len('link '):]
455 os.remove(path)
456 link_files[m] = link_path
457 files_touched.add(m)
458
459 deleted_files = set()
460 for p in r.paths:
461 if p.startswith(diff_path) and r.paths[p].action == 'D':
462 p2 = p[len(diff_path)+1:].strip('/')
463 if p2 in hg_editor.repo[parent_ha]:
464 deleted_files.add(p2)
465 continue
466 # If this isn't in the parent ctx, it must've been a dir
467 deleted_files.update([f for f in hg_editor.repo[parent_ha]
468 if f.startswith(p2 + '/')])
469 files_touched.update(deleted_files)
470
471 copies = getcopies(svn, hg_editor, b, branches[b], r, files_touched,
472 parent_ha)
473
474 def filectxfn(repo, memctx, path):
475 if path in deleted_files:
476 raise IOError()
477 if path in link_files:
478 return context.memfilectx(path=path, data=link_files[path],
479 islink=True, isexec=False,
480 copied=False)
481 data = opener(path).read()
482 exe = exec_files.get(path, None)
483 if exe is None and path in hg_editor.repo[parent_ha]:
484 exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags()
485 copied = copies.get(path)
486 return context.memfilectx(path=path, data=data, islink=False,
487 isexec=exe, copied=copied)
488 except (core.SubversionException, 498 except (core.SubversionException,
489 BadPatchApply, 499 BadPatchApply,
490 svnwrap.SubversionRepoCanNotDiff), e: 500 svnwrap.SubversionRepoCanNotDiff), e:
491 if (hasattr(e, 'apr_err') and e.apr_err != 160013): 501 if (hasattr(e, 'apr_err') and e.apr_err != 160013):
492 raise 502 raise