Mercurial > hgsubversion
comparison fetch_command.py @ 0:f2636cfed115
Initial import of hgsubversion into a public repository.
| author | Augie Fackler <durin42@gmail.com> |
|---|---|
| date | Tue, 30 Sep 2008 11:42:52 -0500 |
| parents | |
| children | 1a5bb173170b |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:f2636cfed115 |
|---|---|
| 1 import cStringIO | |
| 2 import re | |
| 3 import operator | |
| 4 import os | |
| 5 import shutil | |
| 6 import stat | |
| 7 import tempfile | |
| 8 | |
| 9 from mercurial import patch | |
| 10 from mercurial import node | |
| 11 from mercurial import context | |
| 12 from mercurial import revlog | |
| 13 from svn import core | |
| 14 from svn import delta | |
| 15 | |
| 16 import hg_delta_editor | |
| 17 import svnwrap | |
| 18 import util | |
| 19 | |
| 20 | |
| 21 def print_your_svn_is_old_message(ui): | |
| 22 ui.status("In light of that, I'll fall back and do diffs, but it won't do " | |
| 23 "as good a job. You should really upgrade your server.") | |
| 24 | |
| 25 | |
| 26 @util.register_subcommand('pull') | |
| 27 def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None, | |
| 28 tag_locations='tags', | |
| 29 **opts): | |
| 30 """Pull new revisions from Subversion. | |
| 31 """ | |
| 32 skipto_rev=int(skipto_rev) | |
| 33 have_replay = not stupid | |
| 34 if have_replay and not callable(delta.svn_txdelta_apply(None, None, | |
| 35 None)[0]): | |
| 36 ui.status('You are using old Subversion SWIG bindings. Replay will not' | |
| 37 ' work until you upgrade to 1.5.0 or newer. Falling back to' | |
| 38 ' a slower method that may be buggier. Please upgrade, or' | |
| 39 ' contribute a patch to use the ctypes bindings instead' | |
| 40 ' of SWIG.') | |
| 41 have_replay = False | |
| 42 initializing_repo = False | |
| 43 svn = svnwrap.SubversionRepo(svn_url) | |
| 44 author_host = "@%s" % svn.uuid | |
| 45 tag_locations = tag_locations.split(',') | |
| 46 hg_editor = hg_delta_editor.HgChangeReceiver(hg_repo_path, | |
| 47 ui_=ui, | |
| 48 subdir=svn.subdir, | |
| 49 author_host=author_host, | |
| 50 tag_locations=tag_locations) | |
| 51 if os.path.exists(hg_editor.uuid_file): | |
| 52 uuid = open(hg_editor.uuid_file).read() | |
| 53 assert uuid == svn.uuid | |
| 54 start = int(open(hg_editor.last_revision_handled_file, 'r').read()) | |
| 55 else: | |
| 56 open(hg_editor.uuid_file, 'w').write(svn.uuid) | |
| 57 open(hg_editor.svn_url_file, 'w').write(svn_url) | |
| 58 open(hg_editor.last_revision_handled_file, 'w').write(str(0)) | |
| 59 initializing_repo = True | |
| 60 start = skipto_rev | |
| 61 | |
| 62 # start converting revisions | |
| 63 for r in svn.revisions(start=start): | |
| 64 valid = False | |
| 65 hg_editor.update_branch_tag_map_for_rev(r) | |
| 66 for p in r.paths: | |
| 67 if hg_editor._is_path_valid(p): | |
| 68 valid = True | |
| 69 continue | |
| 70 if initializing_repo and start > 0: | |
| 71 assert False, 'This feature not ready yet.' | |
| 72 if valid: | |
| 73 # got a 502? Try more than once! | |
| 74 tries = 0 | |
| 75 converted = False | |
| 76 while not converted and tries < 3: | |
| 77 try: | |
| 78 ui.status('converting %s\n' % r) | |
| 79 if have_replay: | |
| 80 try: | |
| 81 replay_convert_rev(hg_editor, svn, r) | |
| 82 except svnwrap.SubversionRepoCanNotReplay, e: | |
| 83 ui.status('%s\n' % e.message) | |
| 84 print_your_svn_is_old_message(ui) | |
| 85 have_replay = False | |
| 86 stupid_svn_server_pull_rev(ui, svn, hg_editor, r) | |
| 87 else: | |
| 88 stupid_svn_server_pull_rev(ui, svn, hg_editor, r) | |
| 89 converted = True | |
| 90 open(hg_editor.last_revision_handled_file, | |
| 91 'w').write(str(r.revnum)) | |
| 92 except core.SubversionException, e: | |
| 93 if hasattr(e, 'message') and ( | |
| 94 'Server sent unexpected return value (502 Bad Gateway)' | |
| 95 ' in response to PROPFIND') in e.message: | |
| 96 tries += 1 | |
| 97 ui.status('Got a 502, retrying (%s)\n' % tries) | |
| 98 else: | |
| 99 raise | |
| 100 | |
| 101 | |
| 102 def replay_convert_rev(hg_editor, svn, r): | |
| 103 hg_editor.set_current_rev(r) | |
| 104 svn.get_replay(r.revnum, hg_editor) | |
| 105 if hg_editor.missing_plaintexts: | |
| 106 files_to_grab = set() | |
| 107 dirs_to_list = [] | |
| 108 props = {} | |
| 109 for p in hg_editor.missing_plaintexts: | |
| 110 p2 = p | |
| 111 if svn.subdir: | |
| 112 p2 = p2[len(svn.subdir)-1:] | |
| 113 # this *sometimes* raises on me, and I have | |
| 114 # no idea why. TODO(augie) figure out the why. | |
| 115 try: | |
| 116 pl = svn.proplist(p2, r.revnum, recurse=True) | |
| 117 except core.SubversionException, e: | |
| 118 pass | |
| 119 props.update(pl) | |
| 120 if p[-1] == '/': | |
| 121 dirs_to_list.append(p) | |
| 122 else: | |
| 123 files_to_grab.add(p) | |
| 124 while dirs_to_list: | |
| 125 p = dirs_to_list.pop(0) | |
| 126 l = svn.list_dir(p[:-1], r.revnum) | |
| 127 for f in l: | |
| 128 | |
| 129 if l[f].kind == core.svn_node_dir: | |
| 130 dirs_to_list.append(p+f+'/') | |
| 131 elif l[f].kind == core.svn_node_file: | |
| 132 files_to_grab.add(p+f) | |
| 133 for p in files_to_grab: | |
| 134 p2 = p | |
| 135 if svn.subdir: | |
| 136 p2 = p2[len(svn.subdir)-1:] | |
| 137 hg_editor.current_files[p] = svn.get_file(p2, r.revnum) | |
| 138 hg_editor.current_files_exec[p] = False | |
| 139 if p in props: | |
| 140 if 'svn:executable' in props[p]: | |
| 141 hg_editor.current_files_exec[p] = True | |
| 142 if 'svn:special' in props[p]: | |
| 143 hg_editor.current_files_symlink[p] = True | |
| 144 hg_editor.missing_plaintexts = set() | |
| 145 hg_editor.commit_current_delta() | |
| 146 | |
| 147 | |
| 148 binary_file_re = re.compile(r'''Index: ([^\n]*) | |
| 149 =* | |
| 150 Cannot display: file marked as a binary type.''') | |
| 151 | |
| 152 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
| 153 _* | |
| 154 Added: svn:executable | |
| 155 \+ \* | |
| 156 ''') | |
| 157 | |
| 158 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
| 159 _* | |
| 160 Deleted: svn:executable | |
| 161 - \* | |
| 162 ''') | |
| 163 | |
| 164 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''') | |
| 165 | |
| 166 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE) | |
| 167 | |
| 168 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
| 169 _* | |
| 170 Added: svn:special | |
| 171 \+ \* | |
| 172 ''') | |
| 173 | |
| 174 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
| 175 _* | |
| 176 Added: svn:special | |
| 177 \- \* | |
| 178 ''') | |
| 179 | |
| 180 def make_diff_path(b): | |
| 181 if b == None: | |
| 182 return 'trunk' | |
| 183 return 'branches/' + b | |
| 184 | |
| 185 | |
| 186 def stupid_svn_server_pull_rev(ui, svn, hg_editor, r): | |
| 187 used_diff = True | |
| 188 delete_all_files = False | |
| 189 # this server fails at replay | |
| 190 branches = hg_editor.branches_in_paths(r.paths) | |
| 191 temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp') | |
| 192 if not os.path.exists(temp_location): | |
| 193 os.makedirs(temp_location) | |
| 194 for b in branches: | |
| 195 our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location) | |
| 196 diff_path = make_diff_path(b) | |
| 197 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, b) | |
| 198 parent_ha = hg_editor.get_parent_revision(r.revnum, b) | |
| 199 files_touched = set() | |
| 200 link_files = {} | |
| 201 exec_files = {} | |
| 202 try: | |
| 203 if br_p == b: | |
| 204 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False, | |
| 205 # letting patch handle binaries sounded | |
| 206 # cool, but it breaks patch in sad ways | |
| 207 ignore_type=False) | |
| 208 else: | |
| 209 d = svn.get_unified_diff(diff_path, r.revnum, | |
| 210 other_path=make_diff_path(br_p), | |
| 211 other_rev=parent_rev, | |
| 212 deleted=True, ignore_type=True) | |
| 213 if d: | |
| 214 ui.status('Branch creation with mods, pulling full rev.\n') | |
| 215 raise BadPatchApply() | |
| 216 for m in binary_file_re.findall(d): | |
| 217 # we have to pull each binary file by hand as a fulltext, | |
| 218 # which sucks but we've got no choice | |
| 219 file_path = os.path.join(our_tempdir, m) | |
| 220 files_touched.add(m) | |
| 221 try: | |
| 222 try: | |
| 223 os.makedirs(os.path.dirname(file_path)) | |
| 224 except OSError, e: | |
| 225 pass | |
| 226 f = open(file_path, 'w') | |
| 227 f.write(svn.get_file(diff_path+'/'+m, r.revnum)) | |
| 228 f.close() | |
| 229 except core.SubversionException, e: | |
| 230 if (e.message.endswith("' path not found") | |
| 231 or e.message.startswith("File not found: revision")): | |
| 232 pass | |
| 233 else: | |
| 234 raise | |
| 235 d2 = empty_file_patch_wont_make_re.sub('', d) | |
| 236 d2 = property_exec_set_re.sub('', d2) | |
| 237 d2 = property_exec_removed_re.sub('', d2) | |
| 238 old_cwd = os.getcwd() | |
| 239 os.chdir(our_tempdir) | |
| 240 for f in any_file_re.findall(d): | |
| 241 files_touched.add(f) | |
| 242 # this check is here because modified binary files will get | |
| 243 # created before here. | |
| 244 if os.path.exists(f): | |
| 245 continue | |
| 246 dn = os.path.dirname(f) | |
| 247 if dn and not os.path.exists(dn): | |
| 248 os.makedirs(dn) | |
| 249 if f in hg_editor.repo[parent_ha].manifest(): | |
| 250 data = hg_editor.repo[parent_ha].filectx(f).data() | |
| 251 fi = open(f, 'w') | |
| 252 fi.write(data) | |
| 253 fi.close() | |
| 254 else: | |
| 255 open(f, 'w').close() | |
| 256 if f.startswith(our_tempdir): | |
| 257 f = f[len(our_tempdir)+1:] | |
| 258 os.chdir(old_cwd) | |
| 259 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0: | |
| 260 old_cwd = os.getcwd() | |
| 261 os.chdir(our_tempdir) | |
| 262 changed = {} | |
| 263 try: | |
| 264 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2), | |
| 265 changed, strip=0) | |
| 266 except patch.PatchError: | |
| 267 # TODO: this happens if the svn server has the wrong mime | |
| 268 # type stored and doesn't know a file is binary. It would | |
| 269 # be better to do one file at a time and only do a | |
| 270 # full fetch on files that had problems. | |
| 271 os.chdir(old_cwd) | |
| 272 raise BadPatchApply() | |
| 273 for x in changed.iterkeys(): | |
| 274 ui.status('M %s\n' % x) | |
| 275 files_touched.add(x) | |
| 276 os.chdir(old_cwd) | |
| 277 # if this patch didn't apply right, fall back to exporting the | |
| 278 # entire rev. | |
| 279 if patch_st == -1: | |
| 280 parent_ctx = hg_editor.repo[parent_ha] | |
| 281 parent_manifest = parent_ctx.manifest() | |
| 282 for fn in files_touched: | |
| 283 if (fn in parent_manifest and | |
| 284 'l' in parent_ctx.filectx(fn).flags()): | |
| 285 # I think this might be an underlying bug in svn - | |
| 286 # I get diffs of deleted symlinks even though I | |
| 287 # specifically said no deletes above. | |
| 288 ui.status('Pulling whole rev because of a deleted' | |
| 289 'symlink') | |
| 290 raise BadPatchApply() | |
| 291 assert False, ('This should only happen on case-insensitive' | |
| 292 ' volumes.') | |
| 293 elif patch_st == 1: | |
| 294 # When converting Django, I saw fuzz on .po files that was | |
| 295 # causing revisions to end up failing verification. If that | |
| 296 # can be fixed, maybe this won't ever be reached. | |
| 297 ui.status('There was some fuzz, not using diff after all.') | |
| 298 raise BadPatchApply() | |
| 299 else: | |
| 300 ui.status('Not using patch for %s, diff had no hunks.\n' % | |
| 301 r.revnum) | |
| 302 | |
| 303 # we create the files if they don't exist here because we know | |
| 304 # that we'll never have diff info for a deleted file, so if the | |
| 305 # property is set, we should force the file to exist no matter what. | |
| 306 for m in property_exec_removed_re.findall(d): | |
| 307 f = os.path.join(our_tempdir, m) | |
| 308 if not os.path.exists(f): | |
| 309 d = os.path.dirname(f) | |
| 310 if not os.path.exists(d): | |
| 311 os.makedirs(d) | |
| 312 if not m in hg_editor.repo[parent_ha].manifest(): | |
| 313 open(f, 'w').close() | |
| 314 else: | |
| 315 data = hg_editor.repo[parent_ha].filectx(m).data() | |
| 316 fp = open(f, 'w') | |
| 317 fp.write(data) | |
| 318 fp.close() | |
| 319 exec_files[m] = False | |
| 320 files_touched.add(m) | |
| 321 for m in property_exec_set_re.findall(d): | |
| 322 f = os.path.join(our_tempdir, m) | |
| 323 if not os.path.exists(f): | |
| 324 d = os.path.dirname(f) | |
| 325 if not os.path.exists(d): | |
| 326 os.makedirs(d) | |
| 327 if m not in hg_editor.repo[parent_ha].manifest(): | |
| 328 open(f, 'w').close() | |
| 329 else: | |
| 330 data = hg_editor.repo[parent_ha].filectx(m).data() | |
| 331 fp = open(f, 'w') | |
| 332 fp.write(data) | |
| 333 fp.close() | |
| 334 exec_files[m] = True | |
| 335 files_touched.add(m) | |
| 336 for m in property_special_set_re.findall(d): | |
| 337 # TODO(augie) when a symlink is removed, patching will fail. | |
| 338 # We're seeing that above - there's gotta be a better | |
| 339 # workaround than just bailing like that. | |
| 340 path = os.path.join(our_tempdir, m) | |
| 341 assert os.path.exists(path) | |
| 342 link_path = open(path).read() | |
| 343 link_path = link_path[len('link '):] | |
| 344 os.remove(path) | |
| 345 link_files[m] = link_path | |
| 346 files_touched.add(m) | |
| 347 except core.SubversionException, e: | |
| 348 if (e.apr_err == 160013 or (hasattr(e, 'message') and | |
| 349 'was not found in the repository at revision ' in e.message)): | |
| 350 # Either this revision or the previous one does not exist. | |
| 351 try: | |
| 352 ui.status("fetching entire rev previous rev does not exist.\n") | |
| 353 used_diff = False | |
| 354 svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir) | |
| 355 except core.SubversionException, e: | |
| 356 if e.apr_err == 170000 or (e.message.startswith("URL '") | |
| 357 and e.message.endswith("' doesn't exist")): | |
| 358 delete_all_files = True | |
| 359 else: | |
| 360 raise | |
| 361 | |
| 362 except BadPatchApply, e: | |
| 363 # previous rev didn't exist, so this is most likely the first | |
| 364 # revision. We'll have to pull all files by hand. | |
| 365 try: | |
| 366 ui.status("fetching entire rev because raised.\n") | |
| 367 used_diff = False | |
| 368 shutil.rmtree(our_tempdir) | |
| 369 os.makedirs(our_tempdir) | |
| 370 svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir) | |
| 371 except core.SubversionException, e: | |
| 372 if e.apr_err == 170000 or (e.message.startswith("URL '") | |
| 373 and e.message.endswith("' doesn't exist")): | |
| 374 delete_all_files = True | |
| 375 else: | |
| 376 raise | |
| 377 for p in r.paths: | |
| 378 if p.startswith(diff_path) and r.paths[p].action == 'D': | |
| 379 p2 = p[len(diff_path)+1:] | |
| 380 files_touched.add(p2) | |
| 381 p3 = os.path.join(our_tempdir, p2) | |
| 382 if os.path.exists(p3) and not os.path.isdir(p3): | |
| 383 os.unlink(p3) | |
| 384 if p2 and p2[0] == '/': | |
| 385 p2 = p2[1:] | |
| 386 # If this isn't in the parent ctx, it must've been a dir | |
| 387 if not p2 in hg_editor.repo[parent_ha]: | |
| 388 d_files = [f for f in hg_editor.repo[parent_ha].manifest().iterkeys() | |
| 389 if f.startswith(p2 + '/')] | |
| 390 for d in d_files: | |
| 391 files_touched.add(d) | |
| 392 if delete_all_files: | |
| 393 for p in hg_editor.repo[parent_ha].manifest().iterkeys(): | |
| 394 files_touched.add(p) | |
| 395 if not used_diff: | |
| 396 for p in reduce(operator.add, [[os.path.join(x[0], y) for y in x[2]] | |
| 397 for x in | |
| 398 list(os.walk(our_tempdir))]): | |
| 399 p_real = p[len(our_tempdir)+1:] | |
| 400 if os.path.islink(p): | |
| 401 link_files[p_real] = os.readlink(p) | |
| 402 exec_files[p_real] = (os.lstat(p).st_mode & 0100 != 0) | |
| 403 files_touched.add(p_real) | |
| 404 for p in hg_editor.repo[parent_ha].manifest().iterkeys(): | |
| 405 # TODO this might not be a required step. | |
| 406 files_touched.add(p) | |
| 407 date = r.date.replace('T', ' ').replace('Z', '').split('.')[0] | |
| 408 date += ' -0000' | |
| 409 def filectxfn(repo, memctx, path): | |
| 410 disk_path = os.path.join(our_tempdir, path) | |
| 411 if path in link_files: | |
| 412 return context.memfilectx(path=path, data=link_files[path], | |
| 413 islink=True, isexec=False, | |
| 414 copied=False) | |
| 415 fp = open(disk_path) | |
| 416 exe = exec_files.get(path, None) | |
| 417 if exe is None and path in hg_editor.repo[parent_ha]: | |
| 418 exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags() | |
| 419 return context.memfilectx(path=path, data=fp.read(), islink=False, | |
| 420 isexec=exe, copied=False) | |
| 421 extra = {} | |
| 422 if b: | |
| 423 extra['branch'] = b | |
| 424 if parent_ha != node.nullid or files_touched: | |
| 425 # TODO(augie) remove this debug code? Or maybe it's sane to have it. | |
| 426 for f in files_touched: | |
| 427 if f: | |
| 428 assert f[0] != '/' | |
| 429 current_ctx = context.memctx(hg_editor.repo, | |
| 430 [parent_ha, revlog.nullid], | |
| 431 r.message or '...', | |
| 432 files_touched, | |
| 433 filectxfn, | |
| 434 '%s%s' % (r.author, | |
| 435 hg_editor.author_host), | |
| 436 date, | |
| 437 extra) | |
| 438 ha = hg_editor.repo.commitctx(current_ctx) | |
| 439 hg_editor.revmap[r.revnum, b] = ha | |
| 440 hg_editor._save_metadata() | |
| 441 ui.status('committed as %s on branch %s\n' % | |
| 442 (node.hex(ha), b or 'default')) | |
| 443 shutil.rmtree(our_tempdir) | |
| 444 | |
| 445 | |
| 446 class BadPatchApply(Exception): | |
| 447 pass |
