Mercurial > hgsubversion
comparison fetch_command.py @ 0:f2636cfed115
Initial import of hgsubversion into a public repository.
author | Augie Fackler <durin42@gmail.com> |
---|---|
date | Tue, 30 Sep 2008 11:42:52 -0500 |
parents | |
children | 1a5bb173170b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f2636cfed115 |
---|---|
1 import cStringIO | |
2 import re | |
3 import operator | |
4 import os | |
5 import shutil | |
6 import stat | |
7 import tempfile | |
8 | |
9 from mercurial import patch | |
10 from mercurial import node | |
11 from mercurial import context | |
12 from mercurial import revlog | |
13 from svn import core | |
14 from svn import delta | |
15 | |
16 import hg_delta_editor | |
17 import svnwrap | |
18 import util | |
19 | |
20 | |
21 def print_your_svn_is_old_message(ui): | |
22 ui.status("In light of that, I'll fall back and do diffs, but it won't do " | |
23 "as good a job. You should really upgrade your server.") | |
24 | |
25 | |
26 @util.register_subcommand('pull') | |
27 def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None, | |
28 tag_locations='tags', | |
29 **opts): | |
30 """Pull new revisions from Subversion. | |
31 """ | |
32 skipto_rev=int(skipto_rev) | |
33 have_replay = not stupid | |
34 if have_replay and not callable(delta.svn_txdelta_apply(None, None, | |
35 None)[0]): | |
36 ui.status('You are using old Subversion SWIG bindings. Replay will not' | |
37 ' work until you upgrade to 1.5.0 or newer. Falling back to' | |
38 ' a slower method that may be buggier. Please upgrade, or' | |
39 ' contribute a patch to use the ctypes bindings instead' | |
40 ' of SWIG.') | |
41 have_replay = False | |
42 initializing_repo = False | |
43 svn = svnwrap.SubversionRepo(svn_url) | |
44 author_host = "@%s" % svn.uuid | |
45 tag_locations = tag_locations.split(',') | |
46 hg_editor = hg_delta_editor.HgChangeReceiver(hg_repo_path, | |
47 ui_=ui, | |
48 subdir=svn.subdir, | |
49 author_host=author_host, | |
50 tag_locations=tag_locations) | |
51 if os.path.exists(hg_editor.uuid_file): | |
52 uuid = open(hg_editor.uuid_file).read() | |
53 assert uuid == svn.uuid | |
54 start = int(open(hg_editor.last_revision_handled_file, 'r').read()) | |
55 else: | |
56 open(hg_editor.uuid_file, 'w').write(svn.uuid) | |
57 open(hg_editor.svn_url_file, 'w').write(svn_url) | |
58 open(hg_editor.last_revision_handled_file, 'w').write(str(0)) | |
59 initializing_repo = True | |
60 start = skipto_rev | |
61 | |
62 # start converting revisions | |
63 for r in svn.revisions(start=start): | |
64 valid = False | |
65 hg_editor.update_branch_tag_map_for_rev(r) | |
66 for p in r.paths: | |
67 if hg_editor._is_path_valid(p): | |
68 valid = True | |
69 continue | |
70 if initializing_repo and start > 0: | |
71 assert False, 'This feature not ready yet.' | |
72 if valid: | |
73 # got a 502? Try more than once! | |
74 tries = 0 | |
75 converted = False | |
76 while not converted and tries < 3: | |
77 try: | |
78 ui.status('converting %s\n' % r) | |
79 if have_replay: | |
80 try: | |
81 replay_convert_rev(hg_editor, svn, r) | |
82 except svnwrap.SubversionRepoCanNotReplay, e: | |
83 ui.status('%s\n' % e.message) | |
84 print_your_svn_is_old_message(ui) | |
85 have_replay = False | |
86 stupid_svn_server_pull_rev(ui, svn, hg_editor, r) | |
87 else: | |
88 stupid_svn_server_pull_rev(ui, svn, hg_editor, r) | |
89 converted = True | |
90 open(hg_editor.last_revision_handled_file, | |
91 'w').write(str(r.revnum)) | |
92 except core.SubversionException, e: | |
93 if hasattr(e, 'message') and ( | |
94 'Server sent unexpected return value (502 Bad Gateway)' | |
95 ' in response to PROPFIND') in e.message: | |
96 tries += 1 | |
97 ui.status('Got a 502, retrying (%s)\n' % tries) | |
98 else: | |
99 raise | |
100 | |
101 | |
102 def replay_convert_rev(hg_editor, svn, r): | |
103 hg_editor.set_current_rev(r) | |
104 svn.get_replay(r.revnum, hg_editor) | |
105 if hg_editor.missing_plaintexts: | |
106 files_to_grab = set() | |
107 dirs_to_list = [] | |
108 props = {} | |
109 for p in hg_editor.missing_plaintexts: | |
110 p2 = p | |
111 if svn.subdir: | |
112 p2 = p2[len(svn.subdir)-1:] | |
113 # this *sometimes* raises on me, and I have | |
114 # no idea why. TODO(augie) figure out the why. | |
115 try: | |
116 pl = svn.proplist(p2, r.revnum, recurse=True) | |
117 except core.SubversionException, e: | |
118 pass | |
119 props.update(pl) | |
120 if p[-1] == '/': | |
121 dirs_to_list.append(p) | |
122 else: | |
123 files_to_grab.add(p) | |
124 while dirs_to_list: | |
125 p = dirs_to_list.pop(0) | |
126 l = svn.list_dir(p[:-1], r.revnum) | |
127 for f in l: | |
128 | |
129 if l[f].kind == core.svn_node_dir: | |
130 dirs_to_list.append(p+f+'/') | |
131 elif l[f].kind == core.svn_node_file: | |
132 files_to_grab.add(p+f) | |
133 for p in files_to_grab: | |
134 p2 = p | |
135 if svn.subdir: | |
136 p2 = p2[len(svn.subdir)-1:] | |
137 hg_editor.current_files[p] = svn.get_file(p2, r.revnum) | |
138 hg_editor.current_files_exec[p] = False | |
139 if p in props: | |
140 if 'svn:executable' in props[p]: | |
141 hg_editor.current_files_exec[p] = True | |
142 if 'svn:special' in props[p]: | |
143 hg_editor.current_files_symlink[p] = True | |
144 hg_editor.missing_plaintexts = set() | |
145 hg_editor.commit_current_delta() | |
146 | |
147 | |
148 binary_file_re = re.compile(r'''Index: ([^\n]*) | |
149 =* | |
150 Cannot display: file marked as a binary type.''') | |
151 | |
152 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
153 _* | |
154 Added: svn:executable | |
155 \+ \* | |
156 ''') | |
157 | |
158 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
159 _* | |
160 Deleted: svn:executable | |
161 - \* | |
162 ''') | |
163 | |
164 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''') | |
165 | |
166 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE) | |
167 | |
168 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*) | |
169 _* | |
170 Added: svn:special | |
171 \+ \* | |
172 ''') | |
173 | |
174 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*) | |
175 _* | |
176 Added: svn:special | |
177 \- \* | |
178 ''') | |
179 | |
180 def make_diff_path(b): | |
181 if b == None: | |
182 return 'trunk' | |
183 return 'branches/' + b | |
184 | |
185 | |
186 def stupid_svn_server_pull_rev(ui, svn, hg_editor, r): | |
187 used_diff = True | |
188 delete_all_files = False | |
189 # this server fails at replay | |
190 branches = hg_editor.branches_in_paths(r.paths) | |
191 temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp') | |
192 if not os.path.exists(temp_location): | |
193 os.makedirs(temp_location) | |
194 for b in branches: | |
195 our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location) | |
196 diff_path = make_diff_path(b) | |
197 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, b) | |
198 parent_ha = hg_editor.get_parent_revision(r.revnum, b) | |
199 files_touched = set() | |
200 link_files = {} | |
201 exec_files = {} | |
202 try: | |
203 if br_p == b: | |
204 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False, | |
205 # letting patch handle binaries sounded | |
206 # cool, but it breaks patch in sad ways | |
207 ignore_type=False) | |
208 else: | |
209 d = svn.get_unified_diff(diff_path, r.revnum, | |
210 other_path=make_diff_path(br_p), | |
211 other_rev=parent_rev, | |
212 deleted=True, ignore_type=True) | |
213 if d: | |
214 ui.status('Branch creation with mods, pulling full rev.\n') | |
215 raise BadPatchApply() | |
216 for m in binary_file_re.findall(d): | |
217 # we have to pull each binary file by hand as a fulltext, | |
218 # which sucks but we've got no choice | |
219 file_path = os.path.join(our_tempdir, m) | |
220 files_touched.add(m) | |
221 try: | |
222 try: | |
223 os.makedirs(os.path.dirname(file_path)) | |
224 except OSError, e: | |
225 pass | |
226 f = open(file_path, 'w') | |
227 f.write(svn.get_file(diff_path+'/'+m, r.revnum)) | |
228 f.close() | |
229 except core.SubversionException, e: | |
230 if (e.message.endswith("' path not found") | |
231 or e.message.startswith("File not found: revision")): | |
232 pass | |
233 else: | |
234 raise | |
235 d2 = empty_file_patch_wont_make_re.sub('', d) | |
236 d2 = property_exec_set_re.sub('', d2) | |
237 d2 = property_exec_removed_re.sub('', d2) | |
238 old_cwd = os.getcwd() | |
239 os.chdir(our_tempdir) | |
240 for f in any_file_re.findall(d): | |
241 files_touched.add(f) | |
242 # this check is here because modified binary files will get | |
243 # created before here. | |
244 if os.path.exists(f): | |
245 continue | |
246 dn = os.path.dirname(f) | |
247 if dn and not os.path.exists(dn): | |
248 os.makedirs(dn) | |
249 if f in hg_editor.repo[parent_ha].manifest(): | |
250 data = hg_editor.repo[parent_ha].filectx(f).data() | |
251 fi = open(f, 'w') | |
252 fi.write(data) | |
253 fi.close() | |
254 else: | |
255 open(f, 'w').close() | |
256 if f.startswith(our_tempdir): | |
257 f = f[len(our_tempdir)+1:] | |
258 os.chdir(old_cwd) | |
259 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0: | |
260 old_cwd = os.getcwd() | |
261 os.chdir(our_tempdir) | |
262 changed = {} | |
263 try: | |
264 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2), | |
265 changed, strip=0) | |
266 except patch.PatchError: | |
267 # TODO: this happens if the svn server has the wrong mime | |
268 # type stored and doesn't know a file is binary. It would | |
269 # be better to do one file at a time and only do a | |
270 # full fetch on files that had problems. | |
271 os.chdir(old_cwd) | |
272 raise BadPatchApply() | |
273 for x in changed.iterkeys(): | |
274 ui.status('M %s\n' % x) | |
275 files_touched.add(x) | |
276 os.chdir(old_cwd) | |
277 # if this patch didn't apply right, fall back to exporting the | |
278 # entire rev. | |
279 if patch_st == -1: | |
280 parent_ctx = hg_editor.repo[parent_ha] | |
281 parent_manifest = parent_ctx.manifest() | |
282 for fn in files_touched: | |
283 if (fn in parent_manifest and | |
284 'l' in parent_ctx.filectx(fn).flags()): | |
285 # I think this might be an underlying bug in svn - | |
286 # I get diffs of deleted symlinks even though I | |
287 # specifically said no deletes above. | |
288 ui.status('Pulling whole rev because of a deleted' | |
289 'symlink') | |
290 raise BadPatchApply() | |
291 assert False, ('This should only happen on case-insensitive' | |
292 ' volumes.') | |
293 elif patch_st == 1: | |
294 # When converting Django, I saw fuzz on .po files that was | |
295 # causing revisions to end up failing verification. If that | |
296 # can be fixed, maybe this won't ever be reached. | |
297 ui.status('There was some fuzz, not using diff after all.') | |
298 raise BadPatchApply() | |
299 else: | |
300 ui.status('Not using patch for %s, diff had no hunks.\n' % | |
301 r.revnum) | |
302 | |
303 # we create the files if they don't exist here because we know | |
304 # that we'll never have diff info for a deleted file, so if the | |
305 # property is set, we should force the file to exist no matter what. | |
306 for m in property_exec_removed_re.findall(d): | |
307 f = os.path.join(our_tempdir, m) | |
308 if not os.path.exists(f): | |
309 d = os.path.dirname(f) | |
310 if not os.path.exists(d): | |
311 os.makedirs(d) | |
312 if not m in hg_editor.repo[parent_ha].manifest(): | |
313 open(f, 'w').close() | |
314 else: | |
315 data = hg_editor.repo[parent_ha].filectx(m).data() | |
316 fp = open(f, 'w') | |
317 fp.write(data) | |
318 fp.close() | |
319 exec_files[m] = False | |
320 files_touched.add(m) | |
321 for m in property_exec_set_re.findall(d): | |
322 f = os.path.join(our_tempdir, m) | |
323 if not os.path.exists(f): | |
324 d = os.path.dirname(f) | |
325 if not os.path.exists(d): | |
326 os.makedirs(d) | |
327 if m not in hg_editor.repo[parent_ha].manifest(): | |
328 open(f, 'w').close() | |
329 else: | |
330 data = hg_editor.repo[parent_ha].filectx(m).data() | |
331 fp = open(f, 'w') | |
332 fp.write(data) | |
333 fp.close() | |
334 exec_files[m] = True | |
335 files_touched.add(m) | |
336 for m in property_special_set_re.findall(d): | |
337 # TODO(augie) when a symlink is removed, patching will fail. | |
338 # We're seeing that above - there's gotta be a better | |
339 # workaround than just bailing like that. | |
340 path = os.path.join(our_tempdir, m) | |
341 assert os.path.exists(path) | |
342 link_path = open(path).read() | |
343 link_path = link_path[len('link '):] | |
344 os.remove(path) | |
345 link_files[m] = link_path | |
346 files_touched.add(m) | |
347 except core.SubversionException, e: | |
348 if (e.apr_err == 160013 or (hasattr(e, 'message') and | |
349 'was not found in the repository at revision ' in e.message)): | |
350 # Either this revision or the previous one does not exist. | |
351 try: | |
352 ui.status("fetching entire rev previous rev does not exist.\n") | |
353 used_diff = False | |
354 svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir) | |
355 except core.SubversionException, e: | |
356 if e.apr_err == 170000 or (e.message.startswith("URL '") | |
357 and e.message.endswith("' doesn't exist")): | |
358 delete_all_files = True | |
359 else: | |
360 raise | |
361 | |
362 except BadPatchApply, e: | |
363 # previous rev didn't exist, so this is most likely the first | |
364 # revision. We'll have to pull all files by hand. | |
365 try: | |
366 ui.status("fetching entire rev because raised.\n") | |
367 used_diff = False | |
368 shutil.rmtree(our_tempdir) | |
369 os.makedirs(our_tempdir) | |
370 svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir) | |
371 except core.SubversionException, e: | |
372 if e.apr_err == 170000 or (e.message.startswith("URL '") | |
373 and e.message.endswith("' doesn't exist")): | |
374 delete_all_files = True | |
375 else: | |
376 raise | |
377 for p in r.paths: | |
378 if p.startswith(diff_path) and r.paths[p].action == 'D': | |
379 p2 = p[len(diff_path)+1:] | |
380 files_touched.add(p2) | |
381 p3 = os.path.join(our_tempdir, p2) | |
382 if os.path.exists(p3) and not os.path.isdir(p3): | |
383 os.unlink(p3) | |
384 if p2 and p2[0] == '/': | |
385 p2 = p2[1:] | |
386 # If this isn't in the parent ctx, it must've been a dir | |
387 if not p2 in hg_editor.repo[parent_ha]: | |
388 d_files = [f for f in hg_editor.repo[parent_ha].manifest().iterkeys() | |
389 if f.startswith(p2 + '/')] | |
390 for d in d_files: | |
391 files_touched.add(d) | |
392 if delete_all_files: | |
393 for p in hg_editor.repo[parent_ha].manifest().iterkeys(): | |
394 files_touched.add(p) | |
395 if not used_diff: | |
396 for p in reduce(operator.add, [[os.path.join(x[0], y) for y in x[2]] | |
397 for x in | |
398 list(os.walk(our_tempdir))]): | |
399 p_real = p[len(our_tempdir)+1:] | |
400 if os.path.islink(p): | |
401 link_files[p_real] = os.readlink(p) | |
402 exec_files[p_real] = (os.lstat(p).st_mode & 0100 != 0) | |
403 files_touched.add(p_real) | |
404 for p in hg_editor.repo[parent_ha].manifest().iterkeys(): | |
405 # TODO this might not be a required step. | |
406 files_touched.add(p) | |
407 date = r.date.replace('T', ' ').replace('Z', '').split('.')[0] | |
408 date += ' -0000' | |
409 def filectxfn(repo, memctx, path): | |
410 disk_path = os.path.join(our_tempdir, path) | |
411 if path in link_files: | |
412 return context.memfilectx(path=path, data=link_files[path], | |
413 islink=True, isexec=False, | |
414 copied=False) | |
415 fp = open(disk_path) | |
416 exe = exec_files.get(path, None) | |
417 if exe is None and path in hg_editor.repo[parent_ha]: | |
418 exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags() | |
419 return context.memfilectx(path=path, data=fp.read(), islink=False, | |
420 isexec=exe, copied=False) | |
421 extra = {} | |
422 if b: | |
423 extra['branch'] = b | |
424 if parent_ha != node.nullid or files_touched: | |
425 # TODO(augie) remove this debug code? Or maybe it's sane to have it. | |
426 for f in files_touched: | |
427 if f: | |
428 assert f[0] != '/' | |
429 current_ctx = context.memctx(hg_editor.repo, | |
430 [parent_ha, revlog.nullid], | |
431 r.message or '...', | |
432 files_touched, | |
433 filectxfn, | |
434 '%s%s' % (r.author, | |
435 hg_editor.author_host), | |
436 date, | |
437 extra) | |
438 ha = hg_editor.repo.commitctx(current_ctx) | |
439 hg_editor.revmap[r.revnum, b] = ha | |
440 hg_editor._save_metadata() | |
441 ui.status('committed as %s on branch %s\n' % | |
442 (node.hex(ha), b or 'default')) | |
443 shutil.rmtree(our_tempdir) | |
444 | |
445 | |
446 class BadPatchApply(Exception): | |
447 pass |