comparison fetch_command.py @ 0:f2636cfed115

Initial import of hgsubversion into a public repository.
author Augie Fackler <durin42@gmail.com>
date Tue, 30 Sep 2008 11:42:52 -0500
parents
children 1a5bb173170b
comparison
equal deleted inserted replaced
-1:000000000000 0:f2636cfed115
1 import cStringIO
2 import re
3 import operator
4 import os
5 import shutil
6 import stat
7 import tempfile
8
9 from mercurial import patch
10 from mercurial import node
11 from mercurial import context
12 from mercurial import revlog
13 from svn import core
14 from svn import delta
15
16 import hg_delta_editor
17 import svnwrap
18 import util
19
20
21 def print_your_svn_is_old_message(ui):
22 ui.status("In light of that, I'll fall back and do diffs, but it won't do "
23 "as good a job. You should really upgrade your server.")
24
25
26 @util.register_subcommand('pull')
27 def fetch_revisions(ui, svn_url, hg_repo_path, skipto_rev=0, stupid=None,
28 tag_locations='tags',
29 **opts):
30 """Pull new revisions from Subversion.
31 """
32 skipto_rev=int(skipto_rev)
33 have_replay = not stupid
34 if have_replay and not callable(delta.svn_txdelta_apply(None, None,
35 None)[0]):
36 ui.status('You are using old Subversion SWIG bindings. Replay will not'
37 ' work until you upgrade to 1.5.0 or newer. Falling back to'
38 ' a slower method that may be buggier. Please upgrade, or'
39 ' contribute a patch to use the ctypes bindings instead'
40 ' of SWIG.')
41 have_replay = False
42 initializing_repo = False
43 svn = svnwrap.SubversionRepo(svn_url)
44 author_host = "@%s" % svn.uuid
45 tag_locations = tag_locations.split(',')
46 hg_editor = hg_delta_editor.HgChangeReceiver(hg_repo_path,
47 ui_=ui,
48 subdir=svn.subdir,
49 author_host=author_host,
50 tag_locations=tag_locations)
51 if os.path.exists(hg_editor.uuid_file):
52 uuid = open(hg_editor.uuid_file).read()
53 assert uuid == svn.uuid
54 start = int(open(hg_editor.last_revision_handled_file, 'r').read())
55 else:
56 open(hg_editor.uuid_file, 'w').write(svn.uuid)
57 open(hg_editor.svn_url_file, 'w').write(svn_url)
58 open(hg_editor.last_revision_handled_file, 'w').write(str(0))
59 initializing_repo = True
60 start = skipto_rev
61
62 # start converting revisions
63 for r in svn.revisions(start=start):
64 valid = False
65 hg_editor.update_branch_tag_map_for_rev(r)
66 for p in r.paths:
67 if hg_editor._is_path_valid(p):
68 valid = True
69 continue
70 if initializing_repo and start > 0:
71 assert False, 'This feature not ready yet.'
72 if valid:
73 # got a 502? Try more than once!
74 tries = 0
75 converted = False
76 while not converted and tries < 3:
77 try:
78 ui.status('converting %s\n' % r)
79 if have_replay:
80 try:
81 replay_convert_rev(hg_editor, svn, r)
82 except svnwrap.SubversionRepoCanNotReplay, e:
83 ui.status('%s\n' % e.message)
84 print_your_svn_is_old_message(ui)
85 have_replay = False
86 stupid_svn_server_pull_rev(ui, svn, hg_editor, r)
87 else:
88 stupid_svn_server_pull_rev(ui, svn, hg_editor, r)
89 converted = True
90 open(hg_editor.last_revision_handled_file,
91 'w').write(str(r.revnum))
92 except core.SubversionException, e:
93 if hasattr(e, 'message') and (
94 'Server sent unexpected return value (502 Bad Gateway)'
95 ' in response to PROPFIND') in e.message:
96 tries += 1
97 ui.status('Got a 502, retrying (%s)\n' % tries)
98 else:
99 raise
100
101
102 def replay_convert_rev(hg_editor, svn, r):
103 hg_editor.set_current_rev(r)
104 svn.get_replay(r.revnum, hg_editor)
105 if hg_editor.missing_plaintexts:
106 files_to_grab = set()
107 dirs_to_list = []
108 props = {}
109 for p in hg_editor.missing_plaintexts:
110 p2 = p
111 if svn.subdir:
112 p2 = p2[len(svn.subdir)-1:]
113 # this *sometimes* raises on me, and I have
114 # no idea why. TODO(augie) figure out the why.
115 try:
116 pl = svn.proplist(p2, r.revnum, recurse=True)
117 except core.SubversionException, e:
118 pass
119 props.update(pl)
120 if p[-1] == '/':
121 dirs_to_list.append(p)
122 else:
123 files_to_grab.add(p)
124 while dirs_to_list:
125 p = dirs_to_list.pop(0)
126 l = svn.list_dir(p[:-1], r.revnum)
127 for f in l:
128
129 if l[f].kind == core.svn_node_dir:
130 dirs_to_list.append(p+f+'/')
131 elif l[f].kind == core.svn_node_file:
132 files_to_grab.add(p+f)
133 for p in files_to_grab:
134 p2 = p
135 if svn.subdir:
136 p2 = p2[len(svn.subdir)-1:]
137 hg_editor.current_files[p] = svn.get_file(p2, r.revnum)
138 hg_editor.current_files_exec[p] = False
139 if p in props:
140 if 'svn:executable' in props[p]:
141 hg_editor.current_files_exec[p] = True
142 if 'svn:special' in props[p]:
143 hg_editor.current_files_symlink[p] = True
144 hg_editor.missing_plaintexts = set()
145 hg_editor.commit_current_delta()
146
147
148 binary_file_re = re.compile(r'''Index: ([^\n]*)
149 =*
150 Cannot display: file marked as a binary type.''')
151
152 property_exec_set_re = re.compile(r'''Property changes on: ([^\n]*)
153 _*
154 Added: svn:executable
155 \+ \*
156 ''')
157
158 property_exec_removed_re = re.compile(r'''Property changes on: ([^\n]*)
159 _*
160 Deleted: svn:executable
161 - \*
162 ''')
163
164 empty_file_patch_wont_make_re = re.compile(r'''Index: ([^\n]*)\n=*\n(?=Index:)''')
165
166 any_file_re = re.compile(r'''^Index: ([^\n]*)\n=*\n''', re.MULTILINE)
167
168 property_special_set_re = re.compile(r'''Property changes on: ([^\n]*)
169 _*
170 Added: svn:special
171 \+ \*
172 ''')
173
174 property_special_removed_re = re.compile(r'''Property changes on: ([^\n]*)
175 _*
176 Added: svn:special
177 \- \*
178 ''')
179
180 def make_diff_path(b):
181 if b == None:
182 return 'trunk'
183 return 'branches/' + b
184
185
186 def stupid_svn_server_pull_rev(ui, svn, hg_editor, r):
187 used_diff = True
188 delete_all_files = False
189 # this server fails at replay
190 branches = hg_editor.branches_in_paths(r.paths)
191 temp_location = os.path.join(hg_editor.path, '.hg', 'svn', 'temp')
192 if not os.path.exists(temp_location):
193 os.makedirs(temp_location)
194 for b in branches:
195 our_tempdir = tempfile.mkdtemp('svn_fetch_temp', dir=temp_location)
196 diff_path = make_diff_path(b)
197 parent_rev, br_p = hg_editor.get_parent_svn_branch_and_rev(r.revnum, b)
198 parent_ha = hg_editor.get_parent_revision(r.revnum, b)
199 files_touched = set()
200 link_files = {}
201 exec_files = {}
202 try:
203 if br_p == b:
204 d = svn.get_unified_diff(diff_path, r.revnum, deleted=False,
205 # letting patch handle binaries sounded
206 # cool, but it breaks patch in sad ways
207 ignore_type=False)
208 else:
209 d = svn.get_unified_diff(diff_path, r.revnum,
210 other_path=make_diff_path(br_p),
211 other_rev=parent_rev,
212 deleted=True, ignore_type=True)
213 if d:
214 ui.status('Branch creation with mods, pulling full rev.\n')
215 raise BadPatchApply()
216 for m in binary_file_re.findall(d):
217 # we have to pull each binary file by hand as a fulltext,
218 # which sucks but we've got no choice
219 file_path = os.path.join(our_tempdir, m)
220 files_touched.add(m)
221 try:
222 try:
223 os.makedirs(os.path.dirname(file_path))
224 except OSError, e:
225 pass
226 f = open(file_path, 'w')
227 f.write(svn.get_file(diff_path+'/'+m, r.revnum))
228 f.close()
229 except core.SubversionException, e:
230 if (e.message.endswith("' path not found")
231 or e.message.startswith("File not found: revision")):
232 pass
233 else:
234 raise
235 d2 = empty_file_patch_wont_make_re.sub('', d)
236 d2 = property_exec_set_re.sub('', d2)
237 d2 = property_exec_removed_re.sub('', d2)
238 old_cwd = os.getcwd()
239 os.chdir(our_tempdir)
240 for f in any_file_re.findall(d):
241 files_touched.add(f)
242 # this check is here because modified binary files will get
243 # created before here.
244 if os.path.exists(f):
245 continue
246 dn = os.path.dirname(f)
247 if dn and not os.path.exists(dn):
248 os.makedirs(dn)
249 if f in hg_editor.repo[parent_ha].manifest():
250 data = hg_editor.repo[parent_ha].filectx(f).data()
251 fi = open(f, 'w')
252 fi.write(data)
253 fi.close()
254 else:
255 open(f, 'w').close()
256 if f.startswith(our_tempdir):
257 f = f[len(our_tempdir)+1:]
258 os.chdir(old_cwd)
259 if d2.strip() and len(re.findall('\n[-+]', d2.strip())) > 0:
260 old_cwd = os.getcwd()
261 os.chdir(our_tempdir)
262 changed = {}
263 try:
264 patch_st = patch.applydiff(ui, cStringIO.StringIO(d2),
265 changed, strip=0)
266 except patch.PatchError:
267 # TODO: this happens if the svn server has the wrong mime
268 # type stored and doesn't know a file is binary. It would
269 # be better to do one file at a time and only do a
270 # full fetch on files that had problems.
271 os.chdir(old_cwd)
272 raise BadPatchApply()
273 for x in changed.iterkeys():
274 ui.status('M %s\n' % x)
275 files_touched.add(x)
276 os.chdir(old_cwd)
277 # if this patch didn't apply right, fall back to exporting the
278 # entire rev.
279 if patch_st == -1:
280 parent_ctx = hg_editor.repo[parent_ha]
281 parent_manifest = parent_ctx.manifest()
282 for fn in files_touched:
283 if (fn in parent_manifest and
284 'l' in parent_ctx.filectx(fn).flags()):
285 # I think this might be an underlying bug in svn -
286 # I get diffs of deleted symlinks even though I
287 # specifically said no deletes above.
288 ui.status('Pulling whole rev because of a deleted'
289 'symlink')
290 raise BadPatchApply()
291 assert False, ('This should only happen on case-insensitive'
292 ' volumes.')
293 elif patch_st == 1:
294 # When converting Django, I saw fuzz on .po files that was
295 # causing revisions to end up failing verification. If that
296 # can be fixed, maybe this won't ever be reached.
297 ui.status('There was some fuzz, not using diff after all.')
298 raise BadPatchApply()
299 else:
300 ui.status('Not using patch for %s, diff had no hunks.\n' %
301 r.revnum)
302
303 # we create the files if they don't exist here because we know
304 # that we'll never have diff info for a deleted file, so if the
305 # property is set, we should force the file to exist no matter what.
306 for m in property_exec_removed_re.findall(d):
307 f = os.path.join(our_tempdir, m)
308 if not os.path.exists(f):
309 d = os.path.dirname(f)
310 if not os.path.exists(d):
311 os.makedirs(d)
312 if not m in hg_editor.repo[parent_ha].manifest():
313 open(f, 'w').close()
314 else:
315 data = hg_editor.repo[parent_ha].filectx(m).data()
316 fp = open(f, 'w')
317 fp.write(data)
318 fp.close()
319 exec_files[m] = False
320 files_touched.add(m)
321 for m in property_exec_set_re.findall(d):
322 f = os.path.join(our_tempdir, m)
323 if not os.path.exists(f):
324 d = os.path.dirname(f)
325 if not os.path.exists(d):
326 os.makedirs(d)
327 if m not in hg_editor.repo[parent_ha].manifest():
328 open(f, 'w').close()
329 else:
330 data = hg_editor.repo[parent_ha].filectx(m).data()
331 fp = open(f, 'w')
332 fp.write(data)
333 fp.close()
334 exec_files[m] = True
335 files_touched.add(m)
336 for m in property_special_set_re.findall(d):
337 # TODO(augie) when a symlink is removed, patching will fail.
338 # We're seeing that above - there's gotta be a better
339 # workaround than just bailing like that.
340 path = os.path.join(our_tempdir, m)
341 assert os.path.exists(path)
342 link_path = open(path).read()
343 link_path = link_path[len('link '):]
344 os.remove(path)
345 link_files[m] = link_path
346 files_touched.add(m)
347 except core.SubversionException, e:
348 if (e.apr_err == 160013 or (hasattr(e, 'message') and
349 'was not found in the repository at revision ' in e.message)):
350 # Either this revision or the previous one does not exist.
351 try:
352 ui.status("fetching entire rev previous rev does not exist.\n")
353 used_diff = False
354 svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir)
355 except core.SubversionException, e:
356 if e.apr_err == 170000 or (e.message.startswith("URL '")
357 and e.message.endswith("' doesn't exist")):
358 delete_all_files = True
359 else:
360 raise
361
362 except BadPatchApply, e:
363 # previous rev didn't exist, so this is most likely the first
364 # revision. We'll have to pull all files by hand.
365 try:
366 ui.status("fetching entire rev because raised.\n")
367 used_diff = False
368 shutil.rmtree(our_tempdir)
369 os.makedirs(our_tempdir)
370 svn.fetch_all_files_to_dir(diff_path, r.revnum, our_tempdir)
371 except core.SubversionException, e:
372 if e.apr_err == 170000 or (e.message.startswith("URL '")
373 and e.message.endswith("' doesn't exist")):
374 delete_all_files = True
375 else:
376 raise
377 for p in r.paths:
378 if p.startswith(diff_path) and r.paths[p].action == 'D':
379 p2 = p[len(diff_path)+1:]
380 files_touched.add(p2)
381 p3 = os.path.join(our_tempdir, p2)
382 if os.path.exists(p3) and not os.path.isdir(p3):
383 os.unlink(p3)
384 if p2 and p2[0] == '/':
385 p2 = p2[1:]
386 # If this isn't in the parent ctx, it must've been a dir
387 if not p2 in hg_editor.repo[parent_ha]:
388 d_files = [f for f in hg_editor.repo[parent_ha].manifest().iterkeys()
389 if f.startswith(p2 + '/')]
390 for d in d_files:
391 files_touched.add(d)
392 if delete_all_files:
393 for p in hg_editor.repo[parent_ha].manifest().iterkeys():
394 files_touched.add(p)
395 if not used_diff:
396 for p in reduce(operator.add, [[os.path.join(x[0], y) for y in x[2]]
397 for x in
398 list(os.walk(our_tempdir))]):
399 p_real = p[len(our_tempdir)+1:]
400 if os.path.islink(p):
401 link_files[p_real] = os.readlink(p)
402 exec_files[p_real] = (os.lstat(p).st_mode & 0100 != 0)
403 files_touched.add(p_real)
404 for p in hg_editor.repo[parent_ha].manifest().iterkeys():
405 # TODO this might not be a required step.
406 files_touched.add(p)
407 date = r.date.replace('T', ' ').replace('Z', '').split('.')[0]
408 date += ' -0000'
409 def filectxfn(repo, memctx, path):
410 disk_path = os.path.join(our_tempdir, path)
411 if path in link_files:
412 return context.memfilectx(path=path, data=link_files[path],
413 islink=True, isexec=False,
414 copied=False)
415 fp = open(disk_path)
416 exe = exec_files.get(path, None)
417 if exe is None and path in hg_editor.repo[parent_ha]:
418 exe = 'x' in hg_editor.repo[parent_ha].filectx(path).flags()
419 return context.memfilectx(path=path, data=fp.read(), islink=False,
420 isexec=exe, copied=False)
421 extra = {}
422 if b:
423 extra['branch'] = b
424 if parent_ha != node.nullid or files_touched:
425 # TODO(augie) remove this debug code? Or maybe it's sane to have it.
426 for f in files_touched:
427 if f:
428 assert f[0] != '/'
429 current_ctx = context.memctx(hg_editor.repo,
430 [parent_ha, revlog.nullid],
431 r.message or '...',
432 files_touched,
433 filectxfn,
434 '%s%s' % (r.author,
435 hg_editor.author_host),
436 date,
437 extra)
438 ha = hg_editor.repo.commitctx(current_ctx)
439 hg_editor.revmap[r.revnum, b] = ha
440 hg_editor._save_metadata()
441 ui.status('committed as %s on branch %s\n' %
442 (node.hex(ha), b or 'default'))
443 shutil.rmtree(our_tempdir)
444
445
446 class BadPatchApply(Exception):
447 pass