comparison hg_delta_editor.py @ 0:f2636cfed115

Initial import of hgsubversion into a public repository.
author Augie Fackler <durin42@gmail.com>
date Tue, 30 Sep 2008 11:42:52 -0500
parents
children 1a5bb173170b
comparison
equal deleted inserted replaced
-1:000000000000 0:f2636cfed115
1 import cStringIO
2 import cPickle as pickle
3 import os
4 import sys
5 import tempfile
6 import traceback
7
8 from mercurial import context
9 from mercurial import hg
10 from mercurial import ui
11 from mercurial import revlog
12 from mercurial import node
13 from svn import delta
14 from svn import core
15
16 def pickle_atomic(data, file_path, dir=None):
17 """pickle some data to a path atomically.
18
19 This is present because I kept corrupting my revmap by managing to hit ^C
20 during the pickle of that file.
21 """
22 try:
23 f, path = tempfile.mkstemp(prefix='pickling', dir=dir)
24 f = os.fdopen(f, 'w')
25 pickle.dump(data, f)
26 f.close()
27 except:
28 raise
29 else:
30 os.rename(path, file_path)
31
32 def stash_exception_on_self(fn):
33 """Stash any exception raised in the method on self.
34
35 This is required because the SWIG bindings just mutate any exception into
36 a generic Subversion exception with no way of telling what the original was.
37 This allows the editor object to notice when you try and commit and really
38 got an exception in the replay process.
39 """
40 def fun(self, *args, **kwargs):
41 try:
42 return fn(self, *args, **kwargs)
43 except:
44 if not hasattr(self, '_exception_info'):
45 self._exception_info = sys.exc_info()
46 raise
47 return fun
48
49
50 class HgChangeReceiver(delta.Editor):
51 def __init__(self, path, ui_=None, subdir='', author_host='',
52 tag_locations=['tags']):
53 """path is the path to the target hg repo.
54
55 subdir is the subdirectory of the edits *on the svn server*.
56 It is needed for stripping paths off in certain cases.
57 """
58 if not ui_:
59 ui_ = ui.ui()
60 self.ui = ui_
61 self.path = path
62 self.__setup_repo(path)
63 self.subdir = subdir
64 if self.subdir and self.subdir[0] == '/':
65 self.subdir = self.subdir[1:]
66 self.revmap = {}
67 if os.path.exists(self.revmap_file):
68 f = open(self.revmap_file)
69 self.revmap = pickle.load(f)
70 f.close()
71 self.branches = {}
72 if os.path.exists(self.branch_info_file):
73 f = open(self.branch_info_file)
74 self.branches = pickle.load(f)
75 f.close()
76 self.tags = {}
77 if os.path.exists(self.tag_info_file):
78 f = open(self.tag_info_file)
79 self.tags = pickle.load(f)
80 f.close()
81 if os.path.exists(self.tag_locations_file):
82 f = open(self.tag_locations_file)
83 self.tag_locations = pickle.load(f)
84 else:
85 self.tag_locations = tag_locations
86 pickle_atomic(self.tag_locations, self.tag_locations_file,
87 self.meta_data_dir)
88
89 self.clear_current_info()
90 self.author_host = author_host
91
92 def __setup_repo(self, repo_path):
93 '''Verify the repo is going to work out for us.
94
95 This method will fail an assertion if the repo exists but doesn't have
96 the Subversion metadata.
97 '''
98 if os.path.isdir(repo_path) and len(os.listdir(repo_path)):
99 self.repo = hg.repository(self.ui, repo_path)
100 assert os.path.isfile(self.revmap_file)
101 assert os.path.isfile(self.svn_url_file)
102 assert os.path.isfile(self.uuid_file)
103 assert os.path.isfile(self.last_revision_handled_file)
104 else:
105 self.repo = hg.repository(self.ui, repo_path, create=True)
106 os.makedirs(os.path.dirname(self.uuid_file))
107
108 def clear_current_info(self):
109 '''Clear the info relevant to a replayed revision so that the next
110 revision can be replayed.
111 '''
112 self.current_files = {}
113 self.deleted_files = {}
114 self.current_rev = None
115 self.current_files_exec = {}
116 self.current_files_symlink = {}
117 self.missing_plaintexts = set()
118 self.commit_branches_empty = {}
119 self.base_revision = None
120
121 def _save_metadata(self):
122 '''Save the Subversion metadata. This should really be called after
123 every revision is created.
124 '''
125 pickle_atomic(self.revmap, self.revmap_file, self.meta_data_dir)
126 pickle_atomic(self.branches, self.branch_info_file, self.meta_data_dir)
127 pickle_atomic(self.tags, self.tag_info_file, self.meta_data_dir)
128
129 def branches_in_paths(self, paths):
130 '''Given a list of paths, return the set of branches that are touched.
131 '''
132 branches = set([])
133 for p in paths:
134 if self._is_path_valid(p):
135 junk, branch = self._path_and_branch_for_path(p)
136 branches.add(branch)
137 return branches
138
139 def _path_and_branch_for_path(self, path):
140 '''Figure out which branch inside our repo this path represents, and
141 also figure out which path inside that branch it is.
142
143 Raises an exception if it can't perform its job.
144 '''
145 path = self._normalize_path(path)
146 if path.startswith('trunk'):
147 p = path[len('trunk'):]
148 if p and p[0] == '/':
149 p = p[1:]
150 return p, None
151 elif path.startswith('branches/'):
152 p = path[len('branches/'):]
153 br = p.split('/')[0]
154 p = p[len(br)+1:]
155 if p and p[0] == '/':
156 p = p[1:]
157 return p, br
158 raise Exception,'Things went boom: ' + path
159
160 def set_current_rev(self, rev):
161 '''Set the revision we're currently converting.
162 '''
163 self.current_rev = rev
164
165 def _normalize_path(self, path):
166 '''Normalize a path to strip of leading slashes and our subdir if we
167 have one.
168 '''
169 if path and path[0] == '/':
170 path = path[1:]
171 if path and path.startswith(self.subdir):
172 path = path[len(self.subdir):]
173 if path and path[0] == '/':
174 path = path[1:]
175 return path
176
177 def _is_path_valid(self, path):
178 path = self._normalize_path(path)
179 if path.startswith('trunk'):
180 return True
181 elif path.startswith('branches/'):
182 br = path.split('/')[1]
183 return len(br) > 0
184 return False
185
186 def _is_path_tag(self, path):
187 """If path represents the path to a tag, returns the tag name.
188
189 Otherwise, returns False.
190 """
191 path = self._normalize_path(path)
192 for tags_path in self.tag_locations:
193 if path and (path.startswith(tags_path) and
194 len(path) > len('%s/' % tags_path)):
195 return path[len(tags_path)+1:].split('/')[0]
196 return False
197
198 def get_parent_svn_branch_and_rev(self, number, branch):
199 number -= 1
200 if (number, branch) in self.revmap:
201 return number, branch
202 real_num = 0
203 for num, br in self.revmap.iterkeys():
204 if br != branch:
205 continue
206 if num <= number and num > real_num:
207 real_num = num
208 if real_num == 0:
209 if branch in self.branches:
210 parent_branch = self.branches[branch][0]
211 parent_branch_rev = self.branches[branch][1]
212 branch_created_rev = self.branches[branch][2]
213 if parent_branch == 'trunk':
214 parent_branch = None
215 if branch_created_rev <= number+1 and branch != parent_branch:
216 return self.get_parent_svn_branch_and_rev(
217 parent_branch_rev+1,
218 parent_branch)
219 if real_num != 0:
220 return real_num, branch
221 return None, None
222
223 def get_parent_revision(self, number, branch):
224 '''Get the parent revision hash for a commit on a specific branch.
225 '''
226 r, br = self.get_parent_svn_branch_and_rev(number, branch)
227 if r is not None:
228 return self.revmap[r, br]
229 return revlog.nullid
230
231 def update_branch_tag_map_for_rev(self, revision):
232 paths = revision.paths
233 added_branches = {}
234 added_tags = {}
235 tags_to_delete = set()
236 for p in paths:
237 if self._is_path_valid(p):
238 fi, br = self._path_and_branch_for_path(p)
239 if fi == '' and br not in self.branches:
240 # TODO handle creating a branch from a tag
241 src_p = paths[p].copyfrom_path
242 src_rev = paths[p].copyfrom_rev
243 src_tag = self._is_path_tag(src_p)
244
245 if not src_p or not (self._is_path_valid(src_p) or src_tag):
246 # we'll imply you're a branch off of trunk
247 # if you have no path, but if you do, it must be valid
248 # or else we assume trunk as well
249 src_branch = None
250 src_rev = revision.revnum
251 elif src_tag:
252 # this is a branch created from a tag. Note that this
253 # really does happen (see Django)
254 src_branch, src_rev = self.tags[src_tag]
255 added_branches[br] = (src_branch, src_rev,
256 revision.revnum)
257 else:
258 # Not from a tag, and from a valid repo path
259 (src_p,
260 src_branch) = self._path_and_branch_for_path(src_p)
261 added_branches[br] = src_branch, src_rev, revision.revnum
262 elif br in added_branches:
263 if paths[p].copyfrom_rev > added_branches[br][1]:
264 x,y,z = added_branches[br]
265 added_branches[br] = x, paths[p].copyfrom_rev, z
266 else:
267 t_name = self._is_path_tag(p)
268 if t_name == False:
269 continue
270 src_p, src_rev = paths[p].copyfrom_path, paths[p].copyfrom_rev
271 # if you commit to a tag, I'm calling you stupid and ignoring
272 # you.
273 if src_p is not None and src_rev is not None:
274 if self._is_path_valid(src_p):
275 file, branch = self._path_and_branch_for_path(src_p)
276 else:
277 # some crazy people make tags from other tags
278 file = ''
279 from_tag = self._is_path_tag(src_p)
280 if not from_tag:
281 continue
282 branch, src_rev = self.tags[from_tag]
283 if t_name not in added_tags:
284 added_tags[t_name] = branch, src_rev
285 elif file and src_rev > added_tags[t_name][1]:
286 added_tags[t_name] = branch, src_rev
287 elif (paths[p].action == 'D' and p.endswith(t_name)
288 and t_name in self.tags):
289 tags_to_delete.add(t_name)
290 for t in tags_to_delete:
291 del self.tags[t]
292 self.tags.update(added_tags)
293 self.branches.update(added_branches)
294
295 def commit_current_delta(self):
296 if hasattr(self, '_exception_info'):
297 traceback.print_exception(*self._exception_info)
298 raise ReplayException()
299 if self.missing_plaintexts:
300 raise MissingPlainTextError()
301 files_to_commit = self.current_files.keys()
302 files_to_commit.extend(self.current_files_symlink.keys())
303 files_to_commit.extend(self.current_files_exec.keys())
304 files_to_commit = sorted(list(set(files_to_commit)))
305 branch_batches = {}
306 rev = self.current_rev
307 date = rev.date.replace('T', ' ').replace('Z', '').split('.')[0]
308 date += ' -0000'
309
310 # build up the branches that have files on them
311 for f in files_to_commit:
312 if not self._is_path_valid(f):
313 continue
314 p, b = self._path_and_branch_for_path(f)
315 if b not in branch_batches:
316 branch_batches[b] = []
317 branch_batches[b].append((p, f))
318
319 for branch, files in branch_batches.iteritems():
320 if branch in self.commit_branches_empty and files:
321 del self.commit_branches_empty[branch]
322 extra = {}
323 files = dict(files)
324
325 parents = (self.get_parent_revision(rev.revnum, branch),
326 revlog.nullid)
327 if branch is not None:
328 if branch not in self.branches:
329 continue
330 if parents == (revlog.nullid, revlog.nullid):
331 assert False, ('a non-trunk branch should probably have'
332 ' parents figured out by this point')
333 extra['branch'] = branch
334 parent_ctx = self.repo.changectx(parents[0])
335 def filectxfn(repo, memctx, path):
336 is_link = False
337 is_exec = False
338 copied = None
339 current_file = files[path]
340 if current_file in self.deleted_files:
341 raise IOError()
342 # TODO(augie) tag copies from files
343 if path in parent_ctx:
344 is_exec = 'x' in parent_ctx.flags(path)
345 is_link = 'l' in parent_ctx.flags(path)
346 if current_file in self.current_files_exec:
347 is_exec = self.current_files_exec[current_file]
348 if current_file in self.current_files_symlink:
349 is_link = self.current_files_symlink[current_file]
350 if current_file in self.current_files:
351 data = self.current_files[current_file]
352 if is_link:
353 assert data.startswith('link ')
354 data = data[len('link '):]
355 else:
356 data = parent_ctx.filectx(path).data()
357 return context.memfilectx(path=path,
358 data=data,
359 islink=is_link, isexec=is_exec,
360 copied=copied)
361 current_ctx = context.memctx(self.repo,
362 parents,
363 rev.message or '...',
364 files.keys(),
365 filectxfn,
366 '%s%s' %(rev.author, self.author_host),
367 date,
368 extra)
369 new_hash = self.repo.commitctx(current_ctx)
370 self.ui.status('committed as %s on branch %s\n' %
371 (node.hex(new_hash), (branch or 'default')))
372 if (rev.revnum, branch) not in self.revmap:
373 self.revmap[rev.revnum, branch] = new_hash
374 self._save_metadata()
375 # now we handle branches that need to be committed without any files
376 for branch in self.commit_branches_empty:
377 ha = self.get_parent_revision(rev.revnum, branch)
378 if ha == node.nullid:
379 continue
380 parent_ctx = self.repo.changectx(ha)
381 def del_all_files(*args):
382 raise IOError
383 extra = {}
384 if branch:
385 extra['branch'] = branch
386 # True here means nuke all files
387 files = []
388 if self.commit_branches_empty[branch]:
389 files = parent_ctx.manifest().keys()
390 current_ctx = context.memctx(self.repo,
391 (ha, node.nullid),
392 rev.message or ' ',
393 files,
394 del_all_files,
395 '%s%s' % (rev.author,
396 self.author_host),
397 date,
398 extra)
399 new_hash = self.repo.commitctx(current_ctx)
400 self.ui.status('committed as %s on branch %s\n' %
401 (node.hex(new_hash), (branch or 'default')))
402 if (rev.revnum, branch) not in self.revmap:
403 self.revmap[rev.revnum, branch] = new_hash
404 self._save_metadata()
405 self.clear_current_info()
406
407 @property
408 def meta_data_dir(self):
409 return os.path.join(self.path, '.hg', 'svn')
410
411 def meta_file_named(self, name):
412 return os.path.join(self.meta_data_dir, name)
413
414 @property
415 def revmap_file(self):
416 return self.meta_file_named('rev_map')
417
418 @property
419 def svn_url_file(self):
420 return self.meta_file_named('url')
421
422 @property
423 def uuid_file(self):
424 return self.meta_file_named('uuid')
425
426 @property
427 def last_revision_handled_file(self):
428 return self.meta_file_named('last_rev')
429
430 @property
431 def branch_info_file(self):
432 return self.meta_file_named('branch_info')
433
434 @property
435 def tag_info_file(self):
436 return self.meta_file_named('tag_info')
437
438 @property
439 def tag_locations_file(self):
440 return self.meta_file_named('tag_locations')
441
442 @property
443 def url(self):
444 return open(self.svn_url_file).read()
445
446 @stash_exception_on_self
447 def delete_entry(self, path, revision_bogus, parent_baton, pool=None):
448 if self._is_path_valid(path):
449 br_path, branch = self._path_and_branch_for_path(path)
450 ha = self.get_parent_revision(self.current_rev.revnum, branch)
451 if ha == revlog.nullid:
452 return
453 ctx = self.repo.changectx(ha)
454 if br_path not in ctx:
455 br_path2 = ''
456 if br_path != '':
457 br_path2 = br_path + '/'
458 # assuming it is a directory
459 for f in ctx:
460 if f.startswith(br_path2):
461 f_p = '%s/%s' % (path, f[len(br_path2):])
462 self.deleted_files[f_p] = True
463 self.current_files[f_p] = ''
464 self.ui.status('D %s\n' % f_p)
465 self.deleted_files[path] = True
466 self.current_files[path] = ''
467 self.ui.status('D %s\n' % path)
468
469 @stash_exception_on_self
470 def open_file(self, path, parent_baton, base_revision, p=None):
471 self.current_file = 'foobaz'
472 if self._is_path_valid(path):
473 self.current_file = path
474 self.ui.status('M %s\n' % path)
475 if base_revision != -1:
476 self.base_revision = base_revision
477 else:
478 self.base_revision = None
479 self.should_edit_most_recent_plaintext = True
480
481 @stash_exception_on_self
482 def add_file(self, path, parent_baton, copyfrom_path,
483 copyfrom_revision, file_pool=None):
484 self.current_file = 'foobaz'
485 self.base_revision = None
486 if path in self.deleted_files:
487 del self.deleted_files[path]
488 if self._is_path_valid(path):
489 self.current_file = path
490 self.should_edit_most_recent_plaintext = False
491 if copyfrom_path:
492 self.ui.status('A+ %s\n' % path)
493 # TODO(augie) handle this better, actually mark a copy
494 (from_file,
495 from_branch) = self._path_and_branch_for_path(copyfrom_path)
496 ha = self.get_parent_revision(copyfrom_revision + 1,
497 from_branch)
498 ctx = self.repo.changectx(ha)
499 if from_file in ctx:
500 fctx = ctx.filectx(from_file)
501 cur_file = self.current_file
502 self.current_files[cur_file] = fctx.data()
503 self.current_files_symlink[cur_file] = 'l' in fctx.flags()
504 self.current_files_exec[cur_file] = 'x' in fctx.flags()
505 else:
506 self.ui.status('A %s\n' % path)
507
508
509 @stash_exception_on_self
510 def add_directory(self, path, parent_baton, copyfrom_path,
511 copyfrom_revision, dir_pool=None):
512 if self._is_path_valid(path):
513 junk, branch = self._path_and_branch_for_path(path)
514 if not copyfrom_path and not junk:
515 self.commit_branches_empty[branch] = True
516 else:
517 self.commit_branches_empty[branch] = False
518 if not (self._is_path_valid(path) and copyfrom_path and
519 self._is_path_valid(copyfrom_path)):
520 return
521
522 cp_f, br_from = self._path_and_branch_for_path(copyfrom_path)
523 new_hash = self.get_parent_revision(copyfrom_revision + 1, br_from)
524 if new_hash == node.nullid:
525 self.missing_plaintexts.add('%s/' % path)
526 return
527 cp_f_ctx = self.repo.changectx(new_hash)
528 if cp_f != '/' and cp_f != '':
529 cp_f = '%s/' % cp_f
530 else:
531 cp_f = ''
532 for f in cp_f_ctx:
533 if f.startswith(cp_f):
534 f2 = f[len(cp_f):]
535 fctx = cp_f_ctx.filectx(f)
536 fp_c = path + '/' + f2
537 self.current_files[fp_c] = fctx.data()
538 self.current_files_exec[fp_c] = 'x' in fctx.flags()
539 self.current_files_symlink[fp_c] = 'l' in fctx.flags()
540 # TODO(augie) tag copies from files
541
542 @stash_exception_on_self
543 def change_file_prop(self, file_baton, name, value, pool=None):
544 if name == 'svn:executable':
545 self.current_files_exec[self.current_file] = bool(value)
546 elif name == 'svn:special':
547 self.current_files_symlink[self.current_file] = bool(value)
548
549 @stash_exception_on_self
550 def open_directory(self, path, parent_baton, base_revision, dir_pool=None):
551 if self._is_path_valid(path):
552 p_, branch = self._path_and_branch_for_path(path)
553 if p_ == '':
554 self.commit_branches_empty[branch] = False
555
556 @stash_exception_on_self
557 def apply_textdelta(self, file_baton, base_checksum, pool=None):
558 base = ''
559 if not self._is_path_valid(self.current_file):
560 return lambda x: None
561 if (self.current_file in self.current_files
562 and not self.should_edit_most_recent_plaintext):
563 base = self.current_files[self.current_file]
564 elif (base_checksum is not None or
565 self.should_edit_most_recent_plaintext):
566 p_, br = self._path_and_branch_for_path(self.current_file)
567 par_rev = self.current_rev.revnum
568 if self.base_revision:
569 par_rev = self.base_revision + 1
570 ha = self.get_parent_revision(par_rev, br)
571 if ha != revlog.nullid:
572 ctx = self.repo.changectx(ha)
573 if not p_ in ctx:
574 self.missing_plaintexts.add(self.current_file)
575 # short circuit exit since we can't do anything anyway
576 return lambda x: None
577 base = ctx.filectx(p_).data()
578 source = cStringIO.StringIO(base)
579 target = cStringIO.StringIO()
580 self.stream = target
581
582 handler, baton = delta.svn_txdelta_apply(source, target, None)
583 if not callable(handler):
584 # TODO(augie) Raise a real exception, don't just fail an assertion.
585 assert False, 'handler not callable, bindings are broken'
586 def txdelt_window(window):
587 try:
588 if not self._is_path_valid(self.current_file):
589 return
590 handler(window, baton)
591 # window being None means commit this file
592 if not window:
593 self.current_files[self.current_file] = target.getvalue()
594 except core.SubversionException, e:
595 if e.message == 'Delta source ended unexpectedly':
596 self.missing_plaintexts.add(self.current_file)
597 else:
598 self._exception_info = sys.exc_info()
599 raise
600 except:
601 print len(base), self.current_file
602 self._exception_info = sys.exc_info()
603 raise
604 return txdelt_window
605
606 class MissingPlainTextError(Exception):
607 """Exception raised when the repo lacks a source file required for replaying
608 a txdelta.
609 """
610
611 class ReplayException(Exception):
612 """Exception raised when you try and commit but the replay encountered an
613 exception.
614 """