summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xhg-fast-export.py114
1 files changed, 32 insertions, 82 deletions
diff --git a/hg-fast-export.py b/hg-fast-export.py
index cdb838b..d87c18d 100755
--- a/hg-fast-export.py
+++ b/hg-fast-export.py
@@ -36,11 +36,10 @@ def checkpoint(count):
wr()
return count
-def get_parent_mark(parent,marks):
- """Get the mark for some parent.
- If we saw it in the current session, return :%d syntax and
- otherwise the SHA1 from the cache."""
- return marks.get(str(parent),':%d' % (parent+1))
+def revnum_to_revref(rev, old_marks):
+ """Convert an hg revnum to a git-fast-import rev reference (an SHA1
+ or a mark)"""
+ return old_marks.get(rev) or ':%d' % (rev+1)
def file_mismatch(f1,f2):
"""See if two revisions of a file are not equal."""
@@ -131,13 +130,6 @@ def export_file_contents(ctx,manifest,files):
if max>cfg_export_boundary:
sys.stderr.write('Exported %d/%d files\n' % (count,max))
-def is_merge(parents):
- c=0
- for parent in parents:
- if parent>=0:
- c+=1
- return c>1
-
def sanitize_name(name,what="branch"):
"""Sanitize input roughly according to git-check-ref-format(1)"""
@@ -157,7 +149,7 @@ def sanitize_name(name,what="branch"):
sys.stderr.write('Warning: sanitized %s [%s] to [%s]\n' % (what,name,n))
return n
-def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,sob,brmap):
+def export_commit(ui,repo,revision,old_marks,max,count,authors,sob,brmap):
def get_branchname(name):
if brmap.has_key(name):
return brmap[name]
@@ -166,7 +158,6 @@ def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,so
return n
(revnode,_,user,(time,timezone),files,desc,branch,_)=get_changeset(ui,repo,revision,authors)
- parents=repo.changelog.parentrevs(revision)
branch=get_branchname(branch)
@@ -179,75 +170,38 @@ def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,so
wr(desc)
wr()
- pidx1, pidx2 = 0, 1
- if parents[1] > 0:
- if parents[0] <= 0 or \
- repo.changelog.node(parents[0]) < repo.changelog.node(parents[1]):
- pidx1, pidx2 = 1, 0
-
- full_rev=False
- if revision==0: full_rev=True
-
- src=heads.get(branch,'')
- link=''
- if src!='':
- # if we have a cached head, this is an incremental import: initialize it
- # and kill reference so we won't init it again
- wr('from %s' % src)
- heads[branch]=''
- sys.stderr.write('%s: Initializing to parent [%s]\n' %
- (branch,src))
- link=src # avoid making a merge commit for incremental import
- elif link=='' and not heads.has_key(branch) and revision>0:
- if parents[0]>=0:
- # newly created branch with parent: connect to parent
- tmp=get_parent_mark(parents[0],marks)
- wr('from %s' % tmp)
- sys.stderr.write('%s: Link new branch to parent [%s]\n' %
- (branch,tmp))
- link=tmp # avoid making a merge commit for branch fork
- else:
- # newly created branch without parent: feed full revision
- full_rev=True
- elif last.get(branch,revision) != parents[pidx1] and parents[pidx1] > 0 and revision > 0:
- pm=get_parent_mark(parents[pidx1],marks)
- sys.stderr.write('%s: Placing commit [r%d] in branch [%s] on top of [r%d]\n' %
- (branch,revision,branch,parents[pidx1]));
- wr('from %s' % pm)
-
- if parents[pidx2] > 0:
- pm=get_parent_mark(parents[pidx2],marks)
- sys.stderr.write('%s: Merging with parent [%s] from [r%d]\n' %
- (branch,pm,parents[pidx2]))
- wr('merge %s' % pm)
-
- last[branch]=revision
- heads[branch]=''
- # we need this later to write out tags
- marks[str(revision)]=':%d'%(revision+1)
+ parents = [p for p in repo.changelog.parentrevs(revision) if p >= 0]
+
+ # Sort the parents based on revision ids so that we always get the
+ # same resulting git repo, no matter how the revisions were
+ # numbered.
+ parents.sort(key=repo.changelog.node, reverse=True)
ctx=repo.changectx(str(revision))
man=ctx.manifest()
added,changed,removed,type=[],[],[],''
- if full_rev:
+ if len(parents) == 0:
# first revision: feed in full manifest
added=man.keys()
added.sort()
type='full'
- elif is_merge(parents):
- # later merge revision: feed in changed manifest
- # for many files comparing checksums is expensive so only do it for
- # merges where we really need it due to hg's revlog logic
- added,changed,removed=get_filechanges(repo,revision,parents,man)
- type='thorough delta'
else:
- # later non-merge revision: feed in changed manifest
- # if we have exactly one parent, just take the changes from the
- # manifest without expensively comparing checksums
- f=repo.status(repo.lookup(parents[0]),revnode)[:3]
- added,changed,removed=f[1],f[0],f[2]
- type='simple delta'
+ wr('from %s' % revnum_to_revref(parents[0], old_marks))
+ if len(parents) == 1:
+ # later non-merge revision: feed in changed manifest
+ # if we have exactly one parent, just take the changes from the
+ # manifest without expensively comparing checksums
+ f=repo.status(repo.lookup(parents[0]),revnode)[:3]
+ added,changed,removed=f[1],f[0],f[2]
+ type='simple delta'
+ else: # a merge with two parents
+ wr('merge %s' % revnum_to_revref(parents[1], old_marks))
+ # later merge revision: feed in changed manifest
+ # for many files comparing checksums is expensive so only do it for
+ # merges where we really need it due to hg's revlog logic
+ added,changed,removed=get_filechanges(repo,revision,parents,man)
+ type='thorough delta'
sys.stderr.write('%s: Exporting %s revision %d/%d with %d/%d/%d added/changed/removed files\n' %
(branch,type,revision+1,max,len(added),len(changed),len(removed)))
@@ -259,7 +213,7 @@ def export_commit(ui,repo,revision,marks,mapping,heads,last,max,count,authors,so
return checkpoint(count)
-def export_tags(ui,repo,marks_cache,mapping_cache,count,authors):
+def export_tags(ui,repo,old_marks,mapping_cache,count,authors):
l=repo.tagslist()
for tag,node in l:
tag=sanitize_name(tag,"tag")
@@ -272,7 +226,7 @@ def export_tags(ui,repo,marks_cache,mapping_cache,count,authors):
rev=int(mapping_cache[node.encode('hex_codec')])
- ref=marks_cache.get(str(rev),':%d' % (rev))
+ ref=revnum_to_revref(rev, old_marks)
if ref==None:
sys.stderr.write('Failed to find reference for creating tag'
' %s at r%d\n' % (tag,rev))
@@ -332,13 +286,10 @@ def verify_heads(ui,repo,cache,force):
return True
-def mangle_mark(mark):
- return str(int(mark)-1)
-
def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=False,force=False):
_max=int(m)
- marks_cache=load_cache(marksfile,mangle_mark)
+ old_marks=load_cache(marksfile,lambda s: int(s)-1)
mapping_cache=load_cache(mappingfile)
heads_cache=load_cache(headsfile)
state_cache=load_cache(tipfile)
@@ -364,17 +315,16 @@ def hg2git(repourl,m,marksfile,mappingfile,headsfile,tipfile,authors={},sob=Fals
c=0
- last={}
brmap={}
for rev in range(min,max):
- c=export_commit(ui,repo,rev,marks_cache,mapping_cache,heads_cache,last,max,c,authors,sob,brmap)
+ c=export_commit(ui,repo,rev,old_marks,max,c,authors,sob,brmap)
state_cache['tip']=max
state_cache['repo']=repourl
save_cache(tipfile,state_cache)
save_cache(mappingfile,mapping_cache)
- c=export_tags(ui,repo,marks_cache,mapping_cache,c,authors)
+ c=export_tags(ui,repo,old_marks,mapping_cache,c,authors)
sys.stderr.write('Issued %d commands\n' % c)