summaryrefslogtreecommitdiff
path: root/contrib
diff options
context:
space:
mode:
authorLuke Diamand <luke@diamand.org>2009-07-30 00:13:46 +0100
committerJunio C Hamano <gitster@pobox.com>2009-08-10 14:25:20 -0700
commitb932705b457ad191481a871292d71088bb23d244 (patch)
tree702b73d2df4c82df9ad72b6abafdb1b67c0af162 /contrib
parentc14417c4f5f8d7765e232f1e908b85d2ae14ec34 (diff)
downloadgit-b932705b457ad191481a871292d71088bb23d244.tar.gz
git-p4: stream from perforce to speed up clones
Change commit() to stream data from Perforce and into fast-import rather than reading into memory first, and then writing out. This hugely reduces the memory requirements when cloning non-incrementally. Signed-off-by: Luke Diamand <luke@diamand.org> Signed-off-by: Pete Wyckoff <pw@padd.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 'contrib')
-rwxr-xr-xcontrib/fast-import/git-p4169
1 files changed, 102 insertions, 67 deletions
diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 342529db30..38438f3c4a 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -201,7 +201,7 @@ def isModeExec(mode):
def isModeExecChanged(src_mode, dst_mode):
return isModeExec(src_mode) != isModeExec(dst_mode)
-def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None):
cmd = p4_build_cmd("-G %s" % (cmd))
if verbose:
sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -224,7 +224,10 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
try:
while True:
entry = marshal.load(p4.stdout)
- result.append(entry)
+ if cb is not None:
+ cb(entry)
+ else:
+ result.append(entry)
except EOFError:
pass
exitCode = p4.wait()
@@ -950,10 +953,84 @@ class P4Sync(Command):
return branches
- ## Should move this out, doesn't use SELF.
- def readP4Files(self, files):
+ # output one file from the P4 stream
+ # - helper for streamP4Files
+
+ def streamOneP4File(self, file, contents):
+ if file["type"] == "apple":
+ print "\nfile %s is a strange apple file that forks. Ignoring" % \
+ file['depotFile']
+ return
+
+ relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
+ if verbose:
+ sys.stderr.write("%s\n" % relPath)
+
+ mode = "644"
+ if isP4Exec(file["type"]):
+ mode = "755"
+ elif file["type"] == "symlink":
+ mode = "120000"
+ # p4 print on a symlink contains "target\n", so strip it off
+ last = contents.pop()
+ last = last[:-1]
+ contents.append(last)
+
+ if self.isWindows and file["type"].endswith("text"):
+ mangled = []
+ for data in contents:
+ data = data.replace("\r\n", "\n")
+ mangled.append(data)
+ contents = mangled
+
+ if file['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
+ contents = map(lambda text: re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text), contents)
+ elif file['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
+ contents = map(lambda text: re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text), contents)
+
+ self.gitStream.write("M %s inline %s\n" % (mode, relPath))
+
+ # total length...
+ length = 0
+ for d in contents:
+ length = length + len(d)
+
+ self.gitStream.write("data %d\n" % length)
+ for d in contents:
+ self.gitStream.write(d)
+ self.gitStream.write("\n")
+
+ def streamOneP4Deletion(self, file):
+ relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
+ if verbose:
+ sys.stderr.write("delete %s\n" % relPath)
+ self.gitStream.write("D %s\n" % relPath)
+
+ # handle another chunk of streaming data
+ def streamP4FilesCb(self, marshalled):
+
+ if marshalled.has_key('depotFile') and self.stream_have_file_info:
+ # start of a new file - output the old one first
+ self.streamOneP4File(self.stream_file, self.stream_contents)
+ self.stream_file = {}
+ self.stream_contents = []
+ self.stream_have_file_info = False
+
+ # pick up the new file information... for the
+ # 'data' field we need to append to our array
+ for k in marshalled.keys():
+ if k == 'data':
+ self.stream_contents.append(marshalled['data'])
+ else:
+ self.stream_file[k] = marshalled[k]
+
+ self.stream_have_file_info = True
+
+ # Stream directly from "p4 files" into "git fast-import"
+ def streamP4Files(self, files):
filesForCommit = []
filesToRead = []
+ filesToDelete = []
for f in files:
includeFile = True
@@ -967,50 +1044,35 @@ class P4Sync(Command):
filesForCommit.append(f)
if f['action'] not in ('delete', 'purge'):
filesToRead.append(f)
+ else:
+ filesToDelete.append(f)
- filedata = []
- if len(filesToRead) > 0:
- filedata = p4CmdList('-x - print',
- stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
- for f in filesToRead]),
- stdin_mode='w+')
-
- if "p4ExitCode" in filedata[0]:
- die("Problems executing p4. Error: [%d]."
- % (filedata[0]['p4ExitCode']));
-
- j = 0;
- contents = {}
- while j < len(filedata):
- stat = filedata[j]
- j += 1
- text = ''
- while j < len(filedata) and filedata[j]['code'] in ('text', 'unicode', 'binary'):
- text += filedata[j]['data']
- del filedata[j]['data']
- j += 1
-
- if not stat.has_key('depotFile'):
- sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
- continue
+ # deleted files...
+ for f in filesToDelete:
+ self.streamOneP4Deletion(f)
- if stat['type'] in ('text+ko', 'unicode+ko', 'binary+ko'):
- text = re.sub(r'(?i)\$(Id|Header):[^$]*\$',r'$\1$', text)
- elif stat['type'] in ('text+k', 'ktext', 'kxtext', 'unicode+k', 'binary+k'):
- text = re.sub(r'\$(Id|Header|Author|Date|DateTime|Change|File|Revision):[^$\n]*\$',r'$\1$', text)
+ if len(filesToRead) > 0:
+ self.stream_file = {}
+ self.stream_contents = []
+ self.stream_have_file_info = False
- contents[stat['depotFile']] = text
+ # curry self argument
+ def streamP4FilesCbSelf(entry):
+ self.streamP4FilesCb(entry)
- for f in filesForCommit:
- path = f['path']
- if contents.has_key(path):
- f['data'] = contents[path]
+ p4CmdList("-x - print",
+ '\n'.join(['%s#%s' % (f['path'], f['rev'])
+ for f in filesToRead]),
+ cb=streamP4FilesCbSelf)
- return filesForCommit
+ # do the last chunk
+ if self.stream_file.has_key('depotFile'):
+ self.streamOneP4File(self.stream_file, self.stream_contents)
def commit(self, details, files, branch, branchPrefixes, parent = ""):
epoch = details["time"]
author = details["user"]
+ self.branchPrefixes = branchPrefixes
if self.verbose:
print "commit into %s" % branch
@@ -1023,7 +1085,6 @@ class P4Sync(Command):
new_files.append (f)
else:
sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
- files = self.readP4Files(new_files)
self.gitStream.write("commit %s\n" % branch)
# gitStream.write("mark :%s\n" % details["change"])
@@ -1051,33 +1112,7 @@ class P4Sync(Command):
print "parent %s" % parent
self.gitStream.write("from %s\n" % parent)
- for file in files:
- if file["type"] == "apple":
- print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
- continue
-
- relPath = self.stripRepoPath(file['path'], branchPrefixes)
- if file["action"] in ("delete", "purge"):
- self.gitStream.write("D %s\n" % relPath)
- else:
- data = file['data']
-
- mode = "644"
- if isP4Exec(file["type"]):
- mode = "755"
- elif file["type"] == "symlink":
- mode = "120000"
- # p4 print on a symlink contains "target\n", so strip it off
- data = data[:-1]
-
- if self.isWindows and file["type"].endswith("text"):
- data = data.replace("\r\n", "\n")
-
- self.gitStream.write("M %s inline %s\n" % (mode, relPath))
- self.gitStream.write("data %s\n" % len(data))
- self.gitStream.write(data)
- self.gitStream.write("\n")
-
+ self.streamP4Files(new_files)
self.gitStream.write("\n")
change = int(details["change"])