From 692e17964dd4f8da4b3994c5c7055ec9a8c22a75 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:54:58 +0200 Subject: git-p4: add optional type specifier to gitConfig reader The functions "gitConfig" and "gitConfigBool" are almost identical. Make "gitConfig" more generic by adding an optional type specifier. Use the type specifier "--bool" with "gitConfig" to implement "gitConfigBool. This prepares the implementation of other type specifiers such as "--int". Signed-off-by: Lars Schneider Acked-by: Luke Diamand Signed-off-by: Junio C Hamano --- git-p4.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 0093fa3d83..206a45f613 100755 --- a/git-p4.py +++ b/git-p4.py @@ -604,9 +604,12 @@ def gitBranchExists(branch): _gitConfig = {} -def gitConfig(key): +def gitConfig(key, typeSpecifier=None): if not _gitConfig.has_key(key): - cmd = [ "git", "config", key ] + cmd = [ "git", "config" ] + if typeSpecifier: + cmd += [ typeSpecifier ] + cmd += [ key ] s = read_pipe(cmd, ignore_error=True) _gitConfig[key] = s.strip() return _gitConfig[key] @@ -617,10 +620,7 @@ def gitConfigBool(key): in the config.""" if not _gitConfig.has_key(key): - cmd = [ "git", "config", "--bool", key ] - s = read_pipe(cmd, ignore_error=True) - v = s.strip() - _gitConfig[key] = v == "true" + _gitConfig[key] = gitConfig(key, '--bool') == "true" return _gitConfig[key] def gitConfigList(key): -- cgit v1.2.1 From cb1dafdfdadca4d1bbe4147d2f9c72b5ef0f4ab7 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:54:59 +0200 Subject: git-p4: add gitConfigInt reader Add a git config reader for integer variables. Please note that the git config implementation automatically supports k, m, and g suffixes. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 206a45f613..c99b077884 100755 --- a/git-p4.py +++ b/git-p4.py @@ -623,6 +623,17 @@ def gitConfigBool(key): _gitConfig[key] = gitConfig(key, '--bool') == "true" return _gitConfig[key] +def gitConfigInt(key): + if not _gitConfig.has_key(key): + cmd = [ "git", "config", "--int", key ] + s = read_pipe(cmd, ignore_error=True) + v = s.strip() + try: + _gitConfig[key] = int(gitConfig(key, '--int')) + except ValueError: + _gitConfig[key] = None + return _gitConfig[key] + def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) -- cgit v1.2.1 From 7960e70710a97b4d650531614b2839a907f79d95 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:00 +0200 Subject: git-p4: return an empty list if a list config has no values Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index c99b077884..ae1e9ea52e 100755 --- a/git-p4.py +++ b/git-p4.py @@ -638,6 +638,8 @@ def gitConfigList(key): if not _gitConfig.has_key(key): s = read_pipe(["git", "config", "--get-all", key], ignore_error=True) _gitConfig[key] = s.strip().split(os.linesep) + if _gitConfig[key] == ['']: + _gitConfig[key] = [] return _gitConfig[key] def p4BranchesInGit(branchesAreInRemotes=True): -- cgit v1.2.1 From d2176a5060c73a04846e270ab07b90aad4d8b8f0 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:01 +0200 Subject: git-p4: add file streaming progress in verbose mode If a file is streamed from P4 to Git then the verbose mode prints continuously the progress as percentage like this: //depot/file.bin 20% (10 MB) Upon completion the progress is overwritten with depot source, local file and size like this: //depot/file.bin --> local/file.bin (10 MB) Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index ae1e9ea52e..81b0fbc648 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2171,7 +2171,9 @@ class P4Sync(Command, P4UserMap): def streamOneP4File(self, file, contents): relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) if verbose: - sys.stderr.write("%s\n" % relPath) + size = int(self.stream_file['fileSize']) + sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024)) + sys.stdout.flush() (type_base, type_mods) = split_p4_type(file["type"]) @@ -2248,7 +2250,8 @@ class P4Sync(Command, P4UserMap): def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) if verbose: - sys.stderr.write("delete %s\n" % relPath) + sys.stdout.write("delete %s\n" % relPath) + sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) # handle another chunk of streaming data @@ -2288,10 +2291,23 @@ class P4Sync(Command, P4UserMap): # 'data' field we need to append to our array for k in marshalled.keys(): if k == 'data': + if 'streamContentSize' not in self.stream_file: + self.stream_file['streamContentSize'] = 0 + self.stream_file['streamContentSize'] += len(marshalled['data']) self.stream_contents.append(marshalled['data']) else: self.stream_file[k] = marshalled[k] + if (verbose and + 'streamContentSize' in self.stream_file and + 'fileSize' in self.stream_file and + 'depotFile' in self.stream_file): + size = int(self.stream_file["fileSize"]) + if size > 0: + progress = 100*self.stream_file['streamContentSize']/size + sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024))) + sys.stdout.flush() + self.stream_have_file_info = True # Stream directly from "p4 files" into "git fast-import" -- cgit v1.2.1 From 4d25dc4475639395b4c567d947218613de359f09 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:02 +0200 Subject: git-p4: check free space during streaming git-p4 will just halt if there is not enough disk space while streaming content from P4 to Git. Add a check to ensure at least 4 times (arbitrarily chosen) the size of a streamed file is available. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 81b0fbc648..a60c086b18 100755 --- a/git-p4.py +++ b/git-p4.py @@ -104,6 +104,16 @@ def chdir(path, is_client_path=False): path = os.getcwd() os.environ['PWD'] = path +def calcDiskFree(): + """Return free space in bytes on the disk of the given dirname.""" + if platform.system() == 'Windows': + free_bytes = ctypes.c_ulonglong(0) + ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(os.getcwd()), None, None, ctypes.pointer(free_bytes)) + return free_bytes.value + else: + st = os.statvfs(os.getcwd()) + return st.f_bavail * st.f_frsize + def die(msg): if verbose: raise Exception(msg) @@ -2263,6 +2273,14 @@ class P4Sync(Command, P4UserMap): if marshalled["code"] == "error": if "data" in marshalled: err = marshalled["data"].rstrip() + + if not err and 'fileSize' in self.stream_file: + required_bytes = int((4 * int(self.stream_file["fileSize"])) - calcDiskFree()) + if required_bytes > 0: + err = 'Not enough space left on %s! Free at least %i MB.' % ( + os.getcwd(), required_bytes/1024/1024 + ) + if err: f = None if self.stream_have_file_info: -- cgit v1.2.1 From a5db4b127bf86b9dbeb3cb079f4a9530ba313941 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:03 +0200 Subject: git-p4: add support for large file systems Perforce repositories can contain large (binary) files. Migrating these repositories to Git generates very large local clones. External storage systems such as Git LFS [1], Git Fat [2], Git Media [3], git-annex [4] try to address this problem. Add a generic mechanism to detect large files based on extension, uncompressed size, and/or compressed size. [1] https://git-lfs.github.com/ [2] https://github.com/jedbrown/git-fat [3] https://github.com/alebedev/git-media [4] https://git-annex.branchable.com/ Signed-off-by: Lars Schneider Conflicts: Documentation/git-p4.txt git-p4.py Signed-off-by: Junio C Hamano --- git-p4.py | 139 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 129 insertions(+), 10 deletions(-) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index a60c086b18..02b5e9e362 100755 --- a/git-p4.py +++ b/git-p4.py @@ -22,6 +22,8 @@ import platform import re import shutil import stat +import zipfile +import zlib try: from subprocess import CalledProcessError @@ -932,6 +934,110 @@ def wildcard_present(path): m = re.search("[*#@%]", path) return m is not None +class LargeFileSystem(object): + """Base class for large file system support.""" + + def __init__(self, writeToGitStream): + self.largeFiles = set() + self.writeToGitStream = writeToGitStream + + def generatePointer(self, cloneDestination, contentFile): + """Return the content of a pointer file that is stored in Git instead of + the actual content.""" + assert False, "Method 'generatePointer' required in " + self.__class__.__name__ + + def pushFile(self, localLargeFile): + """Push the actual content which is not stored in the Git repository to + a server.""" + assert False, "Method 'pushFile' required in " + self.__class__.__name__ + + def hasLargeFileExtension(self, relPath): + return reduce( + lambda a, b: a or b, + [relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')], + False + ) + + def generateTempFile(self, contents): + contentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + for d in contents: + contentFile.write(d) + contentFile.close() + return contentFile.name + + def exceedsLargeFileThreshold(self, relPath, contents): + if gitConfigInt('git-p4.largeFileThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize > gitConfigInt('git-p4.largeFileThreshold'): + return True + if gitConfigInt('git-p4.largeFileCompressedThreshold'): + contentsSize = sum(len(d) for d in contents) + if contentsSize <= gitConfigInt('git-p4.largeFileCompressedThreshold'): + return False + contentTempFile = self.generateTempFile(contents) + compressedContentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False) + zf = zipfile.ZipFile(compressedContentFile.name, mode='w') + zf.write(contentTempFile, compress_type=zipfile.ZIP_DEFLATED) + zf.close() + compressedContentsSize = zf.infolist()[0].compress_size + os.remove(contentTempFile) + os.remove(compressedContentFile.name) + if compressedContentsSize > gitConfigInt('git-p4.largeFileCompressedThreshold'): + return True + return False + + def addLargeFile(self, relPath): + self.largeFiles.add(relPath) + + def removeLargeFile(self, relPath): + self.largeFiles.remove(relPath) + + def isLargeFile(self, relPath): + return relPath in self.largeFiles + + def processContent(self, git_mode, relPath, contents): + """Processes the content of git fast import. This method decides if a + file is stored in the large file system and handles all necessary + steps.""" + if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath): + contentTempFile = self.generateTempFile(contents) + (git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile) + + # Move temp file to final location in large file system + largeFileDir = os.path.dirname(localLargeFile) + if not os.path.isdir(largeFileDir): + os.makedirs(largeFileDir) + shutil.move(contentTempFile, localLargeFile) + self.addLargeFile(relPath) + if gitConfigBool('git-p4.largeFilePush'): + self.pushFile(localLargeFile) + if verbose: + sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile)) + return (git_mode, contents) + +class MockLFS(LargeFileSystem): + """Mock large file system for testing.""" + + def generatePointer(self, contentFile): + """The pointer content is the original content prefixed with "pointer-". + The local filename of the large file storage is derived from the file content. + """ + with open(contentFile, 'r') as f: + content = next(f) + gitMode = '100644' + pointerContents = 'pointer-' + content + localLargeFile = os.path.join(os.getcwd(), '.git', 'mock-storage', 'local', content[:-1]) + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + """The remote filename of the large file storage is the same as the local + one but in a different directory. + """ + remotePath = os.path.join(os.path.dirname(localLargeFile), '..', 'remote') + if not os.path.exists(remotePath): + os.makedirs(remotePath) + shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) + class Command: def __init__(self): self.usage = "usage: %prog [options]" @@ -1105,6 +1211,9 @@ class P4Submit(Command, P4UserMap): self.p4HasMoveCommand = p4_has_move_command() self.branch = None + if gitConfig('git-p4.largeFileSystem'): + die("Large file system not supported for git-p4 submit command. Please remove it from config.") + def check(self): if len(p4CmdList("opened ...")) > 0: die("You have files opened with perforce! Close them before starting the sync.") @@ -2055,6 +2164,13 @@ class P4Sync(Command, P4UserMap): self.clientSpecDirs = None self.tempBranches = [] self.tempBranchLocation = "git-p4-tmp" + self.largeFileSystem = None + + if gitConfig('git-p4.largeFileSystem'): + largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')] + self.largeFileSystem = largeFileSystemConstructor( + lambda git_mode, relPath, contents: self.writeToGitStream(git_mode, relPath, contents) + ) if gitConfig("git-p4.syncFromOrigin") == "false": self.syncWithOrigin = False @@ -2175,6 +2291,13 @@ class P4Sync(Command, P4UserMap): return branches + def writeToGitStream(self, gitMode, relPath, contents): + self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) + for d in contents: + self.gitStream.write(d) + self.gitStream.write('\n') + # output one file from the P4 stream # - helper for streamP4Files @@ -2245,17 +2368,10 @@ class P4Sync(Command, P4UserMap): text = regexp.sub(r'$\1$', text) contents = [ text ] - self.gitStream.write("M %s inline %s\n" % (git_mode, relPath)) + if self.largeFileSystem: + (git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents) - # total length... - length = 0 - for d in contents: - length = length + len(d) - - self.gitStream.write("data %d\n" % length) - for d in contents: - self.gitStream.write(d) - self.gitStream.write("\n") + self.writeToGitStream(git_mode, relPath, contents) def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) @@ -2264,6 +2380,9 @@ class P4Sync(Command, P4UserMap): sys.stdout.flush() self.gitStream.write("D %s\n" % relPath) + if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): + self.largeFileSystem.removeLargeFile(relPath) + # handle another chunk of streaming data def streamP4FilesCb(self, marshalled): -- cgit v1.2.1 From b47d807d20374d8dc929ba4c0ca3bd7e8a4ce1f7 Mon Sep 17 00:00:00 2001 From: Lars Schneider Date: Sat, 26 Sep 2015 09:55:04 +0200 Subject: git-p4: add Git LFS backend for large file system Add example implementation including test cases for the large file system using Git LFS. Pushing files to the Git LFS server is not tested. Signed-off-by: Lars Schneider Signed-off-by: Junio C Hamano --- git-p4.py | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'git-p4.py') diff --git a/git-p4.py b/git-p4.py index 02b5e9e362..765ea5f064 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1038,6 +1038,78 @@ class MockLFS(LargeFileSystem): os.makedirs(remotePath) shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile))) +class GitLFS(LargeFileSystem): + """Git LFS as backend for the git-p4 large file system. + See https://git-lfs.github.com/ for details.""" + + def __init__(self, *args): + LargeFileSystem.__init__(self, *args) + self.baseGitAttributes = [] + + def generatePointer(self, contentFile): + """Generate a Git LFS pointer for the content. Return LFS Pointer file + mode and content which is stored in the Git repository instead of + the actual content. Return also the new location of the actual + content. + """ + pointerProcess = subprocess.Popen( + ['git', 'lfs', 'pointer', '--file=' + contentFile], + stdout=subprocess.PIPE + ) + pointerFile = pointerProcess.stdout.read() + if pointerProcess.wait(): + os.remove(contentFile) + die('git-lfs pointer command failed. Did you install the extension?') + pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]] + oid = pointerContents[1].split(' ')[1].split(':')[1][:-1] + localLargeFile = os.path.join( + os.getcwd(), + '.git', 'lfs', 'objects', oid[:2], oid[2:4], + oid, + ) + # LFS Spec states that pointer files should not have the executable bit set. + gitMode = '100644' + return (gitMode, pointerContents, localLargeFile) + + def pushFile(self, localLargeFile): + uploadProcess = subprocess.Popen( + ['git', 'lfs', 'push', '--object-id', 'origin', os.path.basename(localLargeFile)] + ) + if uploadProcess.wait(): + die('git-lfs push command failed. Did you define a remote?') + + def generateGitAttributes(self): + return ( + self.baseGitAttributes + + [ + '\n', + '#\n', + '# Git LFS (see https://git-lfs.github.com/)\n', + '#\n', + ] + + ['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(gitConfigList('git-p4.largeFileExtensions')) + ] + + ['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n' + for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f) + ] + ) + + def addLargeFile(self, relPath): + LargeFileSystem.addLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def removeLargeFile(self, relPath): + LargeFileSystem.removeLargeFile(self, relPath) + self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes()) + + def processContent(self, git_mode, relPath, contents): + if relPath == '.gitattributes': + self.baseGitAttributes = contents + return (git_mode, self.generateGitAttributes()) + else: + return LargeFileSystem.processContent(self, git_mode, relPath, contents) + class Command: def __init__(self): self.usage = "usage: %prog [options]" -- cgit v1.2.1