summaryrefslogtreecommitdiff
path: root/lorry.zip-importer
blob: da70a08dd95124116ee773feb2704226fb3b96fc (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#!/usr/bin/env python3

# Copyright 2005-2015 Linus Torvalds and others
# Copyright 2015, 2019, 2021 Codethink Limited
#
# Based on git's contrib/fast-import/import-zips.py; modified for Lorry.

# zip archive frontend for git-fast-import
#
# For example:
#
#  mkdir project; cd project; git init
#  python import-zips.py *.zip
#  git log --stat import-zips

import calendar
import os.path
import struct
import subprocess
import sys
from zipfile import ZipFile

branch_name = "master"
branch_ref = "refs/heads/%s" % branch_name
committer_name = "Lorry Zip Importer"
committer_email = "lorry-zip-importer@lorry"


# File header 'extra' field tags
EXT_TAG_UNIX0 = 0x000D  # PKWARE Unix, aka Unix type 0
EXT_TAG_TIME = 0x5455  # Extended Timestamp
EXT_TIME_FLAG_MTIME = 1  # mtime present (and first)
EXT_TAG_UNIX1 = 0x5855  # Info-ZIP Unix type 1


# Iterate over fields within a file header 'extra' block
def zip_extra_fields(extra):
    pos = 0
    while len(extra) >= pos + 4:
        tag, size = struct.unpack("<HH", extra[pos : pos + 4])
        pos += 4
        if len(extra) < pos + size:
            return
        yield tag, extra[pos : pos + size]
        pos += size


# Make our best guess at the mtime of a zip file entry
def zip_info_mtime(info):
    # Look for Unix-format mtime in the 'extra' block
    for tag, data in zip_extra_fields(info.extra):
        format = None
        if tag in [EXT_TAG_UNIX0, EXT_TAG_UNIX1]:
            format = "<4xL"  # AcTime, ModTime
        elif tag == EXT_TAG_TIME:
            # First byte indicates which timestamps follow
            if len(data) >= 1 and data[0] & EXT_TIME_FLAG_MTIME:
                format = "<xL"  # Flags, ModTime
        if format:
            min_len = struct.calcsize(format)
            if len(data) >= min_len:
                return struct.unpack(format, data[:min_len])[0]

    # Timestamps in the main header are in local time, but the time
    # zone offset is unspecified.  We choose to interpret them as UTC.
    return calendar.timegm(info.date_time + (0, 0, 0))


def export(zipfile, fast_import):
    def printlines(list):
        for str in list:
            fast_import.write(str.encode("utf-8") + b"\n")

    commit_time = 0
    next_mark = 1
    common_prefix = None
    mark = dict()

    zip = ZipFile(zipfile, "r")
    for name in zip.namelist():
        if name.endswith("/"):
            continue
        info = zip.getinfo(name)

        commit_time = max(commit_time, zip_info_mtime(info))
        if common_prefix is None:
            common_prefix = name[: name.rfind("/") + 1]
        else:
            while not name.startswith(common_prefix):
                last_slash = common_prefix[:-1].rfind("/") + 1
                common_prefix = common_prefix[:last_slash]

        mark[name] = ":" + str(next_mark)
        next_mark += 1

        printlines(("blob", "mark " + mark[name], "data " + str(info.file_size)))
        fast_import.write(zip.read(name) + b"\n")

    committer = committer_name + " <" + committer_email + "> %d +0000" % commit_time

    zipfile_basename = os.path.basename(zipfile)
    printlines(
        (
            "commit " + branch_ref,
            "committer " + committer,
            "data <<EOM",
            "Imported from " + zipfile_basename + ".",
            "EOM",
            "",
            "deleteall",
        )
    )

    for name in mark.keys():
        printlines(("M 100644 " + mark[name] + " " + name[len(common_prefix) :],))

    zipname, _ = os.path.splitext(zipfile_basename)

    printlines(
        (
            "",
            "tag " + zipname,
            "from " + branch_ref,
            "tagger " + committer,
            "data <<EOM",
            "Package " + zipfile,
            "EOM",
            "",
        )
    )


def main():
    if len(sys.argv) < 2:
        print("usage:", sys.argv[0], "<zipfile>...")
        sys.exit(1)

    with subprocess.Popen(
        "git fast-import --quiet", shell=True, stdin=subprocess.PIPE
    ) as import_proc:
        for zipfile in sys.argv[1:]:
            export(zipfile, import_proc.stdin)
        import_proc.stdin.close()
        if import_proc.wait() != 0:
            sys.exit(1)


main()