summaryrefslogtreecommitdiff
path: root/lorry.raw-file-importer
blob: 791e735ec07fcf67103db3079493d43243524c18 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/usr/bin/env python3

# Copyright 2021 Codethink Limited

# raw file archive using git-lfs frontend for git-fast-import

import hashlib
import os
import shutil
import subprocess
import sys
import time

branch_name = "master"
branch_ref = "refs/heads/%s" % branch_name
committer_name = "Lorry Raw File Importer"
committer_email = "lorry-raw-file-importer@lorry"


def commit_lfs_gitattributes(fast_import):
    commit_time = int(time.time())
    commit = (
        "commit {ref}\n"
        "committer {committer_name} <{committer_email}> {commit_time} +0000\n"
        "data <<EOM\n"
        "Ensure LFS is configured\n"
        "EOM\n"
        "M 100644 inline .gitattributes\n"
        "data <<EOM\n"
        "* filter=lfs diff=lfs merge=lfs -text\n"
        ".gitattributes filter diff merge text=auto\n"
        "EOM\n"
        "\n"
    ).format(
        ref=branch_ref,
        committer_name=committer_name,
        committer_email=committer_email,
        commit_time=commit_time,
    )
    fast_import.write(commit.encode("utf-8"))


def commit_lfs_file(raw_file, relative_path, last_commit, fast_import):
    # git-lfs-pointer of the file
    ret = subprocess.run(
        ["git", "lfs", "pointer", "--file", raw_file],
        stdout=subprocess.PIPE,
        check=True,
    )
    pointer_digest = ret.stdout
    datasize = len(pointer_digest)

    # sha256sum of the file
    # slightly wasteful because git-lfs-pointer also generates a sha256sum
    with open(raw_file, "rb") as f:
        shasum = hashlib.sha256(f.read()).hexdigest()

    # Add the file to the repo
    out_dir = os.path.join("lfs", "objects", shasum[0:2], shasum[2:4])
    os.makedirs(out_dir, exist_ok=True)
    shutil.copyfile(raw_file, os.path.join(out_dir, shasum))

    # Commit the data to master
    commit_time = int(time.time())
    basename = os.path.basename(raw_file)
    # Ensure we don't allow directory traversal
    path = os.path.relpath(os.path.join(relative_path, basename))
    if path[0:3] == "../":
        print("error: unsafe path: {}".format(path), file=sys.stderr)
        sys.exit(1)

    fromline = "from {}\n".format(last_commit) if last_commit else ""

    commit = (
        "commit {ref}\n"
        "committer {committer_name} <{committer_email}> {commit_time} +0000\n"
        "data <<EOM\n"
        "import {basename}\n"
        "EOM\n"
        "{fromline}"
        "M 100644 inline {path}\n"
        "data {datasize}\n"
        "{data}\n"
        "\n"
    ).format(
        ref=branch_ref,
        committer_name=committer_name,
        committer_email=committer_email,
        commit_time=commit_time,
        basename=basename,
        fromline=fromline,
        path=path,
        datasize=datasize,
        data=pointer_digest.decode("utf-8"),
    )
    fast_import.write(commit.encode("utf-8"))


def get_last_commit():
    # show the full hash of the latest commit
    out = subprocess.run(
        ["git", "rev-parse", branch_ref],
        stdout=subprocess.PIPE,
        universal_newlines=True,
    )
    if out.returncode != 0:
        return None

    # Will be same string as input when no commits exist yet
    out = out.stdout.strip()
    return None if out == branch_ref else out


def main():
    if len(sys.argv) != 3:
        print("usage:", sys.argv[0], "<file>", "<relative path>")
        sys.exit(1)

    raw_file = sys.argv[1]
    relpath = sys.argv[2]

    last_commit = get_last_commit()
    with subprocess.Popen(
        "git fast-import --quiet", shell=True, stdin=subprocess.PIPE
    ) as import_proc:
        if not last_commit:
            commit_lfs_gitattributes(import_proc.stdin)

        commit_lfs_file(raw_file, relpath, last_commit, import_proc.stdin)
        import_proc.stdin.close()
        if import_proc.wait() != 0:
            sys.exit(1)


main()