summaryrefslogtreecommitdiff
path: root/t/t0021-conversion.sh
diff options
context:
space:
mode:
authorLars Schneider <larsxschneider@gmail.com>2016-10-16 16:20:37 -0700
committerJunio C Hamano <gitster@pobox.com>2016-10-17 11:45:52 -0700
commitedcc85814c87ebd7f3b1b7d3979fac3dfb84d308 (patch)
tree92fcb94be20c944c989b91d6cc8b4a0f54af2a01 /t/t0021-conversion.sh
parent234fa07e066d6bc0728c3d9b92686c69a811e5ac (diff)
downloadgit-edcc85814c87ebd7f3b1b7d3979fac3dfb84d308.tar.gz
convert: add filter.<driver>.process option
Git's clean/smudge mechanism invokes an external filter process for every single blob that is affected by a filter. If Git filters a lot of blobs then the startup time of the external filter processes can become a significant part of the overall Git execution time. In a preliminary performance test this developer used a clean/smudge filter written in golang to filter 12,000 files. This process took 364s with the existing filter mechanism and 5s with the new mechanism. See details here: https://github.com/github/git-lfs/pull/1382 This patch adds the `filter.<driver>.process` string option which, if used, keeps the external filter process running and processes all blobs with the packet format (pkt-line) based protocol over standard input and standard output. The full protocol is explained in detail in `Documentation/gitattributes.txt`. A few key decisions: * The long running filter process is referred to as filter protocol version 2 because the existing single shot filter invocation is considered version 1. * Git sends a welcome message and expects a response right after the external filter process has started. This ensures that Git will not hang if a version 1 filter is incorrectly used with the filter.<driver>.process option for version 2 filters. In addition, Git can detect this kind of error and warn the user. * The status of a filter operation (e.g. "success" or "error) is set before the actual response and (if necessary!) re-set after the response. The advantage of this two step status response is that if the filter detects an error early, then the filter can communicate this and Git does not even need to create structures to read the response. * All status responses are pkt-line lists terminated with a flush packet. This allows us to send other status fields with the same protocol in the future. Helped-by: Martin-Louis Bright <mlbright@gmail.com> Reviewed-by: Jakub Narebski <jnareb@gmail.com> Signed-off-by: Lars Schneider <larsxschneider@gmail.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
Diffstat (limited to 't/t0021-conversion.sh')
-rwxr-xr-xt/t0021-conversion.sh444
1 files changed, 441 insertions, 3 deletions
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index dc509381c9..a20b9f58e3 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -4,13 +4,72 @@ test_description='blob conversion via gitattributes'
. ./test-lib.sh
-cat <<EOF >rot13.sh
+TEST_ROOT="$(pwd)"
+
+cat <<EOF >"$TEST_ROOT/rot13.sh"
#!$SHELL_PATH
tr \
'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' \
'nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM'
EOF
-chmod +x rot13.sh
+chmod +x "$TEST_ROOT/rot13.sh"
+
+generate_random_characters () {
+ LEN=$1
+ NAME=$2
+ test-genrandom some-seed $LEN |
+ perl -pe "s/./chr((ord($&) % 26) + ord('a'))/sge" >"$TEST_ROOT/$NAME"
+}
+
+file_size () {
+ cat "$1" | wc -c | sed "s/^[ ]*//"
+}
+
+filter_git () {
+ rm -f rot13-filter.log &&
+ git "$@" 2>git-stderr.log &&
+ rm -f git-stderr.log
+}
+
+# Compare two files and ensure that `clean` and `smudge` respectively are
+# called at least once if specified in the `expect` file. The actual
+# invocation count is not relevant because their number can vary.
+# c.f. http://public-inbox.org/git/xmqqshv18i8i.fsf@gitster.mtv.corp.google.com/
+test_cmp_count () {
+ expect=$1
+ actual=$2
+ for FILE in "$expect" "$actual"
+ do
+ sort "$FILE" | uniq -c | sed "s/^[ ]*//" |
+ sed "s/^\([0-9]\) IN: clean/x IN: clean/" |
+ sed "s/^\([0-9]\) IN: smudge/x IN: smudge/" >"$FILE.tmp" &&
+ mv "$FILE.tmp" "$FILE"
+ done &&
+ test_cmp "$expect" "$actual"
+}
+
+# Compare two files but exclude all `clean` invocations because Git can
+# call `clean` zero or more times.
+# c.f. http://public-inbox.org/git/xmqqshv18i8i.fsf@gitster.mtv.corp.google.com/
+test_cmp_exclude_clean () {
+ expect=$1
+ actual=$2
+ for FILE in "$expect" "$actual"
+ do
+ grep -v "IN: clean" "$FILE" >"$FILE.tmp" &&
+ mv "$FILE.tmp" "$FILE"
+ done &&
+ test_cmp "$expect" "$actual"
+}
+
+# Check that the contents of two files are equal and that their rot13 version
+# is equal to the committed content.
+test_cmp_committed_rot13 () {
+ test_cmp "$1" "$2" &&
+ "$TEST_ROOT/rot13.sh" <"$1" >expected &&
+ git cat-file blob :"$2" >actual &&
+ test_cmp expected actual
+}
test_expect_success setup '
git config filter.rot13.smudge ./rot13.sh &&
@@ -31,7 +90,10 @@ test_expect_success setup '
cat test >test.i &&
git add test test.t test.i &&
rm -f test test.t test.i &&
- git checkout -- test test.t test.i
+ git checkout -- test test.t test.i &&
+
+ echo "content-test2" >test2.o &&
+ echo "content-test3 - filename with special characters" >"test3 '\''sq'\'',\$x.o"
'
script='s/^\$Id: \([0-9a-f]*\) \$/\1/p'
@@ -279,4 +341,380 @@ test_expect_success 'diff does not reuse worktree files that need cleaning' '
test_line_count = 0 count
'
+test_expect_success PERL 'required process filter should filter data' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
+ test_config_global filter.protocol.required true &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "git-stderr.log" >.gitignore &&
+ echo "*.r filter=protocol" >.gitattributes &&
+ git add . &&
+ git commit . -m "test commit 1" &&
+ git branch empty-branch &&
+
+ cp "$TEST_ROOT/test.o" test.r &&
+ cp "$TEST_ROOT/test2.o" test2.r &&
+ mkdir testsubdir &&
+ cp "$TEST_ROOT/test3 '\''sq'\'',\$x.o" "testsubdir/test3 '\''sq'\'',\$x.r" &&
+ >test4-empty.r &&
+
+ S=$(file_size test.r) &&
+ S2=$(file_size test2.r) &&
+ S3=$(file_size "testsubdir/test3 '\''sq'\'',\$x.r") &&
+
+ filter_git add . &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: clean test.r $S [OK] -- OUT: $S . [OK]
+ IN: clean test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ IN: clean test4-empty.r 0 [OK] -- OUT: 0 [OK]
+ IN: clean testsubdir/test3 '\''sq'\'',\$x.r $S3 [OK] -- OUT: $S3 . [OK]
+ STOP
+ EOF
+ test_cmp_count expected.log rot13-filter.log &&
+
+ filter_git commit . -m "test commit 2" &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: clean test.r $S [OK] -- OUT: $S . [OK]
+ IN: clean test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ IN: clean test4-empty.r 0 [OK] -- OUT: 0 [OK]
+ IN: clean testsubdir/test3 '\''sq'\'',\$x.r $S3 [OK] -- OUT: $S3 . [OK]
+ IN: clean test.r $S [OK] -- OUT: $S . [OK]
+ IN: clean test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ IN: clean test4-empty.r 0 [OK] -- OUT: 0 [OK]
+ IN: clean testsubdir/test3 '\''sq'\'',\$x.r $S3 [OK] -- OUT: $S3 . [OK]
+ STOP
+ EOF
+ test_cmp_count expected.log rot13-filter.log &&
+
+ rm -f test2.r "testsubdir/test3 '\''sq'\'',\$x.r" &&
+
+ filter_git checkout --quiet --no-progress . &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: smudge test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ IN: smudge testsubdir/test3 '\''sq'\'',\$x.r $S3 [OK] -- OUT: $S3 . [OK]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ filter_git checkout --quiet --no-progress empty-branch &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: clean test.r $S [OK] -- OUT: $S . [OK]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ filter_git checkout --quiet --no-progress master &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: smudge test.r $S [OK] -- OUT: $S . [OK]
+ IN: smudge test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ IN: smudge test4-empty.r 0 [OK] -- OUT: 0 [OK]
+ IN: smudge testsubdir/test3 '\''sq'\'',\$x.r $S3 [OK] -- OUT: $S3 . [OK]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ test_cmp_committed_rot13 "$TEST_ROOT/test.o" test.r &&
+ test_cmp_committed_rot13 "$TEST_ROOT/test2.o" test2.r &&
+ test_cmp_committed_rot13 "$TEST_ROOT/test3 '\''sq'\'',\$x.o" "testsubdir/test3 '\''sq'\'',\$x.r"
+ )
+'
+
+test_expect_success PERL 'required process filter takes precedence' '
+ test_config_global filter.protocol.clean false &&
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean" &&
+ test_config_global filter.protocol.required true &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+ cp "$TEST_ROOT/test.o" test.r &&
+ S=$(file_size test.r) &&
+
+ # Check that the process filter is invoked here
+ filter_git add . &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: clean test.r $S [OK] -- OUT: $S . [OK]
+ STOP
+ EOF
+ test_cmp_count expected.log rot13-filter.log
+ )
+'
+
+test_expect_success PERL 'required process filter should be used only for "clean" operation only' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean" &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+ cp "$TEST_ROOT/test.o" test.r &&
+ S=$(file_size test.r) &&
+
+ filter_git add . &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: clean test.r $S [OK] -- OUT: $S . [OK]
+ STOP
+ EOF
+ test_cmp_count expected.log rot13-filter.log &&
+
+ rm test.r &&
+
+ filter_git checkout --quiet --no-progress . &&
+ # If the filter would be used for "smudge", too, we would see
+ # "IN: smudge test.r 57 [OK] -- OUT: 57 . [OK]" here
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log
+ )
+'
+
+test_expect_success PERL 'required process filter should process multiple packets' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
+ test_config_global filter.protocol.required true &&
+
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ # Generate data requiring 1, 2, 3 packets
+ S=65516 && # PKTLINE_DATA_MAXLEN -> Maximal size of a packet
+ generate_random_characters $(($S )) 1pkt_1__.file &&
+ generate_random_characters $(($S +1)) 2pkt_1+1.file &&
+ generate_random_characters $(($S*2-1)) 2pkt_2-1.file &&
+ generate_random_characters $(($S*2 )) 2pkt_2__.file &&
+ generate_random_characters $(($S*2+1)) 3pkt_2+1.file &&
+
+ for FILE in "$TEST_ROOT"/*.file
+ do
+ cp "$FILE" . &&
+ "$TEST_ROOT/rot13.sh" <"$FILE" >"$FILE.rot13"
+ done &&
+
+ echo "*.file filter=protocol" >.gitattributes &&
+ filter_git add *.file .gitattributes &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: clean 1pkt_1__.file $(($S )) [OK] -- OUT: $(($S )) . [OK]
+ IN: clean 2pkt_1+1.file $(($S +1)) [OK] -- OUT: $(($S +1)) .. [OK]
+ IN: clean 2pkt_2-1.file $(($S*2-1)) [OK] -- OUT: $(($S*2-1)) .. [OK]
+ IN: clean 2pkt_2__.file $(($S*2 )) [OK] -- OUT: $(($S*2 )) .. [OK]
+ IN: clean 3pkt_2+1.file $(($S*2+1)) [OK] -- OUT: $(($S*2+1)) ... [OK]
+ STOP
+ EOF
+ test_cmp_count expected.log rot13-filter.log &&
+
+ rm -f *.file &&
+
+ filter_git checkout --quiet --no-progress -- *.file &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: smudge 1pkt_1__.file $(($S )) [OK] -- OUT: $(($S )) . [OK]
+ IN: smudge 2pkt_1+1.file $(($S +1)) [OK] -- OUT: $(($S +1)) .. [OK]
+ IN: smudge 2pkt_2-1.file $(($S*2-1)) [OK] -- OUT: $(($S*2-1)) .. [OK]
+ IN: smudge 2pkt_2__.file $(($S*2 )) [OK] -- OUT: $(($S*2 )) .. [OK]
+ IN: smudge 3pkt_2+1.file $(($S*2+1)) [OK] -- OUT: $(($S*2+1)) ... [OK]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ for FILE in *.file
+ do
+ test_cmp_committed_rot13 "$TEST_ROOT/$FILE" $FILE
+ done
+ )
+'
+
+test_expect_success PERL 'required process filter with clean error should fail' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
+ test_config_global filter.protocol.required true &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+
+ cp "$TEST_ROOT/test.o" test.r &&
+ echo "this is going to fail" >clean-write-fail.r &&
+ echo "content-test3-subdir" >test3.r &&
+
+ test_must_fail git add .
+ )
+'
+
+test_expect_success PERL 'process filter should restart after unexpected write failure' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+
+ cp "$TEST_ROOT/test.o" test.r &&
+ cp "$TEST_ROOT/test2.o" test2.r &&
+ echo "this is going to fail" >smudge-write-fail.o &&
+ cp smudge-write-fail.o smudge-write-fail.r &&
+
+ S=$(file_size test.r) &&
+ S2=$(file_size test2.r) &&
+ SF=$(file_size smudge-write-fail.r) &&
+
+ git add . &&
+ rm -f *.r &&
+
+ rm -f rot13-filter.log &&
+ git checkout --quiet --no-progress . 2>git-stderr.log &&
+
+ grep "smudge write error at" git-stderr.log &&
+ grep "error: external filter" git-stderr.log &&
+
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: smudge smudge-write-fail.r $SF [OK] -- OUT: $SF [WRITE FAIL]
+ START
+ init handshake complete
+ IN: smudge test.r $S [OK] -- OUT: $S . [OK]
+ IN: smudge test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ test_cmp_committed_rot13 "$TEST_ROOT/test.o" test.r &&
+ test_cmp_committed_rot13 "$TEST_ROOT/test2.o" test2.r &&
+
+ # Smudge failed
+ ! test_cmp smudge-write-fail.o smudge-write-fail.r &&
+ "$TEST_ROOT/rot13.sh" <smudge-write-fail.o >expected &&
+ git cat-file blob :smudge-write-fail.r >actual &&
+ test_cmp expected actual
+ )
+'
+
+test_expect_success PERL 'process filter should not be restarted if it signals an error' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+
+ cp "$TEST_ROOT/test.o" test.r &&
+ cp "$TEST_ROOT/test2.o" test2.r &&
+ echo "this will cause an error" >error.o &&
+ cp error.o error.r &&
+
+ S=$(file_size test.r) &&
+ S2=$(file_size test2.r) &&
+ SE=$(file_size error.r) &&
+
+ git add . &&
+ rm -f *.r &&
+
+ filter_git checkout --quiet --no-progress . &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: smudge error.r $SE [OK] -- OUT: 0 [ERROR]
+ IN: smudge test.r $S [OK] -- OUT: $S . [OK]
+ IN: smudge test2.r $S2 [OK] -- OUT: $S2 . [OK]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ test_cmp_committed_rot13 "$TEST_ROOT/test.o" test.r &&
+ test_cmp_committed_rot13 "$TEST_ROOT/test2.o" test2.r &&
+ test_cmp error.o error.r
+ )
+'
+
+test_expect_success PERL 'process filter abort stops processing of all further files' '
+ test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+
+ cp "$TEST_ROOT/test.o" test.r &&
+ cp "$TEST_ROOT/test2.o" test2.r &&
+ echo "error this blob and all future blobs" >abort.o &&
+ cp abort.o abort.r &&
+
+ SA=$(file_size abort.r) &&
+
+ git add . &&
+ rm -f *.r &&
+
+ # Note: This test assumes that Git filters files in alphabetical
+ # order ("abort.r" before "test.r").
+ filter_git checkout --quiet --no-progress . &&
+ cat >expected.log <<-EOF &&
+ START
+ init handshake complete
+ IN: smudge abort.r $SA [OK] -- OUT: 0 [ABORT]
+ STOP
+ EOF
+ test_cmp_exclude_clean expected.log rot13-filter.log &&
+
+ test_cmp "$TEST_ROOT/test.o" test.r &&
+ test_cmp "$TEST_ROOT/test2.o" test2.r &&
+ test_cmp abort.o abort.r
+ )
+'
+
+test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
+ test_config_global filter.protocol.process cat &&
+ test_config_global filter.protocol.required true &&
+ rm -rf repo &&
+ mkdir repo &&
+ (
+ cd repo &&
+ git init &&
+
+ echo "*.r filter=protocol" >.gitattributes &&
+
+ cp "$TEST_ROOT/test.o" test.r &&
+ test_must_fail git add . 2>git-stderr.log &&
+ grep "does not support filter protocol version" git-stderr.log
+ )
+'
+
test_done