summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPedro Alvarez Piedehierro <palvarez89@gmail.com>2018-05-20 18:21:47 +0100
committerPedro Alvarez <pedro.alvarez@codethink.co.uk>2018-05-21 10:59:29 +0100
commit6f10fecc49e05d8ad3f2997ac0f788d3fdff51a6 (patch)
tree0d1865dc126ea3ff9eedf14d7231f5a49983ffa4
parent4f932750dd0c72d47295795bb8d405fc90be5ab0 (diff)
downloadlorry-pedro/fix-gcc-tar-import.tar.gz
lorry.tar-importer: Improve support for PAX extended headerspedro/fix-gcc-tar-import
Sometimes the tar files will contain PAX extended headers to deal with cases where the information needed doesn't fit initial header. One of these cases is when the path is larger than 100 characters. An extended header will appear before the block including the contents of this file, including information about the entire path. The PAX extended headers contain one or multiple records constructed as follows: "%d %s=%s\n", <length>, <keyword>, <value> This commit makes sure that we always read the extended header blocks, and in the case of finding one, we parse its records looking for 'path' information. If this information is found, is stored for the next iteration. As a side effect, this commit also fixes a bug where we weren't ignoring the extended header contents because the truncated path was ending on '/' and the script was skipping any actions given that it thought that it was a folder (and it wasn't).
-rwxr-xr-xlorry.tar-importer30
1 files changed, 27 insertions, 3 deletions
diff --git a/lorry.tar-importer b/lorry.tar-importer
index d4d27f5..6cdad1d 100755
--- a/lorry.tar-importer
+++ b/lorry.tar-importer
@@ -70,6 +70,8 @@ foreach my $tar_file (@ARGV)
my $have_top_dir = 1;
my ($top_dir, %files);
+ my $next_path = '';
+
while (read(I, $_, 512) == 512) {
my ($name, $mode, $uid, $gid, $size, $mtime,
$chksum, $typeflag, $linkname, $magic,
@@ -77,6 +79,13 @@ foreach my $tar_file (@ARGV)
$prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12
Z8 Z1 Z100 Z6
Z2 Z32 Z32 Z8 Z8 Z*', $_;
+
+ unless ($next_path eq '') {
+ # Recover name from previous extended header
+ $name = $next_path;
+ $next_path = '';
+ }
+
last unless length($name);
if ($name eq '././@LongLink') {
# GNU tar extension
@@ -97,7 +106,6 @@ foreach my $tar_file (@ARGV)
Z8 Z1 Z100 Z6
Z2 Z32 Z32 Z8 Z8 Z*', $_;
}
- next if $name =~ m{/\z};
$mode = oct $mode;
$size = oct $size;
$mtime = oct $mtime;
@@ -108,8 +116,24 @@ foreach my $tar_file (@ARGV)
print FI "data ", length($linkname), "\n", $linkname;
$mode = 0120000;
} elsif ($typeflag eq 'x') { # extended header
- # skip header contents
- $size -= 512 while ($size > 0 && read(I, $_, 512) == 512);
+ # If extended header, check for path
+ my $pax_header = '';
+ while ($size > 0 && read(I, $_, 512) == 512) {
+ $pax_header = $pax_header . substr($_, 0, $size);
+ $size -= 512;
+ }
+
+ my @lines = split /\n/, $pax_header;
+ foreach my $line (@lines) {
+ my ($len, $entry) = split / /, $line;
+ my ($key, $value) = split /=/, $entry;
+ if ($key eq 'path') {
+ $next_path = $value;
+ }
+ }
+ next;
+ } elsif ($name =~ m{/\z}) {
+ # If it's a folder, ignore
next;
} else {
print FI "blob\n", "mark :$next_mark\n";