diff options
author | Pedro Alvarez Piedehierro <palvarez89@gmail.com> | 2018-05-20 18:21:47 +0100 |
---|---|---|
committer | Pedro Alvarez <pedro.alvarez@codethink.co.uk> | 2018-05-21 10:59:29 +0100 |
commit | 6f10fecc49e05d8ad3f2997ac0f788d3fdff51a6 (patch) | |
tree | 0d1865dc126ea3ff9eedf14d7231f5a49983ffa4 /lorry.tar-importer | |
parent | 4f932750dd0c72d47295795bb8d405fc90be5ab0 (diff) | |
download | lorry-pedro/fix-gcc-tar-import.tar.gz |
lorry.tar-importer: Improve support for PAX extended headerspedro/fix-gcc-tar-import
Sometimes the tar files will contain PAX extended headers to deal with
cases where the information needed doesn't fit initial header.
One of these cases is when the path is larger than 100 characters. An
extended header will appear before the block including the contents of
this file, including information about the entire path.
The PAX extended headers contain one or multiple records constructed as
follows:
"%d %s=%s\n", <length>, <keyword>, <value>
This commit makes sure that we always read the extended header blocks,
and in the case of finding one, we parse its records looking for 'path'
information. If this information is found, is stored for the next
iteration.
As a side effect, this commit also fixes a bug where we weren't ignoring
the extended header contents because the truncated path was ending on '/'
and the script was skipping any actions given that it thought that it
was a folder (and it wasn't).
Diffstat (limited to 'lorry.tar-importer')
-rwxr-xr-x | lorry.tar-importer | 30 |
1 files changed, 27 insertions, 3 deletions
diff --git a/lorry.tar-importer b/lorry.tar-importer index d4d27f5..6cdad1d 100755 --- a/lorry.tar-importer +++ b/lorry.tar-importer @@ -70,6 +70,8 @@ foreach my $tar_file (@ARGV) my $have_top_dir = 1; my ($top_dir, %files); + my $next_path = ''; + while (read(I, $_, 512) == 512) { my ($name, $mode, $uid, $gid, $size, $mtime, $chksum, $typeflag, $linkname, $magic, @@ -77,6 +79,13 @@ foreach my $tar_file (@ARGV) $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12 Z8 Z1 Z100 Z6 Z2 Z32 Z32 Z8 Z8 Z*', $_; + + unless ($next_path eq '') { + # Recover name from previous extended header + $name = $next_path; + $next_path = ''; + } + last unless length($name); if ($name eq '././@LongLink') { # GNU tar extension @@ -97,7 +106,6 @@ foreach my $tar_file (@ARGV) Z8 Z1 Z100 Z6 Z2 Z32 Z32 Z8 Z8 Z*', $_; } - next if $name =~ m{/\z}; $mode = oct $mode; $size = oct $size; $mtime = oct $mtime; @@ -108,8 +116,24 @@ foreach my $tar_file (@ARGV) print FI "data ", length($linkname), "\n", $linkname; $mode = 0120000; } elsif ($typeflag eq 'x') { # extended header - # skip header contents - $size -= 512 while ($size > 0 && read(I, $_, 512) == 512); + # If extended header, check for path + my $pax_header = ''; + while ($size > 0 && read(I, $_, 512) == 512) { + $pax_header = $pax_header . substr($_, 0, $size); + $size -= 512; + } + + my @lines = split /\n/, $pax_header; + foreach my $line (@lines) { + my ($len, $entry) = split / /, $line; + my ($key, $value) = split /=/, $entry; + if ($key eq 'path') { + $next_path = $value; + } + } + next; + } elsif ($name =~ m{/\z}) { + # If it's a folder, ignore next; } else { print FI "blob\n", "mark :$next_mark\n"; |