From 6f10fecc49e05d8ad3f2997ac0f788d3fdff51a6 Mon Sep 17 00:00:00 2001 From: Pedro Alvarez Piedehierro Date: Sun, 20 May 2018 18:21:47 +0100 Subject: lorry.tar-importer: Improve support for PAX extended headers Sometimes the tar files will contain PAX extended headers to deal with cases where the information needed doesn't fit initial header. One of these cases is when the path is larger than 100 characters. An extended header will appear before the block including the contents of this file, including information about the entire path. The PAX extended headers contain one or multiple records constructed as follows: "%d %s=%s\n", , , This commit makes sure that we always read the extended header blocks, and in the case of finding one, we parse its records looking for 'path' information. If this information is found, is stored for the next iteration. As a side effect, this commit also fixes a bug where we weren't ignoring the extended header contents because the truncated path was ending on '/' and the script was skipping any actions given that it thought that it was a folder (and it wasn't). --- lorry.tar-importer | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/lorry.tar-importer b/lorry.tar-importer index d4d27f5..6cdad1d 100755 --- a/lorry.tar-importer +++ b/lorry.tar-importer @@ -70,6 +70,8 @@ foreach my $tar_file (@ARGV) my $have_top_dir = 1; my ($top_dir, %files); + my $next_path = ''; + while (read(I, $_, 512) == 512) { my ($name, $mode, $uid, $gid, $size, $mtime, $chksum, $typeflag, $linkname, $magic, @@ -77,6 +79,13 @@ foreach my $tar_file (@ARGV) $prefix) = unpack 'Z100 Z8 Z8 Z8 Z12 Z12 Z8 Z1 Z100 Z6 Z2 Z32 Z32 Z8 Z8 Z*', $_; + + unless ($next_path eq '') { + # Recover name from previous extended header + $name = $next_path; + $next_path = ''; + } + last unless length($name); if ($name eq '././@LongLink') { # GNU tar extension @@ -97,7 +106,6 @@ foreach my $tar_file (@ARGV) Z8 Z1 Z100 Z6 Z2 Z32 Z32 Z8 Z8 Z*', $_; } - next if $name =~ m{/\z}; $mode = oct $mode; $size = oct $size; $mtime = oct $mtime; @@ -108,8 +116,24 @@ foreach my $tar_file (@ARGV) print FI "data ", length($linkname), "\n", $linkname; $mode = 0120000; } elsif ($typeflag eq 'x') { # extended header - # skip header contents - $size -= 512 while ($size > 0 && read(I, $_, 512) == 512); + # If extended header, check for path + my $pax_header = ''; + while ($size > 0 && read(I, $_, 512) == 512) { + $pax_header = $pax_header . substr($_, 0, $size); + $size -= 512; + } + + my @lines = split /\n/, $pax_header; + foreach my $line (@lines) { + my ($len, $entry) = split / /, $line; + my ($key, $value) = split /=/, $entry; + if ($key eq 'path') { + $next_path = $value; + } + } + next; + } elsif ($name =~ m{/\z}) { + # If it's a folder, ignore next; } else { print FI "blob\n", "mark :$next_mark\n"; -- cgit v1.2.1