summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDag-Erling Smørgrav <des@des.no>2022-11-22 02:52:43 +0000
committerMartin Matuška <martin@matuska.de>2023-01-09 15:23:53 +0100
commita6b488c528b70be136cd5e60928a834081e2ec4d (patch)
treec91ec64a470913e532b6115f3ea4d67b84429bef
parent9de87afec611783439b260bc20f1fc088ad4bbc8 (diff)
downloadlibarchive-a6b488c528b70be136cd5e60928a834081e2ec4d.tar.gz
Support producing multi-fragment zstd archives.
When the `zstd:frame-per-file` option is specified, the zstd filter will start a new frame when flushed, i.e. for each file in the archive. The `zstd:min-frame-size=N` option modifies the `zstd:frame-per-file` option in that it will not start a new frame unless the current one exceeds `N` bytes. When the `zstd:max-frame-size=N` option is specified, the zstd filter will start a new frame any time the compressed size of the previous one exceeds `N` bytes. These options decrease compression efficiency by a varying amount (depending on the exact composition of its contents) but render the tarball seekable, to a certain extent.
-rw-r--r--libarchive/archive_write_add_filter_zstd.c151
1 files changed, 113 insertions, 38 deletions
diff --git a/libarchive/archive_write_add_filter_zstd.c b/libarchive/archive_write_add_filter_zstd.c
index 1d194b19..37c5e741 100644
--- a/libarchive/archive_write_add_filter_zstd.c
+++ b/libarchive/archive_write_add_filter_zstd.c
@@ -55,8 +55,19 @@ struct private_data {
int compression_level;
int threads;
#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
+ enum {
+ running,
+ finishing,
+ resetting,
+ } state;
+ int frame_per_file;
+ size_t min_frame_size;
+ size_t max_frame_size;
+ size_t cur_frame;
+ size_t cur_frame_in;
+ size_t cur_frame_out;
+ size_t total_in;
ZSTD_CStream *cstream;
- int64_t total_in;
ZSTD_outBuffer out;
#else
struct archive_write_program_data *pdata;
@@ -78,6 +89,7 @@ static int archive_compressor_zstd_options(struct archive_write_filter *,
static int archive_compressor_zstd_open(struct archive_write_filter *);
static int archive_compressor_zstd_write(struct archive_write_filter *,
const void *, size_t);
+static int archive_compressor_zstd_flush(struct archive_write_filter *);
static int archive_compressor_zstd_close(struct archive_write_filter *);
static int archive_compressor_zstd_free(struct archive_write_filter *);
#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
@@ -106,6 +118,7 @@ archive_write_add_filter_zstd(struct archive *_a)
f->data = data;
f->open = &archive_compressor_zstd_open;
f->options = &archive_compressor_zstd_options;
+ f->flush = &archive_compressor_zstd_flush;
f->close = &archive_compressor_zstd_close;
f->free = &archive_compressor_zstd_free;
f->code = ARCHIVE_FILTER_ZSTD;
@@ -113,6 +126,11 @@ archive_write_add_filter_zstd(struct archive *_a)
data->compression_level = CLEVEL_DEFAULT;
data->threads = 0;
#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
+ data->frame_per_file = 0;
+ data->min_frame_size = 0;
+ data->max_frame_size = SIZE_MAX;
+ data->cur_frame_in = 0;
+ data->cur_frame_out = 0;
data->cstream = ZSTD_createCStream();
if (data->cstream == NULL) {
free(data);
@@ -154,6 +172,8 @@ static int string_to_number(const char *string, intmax_t *numberp)
{
char *end;
+ if (string == NULL || *string == '\0')
+ return (ARCHIVE_WARN);
*numberp = strtoimax(string, &end, 10);
if (end == string || *end != '\0' || errno == EOVERFLOW) {
*numberp = 0;
@@ -206,6 +226,31 @@ archive_compressor_zstd_options(struct archive_write_filter *f, const char *key,
}
data->threads = threads;
return (ARCHIVE_OK);
+#if HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR
+ } else if (strcmp(key, "frame-per-file") == 0) {
+ data->frame_per_file = 1;
+ return (ARCHIVE_OK);
+ } else if (strcmp(key, "min-frame-size") == 0) {
+ intmax_t min_frame_size;
+ if (string_to_number(value, &min_frame_size) != ARCHIVE_OK) {
+ return (ARCHIVE_WARN);
+ }
+ if (min_frame_size < 0) {
+ return (ARCHIVE_WARN);
+ }
+ data->min_frame_size = min_frame_size;
+ return (ARCHIVE_OK);
+ } else if (strcmp(key, "max-frame-size") == 0) {
+ intmax_t max_frame_size;
+ if (string_to_number(value, &max_frame_size) != ARCHIVE_OK) {
+ return (ARCHIVE_WARN);
+ }
+ if (max_frame_size < 1024) {
+ return (ARCHIVE_WARN);
+ }
+ data->max_frame_size = max_frame_size;
+ return (ARCHIVE_OK);
+#endif
}
/* Note: The "warn" return is just to inform the options
@@ -267,15 +312,22 @@ archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
size_t length)
{
struct private_data *data = (struct private_data *)f->data;
- int ret;
- /* Update statistics */
- data->total_in += length;
+ return (drive_compressor(f, data, 0, buff, length));
+}
- if ((ret = drive_compressor(f, data, 0, buff, length)) != ARCHIVE_OK)
- return (ret);
+/*
+ * Flush the compressed stream.
+ */
+static int
+archive_compressor_zstd_flush(struct archive_write_filter *f)
+{
+ struct private_data *data = (struct private_data *)f->data;
- return (ARCHIVE_OK);
+ if (data->frame_per_file && data->state == running &&
+ data->cur_frame_out > data->min_frame_size)
+ data->state = finishing;
+ return (drive_compressor(f, data, 1, NULL, 0));
}
/*
@@ -286,56 +338,72 @@ archive_compressor_zstd_close(struct archive_write_filter *f)
{
struct private_data *data = (struct private_data *)f->data;
- /* Finish zstd frame */
- return drive_compressor(f, data, 1, NULL, 0);
+ if (data->state == running)
+ data->state = finishing;
+ return (drive_compressor(f, data, 1, NULL, 0));
}
/*
* Utility function to push input data through compressor,
* writing full output blocks as necessary.
- *
- * Note that this handles both the regular write case (finishing ==
- * false) and the end-of-archive case (finishing == true).
*/
static int
drive_compressor(struct archive_write_filter *f,
- struct private_data *data, int finishing, const void *src, size_t length)
+ struct private_data *data, int flush, const void *src, size_t length)
{
ZSTD_inBuffer in = { .src = src, .size = length, .pos = 0 };
- size_t zstdret;
+ size_t ipos, opos, zstdret = 0;
int ret;
for (;;) {
- if (data->out.pos == data->out.size) {
- ret = __archive_write_filter(f->next_filter,
- data->out.dst, data->out.pos);
- if (ret != ARCHIVE_OK)
- return (ARCHIVE_FATAL);
- data->out.pos = 0;
+ ipos = in.pos;
+ opos = data->out.pos;
+ switch (data->state) {
+ case running:
+ if (in.pos == in.size)
+ return (ARCHIVE_OK);
+ zstdret = ZSTD_compressStream(data->cstream,
+ &data->out, &in);
+ if (ZSTD_isError(zstdret))
+ goto zstd_fatal;
+ break;
+ case finishing:
+ zstdret = ZSTD_endStream(data->cstream, &data->out);
+ if (ZSTD_isError(zstdret))
+ goto zstd_fatal;
+ if (zstdret == 0)
+ data->state = resetting;
+ break;
+ case resetting:
+ ZSTD_CCtx_reset(data->cstream, ZSTD_reset_session_only);
+ data->cur_frame++;
+ data->cur_frame_in = 0;
+ data->cur_frame_out = 0;
+ data->state = running;
+ break;
}
-
- /* If there's nothing to do, we're done. */
- if (!finishing && in.pos == in.size)
- return (ARCHIVE_OK);
-
- zstdret = !finishing ?
- ZSTD_compressStream(data->cstream, &data->out, &in) :
- ZSTD_endStream(data->cstream, &data->out);
-
- if (ZSTD_isError(zstdret)) {
- archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
- "Zstd compression failed: %s",
- ZSTD_getErrorName(zstdret));
- return (ARCHIVE_FATAL);
+ data->total_in += in.pos - ipos;
+ data->cur_frame_in += in.pos - ipos;
+ data->cur_frame_out += data->out.pos - opos;
+ if (data->state == running &&
+ data->cur_frame_in >= data->max_frame_size) {
+ data->state = finishing;
}
-
- /* If we're finishing, 0 means nothing left to flush */
- if (finishing && zstdret == 0) {
+ if (data->out.pos == data->out.size ||
+ (flush && data->out.pos > 0)) {
ret = __archive_write_filter(f->next_filter,
data->out.dst, data->out.pos);
- return (ret);
+ if (ret != ARCHIVE_OK)
+ goto fatal;
+ data->out.pos = 0;
}
}
+zstd_fatal:
+ archive_set_error(f->archive, ARCHIVE_ERRNO_MISC,
+ "Zstd compression failed: %s",
+ ZSTD_getErrorName(zstdret));
+fatal:
+ return (ARCHIVE_FATAL);
}
#else /* HAVE_ZSTD_H && HAVE_LIBZSTD_COMPRESSOR */
@@ -381,6 +449,13 @@ archive_compressor_zstd_write(struct archive_write_filter *f, const void *buff,
}
static int
+archive_compressor_zstd_flush(struct archive_write_filter *f)
+{
+
+ return (ARCHIVE_OK);
+}
+
+static int
archive_compressor_zstd_close(struct archive_write_filter *f)
{
struct private_data *data = (struct private_data *)f->data;