From dbcced8e32b50c068ac297106f0502ee200a1ebd Mon Sep 17 00:00:00 2001 From: Patrick Monnerat Date: Sun, 5 Nov 2017 15:09:48 +0100 Subject: HTTP: support multiple Content-Encodings This is implemented as an output streaming stack of unencoders, the last calling the client write procedure. New test 230 checks this feature. Bug: https://github.com/curl/curl/pull/2002 Reported-By: Daniel Bankhead --- lib/content_encoding.c | 574 ++++++++++++++++++++++++++++++++++++++++--------- lib/content_encoding.h | 45 ++-- lib/http.c | 68 +----- lib/http_chunks.c | 42 +--- lib/transfer.c | 37 +--- lib/url.c | 14 +- lib/urldata.h | 24 +-- 7 files changed, 532 insertions(+), 272 deletions(-) (limited to 'lib') diff --git a/lib/content_encoding.c b/lib/content_encoding.c index 110226034..76a9e6866 100644 --- a/lib/content_encoding.c +++ b/lib/content_encoding.c @@ -22,16 +22,23 @@ #include "curl_setup.h" -#ifdef HAVE_LIBZ - #include "urldata.h" #include +#include #include "sendf.h" +#include "http.h" #include "content_encoding.h" #include "strdup.h" +#include "strcase.h" #include "curl_memory.h" #include "memdebug.h" +#define CONTENT_ENCODING_DEFAULT "identity" + +#ifndef CURL_DISABLE_HTTP + +#ifdef HAVE_LIBZ + /* Comment this out if zlib is always going to be at least ver. 1.2.0.4 (doing so will reduce code size slightly). */ #define OLD_ZLIB_SUPPORT 1 @@ -49,6 +56,21 @@ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ +typedef enum { + ZLIB_UNINIT, /* uninitialized */ + ZLIB_INIT, /* initialized */ + ZLIB_GZIP_HEADER, /* reading gzip header */ + ZLIB_GZIP_INFLATING, /* inflating gzip stream */ + ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ +} zlibInitState; + +/* Writer parameters. */ +typedef struct { + zlibInitState zlib_init; /* zlib init state */ + z_stream z; /* State structure for zlib. */ +} zlib_params; + + static voidpf zalloc_cb(voidpf opaque, unsigned int items, unsigned int size) { @@ -79,19 +101,27 @@ process_zlib_error(struct connectdata *conn, z_stream *z) } static CURLcode -exit_zlib(z_stream *z, zlibInitState *zlib_init, CURLcode result) +exit_zlib(struct connectdata *conn, + z_stream *z, zlibInitState *zlib_init, CURLcode result) { - inflateEnd(z); - *zlib_init = ZLIB_UNINIT; + if(*zlib_init == ZLIB_GZIP_HEADER) + Curl_safefree(z->next_in); + + if(*zlib_init != ZLIB_UNINIT) { + if(inflateEnd(z) != Z_OK && result == CURLE_OK) + result = process_zlib_error(conn, z); + *zlib_init = ZLIB_UNINIT; + } + return result; } static CURLcode -inflate_stream(struct connectdata *conn, - struct SingleRequest *k) +inflate_stream(struct connectdata *conn, contenc_writer *writer) { + zlib_params *zp = (zlib_params *) &writer->params; int allow_restart = 1; - z_stream *z = &k->z; /* zlib state structure */ + z_stream *z = &zp->z; /* zlib state structure */ uInt nread = z->avail_in; Bytef *orig_in = z->next_in; int status; /* zlib status */ @@ -102,35 +132,31 @@ inflate_stream(struct connectdata *conn, large to hold on the stack */ decomp = malloc(DSIZ); if(decomp == NULL) { - return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } /* because the buffer size is fixed, iteratively decompress and transfer to the client via client_write. */ for(;;) { /* (re)set buffer for decompressed output for every iteration */ - z->next_out = (Bytef *)decomp; + z->next_out = (Bytef *) decomp; z->avail_out = DSIZ; status = inflate(z, Z_SYNC_FLUSH); if(status == Z_OK || status == Z_STREAM_END) { allow_restart = 0; - if((DSIZ - z->avail_out) && (!k->ignorebody)) { - result = Curl_client_write(conn, CLIENTWRITE_BODY, decomp, + result = Curl_unencode_write(conn, writer->downstream, decomp, DSIZ - z->avail_out); - /* if !CURLE_OK, clean up, return */ - if(result) { - free(decomp); - return exit_zlib(z, &k->zlib_init, result); - } + /* if !CURLE_OK, clean up, return */ + if(result) { + free(decomp); + return exit_zlib(conn, z, &zp->zlib_init, result); } /* Done? clean up, return */ if(status == Z_STREAM_END) { free(decomp); - if(inflateEnd(z) == Z_OK) - return exit_zlib(z, &k->zlib_init, result); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, result); } /* Done with these bytes, exit */ @@ -148,7 +174,8 @@ inflate_stream(struct connectdata *conn, (void) inflateEnd(z); /* don't care about the return code */ if(inflateInit2(z, -MAX_WBITS) != Z_OK) { free(decomp); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + zp->zlib_init = ZLIB_UNINIT; /* inflateEnd() already called. */ + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } z->next_in = orig_in; z->avail_in = nread; @@ -157,36 +184,97 @@ inflate_stream(struct connectdata *conn, } else { /* Error; exit loop, handle below */ free(decomp); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } } - /* Will never get here */ + /* UNREACHED */ } -CURLcode -Curl_unencode_deflate_write(struct connectdata *conn, - struct SingleRequest *k, - ssize_t nread) + +/* Deflate handler. */ +static CURLcode deflate_init_writer(struct connectdata *conn, + contenc_writer *writer) { - z_stream *z = &k->z; /* zlib state structure */ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ - /* Initialize zlib? */ - if(k->zlib_init == ZLIB_UNINIT) { - memset(z, 0, sizeof(z_stream)); - z->zalloc = (alloc_func)zalloc_cb; - z->zfree = (free_func)zfree_cb; + if(!writer->downstream) + return CURLE_WRITE_ERROR; - if(inflateInit(z) != Z_OK) - return process_zlib_error(conn, z); - k->zlib_init = ZLIB_INIT; - } + /* Initialize zlib */ + z->zalloc = (alloc_func) zalloc_cb; + z->zfree = (free_func) zfree_cb; + + if(inflateInit(z) != Z_OK) + return process_zlib_error(conn, z); + zp->zlib_init = ZLIB_INIT; + return CURLE_OK; +} + +static CURLcode deflate_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ /* Set the compressed input when this function is called */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; + z->next_in = (Bytef *) buf; + z->avail_in = (uInt) nbytes; /* Now uncompress the data */ - return inflate_stream(conn, k); + return inflate_stream(conn, writer); +} + +static void deflate_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ + + exit_zlib(conn, z, &zp->zlib_init, CURLE_OK); +} + +static const content_encoding deflate_encoding = { + "deflate", + NULL, + deflate_init_writer, + deflate_unencode_write, + deflate_close_writer, + sizeof(zlib_params) +}; + + +/* Gzip handler. */ +static CURLcode gzip_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ + + if(!writer->downstream) + return CURLE_WRITE_ERROR; + + /* Initialize zlib */ + z->zalloc = (alloc_func) zalloc_cb; + z->zfree = (free_func) zfree_cb; + + if(strcmp(zlibVersion(), "1.2.0.4") >= 0) { + /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ + if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) { + return process_zlib_error(conn, z); + } + zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ + } + else { + /* we must parse the gzip header ourselves */ + if(inflateInit2(z, -MAX_WBITS) != Z_OK) { + return process_zlib_error(conn, z); + } + zp->zlib_init = ZLIB_INIT; /* Initial call state */ + } + + return CURLE_OK; } #ifdef OLD_ZLIB_SUPPORT @@ -273,47 +361,25 @@ static enum { } #endif -CURLcode -Curl_unencode_gzip_write(struct connectdata *conn, - struct SingleRequest *k, - ssize_t nread) +static CURLcode gzip_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) { - z_stream *z = &k->z; /* zlib state structure */ - - /* Initialize zlib? */ - if(k->zlib_init == ZLIB_UNINIT) { - memset(z, 0, sizeof(z_stream)); - z->zalloc = (alloc_func)zalloc_cb; - z->zfree = (free_func)zfree_cb; - - if(strcmp(zlibVersion(), "1.2.0.4") >= 0) { - /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ - if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) { - return process_zlib_error(conn, z); - } - k->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ - } - else { - /* we must parse the gzip header ourselves */ - if(inflateInit2(z, -MAX_WBITS) != Z_OK) { - return process_zlib_error(conn, z); - } - k->zlib_init = ZLIB_INIT; /* Initial call state */ - } - } + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ - if(k->zlib_init == ZLIB_INIT_GZIP) { + if(zp->zlib_init == ZLIB_INIT_GZIP) { /* Let zlib handle the gzip decompression entirely */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; + z->next_in = (Bytef *) buf; + z->avail_in = (uInt) nbytes; /* Now uncompress the data */ - return inflate_stream(conn, k); + return inflate_stream(conn, writer); } #ifndef OLD_ZLIB_SUPPORT /* Support for old zlib versions is compiled away and we are running with an old version, so return an error. */ - return exit_zlib(z, &k->zlib_init, CURLE_WRITE_ERROR); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_WRITE_ERROR); #else /* This next mess is to get around the potential case where there isn't @@ -326,18 +392,18 @@ Curl_unencode_gzip_write(struct connectdata *conn, * can handle the gzip header themselves. */ - switch(k->zlib_init) { + switch(zp->zlib_init) { /* Skip over gzip header? */ case ZLIB_INIT: { /* Initial call state */ ssize_t hlen; - switch(check_gzip_header((unsigned char *)k->str, nread, &hlen)) { + switch(check_gzip_header((unsigned char *) buf, nbytes, &hlen)) { case GZIP_OK: - z->next_in = (Bytef *)k->str + hlen; - z->avail_in = (uInt)(nread - hlen); - k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ + z->next_in = (Bytef *) buf + hlen; + z->avail_in = (uInt) (nbytes - hlen); + zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: @@ -348,19 +414,19 @@ Curl_unencode_gzip_write(struct connectdata *conn, * the first place, and it's even more unlikely for a transfer to fail * immediately afterwards, it should seldom be a problem. */ - z->avail_in = (uInt)nread; + z->avail_in = (uInt) nbytes; z->next_in = malloc(z->avail_in); if(z->next_in == NULL) { - return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } - memcpy(z->next_in, k->str, z->avail_in); - k->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ + memcpy(z->next_in, buf, z->avail_in); + zp->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ /* We don't have any data to inflate yet */ return CURLE_OK; case GZIP_BAD: default: - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } } @@ -370,22 +436,22 @@ Curl_unencode_gzip_write(struct connectdata *conn, { /* Need more gzip header data state */ ssize_t hlen; - z->avail_in += (uInt)nread; + z->avail_in += (uInt) nbytes; z->next_in = Curl_saferealloc(z->next_in, z->avail_in); if(z->next_in == NULL) { - return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } /* Append the new block of data to the previous one */ - memcpy(z->next_in + z->avail_in - nread, k->str, nread); + memcpy(z->next_in + z->avail_in - nbytes, buf, nbytes); switch(check_gzip_header(z->next_in, z->avail_in, &hlen)) { case GZIP_OK: /* This is the zlib stream data */ free(z->next_in); /* Don't point into the malloced block since we just freed it */ - z->next_in = (Bytef *)k->str + hlen + nread - z->avail_in; - z->avail_in = (uInt)(z->avail_in - hlen); - k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ + z->next_in = (Bytef *) buf + hlen + nbytes - z->avail_in; + z->avail_in = (uInt) (z->avail_in - hlen); + zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: @@ -394,8 +460,7 @@ Curl_unencode_gzip_write(struct connectdata *conn, case GZIP_BAD: default: - free(z->next_in); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } } @@ -404,8 +469,8 @@ Curl_unencode_gzip_write(struct connectdata *conn, case ZLIB_GZIP_INFLATING: default: /* Inflating stream state */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; + z->next_in = (Bytef *) buf; + z->avail_in = (uInt) nbytes; break; } @@ -415,17 +480,332 @@ Curl_unencode_gzip_write(struct connectdata *conn, } /* We've parsed the header, now uncompress the data */ - return inflate_stream(conn, k); + return inflate_stream(conn, writer); #endif } +static void gzip_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ + + exit_zlib(conn, z, &zp->zlib_init, CURLE_OK); +} + +static const content_encoding gzip_encoding = { + "gzip", + "x-gzip", + gzip_init_writer, + gzip_unencode_write, + gzip_close_writer, + sizeof(zlib_params) +}; + +#endif /* HAVE_LIBZ */ + + +/* Identity handler. */ +static CURLcode identity_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR; +} + +static CURLcode identity_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + return Curl_unencode_write(conn, writer->downstream, buf, nbytes); +} + +static void identity_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + (void) writer; +} + +static const content_encoding identity_encoding = { + "identity", + NULL, + identity_init_writer, + identity_unencode_write, + identity_close_writer, + 0 +}; + + +/* supported content encodings table. */ +static const content_encoding * const encodings[] = { + &identity_encoding, +#ifdef HAVE_LIBZ + &deflate_encoding, + &gzip_encoding, +#endif + NULL +}; + + +/* Return a list of comma-separated names of supported encodings. */ +char *Curl_all_content_encodings(void) +{ + size_t len = 0; + const content_encoding * const *cep; + const content_encoding *ce; + char *ace; + char *p; + + for(cep = encodings; *cep; cep++) { + ce = *cep; + if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) + len += strlen(ce->name) + 2; + } + + if(!len) + return strdup(CONTENT_ENCODING_DEFAULT); + + ace = malloc(len); + if(ace) { + p = ace; + for(cep = encodings; *cep; cep++) { + ce = *cep; + if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) { + strcpy(p, ce->name); + p += strlen(p); + *p++ = ','; + *p++ = ' '; + } + } + p[-2] = '\0'; + } + + return ace; +} + + +/* Real client writer: no downstream. */ +static CURLcode client_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + return writer->downstream? CURLE_WRITE_ERROR: CURLE_OK; +} + +static CURLcode client_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + struct Curl_easy *data = conn->data; + struct SingleRequest *k = &data->req; + + (void) writer; + + if(!nbytes || k->ignorebody) + return CURLE_OK; + + return Curl_client_write(conn, CLIENTWRITE_BODY, (char *) buf, nbytes); +} + +static void client_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + (void) writer; +} + +static const content_encoding client_encoding = { + NULL, + NULL, + client_init_writer, + client_unencode_write, + client_close_writer, + 0 +}; + + +/* Deferred error dummy writer. */ +static CURLcode error_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR; +} + +static CURLcode error_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + char *all = Curl_all_content_encodings(); + + (void) writer; + (void) buf; + (void) nbytes; + + if(!all) + return CURLE_OUT_OF_MEMORY; + failf(conn->data, "Unrecognized content encoding type. " + "libcurl understands %s content encodings.", all); + free(all); + return CURLE_BAD_CONTENT_ENCODING; +} + +static void error_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + (void) writer; +} + +static const content_encoding error_encoding = { + NULL, + NULL, + error_init_writer, + error_unencode_write, + error_close_writer, + 0 +}; + +/* Create an unencoding writer stage using the given handler. */ +static contenc_writer *new_unencoding_writer(struct connectdata *conn, + const content_encoding *handler, + contenc_writer *downstream) +{ + size_t sz = offsetof(contenc_writer, params) + handler->paramsize; + contenc_writer *writer = (contenc_writer *) malloc(sz); + + if(writer) { + memset(writer, 0, sz); + writer->handler = handler; + writer->downstream = downstream; + if(handler->init_writer(conn, writer)) { + free(writer); + writer = NULL; + } + } + + return writer; +} + +/* Write data using an unencoding writer stack. */ +CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes) +{ + return writer->handler->unencode_write(conn, writer, buf, nbytes); +} + +/* Close and clean-up the connection's writer stack. */ void Curl_unencode_cleanup(struct connectdata *conn) { struct Curl_easy *data = conn->data; struct SingleRequest *k = &data->req; - z_stream *z = &k->z; - if(k->zlib_init != ZLIB_UNINIT) - (void) exit_zlib(z, &k->zlib_init, CURLE_OK); + contenc_writer *writer = k->writer_stack; + + while(writer) { + k->writer_stack = writer->downstream; + writer->handler->close_writer(conn, writer); + free(writer); + writer = k->writer_stack; + } } -#endif /* HAVE_LIBZ */ +/* Find the content encoding by name. */ +static const content_encoding *find_encoding(const char *name, size_t len) +{ + const content_encoding * const *cep; + const content_encoding *ce; + + for(cep = encodings; *cep; cep++) { + ce = *cep; + if((strncasecompare(name, ce->name, len) && !ce->name[len]) || + (ce->alias && strncasecompare(name, ce->alias, len) && !ce->alias[len])) + return ce; + } + return NULL; +} + +/* Set-up the unencoding stack from the Content-Encoding header value. + * See RFC 7231 section 3.1.2.2. */ +CURLcode Curl_build_unencoding_stack(struct connectdata *conn, + const char *enclist, int maybechunked) +{ + struct Curl_easy *data = conn->data; + struct SingleRequest *k = &data->req; + + do { + const char *name; + size_t namelen; + + /* Parse a single encoding name. */ + while(ISSPACE(*enclist) || *enclist == ',') + enclist++; + + name = enclist; + + for(namelen = 0; *enclist && *enclist != ','; enclist++) + if(!ISSPACE(*enclist)) + namelen = enclist - name + 1; + + /* Special case: chunked encoding is handled at the reader level. */ + if(maybechunked && namelen == 7 && strncasecompare(name, "chunked", 7)) { + k->chunk = TRUE; /* chunks coming our way. */ + Curl_httpchunk_init(conn); /* init our chunky engine. */ + } + else if(namelen) { + const content_encoding *encoding = find_encoding(name, namelen); + contenc_writer *writer; + + if(!k->writer_stack) { + k->writer_stack = new_unencoding_writer(conn, &client_encoding, NULL); + + if(!k->writer_stack) + return CURLE_OUT_OF_MEMORY; + } + + if(!encoding) + encoding = &error_encoding; /* Defer error at stack use. */ + + /* Stack the unencoding stage. */ + writer = new_unencoding_writer(conn, encoding, k->writer_stack); + if(!writer) + return CURLE_OUT_OF_MEMORY; + k->writer_stack = writer; + } + } while(*enclist); + + return CURLE_OK; +} + +#else +/* Stubs for builds without HTTP. */ +CURLcode Curl_build_unencoding_stack(struct connectdata *conn, + const char *enclist, int maybechunked) +{ + (void) conn; + (void) enclist; + (void) maybechunked; + return CURLE_NOT_BUILT_IN; +} + +CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes) +{ + (void) conn; + (void) writer; + (void) buf; + (void) nbytes; + return CURLE_NOT_BUILT_IN; +} + +void Curl_unencode_cleanup(struct connectdata *conn) +{ + (void) conn; +} + +char *Curl_all_content_encodings(void) +{ + return strdup(CONTENT_ENCODING_DEFAULT); /* Satisfy caller. */ +} + +#endif /* CURL_DISABLE_HTTP */ diff --git a/lib/content_encoding.h b/lib/content_encoding.h index 3fadd2899..4cd52be62 100644 --- a/lib/content_encoding.h +++ b/lib/content_encoding.h @@ -7,7 +7,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -23,26 +23,33 @@ ***************************************************************************/ #include "curl_setup.h" -/* - * Comma-separated list all supported Content-Encodings ('identity' is implied) - */ -#ifdef HAVE_LIBZ -#define ALL_CONTENT_ENCODINGS "deflate, gzip" -/* force a cleanup */ -void Curl_unencode_cleanup(struct connectdata *conn); -#else -#define ALL_CONTENT_ENCODINGS "identity" -#define Curl_unencode_cleanup(x) Curl_nop_stmt -#endif +/* Decoding writer. */ +typedef struct contenc_writer_s contenc_writer; +typedef struct content_encoding_s content_encoding; + +struct contenc_writer_s { + const content_encoding *handler; /* Encoding handler. */ + contenc_writer *downstream; /* Downstream writer. */ + void *params; /* Encoding-specific storage (variable length). */ +}; -CURLcode Curl_unencode_deflate_write(struct connectdata *conn, - struct SingleRequest *req, - ssize_t nread); +/* Content encoding writer. */ +struct content_encoding_s { + const char *name; /* Encoding name. */ + const char *alias; /* Encoding name alias. */ + CURLcode (*init_writer)(struct connectdata *conn, contenc_writer *writer); + CURLcode (*unencode_write)(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes); + void (*close_writer)(struct connectdata *conn, contenc_writer *writer); + size_t paramsize; +}; -CURLcode -Curl_unencode_gzip_write(struct connectdata *conn, - struct SingleRequest *k, - ssize_t nread); +CURLcode Curl_build_unencoding_stack(struct connectdata *conn, + const char *enclist, int maybechunked); +CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes); +void Curl_unencode_cleanup(struct connectdata *conn); +char *Curl_all_content_encodings(void); #endif /* HEADER_CURL_CONTENT_ENCODING_H */ diff --git a/lib/http.c b/lib/http.c index 0716f8eea..def51abc3 100644 --- a/lib/http.c +++ b/lib/http.c @@ -3103,7 +3103,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, !(conn->handler->protocol & CURLPROTO_RTSP) && data->set.httpreq != HTTPREQ_HEAD) { /* On HTTP 1.1, when connection is not to get closed, but no - Content-Length nor Content-Encoding chunked have been + Content-Length nor Transfer-Encoding chunked have been received, according to RFC2616 section 4.4 point 5, we assume that the server will close the connection to signal the end of the document. */ @@ -3613,51 +3613,9 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, * of chunks, and a chunk-data set to zero signals the * end-of-chunks. */ - char *start; - - /* Find the first non-space letter */ - start = k->p + 18; - - for(;;) { - /* skip whitespaces and commas */ - while(*start && (ISSPACE(*start) || (*start == ','))) - start++; - - if(checkprefix("chunked", start)) { - k->chunk = TRUE; /* chunks coming our way */ - - /* init our chunky engine */ - Curl_httpchunk_init(conn); - - start += 7; - } - - if(k->auto_decoding) - /* TODO: we only support the first mentioned compression for now */ - break; - - if(checkprefix("identity", start)) { - k->auto_decoding = IDENTITY; - start += 8; - } - else if(checkprefix("deflate", start)) { - k->auto_decoding = DEFLATE; - start += 7; - } - else if(checkprefix("gzip", start)) { - k->auto_decoding = GZIP; - start += 4; - } - else if(checkprefix("x-gzip", start)) { - k->auto_decoding = GZIP; - start += 6; - } - else - /* unknown! */ - break; - - } - + result = Curl_build_unencoding_stack(conn, k->p + 18, TRUE); + if(result) + return result; } else if(checkprefix("Content-Encoding:", k->p) && data->set.str[STRING_ENCODING]) { @@ -3668,21 +3626,9 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, * 2616). zlib cannot handle compress. However, errors are * handled further down when the response body is processed */ - char *start; - - /* Find the first non-space letter */ - start = k->p + 17; - while(*start && ISSPACE(*start)) - start++; - - /* Record the content-encoding for later use */ - if(checkprefix("identity", start)) - k->auto_decoding = IDENTITY; - else if(checkprefix("deflate", start)) - k->auto_decoding = DEFLATE; - else if(checkprefix("gzip", start) - || checkprefix("x-gzip", start)) - k->auto_decoding = GZIP; + result = Curl_build_unencoding_stack(conn, k->p + 17, FALSE); + if(result) + return result; } else if(checkprefix("Content-Range:", k->p)) { /* Content-Range: bytes [num]- diff --git a/lib/http_chunks.c b/lib/http_chunks.c index 92d773112..161642969 100644 --- a/lib/http_chunks.c +++ b/lib/http_chunks.c @@ -187,49 +187,17 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize); /* Write the data portion available */ -#ifdef HAVE_LIBZ - switch(conn->data->set.http_ce_skip? - IDENTITY : data->req.auto_decoding) { - case IDENTITY: -#endif - if(!k->ignorebody) { - if(!data->set.http_te_skip) - result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, - piece); - else - result = CURLE_OK; - } -#ifdef HAVE_LIBZ - break; - - case DEFLATE: - /* update data->req.keep.str to point to the chunk data. */ - data->req.str = datap; - result = Curl_unencode_deflate_write(conn, &data->req, - (ssize_t)piece); - break; - - case GZIP: - /* update data->req.keep.str to point to the chunk data. */ - data->req.str = datap; - result = Curl_unencode_gzip_write(conn, &data->req, - (ssize_t)piece); - break; - - default: - failf(conn->data, - "Unrecognized content encoding type. " - "libcurl understands `identity', `deflate' and `gzip' " - "content encodings."); - return CHUNKE_BAD_ENCODING; + if(conn->data->set.http_ce_skip || !k->writer_stack) { + if(!k->ignorebody) + result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, piece); } -#endif + else + result = Curl_unencode_write(conn, k->writer_stack, datap, piece); if(result) return CHUNKE_WRITE_ERROR; *wrote += piece; - ch->datasize -= piece; /* decrease amount left to expect */ datap += piece; /* move read pointer forward */ length -= piece; /* decrease space left in this round */ diff --git a/lib/transfer.c b/lib/transfer.c index 937477670..8f15b1a15 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -779,48 +779,19 @@ static CURLcode readwrite_data(struct Curl_easy *data, in http_chunks.c. Make sure that ALL_CONTENT_ENCODINGS contains all the encodings handled here. */ -#ifdef HAVE_LIBZ - switch(conn->data->set.http_ce_skip ? - IDENTITY : k->auto_decoding) { - case IDENTITY: -#endif - /* This is the default when the server sends no - Content-Encoding header. See Curl_readwrite_init; the - memset() call initializes k->auto_decoding to zero. */ + if(conn->data->set.http_ce_skip || !k->writer_stack) { if(!k->ignorebody) { - #ifndef CURL_DISABLE_POP3 - if(conn->handler->protocol&PROTO_FAMILY_POP3) + if(conn->handler->protocol & PROTO_FAMILY_POP3) result = Curl_pop3_write(conn, k->str, nread); else #endif /* CURL_DISABLE_POP3 */ - result = Curl_client_write(conn, CLIENTWRITE_BODY, k->str, nread); } -#ifdef HAVE_LIBZ - break; - - case DEFLATE: - /* Assume CLIENTWRITE_BODY; headers are not encoded. */ - if(!k->ignorebody) - result = Curl_unencode_deflate_write(conn, k, nread); - break; - - case GZIP: - /* Assume CLIENTWRITE_BODY; headers are not encoded. */ - if(!k->ignorebody) - result = Curl_unencode_gzip_write(conn, k, nread); - break; - - default: - failf(data, "Unrecognized content encoding type. " - "libcurl understands `identity', `deflate' and `gzip' " - "content encodings."); - result = CURLE_BAD_CONTENT_ENCODING; - break; } -#endif + else + result = Curl_unencode_write(conn, k->writer_stack, k->str, nread); } k->badheader = HEADER_NORMAL; /* taken care of now */ diff --git a/lib/url.c b/lib/url.c index 374ac6cfa..aeb0c9027 100644 --- a/lib/url.c +++ b/lib/url.c @@ -1011,9 +1011,17 @@ CURLcode Curl_setopt(struct Curl_easy *data, CURLoption option, * */ argptr = va_arg(param, char *); - result = setstropt(&data->set.str[STRING_ENCODING], - (argptr && !*argptr)? - ALL_CONTENT_ENCODINGS: argptr); + if(argptr && !*argptr) { + argptr = Curl_all_content_encodings(); + if(!argptr) + result = CURLE_OUT_OF_MEMORY; + else { + result = setstropt(&data->set.str[STRING_ENCODING], argptr); + free(argptr); + } + } + else + result = setstropt(&data->set.str[STRING_ENCODING], argptr); break; case CURLOPT_TRANSFER_ENCODING: diff --git a/lib/urldata.h b/lib/urldata.h index e5aae1430..cfdd8d028 100644 --- a/lib/urldata.h +++ b/lib/urldata.h @@ -464,16 +464,6 @@ struct hostname { #define KEEP_SENDBITS (KEEP_SEND | KEEP_SEND_HOLD | KEEP_SEND_PAUSE) -#ifdef HAVE_LIBZ -typedef enum { - ZLIB_UNINIT, /* uninitialized */ - ZLIB_INIT, /* initialized */ - ZLIB_GZIP_HEADER, /* reading gzip header */ - ZLIB_GZIP_INFLATING, /* inflating gzip stream */ - ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ -} zlibInitState; -#endif - #ifdef CURLRES_ASYNCH struct Curl_async { char *hostname; @@ -561,18 +551,8 @@ struct SingleRequest { enum expect100 exp100; /* expect 100 continue state */ enum upgrade101 upgr101; /* 101 upgrade state */ - int auto_decoding; /* What content encoding. sec 3.5, RFC2616. */ - -#define IDENTITY 0 /* No encoding */ -#define DEFLATE 1 /* zlib deflate [RFC 1950 & 1951] */ -#define GZIP 2 /* gzip algorithm [RFC 1952] */ - -#ifdef HAVE_LIBZ - zlibInitState zlib_init; /* possible zlib init state; - undefined if Content-Encoding header. */ - z_stream z; /* State structure for zlib. */ -#endif - + struct contenc_writer_s *writer_stack; /* Content unencoding stack. */ + /* See sec 3.5, RFC2616. */ time_t timeofdoc; long bodywrites; -- cgit v1.2.1