diff options
author | Stefan Eissing <stefan@eissing.org> | 2023-03-30 13:00:51 +0200 |
---|---|---|
committer | Daniel Stenberg <daniel@haxx.se> | 2023-03-31 15:44:57 +0200 |
commit | 544abeea83f8fc088425ea835998eaeb1bcca5ba (patch) | |
tree | 47c0aadfa57ef208dddebd6d411a40653da0a501 | |
parent | a094ec1a85fd82e86a0e2cc40d9cf699ee2003d1 (diff) | |
download | curl-544abeea83f8fc088425ea835998eaeb1bcca5ba.tar.gz |
http3: improvements across backends
- ngtcp2: using bufq for recv stream data
- internal stream_ctx instead of `struct HTTP` members
for quiche, ngtcp2 and msh3
- no more QUIC related members in `struct HTTP`
- experimental use of recvmmsg(), disabled by default
- testing on my old debian box shows no throughput improvements.
- leaving it in, but disabled, for future revisit
- vquic: common UDP receive code for ngtcp2 and quiche
- vquic: common UDP send code for ngtcp2 and quiche
- added pytest skips for known msh3 failures
- fix unit2601 to survive torture testing
- quiche: using latest `master` from quiche and enabling large download
tests, now that key change is supported
- fixing test_07_21 where retry handling of starting a stream
was faulty
- msh3: use bufq for recv buffering headers and data
- msh3: replace fprintf debug logging with LOG_CF where possible
- msh3: force QUIC expire timers on recv/send to have more than
1 request per second served
Closes #10772
-rw-r--r-- | lib/bufq.c | 86 | ||||
-rw-r--r-- | lib/bufq.h | 22 | ||||
-rw-r--r-- | lib/http.h | 53 | ||||
-rw-r--r-- | lib/vquic/curl_msh3.c | 583 | ||||
-rw-r--r-- | lib/vquic/curl_ngtcp2.c | 1073 | ||||
-rw-r--r-- | lib/vquic/curl_quiche.c | 1140 | ||||
-rw-r--r-- | lib/vquic/vquic.c | 323 | ||||
-rw-r--r-- | lib/vquic/vquic_int.h | 68 | ||||
-rw-r--r-- | tests/http/test_02_download.py | 4 | ||||
-rw-r--r-- | tests/http/test_03_goaway.py | 2 | ||||
-rw-r--r-- | tests/http/test_05_errors.py | 6 | ||||
-rw-r--r-- | tests/http/test_07_upload.py | 22 | ||||
-rw-r--r-- | tests/http/test_08_caddy.py | 36 | ||||
-rw-r--r-- | tests/unit/unit2601.c | 1 |
14 files changed, 1989 insertions, 1430 deletions
diff --git a/lib/bufq.c b/lib/bufq.c index f0ab6bb75..535f87acd 100644 --- a/lib/bufq.c +++ b/lib/bufq.c @@ -84,12 +84,12 @@ static size_t chunk_read(struct buf_chunk *chunk, return n; } -static ssize_t chunk_slurp(struct buf_chunk *chunk, - Curl_bufq_reader *reader, - void *reader_ctx, CURLcode *err) +static ssize_t chunk_slurpn(struct buf_chunk *chunk, size_t max_len, + Curl_bufq_reader *reader, + void *reader_ctx, CURLcode *err) { unsigned char *p = &chunk->x.data[chunk->w_offset]; - size_t n = chunk->dlen - chunk->w_offset; + size_t n = chunk->dlen - chunk->w_offset; /* free amount */ ssize_t nread; DEBUGASSERT(chunk->dlen >= chunk->w_offset); @@ -97,6 +97,8 @@ static ssize_t chunk_slurp(struct buf_chunk *chunk, *err = CURLE_AGAIN; return -1; } + if(max_len && n > max_len) + n = max_len; nread = reader(reader_ctx, p, n, err); if(nread > 0) { DEBUGASSERT((size_t)nread <= n); @@ -374,6 +376,7 @@ ssize_t Curl_bufq_write(struct bufq *q, ssize_t nwritten = 0; size_t n; + DEBUGASSERT(q->max_chunks > 0); while(len) { tail = get_non_full_tail(q); if(!tail) { @@ -536,48 +539,75 @@ out: return nwritten; } -ssize_t Curl_bufq_slurp(struct bufq *q, Curl_bufq_reader *reader, - void *reader_ctx, CURLcode *err) +ssize_t Curl_bufq_sipn(struct bufq *q, size_t max_len, + Curl_bufq_reader *reader, void *reader_ctx, + CURLcode *err) { struct buf_chunk *tail = NULL; - ssize_t nread = 0, chunk_nread; + ssize_t nread; *err = CURLE_AGAIN; - while(1) { - tail = get_non_full_tail(q); - if(!tail) { - if(q->chunk_count < q->max_chunks) { - *err = CURLE_OUT_OF_MEMORY; - return -1; - } - else if(nread) { - /* full, return what we read */ - return nread; - } - else { - /* full, blocked */ - *err = CURLE_AGAIN; - return -1; - } + tail = get_non_full_tail(q); + if(!tail) { + if(q->chunk_count < q->max_chunks) { + *err = CURLE_OUT_OF_MEMORY; + return -1; } + /* full, blocked */ + *err = CURLE_AGAIN; + return -1; + } - chunk_nread = chunk_slurp(tail, reader, reader_ctx, err); - if(chunk_nread < 0) { + nread = chunk_slurpn(tail, max_len, reader, reader_ctx, err); + if(nread < 0) { + return -1; + } + else if(nread == 0) { + /* eof */ + *err = CURLE_OK; + } + return nread; +} + +ssize_t Curl_bufq_slurpn(struct bufq *q, size_t max_len, + Curl_bufq_reader *reader, void *reader_ctx, + CURLcode *err) +{ + ssize_t nread = 0, n; + + *err = CURLE_AGAIN; + while(1) { + + n = Curl_bufq_sipn(q, max_len, reader, reader_ctx, err); + if(n < 0) { if(!nread || *err != CURLE_AGAIN) { /* blocked on first read or real error, fail */ nread = -1; } break; } - else if(chunk_nread == 0) { + else if(n == 0) { /* eof */ *err = CURLE_OK; break; } - nread += chunk_nread; + nread += (size_t)n; + if(max_len) { + DEBUGASSERT((size_t)n <= max_len); + max_len -= (size_t)n; + if(!max_len) + break; + } /* give up slurping when we get less bytes than we asked for */ - if(!chunk_is_full(tail)) + if(q->tail && !chunk_is_full(q->tail)) break; } return nread; } + +ssize_t Curl_bufq_slurp(struct bufq *q, Curl_bufq_reader *reader, + void *reader_ctx, CURLcode *err) +{ + return Curl_bufq_slurpn(q, 0, reader, reader_ctx, err); +} + diff --git a/lib/bufq.h b/lib/bufq.h index 09af226a9..b220f01ec 100644 --- a/lib/bufq.h +++ b/lib/bufq.h @@ -245,6 +245,28 @@ typedef ssize_t Curl_bufq_reader(void *reader_ctx, ssize_t Curl_bufq_slurp(struct bufq *q, Curl_bufq_reader *reader, void *reader_ctx, CURLcode *err); +/** + * Read up to `max_len` bytes and append it to the end of the buffer queue. + * if `max_len` is 0, no limit is imposed and the call behaves exactly + * the same as `Curl_bufq_slurp()`. + * Returns the total amount of buf read (may be 0) or -1 on other + * reader errors. + * Note that even in case of a -1 chunks may have been read and + * the buffer queue will have different length than before. + */ +ssize_t Curl_bufq_slurpn(struct bufq *q, size_t max_len, + Curl_bufq_reader *reader, void *reader_ctx, + CURLcode *err); + +/** + * Read *once* up to `max_len` bytes and append it to the buffer. + * if `max_len` is 0, no limit is imposed besides the chunk space. + * Returns the total amount of buf read (may be 0) or -1 on other + * reader errors. + */ +ssize_t Curl_bufq_sipn(struct bufq *q, size_t max_len, + Curl_bufq_reader *reader, void *reader_ctx, + CURLcode *err); /** * Write buf to the end of the buffer queue. diff --git a/lib/http.h b/lib/http.h index b9a2e6149..0c67a774f 100644 --- a/lib/http.h +++ b/lib/http.h @@ -185,10 +185,6 @@ CURLcode Curl_http_auth_act(struct Curl_easy *data); #endif /* CURL_DISABLE_HTTP */ -#ifdef USE_NGHTTP3 -struct h3out; /* see ngtcp2 */ -#endif - /**************************************************************************** * HTTP unique setup ***************************************************************************/ @@ -216,6 +212,8 @@ struct HTTP { HTTPSEND_BODY /* sending body */ } sending; + void *impl_ctx; /* context for actual HTTP implementation */ + #ifdef USE_WEBSOCKETS struct websocket ws; #endif @@ -240,15 +238,11 @@ struct HTTP { size_t push_headers_used; /* number of entries filled in */ size_t push_headers_alloc; /* number of entries allocated */ uint32_t error; /* HTTP/2 stream error code */ -#endif -#if defined(USE_NGHTTP2) || defined(USE_NGHTTP3) bool bodystarted; int status_code; /* HTTP status code */ char *mem; /* points to a buffer in memory to store received data */ size_t len; /* size of the buffer 'mem' points to */ size_t memlen; /* size of data copied to mem */ -#endif -#if defined(USE_NGHTTP2) || defined(ENABLE_QUIC) /* fields used by both HTTP/2 and HTTP/3 */ const uint8_t *upload_mem; /* points to a buffer to read from */ size_t upload_len; /* size of the buffer 'upload_mem' points to */ @@ -256,49 +250,6 @@ struct HTTP { bool closed; /* TRUE on stream close */ bool reset; /* TRUE on stream reset */ #endif - -#ifdef ENABLE_QUIC -#ifndef USE_MSH3 - /*********** for HTTP/3 we store stream-local data here *************/ - int64_t stream3_id; /* stream we are interested in */ - uint64_t error3; /* HTTP/3 stream error code */ - bool firstheader; /* FALSE until headers arrive */ - bool firstbody; /* FALSE until body arrives */ - bool h3req; /* FALSE until request is issued */ -#endif /* !USE_MSH3 */ - bool upload_done; -#endif /* ENABLE_QUIC */ -#ifdef USE_NGHTTP3 - size_t recv_buf_nonflow; /* buffered bytes, not counting for flow control */ - struct h3out *h3out; /* per-stream buffers for upload */ - struct dynbuf overflow; /* excess data received during a single Curl_read */ -#endif /* USE_NGHTTP3 */ -#ifdef USE_MSH3 - struct MSH3_REQUEST *req; -#ifdef _WIN32 - CRITICAL_SECTION recv_lock; -#else /* !_WIN32 */ - pthread_mutex_t recv_lock; -#endif /* _WIN32 */ - /* Receive Buffer (Headers and Data) */ - uint8_t* recv_buf; - size_t recv_buf_alloc; - size_t recv_buf_max; - /* Receive Headers */ - size_t recv_header_len; - bool recv_header_complete; - /* Receive Data */ - size_t recv_data_len; - bool recv_data_complete; - /* General Receive Error */ - CURLcode recv_error; -#endif /* USE_MSH3 */ -#ifdef USE_QUICHE - bool h3_got_header; /* TRUE when h3 stream has recvd some HEADER */ - bool h3_recving_data; /* TRUE when h3 stream is reading DATA */ - bool h3_body_pending; /* TRUE when h3 stream may have more body DATA */ - struct h3_event_node *pending; -#endif /* USE_QUICHE */ }; CURLcode Curl_http_size(struct Curl_easy *data); diff --git a/lib/vquic/curl_msh3.c b/lib/vquic/curl_msh3.c index 530899977..ce1e63473 100644 --- a/lib/vquic/curl_msh3.c +++ b/lib/vquic/curl_msh3.c @@ -45,16 +45,10 @@ #include "curl_memory.h" #include "memdebug.h" -#define DEBUG_CF 1 - -#if DEBUG_CF && defined(DEBUGBUILD) -#define CF_DEBUGF(x) x -#else -#define CF_DEBUGF(x) do { } while(0) -#endif - -#define MSH3_REQ_INIT_BUF_LEN 16384 -#define MSH3_REQ_MAX_BUF_LEN 0x100000 +#define H3_STREAM_WINDOW_SIZE (128 * 1024) +#define H3_STREAM_CHUNK_SIZE (16 * 1024) +#define H3_STREAM_RECV_CHUNKS \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE) #ifdef _WIN32 #define msh3_lock CRITICAL_SECTION @@ -116,6 +110,7 @@ struct cf_msh3_ctx { curl_socket_t sock[2]; /* fake socket pair until we get support in msh3 */ char l_ip[MAX_IPADR_LEN]; /* local IP as string */ int l_port; /* local port number */ + struct cf_call_data call_data; struct curltime connect_started; /* time the current attempt started */ struct curltime handshake_at; /* time connect handshake finished */ /* Flags written by msh3/msquic thread */ @@ -127,6 +122,83 @@ struct cf_msh3_ctx { BIT(active); }; +/* How to access `call_data` from a cf_msh3 filter */ +#define CF_CTX_CALL_DATA(cf) \ + ((struct cf_msh3_ctx *)(cf)->ctx)->call_data + +/** + * All about the H3 internals of a stream + */ +struct stream_ctx { + struct MSH3_REQUEST *req; + struct bufq recvbuf; /* h3 response */ +#ifdef _WIN32 + CRITICAL_SECTION recv_lock; +#else /* !_WIN32 */ + pthread_mutex_t recv_lock; +#endif /* _WIN32 */ + uint64_t error3; /* HTTP/3 stream error code */ + int status_code; /* HTTP status code */ + CURLcode recv_error; + bool closed; + bool reset; + bool upload_done; + bool firstheader; /* FALSE until headers arrive */ + bool recv_header_complete; +}; + +#define H3_STREAM_CTX(d) ((struct stream_ctx *)(((d) && (d)->req.p.http)? \ + ((struct HTTP *)(d)->req.p.http)->impl_ctx \ + : NULL)) +#define H3_STREAM_LCTX(d) ((struct HTTP *)(d)->req.p.http)->impl_ctx +#define H3_STREAM_ID(d) (H3_STREAM_CTX(d)? \ + H3_STREAM_CTX(d)->id : -2) + + +static CURLcode h3_data_setup(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + + if(stream) + return CURLE_OK; + + stream = calloc(1, sizeof(*stream)); + if(!stream) + return CURLE_OUT_OF_MEMORY; + + H3_STREAM_LCTX(data) = stream; + stream->req = ZERO_NULL; + msh3_lock_initialize(&stream->recv_lock); + Curl_bufq_init2(&stream->recvbuf, H3_STREAM_CHUNK_SIZE, + H3_STREAM_RECV_CHUNKS, BUFQ_OPT_SOFT_LIMIT); + DEBUGF(LOG_CF(data, cf, "data setup (easy %p)", (void *)data)); + return CURLE_OK; +} + +static void h3_data_done(struct Curl_cfilter *cf, struct Curl_easy *data) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + + (void)cf; + if(stream) { + DEBUGF(LOG_CF(data, cf, "easy handle is done")); + Curl_bufq_free(&stream->recvbuf); + free(stream); + H3_STREAM_LCTX(data) = NULL; + } +} + +static void notify_drain(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + (void)cf; + if(!data->state.drain) { + data->state.drain = 1; + Curl_expire(data, 0, EXPIRE_RUN_NOW); + } +} + static const MSH3_CONNECTION_IF msh3_conn_if = { msh3_conn_connected, msh3_conn_shutdown_complete, @@ -136,10 +208,12 @@ static const MSH3_CONNECTION_IF msh3_conn_if = { static void MSH3_CALL msh3_conn_connected(MSH3_CONNECTION *Connection, void *IfContext) { - struct cf_msh3_ctx *ctx = IfContext; + struct Curl_cfilter *cf = IfContext; + struct cf_msh3_ctx *ctx = cf->ctx; + struct Curl_easy *data = CF_DATA_CURRENT(cf); (void)Connection; - if(ctx->verbose) - CF_DEBUGF(fprintf(stderr, "* [MSH3] evt: connected\n")); + + DEBUGF(LOG_CF(data, cf, "[MSH3] connected")); ctx->handshake_succeeded = true; ctx->connected = true; ctx->handshake_complete = true; @@ -148,10 +222,12 @@ static void MSH3_CALL msh3_conn_connected(MSH3_CONNECTION *Connection, static void MSH3_CALL msh3_conn_shutdown_complete(MSH3_CONNECTION *Connection, void *IfContext) { - struct cf_msh3_ctx *ctx = IfContext; + struct Curl_cfilter *cf = IfContext; + struct cf_msh3_ctx *ctx = cf->ctx; + struct Curl_easy *data = CF_DATA_CURRENT(cf); + (void)Connection; - if(ctx->verbose) - CF_DEBUGF(fprintf(stderr, "* [MSH3] evt: shutdown complete\n")); + DEBUGF(LOG_CF(data, cf, "[MSH3] shutdown complete")); ctx->connected = false; ctx->handshake_complete = true; } @@ -173,173 +249,159 @@ static const MSH3_REQUEST_IF msh3_request_if = { msh3_data_sent }; -static CURLcode msh3_data_setup(struct Curl_cfilter *cf, - struct Curl_easy *data) +/* Decode HTTP status code. Returns -1 if no valid status code was + decoded. (duplicate from http2.c) */ +static int decode_status_code(const char *value, size_t len) { - struct HTTP *stream = data->req.p.http; - (void)cf; + int i; + int res; - DEBUGASSERT(stream); - if(!stream->recv_buf) { - DEBUGF(LOG_CF(data, cf, "req: setup")); - stream->recv_buf = malloc(MSH3_REQ_INIT_BUF_LEN); - if(!stream->recv_buf) { - return CURLE_OUT_OF_MEMORY; + if(len != 3) { + return -1; + } + + res = 0; + + for(i = 0; i < 3; ++i) { + char c = value[i]; + + if(c < '0' || c > '9') { + return -1; } - stream->req = ZERO_NULL; - msh3_lock_initialize(&stream->recv_lock); - stream->recv_buf_alloc = MSH3_REQ_INIT_BUF_LEN; - stream->recv_buf_max = MSH3_REQ_MAX_BUF_LEN; - stream->recv_header_len = 0; - stream->recv_header_complete = false; - stream->recv_data_len = 0; - stream->recv_data_complete = false; - stream->recv_error = CURLE_OK; + + res *= 10; + res += c - '0'; } - return CURLE_OK; + + return res; } -/* Requires stream->recv_lock to be held */ -static bool msh3request_ensure_room(struct HTTP *stream, size_t len) +/* + * write_resp_raw() copies response data in raw format to the `data`'s + * receive buffer. If not enough space is available, it appends to the + * `data`'s overflow buffer. + */ +static CURLcode write_resp_raw(struct Curl_easy *data, + const void *mem, size_t memlen) { - uint8_t *new_recv_buf; - const size_t cur_recv_len = stream->recv_header_len + stream->recv_data_len; - - if(cur_recv_len + len > stream->recv_buf_alloc) { - size_t new_recv_buf_alloc_len = stream->recv_buf_alloc; - do { - new_recv_buf_alloc_len <<= 1; /* TODO - handle overflow */ - } while(cur_recv_len + len > new_recv_buf_alloc_len); - CF_DEBUGF(fprintf(stderr, "* enlarging buffer to %zu\n", - new_recv_buf_alloc_len)); - new_recv_buf = malloc(new_recv_buf_alloc_len); - if(!new_recv_buf) { - CF_DEBUGF(fprintf(stderr, "* FAILED: enlarging buffer to %zu\n", - new_recv_buf_alloc_len)); - return false; - } - if(cur_recv_len) { - memcpy(new_recv_buf, stream->recv_buf, cur_recv_len); - } - stream->recv_buf_alloc = new_recv_buf_alloc_len; - free(stream->recv_buf); - stream->recv_buf = new_recv_buf; + struct stream_ctx *stream = H3_STREAM_CTX(data); + CURLcode result = CURLE_OK; + ssize_t nwritten; + + nwritten = Curl_bufq_write(&stream->recvbuf, mem, memlen, &result); + if(nwritten < 0) { + return result; + } + + if((size_t)nwritten < memlen) { + /* This MUST not happen. Our recbuf is dimensioned to hold the + * full max_stream_window and then some for this very reason. */ + DEBUGASSERT(0); + return CURLE_RECV_ERROR; } - return true; + return result; } static void MSH3_CALL msh3_header_received(MSH3_REQUEST *Request, - void *IfContext, - const MSH3_HEADER *Header) + void *userp, + const MSH3_HEADER *hd) { - struct Curl_easy *data = IfContext; - struct HTTP *stream = data->req.p.http; - size_t total_len; + struct Curl_easy *data = userp; + struct stream_ctx *stream = H3_STREAM_CTX(data); + CURLcode result; (void)Request; if(stream->recv_header_complete) { - CF_DEBUGF(fprintf(stderr, "* ignoring header after data\n")); return; } msh3_lock_acquire(&stream->recv_lock); - if((Header->NameLength == 7) && - !strncmp(H2H3_PSEUDO_STATUS, (char *)Header->Name, 7)) { - total_len = 10 + Header->ValueLength; - if(!msh3request_ensure_room(stream, total_len)) { - CF_DEBUGF(fprintf(stderr, "* ERROR: unable to buffer: %.*s\n", - (int)Header->NameLength, Header->Name)); - stream->recv_error = CURLE_OUT_OF_MEMORY; - goto release_lock; - } - msnprintf((char *)stream->recv_buf + stream->recv_header_len, - stream->recv_buf_alloc - stream->recv_header_len, - "HTTP/3 %.*s \r\n", (int)Header->ValueLength, Header->Value); + if((hd->NameLength == 7) && + !strncmp(H2H3_PSEUDO_STATUS, (char *)hd->Name, 7)) { + char line[14]; /* status line is always 13 characters long */ + size_t ncopy; + + DEBUGASSERT(!stream->firstheader); + stream->status_code = decode_status_code(hd->Value, hd->ValueLength); + DEBUGASSERT(stream->status_code != -1); + ncopy = msnprintf(line, sizeof(line), "HTTP/3 %03d \r\n", + stream->status_code); + result = write_resp_raw(data, line, ncopy); + if(result) + stream->recv_error = result; + stream->firstheader = TRUE; } else { - total_len = 4 + Header->NameLength + Header->ValueLength; - if(!msh3request_ensure_room(stream, total_len)) { - CF_DEBUGF(fprintf(stderr, "* ERROR: unable to buffer: %.*s\n", - (int)Header->NameLength, Header->Name)); - stream->recv_error = CURLE_OUT_OF_MEMORY; - goto release_lock; + /* store as an HTTP1-style header */ + DEBUGASSERT(stream->firstheader); + result = write_resp_raw(data, hd->Name, hd->NameLength); + if(!result) + result = write_resp_raw(data, ": ", 2); + if(!result) + result = write_resp_raw(data, hd->Value, hd->ValueLength); + if(!result) + result = write_resp_raw(data, "\r\n", 2); + if(result) { + stream->recv_error = result; } - msnprintf((char *)stream->recv_buf + stream->recv_header_len, - stream->recv_buf_alloc - stream->recv_header_len, - "%.*s: %.*s\r\n", - (int)Header->NameLength, Header->Name, - (int)Header->ValueLength, Header->Value); } - stream->recv_header_len += total_len; data->state.drain = 1; - -release_lock: msh3_lock_release(&stream->recv_lock); } static bool MSH3_CALL msh3_data_received(MSH3_REQUEST *Request, - void *IfContext, uint32_t *Length, - const uint8_t *Data) + void *IfContext, uint32_t *buflen, + const uint8_t *buf) { struct Curl_easy *data = IfContext; - struct HTTP *stream = data->req.p.http; - size_t cur_recv_len = stream->recv_header_len + stream->recv_data_len; + struct stream_ctx *stream = H3_STREAM_CTX(data); + CURLcode result; + bool rv = FALSE; + /* TODO: we would like to limit the amount of data we are buffer here. + * There seems to be no mechanism in msh3 to adjust flow control and + * it is undocumented what happens if we return FALSE here or less + * length (buflen is an inout parameter). + */ (void)Request; - if(data && data->set.verbose) - CF_DEBUGF(fprintf(stderr, "* [MSH3] req: evt: received %u. %zu buffered, " - "%zu allocated\n", - *Length, cur_recv_len, stream->recv_buf_alloc)); - /* TODO - Update this code to limit data bufferring by `stream->recv_buf_max` - and return `false` when we reach that limit. Then, when curl drains some - of the buffer, making room, call MsH3RequestSetReceiveEnabled to enable - receive callbacks again. */ msh3_lock_acquire(&stream->recv_lock); if(!stream->recv_header_complete) { - if(data && data->set.verbose) - CF_DEBUGF(fprintf(stderr, "* [MSH3] req: Headers complete!\n")); - if(!msh3request_ensure_room(stream, 2)) { - stream->recv_error = CURLE_OUT_OF_MEMORY; - goto release_lock; + result = write_resp_raw(data, "\r\n", 2); + if(result) { + stream->recv_error = result; + goto out; } - stream->recv_buf[stream->recv_header_len++] = '\r'; - stream->recv_buf[stream->recv_header_len++] = '\n'; stream->recv_header_complete = true; - cur_recv_len += 2; } - if(!msh3request_ensure_room(stream, *Length)) { - stream->recv_error = CURLE_OUT_OF_MEMORY; - goto release_lock; + + result = write_resp_raw(data, buf, *buflen); + if(result) { + stream->recv_error = result; } - memcpy(stream->recv_buf + cur_recv_len, Data, *Length); - stream->recv_data_len += (size_t)*Length; - data->state.drain = 1; + rv = TRUE; -release_lock: +out: msh3_lock_release(&stream->recv_lock); - return true; + return rv; } static void MSH3_CALL msh3_complete(MSH3_REQUEST *Request, void *IfContext, - bool Aborted, uint64_t AbortError) + bool aborted, uint64_t error) { struct Curl_easy *data = IfContext; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); (void)Request; - (void)AbortError; - if(data && data->set.verbose) - CF_DEBUGF(fprintf(stderr, "* [MSH3] req: evt: complete, aborted=%s\n", - Aborted ? "true" : "false")); msh3_lock_acquire(&stream->recv_lock); - if(Aborted) { - stream->recv_error = CURLE_HTTP3; /* TODO - how do we pass AbortError? */ - } + stream->closed = TRUE; stream->recv_header_complete = true; - stream->recv_data_complete = true; + if(error) + stream->error3 = error; + if(aborted) + stream->reset = TRUE; msh3_lock_release(&stream->recv_lock); } @@ -347,7 +409,7 @@ static void MSH3_CALL msh3_shutdown_complete(MSH3_REQUEST *Request, void *IfContext) { struct Curl_easy *data = IfContext; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); (void)Request; (void)stream; } @@ -356,82 +418,121 @@ static void MSH3_CALL msh3_data_sent(MSH3_REQUEST *Request, void *IfContext, void *SendContext) { struct Curl_easy *data = IfContext; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); (void)Request; (void)stream; (void)SendContext; } +static ssize_t recv_closed_stream(struct Curl_cfilter *cf, + struct Curl_easy *data, + CURLcode *err) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + ssize_t nread = -1; + + (void)cf; + if(stream->reset) { + failf(data, "HTTP/3 stream reset by server"); + *err = CURLE_PARTIAL_FILE; + DEBUGF(LOG_CF(data, cf, "cf_recv, was reset -> %d", *err)); + goto out; + } + else if(stream->error3) { + failf(data, "HTTP/3 stream was not closed cleanly: (error %zd)", + (ssize_t)stream->error3); + *err = CURLE_HTTP3; + DEBUGF(LOG_CF(data, cf, "cf_recv, closed uncleanly -> %d", *err)); + goto out; + } + else { + DEBUGF(LOG_CF(data, cf, "cf_recv, closed ok -> %d", *err)); + } + *err = CURLE_OK; + nread = 0; + +out: + data->state.drain = 0; + return nread; +} + +static void set_quic_expire(struct Curl_cfilter *cf, struct Curl_easy *data) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + + /* we have no indication from msh3 when it would be a good time + * to juggle the connection again. So, we compromise by calling + * us again every some milliseconds. */ + (void)cf; + if(stream && stream->req && !stream->closed) { + Curl_expire(data, 10, EXPIRE_QUIC); + } + else { + Curl_expire(data, 50, EXPIRE_QUIC); + } +} + static ssize_t cf_msh3_recv(struct Curl_cfilter *cf, struct Curl_easy *data, char *buf, size_t len, CURLcode *err) { - struct HTTP *stream = data->req.p.http; - size_t outsize = 0; + struct stream_ctx *stream = H3_STREAM_CTX(data); + ssize_t nread = -1; + struct cf_call_data save; (void)cf; + CF_DATA_SAVE(save, cf, data); DEBUGF(LOG_CF(data, cf, "req: recv with %zu byte buffer", len)); + msh3_lock_acquire(&stream->recv_lock); + if(stream->recv_error) { failf(data, "request aborted"); data->state.drain = 0; *err = stream->recv_error; - return -1; + goto out; } *err = CURLE_OK; - msh3_lock_acquire(&stream->recv_lock); - if(stream->recv_header_len) { - outsize = len; - if(stream->recv_header_len < outsize) { - outsize = stream->recv_header_len; - } - memcpy(buf, stream->recv_buf, outsize); - if(outsize < stream->recv_header_len + stream->recv_data_len) { - memmove(stream->recv_buf, stream->recv_buf + outsize, - stream->recv_header_len + stream->recv_data_len - outsize); - } - stream->recv_header_len -= outsize; - DEBUGF(LOG_CF(data, cf, "req: returned %zu bytes of header", outsize)); - } - else if(stream->recv_data_len) { - outsize = len; - if(stream->recv_data_len < outsize) { - outsize = stream->recv_data_len; - } - memcpy(buf, stream->recv_buf, outsize); - if(outsize < stream->recv_data_len) { - memmove(stream->recv_buf, stream->recv_buf + outsize, - stream->recv_data_len - outsize); + if(!Curl_bufq_is_empty(&stream->recvbuf)) { + nread = Curl_bufq_read(&stream->recvbuf, + (unsigned char *)buf, len, err); + DEBUGF(LOG_CF(data, cf, "read recvbuf(len=%zu) -> %zd, %d", + len, nread, *err)); + if(nread < 0) + goto out; + if(!Curl_bufq_is_empty(&stream->recvbuf) || + stream->closed) { + notify_drain(cf, data); } - stream->recv_data_len -= outsize; - DEBUGF(LOG_CF(data, cf, "req: returned %zu bytes of data", outsize)); - if(stream->recv_data_len == 0 && stream->recv_data_complete) - data->state.drain = 1; } - else if(stream->recv_data_complete) { - DEBUGF(LOG_CF(data, cf, "req: receive complete")); - data->state.drain = 0; + else if(stream->closed) { + nread = recv_closed_stream(cf, data, err); + goto out; } else { DEBUGF(LOG_CF(data, cf, "req: nothing here, call again")); *err = CURLE_AGAIN; - outsize = -1; } +out: msh3_lock_release(&stream->recv_lock); - - return (ssize_t)outsize; + set_quic_expire(cf, data); + CF_DATA_RESTORE(cf, save); + return nread; } static ssize_t cf_msh3_send(struct Curl_cfilter *cf, struct Curl_easy *data, const void *buf, size_t len, CURLcode *err) { struct cf_msh3_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); struct h2h3req *hreq; size_t hdrlen = 0; - size_t sentlen = 0; + ssize_t nwritten = -1; + struct cf_call_data save; + + CF_DATA_SAVE(save, cf, data); /* Sizes must match for cast below to work" */ DEBUGASSERT(sizeof(MSH3_HEADER) == sizeof(struct h2h3pseudo)); @@ -442,16 +543,11 @@ static ssize_t cf_msh3_send(struct Curl_cfilter *cf, struct Curl_easy *data, data. Parse out the headers and create the request, then if there is any data left over go ahead and send it too. */ - *err = msh3_data_setup(cf, data); - if(*err) { - failf(data, "could not setup data"); - return -1; - } - *err = Curl_pseudo_headers(data, buf, len, &hdrlen, &hreq); if(*err) { failf(data, "Curl_pseudo_headers failed"); - return -1; + *err = CURLE_SEND_ERROR; + goto out; } DEBUGF(LOG_CF(data, cf, "req: send %zu headers", hreq->entries)); @@ -463,31 +559,35 @@ static ssize_t cf_msh3_send(struct Curl_cfilter *cf, struct Curl_easy *data, if(!stream->req) { failf(data, "request open failed"); *err = CURLE_SEND_ERROR; - return -1; + goto out; } *err = CURLE_OK; - return len; + nwritten = len; + goto out; } + else { + /* request is open */ + DEBUGF(LOG_CF(data, cf, "req: send %zd body bytes", len)); + if(len > 0xFFFFFFFF) { + len = 0xFFFFFFFF; + } - DEBUGF(LOG_CF(data, cf, "req: send %zd body bytes", len)); - if(len > 0xFFFFFFFF) { - /* msh3 doesn't support size_t sends currently. */ - *err = CURLE_SEND_ERROR; - return -1; - } + if(!MsH3RequestSend(stream->req, MSH3_REQUEST_FLAG_NONE, buf, + (uint32_t)len, stream)) { + *err = CURLE_SEND_ERROR; + goto out; + } - /* TODO - Need an explicit signal to know when to FIN. */ - if(!MsH3RequestSend(stream->req, MSH3_REQUEST_FLAG_FIN, buf, (uint32_t)len, - stream)) { - *err = CURLE_SEND_ERROR; - return -1; + /* TODO - msh3/msquic will hold onto this memory until the send complete + event. How do we make sure curl doesn't free it until then? */ + *err = CURLE_OK; + nwritten = len; } - /* TODO - msh3/msquic will hold onto this memory until the send complete - event. How do we make sure curl doesn't free it until then? */ - sentlen += len; - *err = CURLE_OK; - return sentlen; +out: + set_quic_expire(cf, data); + CF_DATA_RESTORE(cf, save); + return nwritten; } static int cf_msh3_get_select_socks(struct Curl_cfilter *cf, @@ -495,36 +595,49 @@ static int cf_msh3_get_select_socks(struct Curl_cfilter *cf, curl_socket_t *socks) { struct cf_msh3_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); int bitmap = GETSOCK_BLANK; + struct cf_call_data save; + CF_DATA_SAVE(save, cf, data); if(stream && ctx->sock[SP_LOCAL] != CURL_SOCKET_BAD) { socks[0] = ctx->sock[SP_LOCAL]; if(stream->recv_error) { bitmap |= GETSOCK_READSOCK(0); - data->state.drain = 1; + notify_drain(cf, data); } - else if(stream->recv_header_len || stream->recv_data_len) { + else if(stream->req) { bitmap |= GETSOCK_READSOCK(0); - data->state.drain = 1; + notify_drain(cf, data); } } DEBUGF(LOG_CF(data, cf, "select_sock %u -> %d", (uint32_t)data->state.drain, bitmap)); - + CF_DATA_RESTORE(cf, save); return bitmap; } static bool cf_msh3_data_pending(struct Curl_cfilter *cf, const struct Curl_easy *data) { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); + struct cf_call_data save; + bool pending = FALSE; + + CF_DATA_SAVE(save, cf, data); (void)cf; - DEBUGF(LOG_CF((struct Curl_easy *)data, cf, "data pending = %hhu", - (bool)(stream->recv_header_len || stream->recv_data_len))); - return stream->recv_header_len || stream->recv_data_len; + if(stream->req) { + msh3_lock_acquire(&stream->recv_lock); + DEBUGF(LOG_CF((struct Curl_easy *)data, cf, "data pending = %zu", + Curl_bufq_len(&stream->recvbuf))); + pending = !Curl_bufq_is_empty(&stream->recvbuf); + msh3_lock_release(&stream->recv_lock); + } + + CF_DATA_RESTORE(cf, save); + return pending; } static void cf_msh3_active(struct Curl_cfilter *cf, struct Curl_easy *data) @@ -548,31 +661,30 @@ static CURLcode cf_msh3_data_event(struct Curl_cfilter *cf, struct Curl_easy *data, int event, int arg1, void *arg2) { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); + struct cf_call_data save; CURLcode result = CURLE_OK; + CF_DATA_SAVE(save, cf, data); + (void)arg1; (void)arg2; switch(event) { case CF_CTRL_DATA_SETUP: - result = msh3_data_setup(cf, data); + result = h3_data_setup(cf, data); break; case CF_CTRL_DATA_DONE: - DEBUGF(LOG_CF(data, cf, "req: done")); - if(stream) { - if(stream->recv_buf) { - Curl_safefree(stream->recv_buf); - msh3_lock_uninitialize(&stream->recv_lock); - } - if(stream->req) { - MsH3RequestClose(stream->req); - stream->req = ZERO_NULL; - } - } + h3_data_done(cf, data); break; case CF_CTRL_DATA_DONE_SEND: DEBUGF(LOG_CF(data, cf, "req: send done")); stream->upload_done = TRUE; + if(stream && stream->req) { + char buf[1]; + if(!MsH3RequestSend(stream->req, MSH3_REQUEST_FLAG_FIN, buf, 0, data)) { + result = CURLE_SEND_ERROR; + } + } break; case CF_CTRL_CONN_INFO_UPDATE: DEBUGF(LOG_CF(data, cf, "req: update info")); @@ -581,6 +693,8 @@ static CURLcode cf_msh3_data_event(struct Curl_cfilter *cf, default: break; } + + CF_DATA_RESTORE(cf, save); return result; } @@ -590,9 +704,10 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, struct cf_msh3_ctx *ctx = cf->ctx; bool verify = !!cf->conn->ssl_config.verifypeer; MSH3_ADDR addr = {0}; + CURLcode result; + memcpy(&addr, &ctx->addr.sa_addr, ctx->addr.addrlen); MSH3_SET_PORT(&addr, (uint16_t)cf->conn->remote_port); - ctx->verbose = (data && data->set.verbose); if(verify && (cf->conn->ssl_config.CAfile || cf->conn->ssl_config.CApath)) { /* TODO: need a way to provide trust anchors to MSH3 */ @@ -618,7 +733,7 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, ctx->qconn = MsH3ConnectionOpen(ctx->api, &msh3_conn_if, - ctx, + cf, cf->conn->host.name, &addr, !verify); @@ -631,6 +746,10 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, return CURLE_FAILED_INIT; } + result = h3_data_setup(cf, data); + if(result) + return result; + return CURLE_OK; } @@ -639,6 +758,7 @@ static CURLcode cf_msh3_connect(struct Curl_cfilter *cf, bool blocking, bool *done) { struct cf_msh3_ctx *ctx = cf->ctx; + struct cf_call_data save; CURLcode result = CURLE_OK; (void)blocking; @@ -647,6 +767,8 @@ static CURLcode cf_msh3_connect(struct Curl_cfilter *cf, return CURLE_OK; } + CF_DATA_SAVE(save, cf, data); + if(ctx->sock[SP_LOCAL] == CURL_SOCKET_BAD) { if(Curl_socketpair(AF_UNIX, SOCK_STREAM, 0, &ctx->sock[0]) < 0) { ctx->sock[SP_LOCAL] = CURL_SOCKET_BAD; @@ -666,6 +788,7 @@ static CURLcode cf_msh3_connect(struct Curl_cfilter *cf, if(ctx->handshake_complete) { ctx->handshake_at = Curl_now(); if(ctx->handshake_succeeded) { + DEBUGF(LOG_CF(data, cf, "handshake succeeded")); cf->conn->bits.multiplex = TRUE; /* at least potentially multiplexed */ cf->conn->httpversion = 30; cf->conn->bundle->multiuse = BUNDLE_MULTIPLEX; @@ -682,26 +805,35 @@ static CURLcode cf_msh3_connect(struct Curl_cfilter *cf, } out: + CF_DATA_RESTORE(cf, save); return result; } static void cf_msh3_close(struct Curl_cfilter *cf, struct Curl_easy *data) { struct cf_msh3_ctx *ctx = cf->ctx; + struct cf_call_data save; (void)data; + CF_DATA_SAVE(save, cf, data); + if(ctx) { DEBUGF(LOG_CF(data, cf, "destroying")); - if(ctx->qconn) + if(ctx->qconn) { MsH3ConnectionClose(ctx->qconn); - if(ctx->api) + ctx->qconn = NULL; + } + if(ctx->api) { MsH3ApiClose(ctx->api); + ctx->api = NULL; + } if(ctx->active) { /* We share our socket at cf->conn->sock[cf->sockindex] when active. * If it is no longer there, someone has stolen (and hopefully * closed it) and we just forget about it. */ + ctx->active = FALSE; if(ctx->sock[SP_LOCAL] == cf->conn->sock[cf->sockindex]) { DEBUGF(LOG_CF(data, cf, "cf_msh3_close(%d) active", (int)ctx->sock[SP_LOCAL])); @@ -721,17 +853,22 @@ static void cf_msh3_close(struct Curl_cfilter *cf, struct Curl_easy *data) if(ctx->sock[SP_REMOTE] != CURL_SOCKET_BAD) { sclose(ctx->sock[SP_REMOTE]); } - memset(ctx, 0, sizeof(*ctx)); ctx->sock[SP_LOCAL] = CURL_SOCKET_BAD; ctx->sock[SP_REMOTE] = CURL_SOCKET_BAD; } + CF_DATA_RESTORE(cf, save); } static void cf_msh3_destroy(struct Curl_cfilter *cf, struct Curl_easy *data) { + struct cf_call_data save; + + CF_DATA_SAVE(save, cf, data); cf_msh3_close(cf, data); free(cf->ctx); cf->ctx = NULL; + /* no CF_DATA_RESTORE(cf, save); its gone */ + } static CURLcode cf_msh3_query(struct Curl_cfilter *cf, diff --git a/lib/vquic/curl_ngtcp2.c b/lib/vquic/curl_ngtcp2.c index 18c741c59..854edb84f 100644 --- a/lib/vquic/curl_ngtcp2.c +++ b/lib/vquic/curl_ngtcp2.c @@ -75,25 +75,32 @@ #define H3_ALPN_H3_29 "\x5h3-29" #define H3_ALPN_H3 "\x2h3" -/* - * This holds outgoing HTTP/3 stream data that is used by nghttp3 until acked. - * It is used as a circular buffer. Add new bytes at the end until it reaches - * the far end, then start over at index 0 again. - */ - -#define H3_SEND_SIZE (256*1024) -struct h3out { - uint8_t buf[H3_SEND_SIZE]; - size_t used; /* number of bytes used in the buffer */ - size_t windex; /* index in the buffer where to start writing the next - data block */ -}; - #define QUIC_MAX_STREAMS (256*1024) #define QUIC_MAX_DATA (1*1024*1024) #define QUIC_IDLE_TIMEOUT (60*NGTCP2_SECONDS) #define QUIC_HANDSHAKE_TIMEOUT (10*NGTCP2_SECONDS) +/* A stream window is the maximum amount we need to buffer for + * each active transfer. We use HTTP/3 flow control and only ACK + * when we take things out of the buffer. + * Chunk size is large enough to take a full DATA frame */ +#define H3_STREAM_WINDOW_SIZE (128 * 1024) +#define H3_STREAM_CHUNK_SIZE (16 * 1024) +/* The pool keeps spares around and half of a full stream windows + * seems good. More does not seem to improve performance. + * The benefit of the pool is that stream buffer to not keep + * spares. So memory consumption goes down when streams run empty, + * have a large upload done, etc. */ +#define H3_STREAM_POOL_SPARES \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE ) / 2 +/* Receive and Send max number of chunks just follows from the + * chunk size and window size */ +#define H3_STREAM_RECV_CHUNKS \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE) +#define H3_STREAM_SEND_CHUNKS \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE) + + #ifdef USE_OPENSSL #define QUIC_CIPHERS \ "TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_" \ @@ -147,11 +154,13 @@ struct cf_ngtcp2_ctx { struct cf_call_data call_data; nghttp3_conn *h3conn; nghttp3_settings h3settings; - int qlogfd; struct curltime started_at; /* time the current attempt started */ struct curltime handshake_at; /* time connect handshake finished */ struct curltime first_byte_at; /* when first byte was recvd */ - struct curltime reconnect_at; /* time the next attempt should start */ + struct curltime reconnect_at; /* time the next attempt should start */ + struct bufc_pool stream_bufcp; /* chunk pool for streams */ + size_t max_stream_window; /* max flow window for one stream */ + int qlogfd; BIT(got_first_byte); /* if first byte was received */ }; @@ -159,6 +168,73 @@ struct cf_ngtcp2_ctx { #define CF_CTX_CALL_DATA(cf) \ ((struct cf_ngtcp2_ctx *)(cf)->ctx)->call_data +/** + * All about the H3 internals of a stream + */ +struct stream_ctx { + int64_t id; /* HTTP/3 protocol identifier */ + struct bufq sendbuf; /* h3 request body */ + struct bufq recvbuf; /* h3 response body */ + size_t sendbuf_len_in_flight; /* sendbuf amount "in flight" */ + size_t recv_buf_nonflow; /* buffered bytes, not counting for flow control */ + uint64_t error3; /* HTTP/3 stream error code */ + int status_code; /* HTTP status code */ + bool resp_hds_complete; /* we have a complete, final response */ + bool closed; /* TRUE on stream close */ + bool reset; /* TRUE on stream reset */ + bool upload_done; /* stream is local closed */ +}; + +#define H3_STREAM_CTX(d) ((struct stream_ctx *)(((d) && (d)->req.p.http)? \ + ((struct HTTP *)(d)->req.p.http)->impl_ctx \ + : NULL)) +#define H3_STREAM_LCTX(d) ((struct HTTP *)(d)->req.p.http)->impl_ctx +#define H3_STREAM_ID(d) (H3_STREAM_CTX(d)? \ + H3_STREAM_CTX(d)->id : -2) + +static CURLcode h3_data_setup(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct cf_ngtcp2_ctx *ctx = cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(data); + + if(stream) + return CURLE_OK; + + stream = calloc(1, sizeof(*stream)); + if(!stream) + return CURLE_OUT_OF_MEMORY; + + stream->id = -1; + /* on send, we control how much we put into the buffer */ + Curl_bufq_initp(&stream->sendbuf, &ctx->stream_bufcp, + H3_STREAM_SEND_CHUNKS, BUFQ_OPT_NONE); + stream->sendbuf_len_in_flight = 0; + /* on recv, we need a flexible buffer limit since we also write + * headers to it that are not counted against the nghttp3 flow limits. */ + Curl_bufq_initp(&stream->recvbuf, &ctx->stream_bufcp, + H3_STREAM_RECV_CHUNKS, BUFQ_OPT_SOFT_LIMIT); + stream->recv_buf_nonflow = 0; + + H3_STREAM_LCTX(data) = stream; + DEBUGF(LOG_CF(data, cf, "data setup (easy %p)", (void *)data)); + return CURLE_OK; +} + +static void h3_data_done(struct Curl_cfilter *cf, struct Curl_easy *data) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + + (void)cf; + if(stream) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] easy handle is done", + stream->id)); + Curl_bufq_free(&stream->sendbuf); + Curl_bufq_free(&stream->recvbuf); + free(stream); + H3_STREAM_LCTX(data) = NULL; + } +} /* ngtcp2 default congestion controller does not perform pacing. Limit the maximum packet burst to MAX_PKT_BURST packets. */ @@ -168,7 +244,7 @@ static CURLcode cf_process_ingress(struct Curl_cfilter *cf, struct Curl_easy *data); static CURLcode cf_flush_egress(struct Curl_cfilter *cf, struct Curl_easy *data); -static int cb_h3_acked_stream_data(nghttp3_conn *conn, int64_t stream_id, +static int cb_h3_acked_req_body(nghttp3_conn *conn, int64_t stream_id, uint64_t datalen, void *user_data, void *stream_user_data); @@ -222,7 +298,6 @@ static void quic_settings(struct cf_ngtcp2_ctx *ctx, { ngtcp2_settings *s = &ctx->settings; ngtcp2_transport_params *t = &ctx->transport_params; - size_t stream_win_size = CURL_MAX_READ_SIZE; ngtcp2_settings_default(s); ngtcp2_transport_params_default(t); @@ -235,13 +310,13 @@ static void quic_settings(struct cf_ngtcp2_ctx *ctx, (void)data; s->initial_ts = timestamp(); s->handshake_timeout = QUIC_HANDSHAKE_TIMEOUT; - s->max_window = 100 * stream_win_size; - s->max_stream_window = stream_win_size; + s->max_window = 100 * ctx->max_stream_window; + s->max_stream_window = ctx->max_stream_window; - t->initial_max_data = 10 * stream_win_size; - t->initial_max_stream_data_bidi_local = stream_win_size; - t->initial_max_stream_data_bidi_remote = stream_win_size; - t->initial_max_stream_data_uni = stream_win_size; + t->initial_max_data = 10 * ctx->max_stream_window; + t->initial_max_stream_data_bidi_local = ctx->max_stream_window; + t->initial_max_stream_data_bidi_remote = ctx->max_stream_window; + t->initial_max_stream_data_uni = ctx->max_stream_window; t->initial_max_streams_bidi = QUIC_MAX_STREAMS; t->initial_max_streams_uni = QUIC_MAX_STREAMS; t->max_idle_timeout = QUIC_IDLE_TIMEOUT; @@ -605,7 +680,7 @@ static void report_consumed_data(struct Curl_cfilter *cf, struct Curl_easy *data, size_t consumed) { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); struct cf_ngtcp2_ctx *ctx = cf->ctx; /* the HTTP/1.1 response headers are written to the buffer, but @@ -622,14 +697,13 @@ static void report_consumed_data(struct Curl_cfilter *cf, } if(consumed > 0) { DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] consumed %zu DATA bytes", - stream->stream3_id, consumed)); - ngtcp2_conn_extend_max_stream_offset(ctx->qconn, stream->stream3_id, + stream->id, consumed)); + ngtcp2_conn_extend_max_stream_offset(ctx->qconn, stream->id, consumed); ngtcp2_conn_extend_max_offset(ctx->qconn, consumed); } - if(!stream->closed && data->state.drain - && !stream->memlen - && !Curl_dyn_len(&stream->overflow)) { + if(!stream->closed && data->state.drain && + Curl_bufq_is_empty(&stream->recvbuf)) { /* nothing buffered any more */ data->state.drain = 0; } @@ -892,22 +966,20 @@ static int cf_ngtcp2_get_select_socks(struct Curl_cfilter *cf, struct cf_ngtcp2_ctx *ctx = cf->ctx; struct SingleRequest *k = &data->req; int rv = GETSOCK_BLANK; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); struct cf_call_data save; CF_DATA_SAVE(save, cf, data); socks[0] = ctx->q.sockfd; - /* in an HTTP/3 connection we can basically always get a frame so we should - always be ready for one */ + /* in HTTP/3 we can always get a frame, so check read */ rv |= GETSOCK_READSOCK(0); /* we're still uploading or the HTTP/2 layer wants to send data */ if((k->keepon & KEEP_SENDBITS) == KEEP_SEND && - (!stream->h3out || stream->h3out->used < H3_SEND_SIZE) && ngtcp2_conn_get_cwnd_left(ctx->qconn) && ngtcp2_conn_get_max_data_left(ctx->qconn) && - nghttp3_conn_is_stream_writable(ctx->h3conn, stream->stream3_id)) + nghttp3_conn_is_stream_writable(ctx->h3conn, stream->id)) rv |= GETSOCK_WRITESOCK(0); DEBUGF(LOG_CF(data, cf, "get_select_socks -> %x (sock=%d)", @@ -926,26 +998,23 @@ static void notify_drain(struct Curl_cfilter *cf, } } - static int cb_h3_stream_close(nghttp3_conn *conn, int64_t stream_id, uint64_t app_error_code, void *user_data, void *stream_user_data) { struct Curl_cfilter *cf = user_data; struct Curl_easy *data = stream_user_data; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); (void)conn; (void)stream_id; (void)app_error_code; (void)cf; - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] h3 close(err=%" PRIx64 ")", + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] h3 close(err=%" PRId64 ")", stream_id, app_error_code)); stream->closed = TRUE; stream->error3 = app_error_code; if(app_error_code == NGHTTP3_H3_INTERNAL_ERROR) { - /* TODO: we do not get a specific error when the remote end closed - * the response before it was complete. */ stream->reset = TRUE; } notify_drain(cf, data); @@ -962,34 +1031,27 @@ static CURLcode write_resp_raw(struct Curl_cfilter *cf, const void *mem, size_t memlen, bool flow) { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); CURLcode result = CURLE_OK; - const char *buf = mem; - size_t ncopy = memlen; - /* copy as much as possible to the receive buffer */ - if(stream->len) { - size_t len = CURLMIN(ncopy, stream->len); - memcpy(stream->mem + stream->memlen, buf, len); - stream->len -= len; - stream->memlen += len; - buf += len; - ncopy -= len; - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] resp_raw: added %zu bytes" - " to data buffer", stream->stream3_id, len)); - } - /* copy the rest to the overflow buffer */ - if(ncopy) { - result = Curl_dyn_addn(&stream->overflow, buf, ncopy); - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] resp_raw: added %zu bytes" - " to overflow buffer -> %d", - stream->stream3_id, ncopy, result)); - notify_drain(cf, data); + ssize_t nwritten; + + (void)cf; + nwritten = Curl_bufq_write(&stream->recvbuf, mem, memlen, &result); + /* DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] add recvbuf(len=%zu) " + "-> %zd, %d", stream->id, memlen, nwritten, result)); + */ + if(nwritten < 0) { + return result; } if(!flow) - stream->recv_buf_nonflow += memlen; - if(CF_DATA_CURRENT(cf) != data) { - notify_drain(cf, data); + stream->recv_buf_nonflow += (size_t)nwritten; + + if((size_t)nwritten < memlen) { + /* This MUST not happen. Our recbuf is dimensioned to hold the + * full max_stream_window and then some for this very reason. */ + DEBUGASSERT(0); + return CURLE_RECV_ERROR; } return result; } @@ -1006,6 +1068,9 @@ static int cb_h3_recv_data(nghttp3_conn *conn, int64_t stream3_id, (void)stream3_id; result = write_resp_raw(cf, data, buf, buflen, TRUE); + if(CF_DATA_CURRENT(cf) != data) { + notify_drain(cf, data); + } return result? -1 : 0; } @@ -1057,7 +1122,7 @@ static int cb_h3_end_headers(nghttp3_conn *conn, int64_t stream_id, { struct Curl_cfilter *cf = user_data; struct Curl_easy *data = stream_user_data; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); CURLcode result = CURLE_OK; (void)conn; (void)stream_id; @@ -1065,17 +1130,18 @@ static int cb_h3_end_headers(nghttp3_conn *conn, int64_t stream_id, (void)cf; /* add a CRLF only if we've received some headers */ - if(stream->firstheader) { - result = write_resp_raw(cf, data, "\r\n", 2, FALSE); - if(result) { - return -1; - } + result = write_resp_raw(cf, data, "\r\n", 2, FALSE); + if(result) { + return -1; } DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] end_headers(status_code=%d", stream_id, stream->status_code)); if(stream->status_code / 100 != 1) { - stream->bodystarted = TRUE; + stream->resp_hds_complete = TRUE; + } + if(CF_DATA_CURRENT(cf) != data) { + notify_drain(cf, data); } return 0; } @@ -1089,7 +1155,7 @@ static int cb_h3_recv_header(nghttp3_conn *conn, int64_t stream_id, nghttp3_vec h3name = nghttp3_rcbuf_get_buf(name); nghttp3_vec h3val = nghttp3_rcbuf_get_buf(value); struct Curl_easy *data = stream_user_data; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); CURLcode result = CURLE_OK; (void)conn; (void)stream_id; @@ -1101,7 +1167,6 @@ static int cb_h3_recv_header(nghttp3_conn *conn, int64_t stream_id, char line[14]; /* status line is always 13 characters long */ size_t ncopy; - DEBUGASSERT(!stream->firstheader); stream->status_code = decode_status_code(h3val.base, h3val.len); DEBUGASSERT(stream->status_code != -1); ncopy = msnprintf(line, sizeof(line), "HTTP/3 %03d \r\n", @@ -1112,11 +1177,9 @@ static int cb_h3_recv_header(nghttp3_conn *conn, int64_t stream_id, if(result) { return -1; } - stream->firstheader = TRUE; } else { /* store as an HTTP1-style header */ - DEBUGASSERT(stream->firstheader); DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] header: %.*s: %.*s", stream_id, (int)h3name.len, h3name.base, (int)h3val.len, h3val.base)); @@ -1179,7 +1242,7 @@ static int cb_h3_reset_stream(nghttp3_conn *conn, int64_t stream_id, } static nghttp3_callbacks ngh3_callbacks = { - cb_h3_acked_stream_data, /* acked_stream_data */ + cb_h3_acked_req_body, /* acked_stream_data */ cb_h3_stream_close, cb_h3_recv_data, cb_h3_deferred_consume, @@ -1255,69 +1318,46 @@ static int init_ngh3_conn(struct Curl_cfilter *cf) return result; } -static void drain_overflow_buffer(struct Curl_cfilter *cf, - struct Curl_easy *data) -{ - struct HTTP *stream = data->req.p.http; - size_t overlen = Curl_dyn_len(&stream->overflow); - size_t ncopy = CURLMIN(overlen, stream->len); - - (void)cf; - if(ncopy > 0) { - memcpy(stream->mem + stream->memlen, - Curl_dyn_ptr(&stream->overflow), ncopy); - stream->len -= ncopy; - stream->memlen += ncopy; - if(ncopy != overlen) - /* make the buffer only keep the tail */ - (void)Curl_dyn_tail(&stream->overflow, overlen - ncopy); - else { - Curl_dyn_reset(&stream->overflow); - } - } -} - static ssize_t recv_closed_stream(struct Curl_cfilter *cf, struct Curl_easy *data, CURLcode *err) { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); ssize_t nread = -1; (void)cf; if(stream->reset) { failf(data, - "HTTP/3 stream %" PRId64 " reset by server", stream->stream3_id); + "HTTP/3 stream %" PRId64 " reset by server", stream->id); *err = CURLE_PARTIAL_FILE; DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, was reset -> %d", - stream->stream3_id, *err)); + stream->id, *err)); goto out; } else if(stream->error3 != NGHTTP3_H3_NO_ERROR) { failf(data, - "HTTP/3 stream %" PRId64 " was not closed cleanly: (err 0x%" PRIx64 - ")", - stream->stream3_id, stream->error3); + "HTTP/3 stream %" PRId64 " was not closed cleanly: " + "(err %"PRId64")", stream->id, stream->error3); *err = CURLE_HTTP3; DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, closed uncleanly" - " -> %d", stream->stream3_id, *err)); + " -> %d", stream->id, *err)); goto out; } - if(!stream->bodystarted) { + if(!stream->resp_hds_complete) { failf(data, "HTTP/3 stream %" PRId64 " was closed cleanly, but before getting" " all response header fields, treated as error", - stream->stream3_id); + stream->id); *err = CURLE_HTTP3; DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, closed incomplete" - " -> %d", stream->stream3_id, *err)); + " -> %d", stream->id, *err)); goto out; } else { DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, closed ok" - " -> %d", stream->stream3_id, *err)); + " -> %d", stream->id, *err)); } *err = CURLE_OK; nread = 0; @@ -1332,7 +1372,7 @@ static ssize_t cf_ngtcp2_recv(struct Curl_cfilter *cf, struct Curl_easy *data, char *buf, size_t len, CURLcode *err) { struct cf_ngtcp2_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); ssize_t nread = -1; struct cf_call_data save; @@ -1345,171 +1385,151 @@ static ssize_t cf_ngtcp2_recv(struct Curl_cfilter *cf, struct Curl_easy *data, DEBUGASSERT(ctx->h3conn); *err = CURLE_OK; - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv(len=%zu) start", - stream->stream3_id, len)); - /* TODO: this implementation of response DATA buffering is fragile. - * It makes the following assumptions: - * - the `buf` passed here has the same lifetime as the easy handle - * - data returned in `buf` from this call is immediately used and `buf` - * can be overwritten during any handling of other transfers at - * this connection. - */ - if(!stream->memlen) { - /* `buf` was not known before or is currently not used by stream, - * assign it (again). */ - stream->mem = buf; - stream->len = len; + if(!Curl_bufq_is_empty(&stream->recvbuf)) { + nread = Curl_bufq_read(&stream->recvbuf, + (unsigned char *)buf, len, err); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] read recvbuf(len=%zu) " + "-> %zd, %d", stream->id, len, nread, *err)); + if(nread < 0) + goto out; + report_consumed_data(cf, data, nread); } - /* if there's data in the overflow buffer, move as much - as possible to the receive buffer now */ - drain_overflow_buffer(cf, data); - if(cf_process_ingress(cf, data)) { *err = CURLE_RECV_ERROR; nread = -1; goto out; } - if(stream->memlen) { - nread = stream->memlen; - /* reset to allow more data to come */ - /* TODO: very brittle buffer use design: - * - stream->mem has now `nread` bytes of response data - * - we assume that the caller will use those immediately and - * we can overwrite that with new data on our next invocation from - * anywhere. - */ - stream->mem = buf; - stream->memlen = 0; - stream->len = len; - /* extend the stream window with the data we're consuming and send out - any additional packets to tell the server that we can receive more */ - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv -> %zd bytes", - stream->stream3_id, nread)); + /* recvbuf had nothing before, maybe after progressing ingress? */ + if(nread < 0 && !Curl_bufq_is_empty(&stream->recvbuf)) { + nread = Curl_bufq_read(&stream->recvbuf, + (unsigned char *)buf, len, err); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] read recvbuf(len=%zu) " + "-> %zd, %d", stream->id, len, nread, *err)); + if(nread < 0) + goto out; report_consumed_data(cf, data, nread); - if(cf_flush_egress(cf, data)) { - *err = CURLE_SEND_ERROR; - nread = -1; - } - goto out; } - if(stream->closed) { - nread = recv_closed_stream(cf, data, err); - goto out; + if(nread > 0) { + if(1 || !Curl_bufq_is_empty(&stream->recvbuf)) { + notify_drain(cf, data); + } + } + else { + if(stream->closed) { + nread = recv_closed_stream(cf, data, err); + goto out; + } + data->state.drain = FALSE; + *err = CURLE_AGAIN; + nread = -1; } - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv -> EAGAIN", - stream->stream3_id)); - *err = CURLE_AGAIN; - nread = -1; out: if(cf_flush_egress(cf, data)) { *err = CURLE_SEND_ERROR; nread = -1; goto out; } - + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv(len=%zu) -> %zd, %d", + stream->id, len, nread, *err)); CF_DATA_RESTORE(cf, save); return nread; } -/* this amount of data has now been acked on this stream */ -static int cb_h3_acked_stream_data(nghttp3_conn *conn, int64_t stream_id, - uint64_t datalen, void *user_data, - void *stream_user_data) +static int cb_h3_acked_req_body(nghttp3_conn *conn, int64_t stream_id, + uint64_t datalen, void *user_data, + void *stream_user_data) { struct Curl_cfilter *cf = user_data; struct Curl_easy *data = stream_user_data; - struct HTTP *stream = data->req.p.http; - (void)user_data; + struct stream_ctx *stream = H3_STREAM_CTX(data); (void)cf; - if(!data->set.postfields) { - stream->h3out->used -= datalen; - DEBUGF(LOG_CF(data, cf, "cb_h3_acked_stream_data, %"PRIu64" bytes, " - "%zd left unacked", datalen, stream->h3out->used)); - DEBUGASSERT(stream->h3out->used < H3_SEND_SIZE); - - if(stream->h3out->used == 0) { - int rv = nghttp3_conn_resume_stream(conn, stream_id); - if(rv) { - return NGTCP2_ERR_CALLBACK_FAILURE; - } + /* The server ackknowledged `datalen` of bytes from our request body. + * This is a delta. We have kept this data in `sendbuf` for + * re-transmissions and can free it now. */ + Curl_bufq_skip(&stream->sendbuf, datalen); + DEBUGASSERT(stream->sendbuf_len_in_flight >= datalen); + stream->sendbuf_len_in_flight -= datalen; + + /* `sendbuf` *might* now have more room. If so, resume this + * possibly paused stream. And also tell our transfer engine that + * it may continue KEEP_SEND if told to PAUSE. */ + if(!Curl_bufq_is_full(&stream->sendbuf)) { + int rv = nghttp3_conn_resume_stream(conn, stream_id); + if(rv) { + return NGTCP2_ERR_CALLBACK_FAILURE; + } + if((data->req.keepon & KEEP_SEND_HOLD) && + (data->req.keepon & KEEP_SEND)) { + data->req.keepon &= ~KEEP_SEND_HOLD; + notify_drain(cf, data); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] unpausing acks", + stream_id)); } } return 0; } -static nghttp3_ssize cb_h3_readfunction(nghttp3_conn *conn, int64_t stream_id, - nghttp3_vec *vec, size_t veccnt, - uint32_t *pflags, void *user_data, - void *stream_user_data) +static nghttp3_ssize +cb_h3_read_req_body(nghttp3_conn *conn, int64_t stream_id, + nghttp3_vec *vec, size_t veccnt, + uint32_t *pflags, void *user_data, + void *stream_user_data) { struct Curl_cfilter *cf = user_data; struct Curl_easy *data = stream_user_data; - size_t nread; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); + ssize_t nwritten = 0; + size_t nvecs = 0; (void)cf; (void)conn; (void)stream_id; (void)user_data; (void)veccnt; - if(data->set.postfields) { - vec[0].base = data->set.postfields; - vec[0].len = data->state.infilesize; - *pflags = NGHTTP3_DATA_FLAG_EOF; - return 1; - } - - if(stream->upload_len && H3_SEND_SIZE <= stream->h3out->used) { - return NGHTTP3_ERR_WOULDBLOCK; - } - - nread = CURLMIN(stream->upload_len, H3_SEND_SIZE - stream->h3out->used); - if(nread > 0) { - /* nghttp3 wants us to hold on to the data until it tells us it is okay to - delete it. Append the data at the end of the h3out buffer. Since we can - only return consecutive data, copy the amount that fits and the next - part comes in next invoke. */ - struct h3out *out = stream->h3out; - if(nread + out->windex > H3_SEND_SIZE) - nread = H3_SEND_SIZE - out->windex; - - memcpy(&out->buf[out->windex], stream->upload_mem, nread); - - /* that's the chunk we return to nghttp3 */ - vec[0].base = &out->buf[out->windex]; - vec[0].len = nread; - - out->windex += nread; - out->used += nread; - - if(out->windex == H3_SEND_SIZE) - out->windex = 0; /* wrap */ - stream->upload_mem += nread; - stream->upload_len -= nread; - if(data->state.infilesize != -1) { - stream->upload_left -= nread; - if(!stream->upload_left) - *pflags = NGHTTP3_DATA_FLAG_EOF; + /* nghttp3 keeps references to the sendbuf data until it is ACKed + * by the server (see `cb_h3_acked_req_body()` for updates). + * `sendbuf_len_in_flight` is the amount of bytes in `sendbuf` + * that we have already passed to nghttp3, but which have not been + * ACKed yet. + * Any amount beyond `sendbuf_len_in_flight` we need still to pass + * to nghttp3. Do that now, if we can. */ + if(stream->sendbuf_len_in_flight < Curl_bufq_len(&stream->sendbuf)) { + nvecs = 0; + while(nvecs < veccnt && + Curl_bufq_peek_at(&stream->sendbuf, + stream->sendbuf_len_in_flight, + (const unsigned char **)&vec[nvecs].base, + &vec[nvecs].len)) { + stream->sendbuf_len_in_flight += vec[nvecs].len; + nwritten += vec[nvecs].len; + ++nvecs; } - DEBUGF(LOG_CF(data, cf, "cb_h3_readfunction %zd bytes%s (at %zd unacked)", - nread, *pflags == NGHTTP3_DATA_FLAG_EOF?" EOF":"", - out->used)); + DEBUGASSERT(nvecs > 0); /* we SHOULD have been be able to peek */ } - if(stream->upload_done && !stream->upload_len && - (stream->upload_left <= 0)) { - DEBUGF(LOG_CF(data, cf, "cb_h3_readfunction sets EOF")); + + /* When we stopped sending and everything in `sendbuf` is "in flight", + * we are at the end of the request body. */ + if(stream->upload_done && + stream->sendbuf_len_in_flight == Curl_bufq_len(&stream->sendbuf)) { *pflags = NGHTTP3_DATA_FLAG_EOF; - return nread ? 1 : 0; } - else if(!nread) { + else if(!nwritten) { + /* Not EOF, and nothing to give, we signal WOULDBLOCK. */ + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] read req body -> AGAIN", + stream->id)); return NGHTTP3_ERR_WOULDBLOCK; } - return 1; + + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] read req body -> " + "%d vecs%s with %zu/%zu", stream->id, + (int)nvecs, *pflags == NGHTTP3_DATA_FLAG_EOF?" EOF":"", + nwritten, Curl_bufq_len(&stream->sendbuf))); + return (nghttp3_ssize)nvecs; } /* Index where :authority header field will appear in request header @@ -1522,104 +1542,78 @@ static CURLcode h3_stream_open(struct Curl_cfilter *cf, size_t len) { struct cf_ngtcp2_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); size_t nheader; CURLcode result = CURLE_OK; nghttp3_nv *nva = NULL; - int64_t stream3_id; int rc = 0; - struct h3out *h3out = NULL; + unsigned int i; struct h2h3req *hreq = NULL; + nghttp3_data_reader reader; + nghttp3_data_reader *preader = NULL; - rc = ngtcp2_conn_open_bidi_stream(ctx->qconn, &stream3_id, NULL); + rc = ngtcp2_conn_open_bidi_stream(ctx->qconn, &stream->id, NULL); if(rc) { failf(data, "can get bidi streams"); - goto fail; + goto out; } - stream->stream3_id = stream3_id; - stream->h3req = TRUE; - Curl_dyn_init(&stream->overflow, CURL_MAX_READ_SIZE); - stream->recv_buf_nonflow = 0; - result = Curl_pseudo_headers(data, mem, len, NULL, &hreq); if(result) - goto fail; + goto out; nheader = hreq->entries; nva = malloc(sizeof(nghttp3_nv) * nheader); if(!nva) { result = CURLE_OUT_OF_MEMORY; - goto fail; + goto out; } - else { - unsigned int i; - for(i = 0; i < nheader; i++) { - nva[i].name = (unsigned char *)hreq->header[i].name; - nva[i].namelen = hreq->header[i].namelen; - nva[i].value = (unsigned char *)hreq->header[i].value; - nva[i].valuelen = hreq->header[i].valuelen; - nva[i].flags = NGHTTP3_NV_FLAG_NONE; - } + + for(i = 0; i < nheader; i++) { + nva[i].name = (unsigned char *)hreq->header[i].name; + nva[i].namelen = hreq->header[i].namelen; + nva[i].value = (unsigned char *)hreq->header[i].value; + nva[i].valuelen = hreq->header[i].valuelen; + nva[i].flags = NGHTTP3_NV_FLAG_NONE; } switch(data->state.httpreq) { case HTTPREQ_POST: case HTTPREQ_POST_FORM: case HTTPREQ_POST_MIME: - case HTTPREQ_PUT: { - nghttp3_data_reader data_reader; - if(data->state.infilesize != -1) - stream->upload_left = data->state.infilesize; - else - /* data sending without specifying the data amount up front */ - stream->upload_left = -1; /* unknown, but not zero */ - - data_reader.read_data = cb_h3_readfunction; - - h3out = calloc(sizeof(struct h3out), 1); - if(!h3out) { - result = CURLE_OUT_OF_MEMORY; - goto fail; - } - stream->h3out = h3out; - - rc = nghttp3_conn_submit_request(ctx->h3conn, stream->stream3_id, - nva, nheader, &data_reader, data); - if(rc) - goto fail; + case HTTPREQ_PUT: + /* known request body size or -1 */ + reader.read_data = cb_h3_read_req_body; + preader = &reader; break; - } default: - stream->upload_left = 0; /* nothing left to send */ - rc = nghttp3_conn_submit_request(ctx->h3conn, stream->stream3_id, - nva, nheader, NULL, data); - if(rc) - goto fail; + /* there is not request body */ + stream->upload_done = TRUE; + preader = NULL; break; } - Curl_safefree(nva); + rc = nghttp3_conn_submit_request(ctx->h3conn, stream->id, + nva, nheader, preader, data); + if(rc) + goto out; infof(data, "Using HTTP/3 Stream ID: %" PRId64 " (easy handle %p)", - stream3_id, (void *)data); + stream->id, (void *)data); DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] opened for %s", - stream3_id, data->state.url)); + stream->id, data->state.url)); - Curl_pseudo_free(hreq); - return CURLE_OK; - -fail: - if(rc) { +out: + if(!result && rc) { switch(rc) { case NGHTTP3_ERR_CONN_CLOSING: DEBUGF(LOG_CF(data, cf, "h3sid[%"PRId64"] failed to send, " - "connection is closing", stream->stream3_id)); + "connection is closing", stream->id)); result = CURLE_RECV_ERROR; break; default: DEBUGF(LOG_CF(data, cf, "h3sid[%"PRId64"] failed to send -> %d (%s)", - stream->stream3_id, rc, ngtcp2_strerror(rc))); + stream->id, rc, ngtcp2_strerror(rc))); result = CURLE_SEND_ERROR; break; } @@ -1633,8 +1627,8 @@ static ssize_t cf_ngtcp2_send(struct Curl_cfilter *cf, struct Curl_easy *data, const void *buf, size_t len, CURLcode *err) { struct cf_ngtcp2_ctx *ctx = cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(data); ssize_t sent = 0; - struct HTTP *stream = data->req.p.http; struct cf_call_data save; CF_DATA_SAVE(save, cf, data); @@ -1649,7 +1643,7 @@ static ssize_t cf_ngtcp2_send(struct Curl_cfilter *cf, struct Curl_easy *data, goto out; } - if(!stream->h3req) { + if(stream->id < 0) { CURLcode result = h3_stream_open(cf, data, buf, len); if(result) { DEBUGF(LOG_CF(data, cf, "failed to open stream -> %d", result)); @@ -1662,18 +1656,22 @@ static ssize_t cf_ngtcp2_send(struct Curl_cfilter *cf, struct Curl_easy *data, sent = len; } else { - DEBUGF(LOG_CF(data, cf, "ngh3_stream_send() wants to send %zd bytes", - len)); - if(!stream->upload_len) { - stream->upload_mem = buf; - stream->upload_len = len; - (void)nghttp3_conn_resume_stream(ctx->h3conn, stream->stream3_id); - } - else { - *err = CURLE_AGAIN; - sent = -1; + sent = Curl_bufq_write(&stream->sendbuf, buf, len, err); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_send, add to " + "sendbuf(len=%zu) -> %zd, %d", + stream->id, len, sent, *err)); + if(sent < 0) { + if(*err == CURLE_AGAIN) { + /* Can't add more to the send buf, needs to drain first. + * Pause the sending to avoid a busy loop. */ + data->req.keepon |= KEEP_SEND_HOLD; + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] pause send", + stream->id)); + } goto out; } + + (void)nghttp3_conn_resume_stream(ctx->h3conn, stream->id); } if(cf_flush_egress(cf, data)) { @@ -1682,24 +1680,6 @@ static ssize_t cf_ngtcp2_send(struct Curl_cfilter *cf, struct Curl_easy *data, goto out; } - /* Reset post upload buffer after resumed. */ - if(stream->upload_mem) { - if(data->set.postfields) { - sent = len; - } - else { - sent = len - stream->upload_len; - } - - stream->upload_mem = NULL; - stream->upload_len = 0; - - if(sent == 0) { - *err = CURLE_AGAIN; - sent = -1; - goto out; - } - } out: CF_DATA_RESTORE(cf, save); return sent; @@ -1757,281 +1737,311 @@ static CURLcode qng_verify_peer(struct Curl_cfilter *cf, return result; } -static CURLcode cf_process_ingress(struct Curl_cfilter *cf, - struct Curl_easy *data) +struct recv_ctx { + struct Curl_cfilter *cf; + struct Curl_easy *data; + ngtcp2_tstamp ts; + size_t pkt_count; +}; + +static CURLcode recv_pkt(const unsigned char *pkt, size_t pktlen, + struct sockaddr_storage *remote_addr, + socklen_t remote_addrlen, int ecn, + void *userp) { - struct cf_ngtcp2_ctx *ctx = cf->ctx; - ssize_t recvd; - int rv; - uint8_t buf[65536]; - int bufsize = (int)sizeof(buf); - size_t pktcount = 0, total_recvd = 0; - struct sockaddr_storage remote_addr; - socklen_t remote_addrlen; + struct recv_ctx *r = userp; + struct cf_ngtcp2_ctx *ctx = r->cf->ctx; + ngtcp2_pkt_info pi; ngtcp2_path path; - ngtcp2_tstamp ts = timestamp(); - ngtcp2_pkt_info pi = { 0 }; + int rv; - for(;;) { - remote_addrlen = sizeof(remote_addr); - while((recvd = recvfrom(ctx->q.sockfd, (char *)buf, bufsize, 0, - (struct sockaddr *)&remote_addr, - &remote_addrlen)) == -1 && - SOCKERRNO == EINTR) - ; - if(recvd == -1) { - if(SOCKERRNO == EAGAIN || SOCKERRNO == EWOULDBLOCK) { - DEBUGF(LOG_CF(data, cf, "ingress, recvfrom -> EAGAIN")); - goto out; + ++r->pkt_count; + ngtcp2_addr_init(&path.local, (struct sockaddr *)&ctx->q.local_addr, + ctx->q.local_addrlen); + ngtcp2_addr_init(&path.remote, (struct sockaddr *)remote_addr, + remote_addrlen); + pi.ecn = (uint32_t)ecn; + + rv = ngtcp2_conn_read_pkt(ctx->qconn, &path, &pi, pkt, pktlen, r->ts); + if(rv) { + DEBUGF(LOG_CF(r->data, r->cf, "ingress, read_pkt -> %s", + ngtcp2_strerror(rv))); + if(!ctx->last_error.error_code) { + if(rv == NGTCP2_ERR_CRYPTO) { + ngtcp2_connection_close_error_set_transport_error_tls_alert( + &ctx->last_error, + ngtcp2_conn_get_tls_alert(ctx->qconn), NULL, 0); } - if(!cf->connected && SOCKERRNO == ECONNREFUSED) { - const char *r_ip; - int r_port; - Curl_cf_socket_peek(cf->next, data, NULL, NULL, - &r_ip, &r_port, NULL, NULL); - failf(data, "ngtcp2: connection to %s port %u refused", - r_ip, r_port); - return CURLE_COULDNT_CONNECT; + else { + ngtcp2_connection_close_error_set_transport_error_liberr( + &ctx->last_error, rv, NULL, 0); } - failf(data, "ngtcp2: recvfrom() unexpectedly returned %zd (errno=%d)", - recvd, SOCKERRNO); - return CURLE_RECV_ERROR; } - if(recvd > 0 && !ctx->got_first_byte) { - ctx->first_byte_at = Curl_now(); - ctx->got_first_byte = TRUE; - } - - ++pktcount; - total_recvd += recvd; - - ngtcp2_addr_init(&path.local, (struct sockaddr *)&ctx->q.local_addr, - ctx->q.local_addrlen); - ngtcp2_addr_init(&path.remote, (struct sockaddr *)&remote_addr, - remote_addrlen); - - rv = ngtcp2_conn_read_pkt(ctx->qconn, &path, &pi, buf, recvd, ts); - if(rv) { - DEBUGF(LOG_CF(data, cf, "ingress, read_pkt -> %s", - ngtcp2_strerror(rv))); - if(!ctx->last_error.error_code) { - if(rv == NGTCP2_ERR_CRYPTO) { - ngtcp2_connection_close_error_set_transport_error_tls_alert( - &ctx->last_error, - ngtcp2_conn_get_tls_alert(ctx->qconn), NULL, 0); - } - else { - ngtcp2_connection_close_error_set_transport_error_liberr( - &ctx->last_error, rv, NULL, 0); - } - } - - if(rv == NGTCP2_ERR_CRYPTO) - /* this is a "TLS problem", but a failed certificate verification - is a common reason for this */ - return CURLE_PEER_FAILED_VERIFICATION; - return CURLE_RECV_ERROR; - } + if(rv == NGTCP2_ERR_CRYPTO) + /* this is a "TLS problem", but a failed certificate verification + is a common reason for this */ + return CURLE_PEER_FAILED_VERIFICATION; + return CURLE_RECV_ERROR; } -out: - (void)pktcount; - (void)total_recvd; - DEBUGF(LOG_CF(data, cf, "ingress, recvd %zu packets with %zd bytes", - pktcount, total_recvd)); return CURLE_OK; } -static CURLcode cf_flush_egress(struct Curl_cfilter *cf, - struct Curl_easy *data) +static CURLcode cf_process_ingress(struct Curl_cfilter *cf, + struct Curl_easy *data) { struct cf_ngtcp2_ctx *ctx = cf->ctx; - int rv; - size_t sent; - ngtcp2_ssize outlen; - uint8_t *outpos = ctx->q.pktbuf; - size_t max_udp_payload_size = - ngtcp2_conn_get_max_tx_udp_payload_size(ctx->qconn); - size_t path_max_udp_payload_size = - ngtcp2_conn_get_path_max_tx_udp_payload_size(ctx->qconn); - size_t max_pktcnt = - CURLMIN(MAX_PKT_BURST, ctx->q.pktbuflen / max_udp_payload_size); - size_t pktcnt = 0; - size_t gsolen = 0; /* this disables gso until we have a clue */ - ngtcp2_path_storage ps; - ngtcp2_tstamp ts = timestamp(); - ngtcp2_tstamp expiry; - ngtcp2_duration timeout; - int64_t stream_id; - nghttp3_ssize veccnt; - int fin; - nghttp3_vec vec[16]; - ngtcp2_ssize ndatalen; - uint32_t flags; - CURLcode curlcode; + struct recv_ctx rctx; + size_t pkts_chunk = 128, i; + size_t pkts_max = 10 * pkts_chunk; + CURLcode result; - rv = ngtcp2_conn_handle_expiry(ctx->qconn, ts); - if(rv) { - failf(data, "ngtcp2_conn_handle_expiry returned error: %s", - ngtcp2_strerror(rv)); - ngtcp2_connection_close_error_set_transport_error_liberr(&ctx->last_error, - rv, NULL, 0); - return CURLE_SEND_ERROR; - } + rctx.cf = cf; + rctx.data = data; + rctx.ts = timestamp(); + rctx.pkt_count = 0; - if(ctx->q.num_blocked_pkt) { - curlcode = vquic_send_blocked_pkt(cf, data, &ctx->q); - if(curlcode) { - if(curlcode == CURLE_AGAIN) { - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; - } - return curlcode; - } + for(i = 0; i < pkts_max; i += pkts_chunk) { + rctx.pkt_count = 0; + result = vquic_recv_packets(cf, data, &ctx->q, pkts_chunk, + recv_pkt, &rctx); + if(result) /* error */ + break; + if(rctx.pkt_count < pkts_chunk) /* got less than we could */ + break; + /* give egress a chance before we receive more */ + result = cf_flush_egress(cf, data); } + return result; +} - ngtcp2_path_storage_zero(&ps); +struct read_ctx { + struct Curl_cfilter *cf; + struct Curl_easy *data; + ngtcp2_tstamp ts; + ngtcp2_path_storage *ps; +}; +/** + * Read a network packet to send from ngtcp2 into `buf`. + * Return number of bytes written or -1 with *err set. + */ +static ssize_t read_pkt_to_send(void *userp, + unsigned char *buf, size_t buflen, + CURLcode *err) +{ + struct read_ctx *x = userp; + struct cf_ngtcp2_ctx *ctx = x->cf->ctx; + nghttp3_vec vec[16]; + nghttp3_ssize veccnt; + ngtcp2_ssize ndatalen; + uint32_t flags; + int64_t stream_id; + int fin; + ssize_t nwritten, n; + veccnt = 0; + stream_id = -1; + fin = 0; + + /* ngtcp2 may want to put several frames from different streams into + * this packet. `NGTCP2_WRITE_STREAM_FLAG_MORE` tells it to do so. + * When `NGTCP2_ERR_WRITE_MORE` is returned, we *need* to make + * another iteration. + * When ngtcp2 is happy (because it has no other frame that would fit + * or it has nothing more to send), it returns the total length + * of the assembled packet. This may be 0 if there was nothing to send. */ + nwritten = 0; + *err = CURLE_OK; for(;;) { - veccnt = 0; - stream_id = -1; - fin = 0; if(ctx->h3conn && ngtcp2_conn_get_max_data_left(ctx->qconn)) { veccnt = nghttp3_conn_writev_stream(ctx->h3conn, &stream_id, &fin, vec, sizeof(vec) / sizeof(vec[0])); if(veccnt < 0) { - failf(data, "nghttp3_conn_writev_stream returned error: %s", + failf(x->data, "nghttp3_conn_writev_stream returned error: %s", nghttp3_strerror((int)veccnt)); ngtcp2_connection_close_error_set_application_error( &ctx->last_error, nghttp3_err_infer_quic_app_error_code((int)veccnt), NULL, 0); - return CURLE_SEND_ERROR; + *err = CURLE_SEND_ERROR; + return -1; } } flags = NGTCP2_WRITE_STREAM_FLAG_MORE | (fin ? NGTCP2_WRITE_STREAM_FLAG_FIN : 0); - outlen = ngtcp2_conn_writev_stream(ctx->qconn, &ps.path, NULL, outpos, - max_udp_payload_size, - &ndatalen, flags, stream_id, - (const ngtcp2_vec *)vec, veccnt, ts); - if(outlen == 0) { - /* ngtcp2 does not want to send more packets, if the buffer is - * not empty, send that now */ - if(outpos != ctx->q.pktbuf) { - curlcode = vquic_send_packet(cf, data, &ctx->q, ctx->q.pktbuf, - outpos - ctx->q.pktbuf, gsolen, &sent); - if(curlcode) { - if(curlcode == CURLE_AGAIN) { - vquic_push_blocked_pkt(cf, &ctx->q, ctx->q.pktbuf + sent, - outpos - ctx->q.pktbuf - sent, - gsolen); - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; - } - return curlcode; - } - } - /* done for now */ + n = ngtcp2_conn_writev_stream(ctx->qconn, x->ps? &x->ps->path : NULL, + NULL, buf, buflen, + &ndatalen, flags, stream_id, + (const ngtcp2_vec *)vec, veccnt, x->ts); + if(n == 0) { + /* nothing to send */ + *err = CURLE_AGAIN; + nwritten = -1; goto out; } - if(outlen < 0) { - switch(outlen) { + else if(n < 0) { + switch(n) { case NGTCP2_ERR_STREAM_DATA_BLOCKED: - assert(ndatalen == -1); + DEBUGASSERT(ndatalen == -1); nghttp3_conn_block_stream(ctx->h3conn, stream_id); - continue; + n = 0; + break; case NGTCP2_ERR_STREAM_SHUT_WR: - assert(ndatalen == -1); + DEBUGASSERT(ndatalen == -1); nghttp3_conn_shutdown_stream_write(ctx->h3conn, stream_id); - continue; + n = 0; + break; case NGTCP2_ERR_WRITE_MORE: /* ngtcp2 wants to send more. update the flow of the stream whose data * is in the buffer and continue */ - assert(ndatalen >= 0); - rv = nghttp3_conn_add_write_offset(ctx->h3conn, stream_id, ndatalen); - if(rv) { - failf(data, "nghttp3_conn_add_write_offset returned error: %s\n", - nghttp3_strerror(rv)); - return CURLE_SEND_ERROR; - } - continue; + DEBUGASSERT(ndatalen >= 0); + n = 0; + break; default: - assert(ndatalen == -1); - failf(data, "ngtcp2_conn_writev_stream returned error: %s", - ngtcp2_strerror((int)outlen)); + DEBUGASSERT(ndatalen == -1); + failf(x->data, "ngtcp2_conn_writev_stream returned error: %s", + ngtcp2_strerror((int)n)); ngtcp2_connection_close_error_set_transport_error_liberr( - &ctx->last_error, (int)outlen, NULL, 0); - return CURLE_SEND_ERROR; + &ctx->last_error, (int)n, NULL, 0); + *err = CURLE_SEND_ERROR; + nwritten = -1; + goto out; } } - else if(ndatalen >= 0) { - /* ngtcp2 thinks it has added all it wants. Update the stream */ - rv = nghttp3_conn_add_write_offset(ctx->h3conn, stream_id, ndatalen); + + if(ndatalen >= 0) { + /* we add the amount of data bytes to the flow windows */ + int rv = nghttp3_conn_add_write_offset(ctx->h3conn, stream_id, ndatalen); if(rv) { - failf(data, "nghttp3_conn_add_write_offset returned error: %s\n", + failf(x->data, "nghttp3_conn_add_write_offset returned error: %s\n", nghttp3_strerror(rv)); return CURLE_SEND_ERROR; } } - /* advance to the end of the buffered packet data */ - outpos += outlen; + if(n > 0) { + /* packet assembled, leave */ + nwritten = n; + goto out; + } + } +out: + return nwritten; +} - if(pktcnt == 0) { - /* first packet buffer chunk. use this as gsolen. It's how ngtcp2 - * indicates the intended segment size. */ - gsolen = outlen; +static CURLcode cf_flush_egress(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct cf_ngtcp2_ctx *ctx = cf->ctx; + int rv; + ssize_t nread; + size_t max_payload_size, path_max_payload_size, max_pktcnt; + size_t pktcnt = 0; + size_t gsolen = 0; /* this disables gso until we have a clue */ + ngtcp2_path_storage ps; + ngtcp2_tstamp ts = timestamp(); + ngtcp2_tstamp expiry; + ngtcp2_duration timeout; + CURLcode curlcode; + struct read_ctx readx; + + rv = ngtcp2_conn_handle_expiry(ctx->qconn, ts); + if(rv) { + failf(data, "ngtcp2_conn_handle_expiry returned error: %s", + ngtcp2_strerror(rv)); + ngtcp2_connection_close_error_set_transport_error_liberr(&ctx->last_error, + rv, NULL, 0); + return CURLE_SEND_ERROR; + } + + curlcode = vquic_flush(cf, data, &ctx->q); + if(curlcode) { + if(curlcode == CURLE_AGAIN) { + Curl_expire(data, 1, EXPIRE_QUIC); + return CURLE_OK; } - else if((size_t)outlen > gsolen || - (gsolen > path_max_udp_payload_size && (size_t)outlen != gsolen)) { - /* Packet larger than path_max_udp_payload_size is PMTUD probe - packet and it might not be sent because of EMSGSIZE. Send - them separately to minimize the loss. */ - /* send the pktbuf *before* the last addition */ - curlcode = vquic_send_packet(cf, data, &ctx->q, ctx->q.pktbuf, - outpos - outlen - ctx->q.pktbuf, gsolen, &sent); + return curlcode; + } + + ngtcp2_path_storage_zero(&ps); + + /* In UDP, there is a maximum theoretical packet paload length and + * a minimum payload length that is "guarantueed" to work. + * To detect if this minimum payload can be increased, ngtcp2 sends + * now and then a packet payload larger than the minimum. It that + * is ACKed by the peer, both parties know that it works and + * the subsequent packets can use a larger one. + * This is called PMTUD (Path Maximum Transmission Unit Discovery). + * Since a PMTUD might be rejected right on send, we do not want it + * be followed by other packets of lesser size. Because those would + * also fail then. So, if we detect a PMTUD while buffering, we flush. + */ + max_payload_size = ngtcp2_conn_get_max_tx_udp_payload_size(ctx->qconn); + path_max_payload_size = + ngtcp2_conn_get_path_max_tx_udp_payload_size(ctx->qconn); + /* maximum number of packets buffered before we flush to the socket */ + max_pktcnt = CURLMIN(MAX_PKT_BURST, + ctx->q.sendbuf.chunk_size / max_payload_size); + + readx.cf = cf; + readx.data = data; + readx.ts = ts; + readx.ps = &ps; + + for(;;) { + /* add the next packet to send, if any, to our buffer */ + nread = Curl_bufq_sipn(&ctx->q.sendbuf, max_payload_size, + read_pkt_to_send, &readx, &curlcode); + /* DEBUGF(LOG_CF(data, cf, "sip packet(maxlen=%zu) -> %zd, %d", + max_payload_size, nread, curlcode)); */ + if(nread < 0) { + if(curlcode != CURLE_AGAIN) + return curlcode; + /* Nothing more to add, flush and leave */ + curlcode = vquic_send(cf, data, &ctx->q, gsolen); if(curlcode) { if(curlcode == CURLE_AGAIN) { - /* blocked, add the pktbuf *before* and *at* the last addition - * separately to the blocked packages */ - vquic_push_blocked_pkt(cf, &ctx->q, ctx->q.pktbuf + sent, - outpos - outlen - ctx->q.pktbuf - sent, gsolen); - vquic_push_blocked_pkt(cf, &ctx->q, outpos - outlen, outlen, outlen); Curl_expire(data, 1, EXPIRE_QUIC); return CURLE_OK; } return curlcode; } - /* send the pktbuf *at* the last addition */ - curlcode = vquic_send_packet(cf, data, &ctx->q, outpos - outlen, outlen, - outlen, &sent); + goto out; + } + + DEBUGASSERT(nread > 0); + if(pktcnt == 0) { + /* first packet in buffer. This is either of a known, "good" + * payload size or it is a PMTUD. We'll see. */ + gsolen = (size_t)nread; + } + else if((size_t)nread > gsolen || + (gsolen > path_max_payload_size && (size_t)nread != gsolen)) { + /* The just added packet is a PMTUD *or* the one(s) before the + * just added were PMTUD and the last one is smaller. + * Flush the buffer before the last add. */ + curlcode = vquic_send_tail_split(cf, data, &ctx->q, + gsolen, nread, nread); if(curlcode) { if(curlcode == CURLE_AGAIN) { - assert(0 == sent); - vquic_push_blocked_pkt(cf, &ctx->q, outpos - outlen, outlen, outlen); Curl_expire(data, 1, EXPIRE_QUIC); return CURLE_OK; } return curlcode; } - /* pktbuf has been completely sent */ pktcnt = 0; - outpos = ctx->q.pktbuf; continue; } - if(++pktcnt >= max_pktcnt || (size_t)outlen < gsolen) { - /* enough packets or last one is shorter than the intended - * segment size, indicating that it is time to send. */ - curlcode = vquic_send_packet(cf, data, &ctx->q, ctx->q.pktbuf, - outpos - ctx->q.pktbuf, gsolen, &sent); + if(++pktcnt >= max_pktcnt || (size_t)nread < gsolen) { + /* Reached MAX_PKT_BURST *or* + * the capacity of our buffer *or* + * last add was shorter than the previous ones, flush */ + curlcode = vquic_send(cf, data, &ctx->q, gsolen); if(curlcode) { if(curlcode == CURLE_AGAIN) { - vquic_push_blocked_pkt(cf, &ctx->q, ctx->q.pktbuf + sent, - outpos - ctx->q.pktbuf - sent, gsolen); Curl_expire(data, 1, EXPIRE_QUIC); return CURLE_OK; } @@ -2039,7 +2049,6 @@ static CURLcode cf_flush_egress(struct Curl_cfilter *cf, } /* pktbuf has been completely sent */ pktcnt = 0; - outpos = ctx->q.pktbuf; } } @@ -2069,13 +2078,9 @@ out: static bool cf_ngtcp2_data_pending(struct Curl_cfilter *cf, const struct Curl_easy *data) { - /* We may have received more data than we're able to hold in the receive - buffer and allocated an overflow buffer. Since it's possible that - there's no more data coming on the socket, we need to keep reading - until the overflow buffer is empty. */ - const struct HTTP *stream = data->req.p.http; + const struct stream_ctx *stream = H3_STREAM_CTX(data); (void)cf; - return Curl_dyn_len(&stream->overflow) > 0; + return !Curl_bufq_is_empty(&stream->recvbuf); } static CURLcode cf_ngtcp2_data_event(struct Curl_cfilter *cf, @@ -2090,16 +2095,18 @@ static CURLcode cf_ngtcp2_data_event(struct Curl_cfilter *cf, (void)arg1; (void)arg2; switch(event) { + case CF_CTRL_DATA_SETUP: { + result = h3_data_setup(cf, data); + break; + } case CF_CTRL_DATA_DONE: { - struct HTTP *stream = data->req.p.http; - Curl_dyn_free(&stream->overflow); - free(stream->h3out); + h3_data_done(cf, data); break; } case CF_CTRL_DATA_DONE_SEND: { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); stream->upload_done = TRUE; - (void)nghttp3_conn_resume_stream(ctx->h3conn, stream->stream3_id); + (void)nghttp3_conn_resume_stream(ctx->h3conn, stream->id); break; } case CF_CTRL_DATA_IDLE: @@ -2147,6 +2154,7 @@ static void cf_ngtcp2_ctx_clear(struct cf_ngtcp2_ctx *ctx) nghttp3_conn_del(ctx->h3conn); if(ctx->qconn) ngtcp2_conn_del(ctx->qconn); + Curl_bufcp_free(&ctx->stream_bufcp); memset(ctx, 0, sizeof(*ctx)); ctx->qlogfd = -1; @@ -2212,6 +2220,10 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, int qfd; ctx->version = NGTCP2_PROTO_VER_MAX; + ctx->max_stream_window = H3_STREAM_WINDOW_SIZE; + Curl_bufcp_init(&ctx->stream_bufcp, H3_STREAM_CHUNK_SIZE, + H3_STREAM_POOL_SPARES); + #ifdef USE_OPENSSL result = quic_ssl_ctx(&ctx->sslctx, cf, data); if(result) @@ -2244,8 +2256,11 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, ctx->qlogfd = qfd; /* -1 if failure above */ quic_settings(ctx, data); - result = vquic_ctx_init(&ctx->q, - NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE * MAX_PKT_BURST); + result = vquic_ctx_init(&ctx->q); + if(result) + return result; + + result = h3_data_setup(cf, data); if(result) return result; diff --git a/lib/vquic/curl_quiche.c b/lib/vquic/curl_quiche.c index 87a221cc1..01a0d2c7e 100644 --- a/lib/vquic/curl_quiche.c +++ b/lib/vquic/curl_quiche.c @@ -28,6 +28,7 @@ #include <quiche.h> #include <openssl/err.h> #include <openssl/ssl.h> +#include "bufq.h" #include "urldata.h" #include "cfilters.h" #include "cf-socket.h" @@ -52,14 +53,26 @@ #include "curl_memory.h" #include "memdebug.h" - -#define QUIC_MAX_STREAMS (256*1024) -#define QUIC_MAX_DATA (1*1024*1024) -#define QUIC_IDLE_TIMEOUT (60 * 1000) /* milliseconds */ - -/* how many UDP packets to send max in one call */ -#define MAX_PKT_BURST 10 -#define MAX_UDP_PAYLOAD_SIZE 1452 +/* #define DEBUG_QUICHE */ + +#define QUIC_MAX_STREAMS (100) +#define QUIC_IDLE_TIMEOUT (5 * 1000) /* milliseconds */ + +#define H3_STREAM_WINDOW_SIZE (128 * 1024) +#define H3_STREAM_CHUNK_SIZE (16 * 1024) +/* The pool keeps spares around and half of a full stream windows + * seems good. More does not seem to improve performance. + * The benefit of the pool is that stream buffer to not keep + * spares. So memory consumption goes down when streams run empty, + * have a large upload done, etc. */ +#define H3_STREAM_POOL_SPARES \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE ) / 2 +/* Receive and Send max number of chunks just follows from the + * chunk size and window size */ +#define H3_STREAM_RECV_CHUNKS \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE) +#define H3_STREAM_SEND_CHUNKS \ + (H3_STREAM_WINDOW_SIZE / H3_STREAM_CHUNK_SIZE) /* * Store quiche version info in this buffer. @@ -123,18 +136,6 @@ static SSL_CTX *quic_ssl_ctx(struct Curl_easy *data) return ssl_ctx; } -struct quic_handshake { - char *buf; /* pointer to the buffer */ - size_t alloclen; /* size of allocation */ - size_t len; /* size of content in buffer */ - size_t nread; /* how many bytes have been read */ -}; - -struct h3_event_node { - struct h3_event_node *next; - quiche_h3_event *ev; -}; - struct cf_quiche_ctx { struct cf_quic_ctx q; quiche_conn *qconn; @@ -148,11 +149,13 @@ struct cf_quiche_ctx { struct curltime handshake_at; /* time connect handshake finished */ struct curltime first_byte_at; /* when first byte was recvd */ struct curltime reconnect_at; /* time the next attempt should start */ + struct bufc_pool stream_bufcp; /* chunk pool for streams */ + curl_off_t data_recvd; + size_t sends_on_hold; /* # of streams with SEND_HOLD set */ BIT(goaway); /* got GOAWAY from server */ BIT(got_first_byte); /* if first byte was received */ }; - #ifdef DEBUG_QUICHE static void quiche_debug_log(const char *line, void *argp) { @@ -161,21 +164,6 @@ static void quiche_debug_log(const char *line, void *argp) } #endif -static void h3_clear_pending(struct Curl_easy *data) -{ - struct HTTP *stream = data->req.p.http; - - if(stream->pending) { - struct h3_event_node *node, *next; - for(node = stream->pending; node; node = next) { - next = node->next; - quiche_h3_event_free(node->ev); - free(node); - } - stream->pending = NULL; - } -} - static void cf_quiche_ctx_clear(struct cf_quiche_ctx *ctx) { if(ctx) { @@ -188,129 +176,282 @@ static void cf_quiche_ctx_clear(struct cf_quiche_ctx *ctx) quiche_h3_conn_free(ctx->h3c); if(ctx->cfg) quiche_config_free(ctx->cfg); + Curl_bufcp_free(&ctx->stream_bufcp); memset(ctx, 0, sizeof(*ctx)); } } -static void notify_drain(struct Curl_cfilter *cf, - struct Curl_easy *data) +/** + * All about the H3 internals of a stream + */ +struct stream_ctx { + int64_t id; /* HTTP/3 protocol stream identifier */ + struct bufq recvbuf; /* h3 response */ + size_t req_hds_len; /* how many bytes in the first send are headers */ + uint64_t error3; /* HTTP/3 stream error code */ + bool closed; /* TRUE on stream close */ + bool reset; /* TRUE on stream reset */ + bool upload_done; /* stream is locally closed */ + bool resp_hds_complete; /* complete, final response has been received */ + bool resp_got_header; /* TRUE when h3 stream has recvd some HEADER */ +}; + +#define H3_STREAM_CTX(d) ((struct stream_ctx *)(((d) && (d)->req.p.http)? \ + ((struct HTTP *)(d)->req.p.http)->impl_ctx \ + : NULL)) +#define H3_STREAM_LCTX(d) ((struct HTTP *)(d)->req.p.http)->impl_ctx +#define H3_STREAM_ID(d) (H3_STREAM_CTX(d)? \ + H3_STREAM_CTX(d)->id : -2) + +static bool stream_send_is_suspended(struct Curl_easy *data) { - (void)cf; - data->state.drain = 1; - Curl_expire(data, 0, EXPIRE_RUN_NOW); + return (data->req.keepon & KEEP_SEND_HOLD); } -static CURLcode h3_add_event(struct Curl_cfilter *cf, - struct Curl_easy *data, - int64_t stream3_id, quiche_h3_event *ev) +static void stream_send_suspend(struct Curl_cfilter *cf, + struct Curl_easy *data) { - struct Curl_easy *mdata; - struct h3_event_node *node, **pnext; + struct cf_quiche_ctx *ctx = cf->ctx; - DEBUGASSERT(data->multi); - for(mdata = data->multi->easyp; mdata; mdata = mdata->next) { - if(mdata->req.p.http && mdata->req.p.http->stream3_id == stream3_id) { - break; + if((data->req.keepon & KEEP_SENDBITS) == KEEP_SEND) { + data->req.keepon |= KEEP_SEND_HOLD; + ++ctx->sends_on_hold; + if(H3_STREAM_ID(data) >= 0) + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] suspend sending", + H3_STREAM_ID(data))); + else + DEBUGF(LOG_CF(data, cf, "[%s] suspend sending", + data->state.url)); + } +} + +static void stream_send_resume(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct cf_quiche_ctx *ctx = cf->ctx; + + if(stream_send_is_suspended(data)) { + data->req.keepon &= ~KEEP_SEND_HOLD; + --ctx->sends_on_hold; + if(H3_STREAM_ID(data) >= 0) + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] resume sending", + H3_STREAM_ID(data))); + else + DEBUGF(LOG_CF(data, cf, "[%s] resume sending", + data->state.url)); + Curl_expire(data, 0, EXPIRE_RUN_NOW); + } +} + +static void check_resumes(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct cf_quiche_ctx *ctx = cf->ctx; + struct Curl_easy *sdata; + + if(ctx->sends_on_hold) { + DEBUGASSERT(data->multi); + for(sdata = data->multi->easyp; + sdata && ctx->sends_on_hold; sdata = sdata->next) { + if(stream_send_is_suspended(sdata)) { + stream_send_resume(cf, sdata); + } } } +} - if(!mdata) { - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] event discarded, easy handle " - "not found", stream3_id)); - quiche_h3_event_free(ev); +static CURLcode h3_data_setup(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct cf_quiche_ctx *ctx = cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(data); + + if(stream) return CURLE_OK; - } - node = calloc(sizeof(*node), 1); - if(!node) { - quiche_h3_event_free(ev); + stream = calloc(1, sizeof(*stream)); + if(!stream) return CURLE_OUT_OF_MEMORY; + + H3_STREAM_LCTX(data) = stream; + stream->id = -1; + Curl_bufq_initp(&stream->recvbuf, &ctx->stream_bufcp, + H3_STREAM_RECV_CHUNKS, BUFQ_OPT_SOFT_LIMIT); + DEBUGF(LOG_CF(data, cf, "data setup (easy %p)", (void *)data)); + return CURLE_OK; +} + +static void h3_data_done(struct Curl_cfilter *cf, struct Curl_easy *data) +{ + struct cf_quiche_ctx *ctx = cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(data); + + (void)cf; + if(stream) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] easy handle is done", + stream->id)); + if(stream_send_is_suspended(data)) { + data->req.keepon &= ~KEEP_SEND_HOLD; + --ctx->sends_on_hold; + } + Curl_bufq_free(&stream->recvbuf); + free(stream); + H3_STREAM_LCTX(data) = NULL; + } +} + +static void notify_drain(struct Curl_cfilter *cf, struct Curl_easy *data) +{ + (void)cf; + if(!data->state.drain) { + data->state.drain = 1; + Curl_expire(data, 0, EXPIRE_RUN_NOW); } - node->ev = ev; - /* append to process them in order of arrival */ - pnext = &mdata->req.p.http->pending; - while(*pnext) { - pnext = &((*pnext)->next); +} + +static struct Curl_easy *get_stream_easy(struct Curl_cfilter *cf, + struct Curl_easy *data, + int64_t stream3_id) +{ + struct Curl_easy *sdata; + + (void)cf; + if(H3_STREAM_ID(data) == stream3_id) { + return data; } - *pnext = node; - notify_drain(cf, mdata); - return CURLE_OK; + else { + DEBUGASSERT(data->multi); + for(sdata = data->multi->easyp; sdata; sdata = sdata->next) { + if(H3_STREAM_ID(sdata) == stream3_id) { + return sdata; + } + } + } + return NULL; +} + +/* + * write_resp_raw() copies response data in raw format to the `data`'s + * receive buffer. If not enough space is available, it appends to the + * `data`'s overflow buffer. + */ +static CURLcode write_resp_raw(struct Curl_cfilter *cf, + struct Curl_easy *data, + const void *mem, size_t memlen) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + CURLcode result = CURLE_OK; + ssize_t nwritten; + + (void)cf; + nwritten = Curl_bufq_write(&stream->recvbuf, mem, memlen, &result); + if(nwritten < 0) + return result; + + if((size_t)nwritten < memlen) { + /* This MUST not happen. Our recbuf is dimensioned to hold the + * full max_stream_window and then some for this very reason. */ + DEBUGASSERT(0); + return CURLE_RECV_ERROR; + } + return result; } -struct h3h1header { - char *dest; - size_t destlen; /* left to use */ - size_t nlen; /* used */ +struct cb_ctx { + struct Curl_cfilter *cf; + struct Curl_easy *data; }; static int cb_each_header(uint8_t *name, size_t name_len, uint8_t *value, size_t value_len, void *argp) { - struct h3h1header *headers = (struct h3h1header *)argp; - size_t olen = 0; + struct cb_ctx *x = argp; + struct stream_ctx *stream = H3_STREAM_CTX(x->data); + CURLcode result; + (void)stream; if((name_len == 7) && !strncmp(H2H3_PSEUDO_STATUS, (char *)name, 7)) { - msnprintf(headers->dest, - headers->destlen, "HTTP/3 %.*s \r\n", - (int) value_len, value); - } - else if(!headers->nlen) { - return CURLE_HTTP3; + result = write_resp_raw(x->cf, x->data, "HTTP/3 ", sizeof("HTTP/3 ") - 1); + if(!result) + result = write_resp_raw(x->cf, x->data, value, value_len); + if(!result) + result = write_resp_raw(x->cf, x->data, " \r\n", 3); } else { - msnprintf(headers->dest, - headers->destlen, "%.*s: %.*s\r\n", - (int)name_len, name, (int) value_len, value); - } - olen = strlen(headers->dest); - headers->destlen -= olen; - headers->nlen += olen; - headers->dest += olen; - return 0; + result = write_resp_raw(x->cf, x->data, name, name_len); + if(!result) + result = write_resp_raw(x->cf, x->data, ": ", 2); + if(!result) + result = write_resp_raw(x->cf, x->data, value, value_len); + if(!result) + result = write_resp_raw(x->cf, x->data, "\r\n", 2); + } + if(result) { + DEBUGF(LOG_CF(x->data, x->cf, + "[h3sid=%"PRId64"][HEADERS][%.*s: %.*s] error %d", + stream->id, (int)name_len, name, + (int)value_len, value, result)); + } + return result; } -static ssize_t cf_recv_body(struct Curl_cfilter *cf, - struct Curl_easy *data, - char *buf, size_t len, +static ssize_t stream_resp_read(void *reader_ctx, + unsigned char *buf, size_t len, CURLcode *err) { - struct cf_quiche_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct cb_ctx *x = reader_ctx; + struct cf_quiche_ctx *ctx = x->cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(x->data); ssize_t nread; - size_t offset = 0; - if(!stream->firstbody) { - /* add a header-body separator CRLF */ - offset = 2; - } - nread = quiche_h3_recv_body(ctx->h3c, ctx->qconn, stream->stream3_id, - (unsigned char *)buf + offset, len - offset); + nread = quiche_h3_recv_body(ctx->h3c, ctx->qconn, stream->id, + buf, len); if(nread >= 0) { - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"][DATA] len=%zd", - stream->stream3_id, nread)); - if(!stream->firstbody) { - stream->firstbody = TRUE; - buf[0] = '\r'; - buf[1] = '\n'; - nread += offset; - } + *err = CURLE_OK; + return nread; } - else if(nread == -1) { + else if(nread < 0) { *err = CURLE_AGAIN; - stream->h3_recving_data = FALSE; + return -1; } else { + *err = stream->resp_got_header? CURLE_PARTIAL_FILE : CURLE_RECV_ERROR; + return -1; + } +} + +static CURLcode cf_recv_body(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct stream_ctx *stream = H3_STREAM_CTX(data); + ssize_t nwritten; + struct cb_ctx cb_ctx; + CURLcode result = CURLE_OK; + + if(!stream->resp_hds_complete) { + result = write_resp_raw(cf, data, "\r\n", 2); + if(result) + return result; + stream->resp_hds_complete = TRUE; + } + + cb_ctx.cf = cf; + cb_ctx.data = data; + nwritten = Curl_bufq_slurp(&stream->recvbuf, + stream_resp_read, &cb_ctx, &result); + + if(nwritten < 0 && result != CURLE_AGAIN) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] recv_body error %zd", + stream->id, nwritten)); failf(data, "Error %zd in HTTP/3 response body for stream[%"PRId64"]", - nread, stream->stream3_id); + nwritten, stream->id); stream->closed = TRUE; stream->reset = TRUE; streamclose(cf->conn, "Reset of stream"); - stream->h3_recving_data = FALSE; - nread = -1; - *err = stream->h3_got_header? CURLE_PARTIAL_FILE : CURLE_RECV_ERROR; + return result; } - return nread; + return CURLE_OK; } #ifdef DEBUGBUILD @@ -335,64 +476,48 @@ static const char *cf_ev_name(quiche_h3_event *ev) #define cf_ev_name(x) "" #endif -static ssize_t h3_process_event(struct Curl_cfilter *cf, - struct Curl_easy *data, - char *buf, size_t len, - int64_t stream3_id, - quiche_h3_event *ev, - CURLcode *err) +static CURLcode h3_process_event(struct Curl_cfilter *cf, + struct Curl_easy *data, + int64_t stream3_id, + quiche_h3_event *ev) { - struct HTTP *stream = data->req.p.http; - ssize_t recvd = 0; + struct stream_ctx *stream = H3_STREAM_CTX(data); + struct cb_ctx cb_ctx; + CURLcode result = CURLE_OK; int rc; - struct h3h1header headers; - - DEBUGASSERT(stream3_id == stream->stream3_id); - *err = CURLE_OK; + DEBUGASSERT(stream3_id == stream->id); switch(quiche_h3_event_type(ev)) { case QUICHE_H3_EVENT_HEADERS: - stream->h3_got_header = TRUE; - headers.dest = buf; - headers.destlen = len; - headers.nlen = 0; - rc = quiche_h3_event_for_each_header(ev, cb_each_header, &headers); + stream->resp_got_header = TRUE; + cb_ctx.cf = cf; + cb_ctx.data = data; + rc = quiche_h3_event_for_each_header(ev, cb_each_header, &cb_ctx); if(rc) { failf(data, "Error %d in HTTP/3 response header for stream[%"PRId64"]", rc, stream3_id); - *err = CURLE_RECV_ERROR; - recvd = -1; - break; + return CURLE_RECV_ERROR; } - recvd = headers.nlen; - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"][HEADERS] len=%zd", - stream3_id, recvd)); + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"][HEADERS]", stream3_id)); break; case QUICHE_H3_EVENT_DATA: - DEBUGASSERT(!stream->closed); - stream->h3_recving_data = TRUE; - recvd = cf_recv_body(cf, data, buf, len, err); - if(recvd < 0) { - if(*err != CURLE_AGAIN) - return -1; - recvd = 0; + if(!stream->closed) { + result = cf_recv_body(cf, data); } break; case QUICHE_H3_EVENT_RESET: - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"][RESET]", stream3_id)); + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"][RESET]", stream3_id)); stream->closed = TRUE; stream->reset = TRUE; - /* streamclose(cf->conn, "Reset of stream");*/ - stream->h3_recving_data = FALSE; + streamclose(cf->conn, "Reset of stream"); break; case QUICHE_H3_EVENT_FINISHED: DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"][FINISHED]", stream3_id)); stream->closed = TRUE; - /* streamclose(cf->conn, "End of stream");*/ - stream->h3_recving_data = FALSE; + streamclose(cf->conn, "End of stream"); break; case QUICHE_H3_EVENT_GOAWAY: @@ -404,126 +529,161 @@ static ssize_t h3_process_event(struct Curl_cfilter *cf, stream3_id, quiche_h3_event_type(ev))); break; } - return recvd; + return result; } -static ssize_t h3_process_pending(struct Curl_cfilter *cf, - struct Curl_easy *data, - char *buf, size_t len, - CURLcode *err) +static CURLcode cf_poll_events(struct Curl_cfilter *cf, + struct Curl_easy *data) { - struct HTTP *stream = data->req.p.http; - struct h3_event_node *node = stream->pending, **pnext = &stream->pending; - ssize_t recvd = 0, erecvd; + struct cf_quiche_ctx *ctx = cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(data); + struct Curl_easy *sdata; + quiche_h3_event *ev; + CURLcode result; - *err = CURLE_OK; - DEBUGASSERT(stream); - while(node && len) { - erecvd = h3_process_event(cf, data, buf, len, - stream->stream3_id, node->ev, err); - quiche_h3_event_free(node->ev); - *pnext = node->next; - free(node); - node = *pnext; - if(erecvd < 0) { - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] process event -> %d", - stream->stream3_id, *err)); - return erecvd; + /* Take in the events and distribute them to the transfers. */ + while(ctx->h3c) { + int64_t stream3_id = quiche_h3_conn_poll(ctx->h3c, ctx->qconn, &ev); + if(stream3_id == QUICHE_H3_ERR_DONE) { + break; + } + else if(stream3_id < 0) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] error poll: %"PRId64, + stream? stream->id : -1, stream3_id)); + return CURLE_HTTP3; + } + + sdata = get_stream_easy(cf, data, stream3_id); + if(!sdata) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] discard event %s for " + "unknown [h3sid=%"PRId64"]", + stream? stream->id : -1, cf_ev_name(ev), + stream3_id)); + } + else { + result = h3_process_event(cf, sdata, stream3_id, ev); + if(sdata != data) { + notify_drain(cf, sdata); + } + if(result) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] error processing event %s " + "for [h3sid=%"PRId64"] -> %d", + stream? stream->id : -1, cf_ev_name(ev), + stream3_id, result)); + quiche_h3_event_free(ev); + return result; + } + quiche_h3_event_free(ev); } - recvd += erecvd; - *err = CURLE_OK; - buf += erecvd; - len -= erecvd; } - return recvd; + return CURLE_OK; } -static CURLcode cf_process_ingress(struct Curl_cfilter *cf, - struct Curl_easy *data) +struct recv_ctx { + struct Curl_cfilter *cf; + struct Curl_easy *data; + int pkts; +}; + +static CURLcode recv_pkt(const unsigned char *pkt, size_t pktlen, + struct sockaddr_storage *remote_addr, + socklen_t remote_addrlen, int ecn, + void *userp) { - struct cf_quiche_ctx *ctx = cf->ctx; - int64_t stream3_id = data->req.p.http? data->req.p.http->stream3_id : -1; - uint8_t buf[65536]; - int bufsize = (int)sizeof(buf); - struct sockaddr_storage remote_addr; - socklen_t remote_addrlen; + struct recv_ctx *r = userp; + struct cf_quiche_ctx *ctx = r->cf->ctx; quiche_recv_info recv_info; - ssize_t recvd, nread; - ssize_t total = 0, pkts = 0; + ssize_t nread; - DEBUGASSERT(ctx->qconn); + (void)ecn; + ++r->pkts; - /* in case the timeout expired */ - quiche_conn_on_timeout(ctx->qconn); - - do { - remote_addrlen = sizeof(remote_addr); - while((recvd = recvfrom(ctx->q.sockfd, (char *)buf, bufsize, 0, - (struct sockaddr *)&remote_addr, - &remote_addrlen)) == -1 && - SOCKERRNO == EINTR) - ; - if(recvd < 0) { - if((SOCKERRNO == EAGAIN) || (SOCKERRNO == EWOULDBLOCK)) { - break; - } - if(SOCKERRNO == ECONNREFUSED) { - const char *r_ip; - int r_port; - Curl_cf_socket_peek(cf->next, data, NULL, NULL, - &r_ip, &r_port, NULL, NULL); - failf(data, "quiche: connection to %s:%u refused", - r_ip, r_port); - return CURLE_COULDNT_CONNECT; - } - failf(data, "quiche: recvfrom() unexpectedly returned %zd " - "(errno: %d, socket %d)", recvd, SOCKERRNO, ctx->q.sockfd); - return CURLE_RECV_ERROR; - } + recv_info.to = (struct sockaddr *)&ctx->q.local_addr; + recv_info.to_len = ctx->q.local_addrlen; + recv_info.from = (struct sockaddr *)remote_addr; + recv_info.from_len = remote_addrlen; - total += recvd; - ++pkts; - if(recvd > 0 && !ctx->got_first_byte) { - ctx->first_byte_at = Curl_now(); - ctx->got_first_byte = TRUE; + nread = quiche_conn_recv(ctx->qconn, (unsigned char *)pkt, pktlen, + &recv_info); + if(nread < 0) { + if(QUICHE_ERR_DONE == nread) { + DEBUGF(LOG_CF(r->data, r->cf, "ingress, quiche is DONE")); + return CURLE_OK; } - recv_info.from = (struct sockaddr *) &remote_addr; - recv_info.from_len = remote_addrlen; - recv_info.to = (struct sockaddr *) &ctx->q.local_addr; - recv_info.to_len = ctx->q.local_addrlen; - - nread = quiche_conn_recv(ctx->qconn, buf, recvd, &recv_info); - if(nread < 0) { - if(QUICHE_ERR_DONE == nread) { - DEBUGF(LOG_CF(data, cf, "ingress, quiche is DONE")); - return CURLE_OK; - } - else if(QUICHE_ERR_TLS_FAIL == nread) { - long verify_ok = SSL_get_verify_result(ctx->ssl); - if(verify_ok != X509_V_OK) { - failf(data, "SSL certificate problem: %s", - X509_verify_cert_error_string(verify_ok)); - return CURLE_PEER_FAILED_VERIFICATION; - } - } - else { - failf(data, "quiche_conn_recv() == %zd", nread); - return CURLE_RECV_ERROR; + else if(QUICHE_ERR_TLS_FAIL == nread) { + long verify_ok = SSL_get_verify_result(ctx->ssl); + if(verify_ok != X509_V_OK) { + failf(r->data, "SSL certificate problem: %s", + X509_verify_cert_error_string(verify_ok)); + return CURLE_PEER_FAILED_VERIFICATION; } } - else if(nread < recvd) { - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] ingress, quiche only " - "accepted %zd/%zd bytes", - stream3_id, nread, recvd)); + else { + failf(r->data, "quiche_conn_recv() == %zd", nread); + return CURLE_RECV_ERROR; } + } + else if((size_t)nread < pktlen) { + DEBUGF(LOG_CF(r->data, r->cf, "ingress, quiche only read %zd/%zd bytes", + nread, pktlen)); + } - } while(pkts < 1000); /* arbitrary */ - - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] ingress, recvd %zd bytes " - "in %zd packets", stream3_id, total, pkts)); return CURLE_OK; } +static CURLcode cf_process_ingress(struct Curl_cfilter *cf, + struct Curl_easy *data) +{ + struct cf_quiche_ctx *ctx = cf->ctx; + struct recv_ctx rctx; + CURLcode result; + + DEBUGASSERT(ctx->qconn); + rctx.cf = cf; + rctx.data = data; + rctx.pkts = 0; + + result = vquic_recv_packets(cf, data, &ctx->q, 1000, recv_pkt, &rctx); + if(result) + return result; + + if(rctx.pkts > 0) { + /* quiche digested ingress packets. It might have opened flow control + * windows again. */ + check_resumes(cf, data); + } + return cf_poll_events(cf, data); +} + +struct read_ctx { + struct Curl_cfilter *cf; + struct Curl_easy *data; + quiche_send_info send_info; +}; + +static ssize_t read_pkt_to_send(void *userp, + unsigned char *buf, size_t buflen, + CURLcode *err) +{ + struct read_ctx *x = userp; + struct cf_quiche_ctx *ctx = x->cf->ctx; + ssize_t nwritten; + + nwritten = quiche_conn_send(ctx->qconn, buf, buflen, &x->send_info); + if(nwritten == QUICHE_ERR_DONE) { + *err = CURLE_AGAIN; + return -1; + } + + if(nwritten < 0) { + failf(x->data, "quiche_conn_send returned %zd", nwritten); + *err = CURLE_SEND_ERROR; + return -1; + } + *err = CURLE_OK; + return nwritten; +} + /* * flush_egress drains the buffers and sends off data. * Calls failf() on errors. @@ -532,60 +692,59 @@ static CURLcode cf_flush_egress(struct Curl_cfilter *cf, struct Curl_easy *data) { struct cf_quiche_ctx *ctx = cf->ctx; - int64_t stream3_id = data->req.p.http? data->req.p.http->stream3_id : -1; - quiche_send_info send_info; - ssize_t outlen, total_len = 0; - size_t max_udp_payload_size = - quiche_conn_max_send_udp_payload_size(ctx->qconn); - size_t gsolen = max_udp_payload_size; - size_t sent, pktcnt = 0; + ssize_t nread; CURLcode result; int64_t timeout_ns; + struct read_ctx readx; + size_t pkt_count, gsolen; - ctx->q.no_gso = TRUE; - if(ctx->q.num_blocked_pkt) { - result = vquic_send_blocked_pkt(cf, data, &ctx->q); - if(result) { - if(result == CURLE_AGAIN) { - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] egress, still not " - "able to send blocked packet", stream3_id)); - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; - } - goto out; + result = vquic_flush(cf, data, &ctx->q); + if(result) { + if(result == CURLE_AGAIN) { + Curl_expire(data, 1, EXPIRE_QUIC); + return CURLE_OK; } + return result; } + readx.cf = cf; + readx.data = data; + memset(&readx.send_info, 0, sizeof(readx.send_info)); + pkt_count = 0; + gsolen = quiche_conn_max_send_udp_payload_size(ctx->qconn); for(;;) { - outlen = quiche_conn_send(ctx->qconn, ctx->q.pktbuf, max_udp_payload_size, - &send_info); - if(outlen == QUICHE_ERR_DONE) { - result = CURLE_OK; - goto out; - } + /* add the next packet to send, if any, to our buffer */ + nread = Curl_bufq_sipn(&ctx->q.sendbuf, 0, + read_pkt_to_send, &readx, &result); + /* DEBUGF(LOG_CF(data, cf, "sip packet(maxlen=%zu) -> %zd, %d", + (size_t)0, nread, result)); */ - if(outlen < 0) { - failf(data, "quiche_conn_send returned %zd", outlen); - result = CURLE_SEND_ERROR; + if(nread < 0) { + if(result != CURLE_AGAIN) + return result; + /* Nothing more to add, flush and leave */ + result = vquic_send(cf, data, &ctx->q, gsolen); + if(result) { + if(result == CURLE_AGAIN) { + Curl_expire(data, 1, EXPIRE_QUIC); + return CURLE_OK; + } + return result; + } goto out; } - /* send the pktbuf *before* the last addition */ - result = vquic_send_packet(cf, data, &ctx->q, ctx->q.pktbuf, - outlen, gsolen, &sent); - ++pktcnt; - total_len += outlen; - if(result) { - if(result == CURLE_AGAIN) { - /* blocked, add the pktbuf *before* and *at* the last addition - * separately to the blocked packages */ - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] egress, pushing blocked " - "packet with %zd bytes", stream3_id, outlen)); - vquic_push_blocked_pkt(cf, &ctx->q, ctx->q.pktbuf, outlen, gsolen); - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; + ++pkt_count; + if((size_t)nread < gsolen || pkt_count >= MAX_PKT_BURST) { + result = vquic_send(cf, data, &ctx->q, gsolen); + if(result) { + if(result == CURLE_AGAIN) { + Curl_expire(data, 1, EXPIRE_QUIC); + return CURLE_OK; + } + goto out; } - goto out; + pkt_count = 0; } } @@ -595,9 +754,6 @@ out: timeout_ns += 1000000; /* expire resolution is milliseconds */ Curl_expire(data, (timeout_ns / 1000000), EXPIRE_QUIC); - if(pktcnt) - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] egress, sent %zd packets " - "with %zd bytes", stream3_id, pktcnt, total_len)); return result; } @@ -605,195 +761,131 @@ static ssize_t recv_closed_stream(struct Curl_cfilter *cf, struct Curl_easy *data, CURLcode *err) { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); ssize_t nread = -1; if(stream->reset) { failf(data, - "HTTP/3 stream %" PRId64 " reset by server", stream->stream3_id); - *err = stream->h3_got_header? CURLE_PARTIAL_FILE : CURLE_RECV_ERROR; + "HTTP/3 stream %" PRId64 " reset by server", stream->id); + *err = stream->resp_got_header? CURLE_PARTIAL_FILE : CURLE_RECV_ERROR; DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, was reset -> %d", - stream->stream3_id, *err)); - goto out; + stream->id, *err)); } - - if(!stream->h3_got_header) { + else if(!stream->resp_got_header) { failf(data, "HTTP/3 stream %" PRId64 " was closed cleanly, but before getting" " all response header fields, treated as error", - stream->stream3_id); + stream->id); /* *err = CURLE_PARTIAL_FILE; */ *err = CURLE_RECV_ERROR; DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, closed incomplete" - " -> %d", stream->stream3_id, *err)); - goto out; + " -> %d", stream->id, *err)); } else { + *err = CURLE_OK; + nread = 0; DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_recv, closed ok" - " -> %d", stream->stream3_id, *err)); + " -> %d", stream->id, *err)); } - *err = CURLE_OK; - nread = 0; - -out: return nread; } -static CURLcode cf_poll_events(struct Curl_cfilter *cf, - struct Curl_easy *data) -{ - struct cf_quiche_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; - quiche_h3_event *ev; - - /* Take in the events and distribute them to the transfers. */ - while(1) { - int64_t stream3_id = quiche_h3_conn_poll(ctx->h3c, ctx->qconn, &ev); - if(stream3_id < 0) { - /* nothing more to do */ - break; - } - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] recv, queue event %s " - "for [h3sid=%"PRId64"]", - stream? stream->stream3_id : -1, cf_ev_name(ev), - stream3_id)); - if(h3_add_event(cf, data, stream3_id, ev) != CURLE_OK) { - return CURLE_OUT_OF_MEMORY; - } - } - return CURLE_OK; -} - -static ssize_t cf_recv_transfer_data(struct Curl_cfilter *cf, - struct Curl_easy *data, - char *buf, size_t len, - CURLcode *err) -{ - struct HTTP *stream = data->req.p.http; - ssize_t recvd = -1; - size_t offset = 0; - - if(stream->h3_recving_data) { - /* try receiving body first */ - recvd = cf_recv_body(cf, data, buf, len, err); - if(recvd < 0) { - if(*err != CURLE_AGAIN) - return -1; - recvd = 0; - } - if(recvd > 0) { - offset = recvd; - } - } - - if(offset < len && stream->pending) { - /* process any pending events for `data` first. if there are, - * return so the transfer can handle those. We do not want to - * progress ingress while events are pending here. */ - recvd = h3_process_pending(cf, data, buf + offset, len - offset, err); - if(recvd < 0) { - if(*err != CURLE_AGAIN) - return -1; - recvd = 0; - } - if(recvd > 0) { - offset += recvd; - } - } - - if(offset) { - *err = CURLE_OK; - return offset; - } - *err = CURLE_AGAIN; - return 0; -} - static ssize_t cf_quiche_recv(struct Curl_cfilter *cf, struct Curl_easy *data, char *buf, size_t len, CURLcode *err) { - struct HTTP *stream = data->req.p.http; - ssize_t recvd = -1; - - *err = CURLE_AGAIN; + struct cf_quiche_ctx *ctx = cf->ctx; + struct stream_ctx *stream = H3_STREAM_CTX(data); + ssize_t nread = -1; + CURLcode result; - recvd = cf_recv_transfer_data(cf, data, buf, len, err); - if(recvd) - goto out; - if(stream->closed) { - recvd = recv_closed_stream(cf, data, err); - goto out; + if(!Curl_bufq_is_empty(&stream->recvbuf)) { + nread = Curl_bufq_read(&stream->recvbuf, + (unsigned char *)buf, len, err); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] read recvbuf(len=%zu) " + "-> %zd, %d", stream->id, len, nread, *err)); + if(nread < 0) + goto out; } - /* we did get nothing from the quiche buffers or pending events. - * Take in more data from the connection, any error is fatal */ if(cf_process_ingress(cf, data)) { - DEBUGF(LOG_CF(data, cf, "h3_stream_recv returns on ingress")); + DEBUGF(LOG_CF(data, cf, "cf_recv, error on ingress")); *err = CURLE_RECV_ERROR; - recvd = -1; + nread = -1; goto out; } - /* poll quiche and distribute the events to the transfers */ - *err = cf_poll_events(cf, data); - if(*err) { - recvd = -1; - goto out; + + /* recvbuf had nothing before, maybe after progressing ingress? */ + if(nread < 0 && !Curl_bufq_is_empty(&stream->recvbuf)) { + nread = Curl_bufq_read(&stream->recvbuf, + (unsigned char *)buf, len, err); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] read recvbuf(len=%zu) " + "-> %zd, %d", stream->id, len, nread, *err)); + if(nread < 0) + goto out; } - /* try to receive again for this transfer */ - recvd = cf_recv_transfer_data(cf, data, buf, len, err); - if(recvd) - goto out; - if(stream->closed) { - recvd = recv_closed_stream(cf, data, err); - goto out; + if(nread > 0) { + data->state.drain = (!Curl_bufq_is_empty(&stream->recvbuf) || + stream->closed); + } + else { + data->state.drain = FALSE; + if(stream->closed) { + nread = recv_closed_stream(cf, data, err); + goto out; + } + *err = CURLE_AGAIN; + nread = -1; } - recvd = -1; - *err = CURLE_AGAIN; - data->state.drain = 0; out: - if(cf_flush_egress(cf, data)) { + result = cf_flush_egress(cf, data); + if(result) { DEBUGF(LOG_CF(data, cf, "cf_recv, flush egress failed")); - *err = CURLE_SEND_ERROR; - return -1; + *err = result; + nread = -1; } - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] cf_recv -> %zd, err=%d", - stream->stream3_id, recvd, *err)); - if(recvd > 0) - notify_drain(cf, data); - return recvd; + if(nread > 0) + ctx->data_recvd += nread; + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] cf_recv(total=%zd) -> %zd, %d", + stream->id, ctx->data_recvd, nread, *err)); + return nread; } /* Index where :authority header field will appear in request header field list. */ #define AUTHORITY_DST_IDX 3 -static CURLcode cf_http_request(struct Curl_cfilter *cf, - struct Curl_easy *data, - const void *mem, - size_t len) +static ssize_t h3_open_stream(struct Curl_cfilter *cf, + struct Curl_easy *data, + const void *mem, size_t len, + CURLcode *err) { struct cf_quiche_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); size_t nheader; int64_t stream3_id; quiche_h3_header *nva = NULL; - CURLcode result = CURLE_OK; struct h2h3req *hreq = NULL; - stream->h3req = TRUE; /* send off! */ - stream->closed = FALSE; - stream->reset = FALSE; + if(!stream->req_hds_len) { + stream->req_hds_len = len; /* fist call */ + } + else { + /* subsequent attempt, we should get at least as many bytes as + * in the first call as headers are either completely sent or not + * at all. */ + DEBUGASSERT(stream->req_hds_len <= len); + } - result = Curl_pseudo_headers(data, mem, len, NULL, &hreq); - if(result) + *err = Curl_pseudo_headers(data, mem, stream->req_hds_len, NULL, &hreq); + if(*err) goto fail; nheader = hreq->entries; nva = malloc(sizeof(quiche_h3_header) * nheader); if(!nva) { - result = CURLE_OUT_OF_MEMORY; + *err = CURLE_OUT_OF_MEMORY; goto fail; } else { @@ -812,107 +904,122 @@ static CURLcode cf_http_request(struct Curl_cfilter *cf, case HTTPREQ_POST_MIME: case HTTPREQ_PUT: if(data->state.infilesize != -1) - stream->upload_left = data->state.infilesize; + stream->upload_done = !data->state.infilesize; else /* data sending without specifying the data amount up front */ - stream->upload_left = -1; /* unknown, but not zero */ - - stream->upload_done = !stream->upload_left; - stream3_id = quiche_h3_send_request(ctx->h3c, ctx->qconn, nva, nheader, - stream->upload_done); + stream->upload_done = FALSE; break; default: - stream->upload_left = 0; stream->upload_done = TRUE; - stream3_id = quiche_h3_send_request(ctx->h3c, ctx->qconn, nva, nheader, - TRUE); break; } - Curl_safefree(nva); - + stream3_id = quiche_h3_send_request(ctx->h3c, ctx->qconn, nva, nheader, + stream->upload_done); if(stream3_id < 0) { if(QUICHE_H3_ERR_STREAM_BLOCKED == stream3_id) { - DEBUGF(LOG_CF(data, cf, "send_request(%s, body_len=%ld) rejected " - "with H3_ERR_STREAM_BLOCKED", - data->state.url, (long)stream->upload_left)); - result = CURLE_AGAIN; + /* quiche seems to report this error if the connection window is + * exhausted. Which happens frequently and intermittent. */ + DEBUGF(LOG_CF(data, cf, "send_request(%s) rejected with BLOCKED", + data->state.url)); + stream_send_suspend(cf, data); + *err = CURLE_AGAIN; goto fail; } else { - DEBUGF(LOG_CF(data, cf, "send_request(%s, body_len=%ld) -> %" PRId64, - data->state.url, (long)stream->upload_left, stream3_id)); + DEBUGF(LOG_CF(data, cf, "send_request(%s) -> %" PRId64, + data->state.url, stream3_id)); } - result = CURLE_SEND_ERROR; + *err = CURLE_SEND_ERROR; goto fail; } - stream->stream3_id = stream3_id; + DEBUGASSERT(stream->id == -1); + stream->id = stream3_id; + stream->closed = FALSE; + stream->reset = FALSE; + infof(data, "Using HTTP/3 Stream ID: %" PRId64 " (easy handle %p)", stream3_id, (void *)data); DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] opened for %s", stream3_id, data->state.url)); Curl_pseudo_free(hreq); - return CURLE_OK; + free(nva); + *err = CURLE_OK; + return stream->req_hds_len; fail: free(nva); Curl_pseudo_free(hreq); - return result; + return -1; } static ssize_t cf_quiche_send(struct Curl_cfilter *cf, struct Curl_easy *data, const void *buf, size_t len, CURLcode *err) { struct cf_quiche_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); + CURLcode result; ssize_t nwritten; - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_send(len=%zu) start", - stream->h3req? stream->stream3_id : -1, len)); *err = cf_process_ingress(cf, data); - if(*err) - return -1; + if(*err) { + nwritten = -1; + goto out; + } - if(!stream->h3req) { - CURLcode result = cf_http_request(cf, data, buf, len); - if(result) { - *err = result; - return -1; - } - nwritten = len; + if(stream->id < 0) { + nwritten = h3_open_stream(cf, data, buf, len, err); + if(nwritten < 0) + goto out; } else { - nwritten = quiche_h3_send_body(ctx->h3c, ctx->qconn, stream->stream3_id, - (uint8_t *)buf, len, FALSE); - DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] send body(len=%zu) -> %zd", - stream->stream3_id, len, nwritten)); - if(nwritten == QUICHE_H3_ERR_DONE) { - /* no error, nothing to do (flow control?) */ + nwritten = quiche_h3_send_body(ctx->h3c, ctx->qconn, stream->id, + (uint8_t *)buf, len, stream->upload_done); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] send body(len=%zu, eof=%d) " + "-> %zd", stream->id, len, stream->upload_done, + nwritten)); + if(nwritten == QUICHE_H3_ERR_DONE || (nwritten == 0 && len > 0)) { + /* TODO: we seem to be blocked on flow control and should HOLD + * sending. But when do we open again? */ + if(!quiche_conn_stream_writable(ctx->qconn, stream->id, len)) { + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] send_body(len=%zu) " + "-> window exhausted", stream->id, len)); + stream_send_suspend(cf, data); + } *err = CURLE_AGAIN; nwritten = -1; + goto out; } else if(nwritten == QUICHE_H3_TRANSPORT_ERR_FINAL_SIZE) { - DEBUGF(LOG_CF(data, cf, "send_body(len=%zu) -> exceeds size", len)); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] send_body(len=%zu) " + "-> exceeds size", stream->id, len)); *err = CURLE_SEND_ERROR; nwritten = -1; + goto out; } else if(nwritten < 0) { - DEBUGF(LOG_CF(data, cf, "send_body(len=%zu) -> SEND_ERROR", len)); + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] send_body(len=%zu) " + "-> quiche err %zd", stream->id, len, nwritten)); *err = CURLE_SEND_ERROR; nwritten = -1; + goto out; } else { + /* quiche accepted all or at least a part of the buf */ *err = CURLE_OK; } } - if(cf_flush_egress(cf, data)) { - *err = CURLE_SEND_ERROR; - return -1; +out: + result = cf_flush_egress(cf, data); + if(result) { + *err = result; + nwritten = -1; } - + DEBUGF(LOG_CF(data, cf, "[h3sid=%" PRId64 "] cf_send(len=%zu) -> %zd, %d", + stream->id, len, nwritten, *err)); return nwritten; } @@ -920,19 +1027,22 @@ static bool stream_is_writeable(struct Curl_cfilter *cf, struct Curl_easy *data) { struct cf_quiche_ctx *ctx = cf->ctx; - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); + bool is_writable = FALSE; /* surely, there must be a better way */ quiche_stream_iter *qiter = quiche_conn_writable(ctx->qconn); if(qiter) { uint64_t stream_id; while(quiche_stream_iter_next(qiter, &stream_id)) { - if(stream_id == (uint64_t)stream->stream3_id) - return TRUE; + if(stream_id == (uint64_t)stream->id) { + is_writable = TRUE; + break; + } } quiche_stream_iter_free(qiter); } - return FALSE; + return is_writable; } static int cf_quiche_get_select_socks(struct Curl_cfilter *cf, @@ -964,57 +1074,43 @@ static int cf_quiche_get_select_socks(struct Curl_cfilter *cf, static bool cf_quiche_data_pending(struct Curl_cfilter *cf, const struct Curl_easy *data) { - struct HTTP *stream = data->req.p.http; - - if(stream->pending) { - DEBUGF(LOG_CF((struct Curl_easy *)data, cf, - "[h3sid=%"PRId64"] has event pending", stream->stream3_id)); - return TRUE; - } - if(stream->h3_recving_data) { - DEBUGF(LOG_CF((struct Curl_easy *)data, cf, - "[h3sid=%"PRId64"] is receiving DATA", stream->stream3_id)); - return TRUE; - } - if(data->state.drain) { - DEBUGF(LOG_CF((struct Curl_easy *)data, cf, - "[h3sid=%"PRId64"] is draining", stream->stream3_id)); - return TRUE; - } - return FALSE; + const struct stream_ctx *stream = H3_STREAM_CTX(data); + (void)cf; + return !Curl_bufq_is_empty(&stream->recvbuf); } static CURLcode cf_quiche_data_event(struct Curl_cfilter *cf, struct Curl_easy *data, int event, int arg1, void *arg2) { - struct cf_quiche_ctx *ctx = cf->ctx; CURLcode result = CURLE_OK; (void)arg1; (void)arg2; switch(event) { + case CF_CTRL_DATA_SETUP: { + result = h3_data_setup(cf, data); + break; + } case CF_CTRL_DATA_DONE: { - struct HTTP *stream = data->req.p.http; - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] easy handle is %s", - stream->stream3_id, arg1? "cancelled" : "done")); - h3_clear_pending(data); + h3_data_done(cf, data); break; } case CF_CTRL_DATA_DONE_SEND: { - struct HTTP *stream = data->req.p.http; + struct stream_ctx *stream = H3_STREAM_CTX(data); + unsigned char body[1]; ssize_t sent; stream->upload_done = TRUE; - sent = quiche_h3_send_body(ctx->h3c, ctx->qconn, stream->stream3_id, - NULL, 0, TRUE); - DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] send_body FINISHED", - stream->stream3_id)); - if(sent < 0) - return CURLE_SEND_ERROR; + + body[0] = 'X'; + sent = cf_quiche_send(cf, data, body, 0, &result); + DEBUGF(LOG_CF(data, cf, "[h3sid=%"PRId64"] DONE_SEND -> %zd, %d", + stream->id, sent, result)); break; } case CF_CTRL_DATA_IDLE: - /* anything to do? */ + /* result = cf_flush_egress(cf, data); + DEBUGF(LOG_CF(data, cf, "data idle, flush egress -> %d", result)); */ break; default: break; @@ -1095,8 +1191,11 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, debug_log_init = 1; } #endif + Curl_bufcp_init(&ctx->stream_bufcp, H3_STREAM_CHUNK_SIZE, + H3_STREAM_POOL_SPARES); + ctx->data_recvd = 0; - result = vquic_ctx_init(&ctx->q, MAX_UDP_PAYLOAD_SIZE * MAX_PKT_BURST); + result = vquic_ctx_init(&ctx->q); if(result) return result; @@ -1106,14 +1205,21 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, return CURLE_FAILED_INIT; } quiche_config_set_max_idle_timeout(ctx->cfg, QUIC_IDLE_TIMEOUT); - quiche_config_set_initial_max_data(ctx->cfg, QUIC_MAX_DATA); - quiche_config_set_initial_max_stream_data_bidi_local( - ctx->cfg, QUIC_MAX_DATA); - quiche_config_set_initial_max_stream_data_bidi_remote( - ctx->cfg, QUIC_MAX_DATA); - quiche_config_set_initial_max_stream_data_uni(ctx->cfg, QUIC_MAX_DATA); + quiche_config_set_initial_max_data(ctx->cfg, (1 * 1024 * 1024) + /* (QUIC_MAX_STREAMS/2) * H3_STREAM_WINDOW_SIZE */); quiche_config_set_initial_max_streams_bidi(ctx->cfg, QUIC_MAX_STREAMS); quiche_config_set_initial_max_streams_uni(ctx->cfg, QUIC_MAX_STREAMS); + quiche_config_set_initial_max_stream_data_bidi_local(ctx->cfg, + H3_STREAM_WINDOW_SIZE); + quiche_config_set_initial_max_stream_data_bidi_remote(ctx->cfg, + H3_STREAM_WINDOW_SIZE); + quiche_config_set_initial_max_stream_data_uni(ctx->cfg, + H3_STREAM_WINDOW_SIZE); + quiche_config_set_disable_active_migration(ctx->cfg, TRUE); + + quiche_config_set_max_connection_window(ctx->cfg, + 10 * QUIC_MAX_STREAMS * H3_STREAM_WINDOW_SIZE); + quiche_config_set_max_stream_window(ctx->cfg, 10 * H3_STREAM_WINDOW_SIZE); quiche_config_set_application_protos(ctx->cfg, (uint8_t *) QUICHE_H3_APPLICATION_PROTOCOL, @@ -1166,6 +1272,11 @@ static CURLcode cf_connect_start(struct Curl_cfilter *cf, } #endif + /* we do not get a setup event for the initial transfer */ + result = h3_data_setup(cf, data); + if(result) + return result; + result = cf_flush_egress(cf, data); if(result) return result; @@ -1293,7 +1404,6 @@ static void cf_quiche_close(struct Curl_cfilter *cf, struct Curl_easy *data) { struct cf_quiche_ctx *ctx = cf->ctx; - (void)data; if(ctx) { if(ctx->qconn) { (void)quiche_conn_close(ctx->qconn, TRUE, 0, NULL, 0); diff --git a/lib/vquic/vquic.c b/lib/vquic/vquic.c index 87a45255e..87dd1a75d 100644 --- a/lib/vquic/vquic.c +++ b/lib/vquic/vquic.c @@ -22,12 +22,25 @@ * ***************************************************************************/ +/* WIP, experimental: use recvmmsg() on linux + * we have no configure check, yet + * and also it is only available for _GNU_SOURCE, which + * we do not use otherwise. +#define HAVE_SENDMMSG + */ +#if defined(HAVE_SENDMMSG) +#define _GNU_SOURCE +#include <sys/socket.h> +#undef _GNU_SOURCE +#endif + #include "curl_setup.h" #ifdef HAVE_FCNTL_H #include <fcntl.h> #endif #include "urldata.h" +#include "bufq.h" #include "dynbuf.h" #include "cfilters.h" #include "curl_log.h" @@ -51,6 +64,10 @@ #define QLOGMODE O_WRONLY|O_CREAT #endif +#define NW_CHUNK_SIZE (64 * 1024) +#define NW_SEND_CHUNKS 2 + + void Curl_quic_ver(char *p, size_t len) { #if defined(USE_NGTCP2) && defined(USE_NGHTTP3) @@ -62,17 +79,10 @@ void Curl_quic_ver(char *p, size_t len) #endif } -CURLcode vquic_ctx_init(struct cf_quic_ctx *qctx, size_t pktbuflen) +CURLcode vquic_ctx_init(struct cf_quic_ctx *qctx) { - qctx->num_blocked_pkt = 0; - qctx->num_blocked_pkt_sent = 0; - memset(&qctx->blocked_pkt, 0, sizeof(qctx->blocked_pkt)); - - qctx->pktbuflen = pktbuflen; - qctx->pktbuf = malloc(qctx->pktbuflen); - if(!qctx->pktbuf) - return CURLE_OUT_OF_MEMORY; - + Curl_bufq_init2(&qctx->sendbuf, NW_CHUNK_SIZE, NW_SEND_CHUNKS, + BUFQ_OPT_SOFT_LIMIT); #if defined(__linux__) && defined(UDP_SEGMENT) && defined(HAVE_SENDMSG) qctx->no_gso = FALSE; #else @@ -84,8 +94,7 @@ CURLcode vquic_ctx_init(struct cf_quic_ctx *qctx, size_t pktbuflen) void vquic_ctx_free(struct cf_quic_ctx *qctx) { - free(qctx->pktbuf); - qctx->pktbuf = NULL; + Curl_bufq_free(&qctx->sendbuf); } static CURLcode send_packet_no_gso(struct Curl_cfilter *cf, @@ -215,11 +224,11 @@ static CURLcode send_packet_no_gso(struct Curl_cfilter *cf, return CURLE_OK; } -CURLcode vquic_send_packet(struct Curl_cfilter *cf, - struct Curl_easy *data, - struct cf_quic_ctx *qctx, - const uint8_t *pkt, size_t pktlen, size_t gsolen, - size_t *psent) +CURLcode vquic_send_packets(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx, + const uint8_t *pkt, size_t pktlen, size_t gsolen, + size_t *psent) { if(qctx->no_gso && pktlen > gsolen) { return send_packet_no_gso(cf, data, qctx, pkt, pktlen, gsolen, psent); @@ -228,53 +237,271 @@ CURLcode vquic_send_packet(struct Curl_cfilter *cf, return do_sendmsg(cf, data, qctx, pkt, pktlen, gsolen, psent); } +CURLcode vquic_flush(struct Curl_cfilter *cf, struct Curl_easy *data, + struct cf_quic_ctx *qctx) +{ + const unsigned char *buf; + size_t blen, sent; + CURLcode result; + size_t gsolen; + + while(Curl_bufq_peek(&qctx->sendbuf, &buf, &blen)) { + gsolen = qctx->gsolen; + if(qctx->split_len) { + gsolen = qctx->split_gsolen; + if(blen > qctx->split_len) + blen = qctx->split_len; + } + DEBUGF(LOG_CF(data, cf, "vquic_send(len=%zu, gso=%zu)", + blen, gsolen)); + result = vquic_send_packets(cf, data, qctx, buf, blen, gsolen, &sent); + DEBUGF(LOG_CF(data, cf, "vquic_send(len=%zu, gso=%zu) -> %d, sent=%zu", + blen, gsolen, result, sent)); + if(result) { + if(result == CURLE_AGAIN) { + Curl_bufq_skip(&qctx->sendbuf, sent); + if(qctx->split_len) + qctx->split_len -= sent; + } + return result; + } + Curl_bufq_skip(&qctx->sendbuf, sent); + if(qctx->split_len) + qctx->split_len -= sent; + } + return CURLE_OK; +} -void vquic_push_blocked_pkt(struct Curl_cfilter *cf, - struct cf_quic_ctx *qctx, - const uint8_t *pkt, size_t pktlen, size_t gsolen) +CURLcode vquic_send(struct Curl_cfilter *cf, struct Curl_easy *data, + struct cf_quic_ctx *qctx, size_t gsolen) { - struct vquic_blocked_pkt *blkpkt; + qctx->gsolen = gsolen; + return vquic_flush(cf, data, qctx); +} - (void)cf; - assert(qctx->num_blocked_pkt < - sizeof(qctx->blocked_pkt) / sizeof(qctx->blocked_pkt[0])); +CURLcode vquic_send_tail_split(struct Curl_cfilter *cf, struct Curl_easy *data, + struct cf_quic_ctx *qctx, size_t gsolen, + size_t tail_len, size_t tail_gsolen) +{ + DEBUGASSERT(Curl_bufq_len(&qctx->sendbuf) > tail_len); + qctx->split_len = Curl_bufq_len(&qctx->sendbuf) - tail_len; + qctx->split_gsolen = gsolen; + qctx->gsolen = tail_gsolen; + DEBUGF(LOG_CF(data, cf, "vquic_send_tail_split: [%zu gso=%zu][%zu gso=%zu]", + qctx->split_len, qctx->split_gsolen, + tail_len, qctx->gsolen)); + return vquic_flush(cf, data, qctx); +} + +#ifdef HAVE_SENDMMSG +static CURLcode recvmmsg_packets(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx, + size_t max_pkts, + vquic_recv_pkt_cb *recv_cb, void *userp) +{ +#define MMSG_NUM 64 + struct iovec msg_iov[MMSG_NUM]; + struct mmsghdr mmsg[MMSG_NUM]; + uint8_t bufs[MMSG_NUM][2*1024]; + struct sockaddr_storage remote_addr[MMSG_NUM]; + size_t total_nread, pkts; + int mcount, i, n; + CURLcode result = CURLE_OK; + + DEBUGASSERT(max_pkts > 0); + pkts = 0; + total_nread = 0; + while(pkts < max_pkts) { + n = (int)CURLMIN(MMSG_NUM, max_pkts); + memset(&mmsg, 0, sizeof(mmsg)); + for(i = 0; i < n; ++i) { + msg_iov[i].iov_base = bufs[i]; + msg_iov[i].iov_len = (int)sizeof(bufs[i]); + mmsg[i].msg_hdr.msg_iov = &msg_iov[i]; + mmsg[i].msg_hdr.msg_iovlen = 1; + mmsg[i].msg_hdr.msg_name = &remote_addr[i]; + mmsg[i].msg_hdr.msg_namelen = sizeof(remote_addr[i]); + } - blkpkt = &qctx->blocked_pkt[qctx->num_blocked_pkt++]; + while((mcount = recvmmsg(qctx->sockfd, mmsg, n, 0, NULL)) == -1 && + SOCKERRNO == EINTR) + ; + if(mcount == -1) { + if(SOCKERRNO == EAGAIN || SOCKERRNO == EWOULDBLOCK) { + DEBUGF(LOG_CF(data, cf, "ingress, recvmmsg -> EAGAIN")); + goto out; + } + if(!cf->connected && SOCKERRNO == ECONNREFUSED) { + const char *r_ip; + int r_port; + Curl_cf_socket_peek(cf->next, data, NULL, NULL, + &r_ip, &r_port, NULL, NULL); + failf(data, "QUIC: connection to %s port %u refused", + r_ip, r_port); + result = CURLE_COULDNT_CONNECT; + goto out; + } + failf(data, "QUIC: recvmsg() unexpectedly returned %d (errno=%d)", + mcount, SOCKERRNO); + result = CURLE_RECV_ERROR; + goto out; + } - blkpkt->pkt = pkt; - blkpkt->pktlen = pktlen; - blkpkt->gsolen = gsolen; + DEBUGF(LOG_CF(data, cf, "recvmmsg() -> %d packets", mcount)); + pkts += mcount; + for(i = 0; i < mcount; ++i) { + total_nread += mmsg[i].msg_len; + result = recv_cb(bufs[i], mmsg[i].msg_len, + mmsg[i].msg_hdr.msg_name, mmsg[i].msg_hdr.msg_namelen, + 0, userp); + if(result) + goto out; + } + } + +out: + DEBUGF(LOG_CF(data, cf, "recvd %zu packets with %zd bytes -> %d", + pkts, total_nread, result)); + return result; } -CURLcode vquic_send_blocked_pkt(struct Curl_cfilter *cf, +#elif defined(HAVE_SENDMSG) +static CURLcode recvmsg_packets(struct Curl_cfilter *cf, struct Curl_easy *data, - struct cf_quic_ctx *qctx) + struct cf_quic_ctx *qctx, + size_t max_pkts, + vquic_recv_pkt_cb *recv_cb, void *userp) { - size_t sent; - CURLcode curlcode; - struct vquic_blocked_pkt *blkpkt; + struct iovec msg_iov; + struct msghdr msg; + uint8_t buf[64*1024]; + struct sockaddr_storage remote_addr; + size_t total_nread, pkts; + ssize_t nread; + CURLcode result = CURLE_OK; - (void)cf; - for(; qctx->num_blocked_pkt_sent < qctx->num_blocked_pkt; - ++qctx->num_blocked_pkt_sent) { - blkpkt = &qctx->blocked_pkt[qctx->num_blocked_pkt_sent]; - curlcode = vquic_send_packet(cf, data, qctx, blkpkt->pkt, - blkpkt->pktlen, blkpkt->gsolen, &sent); - - if(curlcode) { - if(curlcode == CURLE_AGAIN) { - blkpkt->pkt += sent; - blkpkt->pktlen -= sent; + msg_iov.iov_base = buf; + msg_iov.iov_len = (int)sizeof(buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &msg_iov; + msg.msg_iovlen = 1; + + DEBUGASSERT(max_pkts > 0); + for(pkts = 0, total_nread = 0; pkts < max_pkts;) { + msg.msg_name = &remote_addr; + msg.msg_namelen = sizeof(remote_addr); + while((nread = recvmsg(qctx->sockfd, &msg, 0)) == -1 && + SOCKERRNO == EINTR) + ; + if(nread == -1) { + if(SOCKERRNO == EAGAIN || SOCKERRNO == EWOULDBLOCK) { + DEBUGF(LOG_CF(data, cf, "ingress, recvmsg -> EAGAIN")); + goto out; } - return curlcode; + if(!cf->connected && SOCKERRNO == ECONNREFUSED) { + const char *r_ip; + int r_port; + Curl_cf_socket_peek(cf->next, data, NULL, NULL, + &r_ip, &r_port, NULL, NULL); + failf(data, "QUIC: connection to %s port %u refused", + r_ip, r_port); + result = CURLE_COULDNT_CONNECT; + goto out; + } + failf(data, "QUIC: recvmsg() unexpectedly returned %zd (errno=%d)", + nread, SOCKERRNO); + result = CURLE_RECV_ERROR; + goto out; + } + + ++pkts; + total_nread += (size_t)nread; + result = recv_cb(buf, (size_t)nread, msg.msg_name, msg.msg_namelen, + 0, userp); + if(result) + goto out; + } + +out: + DEBUGF(LOG_CF(data, cf, "recvd %zu packets with %zd bytes -> %d", + pkts, total_nread, result)); + return result; +} + +#else /* HAVE_SENDMMSG || HAVE_SENDMSG */ +CURLcode recvfrom_packets(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx, + size_t max_pkts, + vquic_recv_pkt_cb *recv_cb, void *userp) +{ + uint8_t buf[64*1024]; + int bufsize = (int)sizeof(buf); + struct sockaddr_storage remote_addr; + socklen_t remote_addrlen = sizeof(remote_addr); + size_t total_nread, pkts; + ssize_t nread; + CURLcode result = CURLE_OK; + + DEBUGASSERT(max_pkts > 0); + for(pkts = 0, total_nread = 0; pkts < max_pkts;) { + while((nread = recvfrom(qctx->sockfd, (char *)buf, bufsize, 0, + (struct sockaddr *)&remote_addr, + &remote_addrlen)) == -1 && + SOCKERRNO == EINTR) + ; + if(nread == -1) { + if(SOCKERRNO == EAGAIN || SOCKERRNO == EWOULDBLOCK) { + DEBUGF(LOG_CF(data, cf, "ingress, recvfrom -> EAGAIN")); + goto out; + } + if(!cf->connected && SOCKERRNO == ECONNREFUSED) { + const char *r_ip; + int r_port; + Curl_cf_socket_peek(cf->next, data, NULL, NULL, + &r_ip, &r_port, NULL, NULL); + failf(data, "QUIC: connection to %s port %u refused", + r_ip, r_port); + result = CURLE_COULDNT_CONNECT; + goto out; + } + failf(data, "QUIC: recvfrom() unexpectedly returned %zd (errno=%d)", + nread, SOCKERRNO); + result = CURLE_RECV_ERROR; + goto out; } + + ++pkts; + total_nread += (size_t)nread; + result = recv_cb(buf, (size_t)nread, &remote_addr, remote_addrlen, + 0, userp); + if(result) + goto out; } - qctx->num_blocked_pkt = 0; - qctx->num_blocked_pkt_sent = 0; +out: + DEBUGF(LOG_CF(data, cf, "recvd %zu packets with %zd bytes -> %d", + pkts, total_nread, result)); + return result; +} +#endif /* !HAVE_SENDMMSG && !HAVE_SENDMSG */ - return CURLE_OK; +CURLcode vquic_recv_packets(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx, + size_t max_pkts, + vquic_recv_pkt_cb *recv_cb, void *userp) +{ +#if defined(HAVE_SENDMMSG) + return recvmmsg_packets(cf, data, qctx, max_pkts, recv_cb, userp); +#elif defined(HAVE_SENDMSG) + return recvmsg_packets(cf, data, qctx, max_pkts, recv_cb, userp); +#else + return recvfrom_packets(cf, data, qctx, max_pkts, recv_cb, userp); +#endif } /* diff --git a/lib/vquic/vquic_int.h b/lib/vquic/vquic_int.h index 42aba39b0..f0129e19d 100644 --- a/lib/vquic/vquic_int.h +++ b/lib/vquic/vquic_int.h @@ -25,47 +25,63 @@ ***************************************************************************/ #include "curl_setup.h" +#include "bufq.h" #ifdef ENABLE_QUIC -struct vquic_blocked_pkt { - const uint8_t *pkt; - size_t pktlen; - size_t gsolen; -}; +#define MAX_PKT_BURST 10 +#define MAX_UDP_PAYLOAD_SIZE 1452 struct cf_quic_ctx { - curl_socket_t sockfd; - struct sockaddr_storage local_addr; - socklen_t local_addrlen; - struct vquic_blocked_pkt blocked_pkt[2]; - uint8_t *pktbuf; - /* the number of entries in blocked_pkt */ - size_t num_blocked_pkt; - size_t num_blocked_pkt_sent; - /* the packets blocked by sendmsg (EAGAIN or EWOULDBLOCK) */ - size_t pktbuflen; - /* the number of processed entries in blocked_pkt */ - bool no_gso; + curl_socket_t sockfd; /* connected UDP socket */ + struct sockaddr_storage local_addr; /* address socket is bound to */ + socklen_t local_addrlen; /* length of local address */ + + struct bufq sendbuf; /* buffer for sending one or more packets */ + size_t gsolen; /* length of individual packets in send buf */ + size_t split_len; /* if != 0, buffer length after which GSO differs */ + size_t split_gsolen; /* length of individual packets after split_len */ + bool no_gso; /* do not use gso on sending */ }; -CURLcode vquic_ctx_init(struct cf_quic_ctx *qctx, size_t pktbuflen); +CURLcode vquic_ctx_init(struct cf_quic_ctx *qctx); void vquic_ctx_free(struct cf_quic_ctx *qctx); -CURLcode vquic_send_packet(struct Curl_cfilter *cf, - struct Curl_easy *data, - struct cf_quic_ctx *qctx, - const uint8_t *pkt, size_t pktlen, size_t gsolen, - size_t *psent); +CURLcode vquic_send_packets(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx, + const uint8_t *pkt, size_t pktlen, size_t gsolen, + size_t *psent); void vquic_push_blocked_pkt(struct Curl_cfilter *cf, struct cf_quic_ctx *qctx, const uint8_t *pkt, size_t pktlen, size_t gsolen); -CURLcode vquic_send_blocked_pkt(struct Curl_cfilter *cf, - struct Curl_easy *data, - struct cf_quic_ctx *qctx); +CURLcode vquic_send_blocked_pkts(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx); + +CURLcode vquic_send(struct Curl_cfilter *cf, struct Curl_easy *data, + struct cf_quic_ctx *qctx, size_t gsolen); + +CURLcode vquic_send_tail_split(struct Curl_cfilter *cf, struct Curl_easy *data, + struct cf_quic_ctx *qctx, size_t gsolen, + size_t tail_len, size_t tail_gsolen); +CURLcode vquic_flush(struct Curl_cfilter *cf, struct Curl_easy *data, + struct cf_quic_ctx *qctx); + + +typedef CURLcode vquic_recv_pkt_cb(const unsigned char *pkt, size_t pktlen, + struct sockaddr_storage *remote_addr, + socklen_t remote_addrlen, int ecn, + void *userp); + +CURLcode vquic_recv_packets(struct Curl_cfilter *cf, + struct Curl_easy *data, + struct cf_quic_ctx *qctx, + size_t max_pkts, + vquic_recv_pkt_cb *recv_cb, void *userp); #endif /* !ENABLE_QUIC */ diff --git a/tests/http/test_02_download.py b/tests/http/test_02_download.py index 4b131594b..5804adaf8 100644 --- a/tests/http/test_02_download.py +++ b/tests/http/test_02_download.py @@ -114,6 +114,8 @@ class TestDownload: httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 shaky here") curl = CurlClient(env=env) urln = f'https://{env.authority_for(env.domain1, proto)}/data.json?[0-499]' r = curl.http_download(urls=[urln], alpn_proto=proto) @@ -223,6 +225,8 @@ class TestDownload: httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") count = 20 urln = f'https://{env.authority_for(env.domain1, proto)}/data-10m?[0-{count-1}]' curl = CurlClient(env=env) diff --git a/tests/http/test_03_goaway.py b/tests/http/test_03_goaway.py index e40ae35b0..5da60aa83 100644 --- a/tests/http/test_03_goaway.py +++ b/tests/http/test_03_goaway.py @@ -81,6 +81,8 @@ class TestGoAway: @pytest.mark.skipif(condition=not Env.have_h3(), reason="h3 not supported") def test_03_02_h3_goaway(self, env: Env, httpd, nghttpx, repeat): proto = 'h3' + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") count = 3 self.r = None def long_run(): diff --git a/tests/http/test_05_errors.py b/tests/http/test_05_errors.py index dc14d3bd0..f27ba8c39 100644 --- a/tests/http/test_05_errors.py +++ b/tests/http/test_05_errors.py @@ -52,6 +52,8 @@ class TestErrors: proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") count = 1 curl = CurlClient(env=env) urln = f'https://{env.authority_for(env.domain1, proto)}' \ @@ -73,8 +75,8 @@ class TestErrors: proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") - if proto == 'h3' and env.curl_uses_lib('quiche'): - pytest.skip("quiche not reliable, sometimes reports success") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") count = 20 curl = CurlClient(env=env) urln = f'https://{env.authority_for(env.domain1, proto)}' \ diff --git a/tests/http/test_07_upload.py b/tests/http/test_07_upload.py index 40f178a7c..795e5f2fe 100644 --- a/tests/http/test_07_upload.py +++ b/tests/http/test_07_upload.py @@ -52,6 +52,8 @@ class TestUpload: def test_07_01_upload_1_small(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 fails here") data = '0123456789' curl = CurlClient(env=env) url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-0]' @@ -66,6 +68,8 @@ class TestUpload: def test_07_02_upload_1_large(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 fails here") fdata = os.path.join(env.gen_dir, 'data-100k') curl = CurlClient(env=env) url = f'https://{env.authority_for(env.domain1, proto)}/curltest/echo?id=[0-0]' @@ -81,6 +85,8 @@ class TestUpload: def test_07_10_upload_sequential(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") count = 50 data = '0123456789' curl = CurlClient(env=env) @@ -97,6 +103,8 @@ class TestUpload: def test_07_11_upload_parallel(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") # limit since we use a separate connection in h1 count = 50 data = '0123456789' @@ -115,6 +123,8 @@ class TestUpload: def test_07_20_upload_seq_large(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") fdata = os.path.join(env.gen_dir, 'data-100k') count = 50 curl = CurlClient(env=env) @@ -133,6 +143,8 @@ class TestUpload: def test_07_12_upload_seq_large(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") fdata = os.path.join(env.gen_dir, 'data-10m') count = 2 curl = CurlClient(env=env) @@ -151,6 +163,8 @@ class TestUpload: def test_07_20_upload_parallel(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") # limit since we use a separate connection in h1 count = 50 data = '0123456789' @@ -169,8 +183,8 @@ class TestUpload: def test_07_21_upload_parallel_large(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") - if proto == 'h3' and env.curl_uses_lib('quiche'): - pytest.skip("quiche stalls on parallel, large uploads, unless --trace is used???") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 stalls here") fdata = os.path.join(env.gen_dir, 'data-100k') # limit since we use a separate connection in h1 count = 50 @@ -187,6 +201,8 @@ class TestUpload: def test_07_30_put_100k(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 fails here") fdata = os.path.join(env.gen_dir, 'data-100k') count = 1 curl = CurlClient(env=env) @@ -206,6 +222,8 @@ class TestUpload: def test_07_31_put_10m(self, env: Env, httpd, nghttpx, repeat, proto): if proto == 'h3' and not env.have_h3(): pytest.skip("h3 not supported") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 fails here") fdata = os.path.join(env.gen_dir, 'data-10m') count = 1 curl = CurlClient(env=env) diff --git a/tests/http/test_08_caddy.py b/tests/http/test_08_caddy.py index 66c7c900e..6ce34ec89 100644 --- a/tests/http/test_08_caddy.py +++ b/tests/http/test_08_caddy.py @@ -57,6 +57,7 @@ class TestCaddy: @pytest.fixture(autouse=True, scope='class') def _class_scope(self, env, caddy): self._make_docs_file(docs_dir=caddy.docs_dir, fname='data1.data', fsize=1024*1024) + self._make_docs_file(docs_dir=caddy.docs_dir, fname='data5.data', fsize=5*1024*1024) self._make_docs_file(docs_dir=caddy.docs_dir, fname='data10.data', fsize=10*1024*1024) self._make_docs_file(docs_dir=caddy.docs_dir, fname='data100.data', fsize=100*1024*1024) @@ -65,6 +66,8 @@ class TestCaddy: def test_08_01_download_1(self, env: Env, caddy: Caddy, repeat, proto): if proto == 'h3' and not env.have_h3_curl(): pytest.skip("h3 not supported in curl") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 itself crashes") curl = CurlClient(env=env) url = f'https://{env.domain1}:{caddy.port}/data.json' r = curl.http_download(urls=[url], alpn_proto=proto) @@ -77,6 +80,8 @@ class TestCaddy: repeat, proto): if proto == 'h3' and not env.have_h3_curl(): pytest.skip("h3 not supported in curl") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 itself crashes") count = 50 curl = CurlClient(env=env) urln = f'https://{env.domain1}:{caddy.port}/data1.data?[0-{count-1}]' @@ -92,7 +97,9 @@ class TestCaddy: repeat, proto): if proto == 'h3' and not env.have_h3_curl(): pytest.skip("h3 not supported in curl") - count = 50 + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 itself crashes") + count = 20 curl = CurlClient(env=env) urln = f'https://{env.domain1}:{caddy.port}/data1.data?[0-{count-1}]' r = curl.http_download(urls=[urln], alpn_proto=proto, extra_args=[ @@ -106,14 +113,31 @@ class TestCaddy: else: assert r.total_connects == 1 + # download 5MB files sequentially + @pytest.mark.parametrize("proto", ['h2', 'h3']) + def test_08_04a_download_10mb_sequential(self, env: Env, caddy: Caddy, + repeat, proto): + if proto == 'h3' and not env.have_h3_curl(): + pytest.skip("h3 not supported in curl") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 itself crashes") + count = 40 + curl = CurlClient(env=env) + urln = f'https://{env.domain1}:{caddy.port}/data5.data?[0-{count-1}]' + r = curl.http_download(urls=[urln], alpn_proto=proto) + assert r.exit_code == 0 + r.check_stats(count=count, exp_status=200) + # sequential transfers will open 1 connection + assert r.total_connects == 1 + # download 10MB files sequentially @pytest.mark.parametrize("proto", ['h2', 'h3']) - def test_08_04_download_10mb_sequential(self, env: Env, caddy: Caddy, + def test_08_04b_download_10mb_sequential(self, env: Env, caddy: Caddy, repeat, proto): if proto == 'h3' and not env.have_h3_curl(): pytest.skip("h3 not supported in curl") - if proto == 'h3' and env.curl_uses_lib('quiche'): - pytest.skip("quiche stalls after a certain amount of data") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 itself crashes") count = 20 curl = CurlClient(env=env) urln = f'https://{env.domain1}:{caddy.port}/data10.data?[0-{count-1}]' @@ -129,8 +153,8 @@ class TestCaddy: repeat, proto): if proto == 'h3' and not env.have_h3_curl(): pytest.skip("h3 not supported in curl") - if proto == 'h3' and env.curl_uses_lib('quiche'): - pytest.skip("quiche stalls after a certain amount of data") + if proto == 'h3' and env.curl_uses_lib('msh3'): + pytest.skip("msh3 itself crashes") count = 50 curl = CurlClient(env=env) urln = f'https://{env.domain1}:{caddy.port}/data10.data?[0-{count-1}]' diff --git a/tests/unit/unit2601.c b/tests/unit/unit2601.c index de2a9f9d6..b01027400 100644 --- a/tests/unit/unit2601.c +++ b/tests/unit/unit2601.c @@ -243,4 +243,5 @@ UNITTEST_START check_bufq(8, 8000, 10, 1234, 1234, BUFQ_OPT_NONE); check_bufq(8, 1024, 4, 129, 127, BUFQ_OPT_NO_SPARES); + return 0; UNITTEST_STOP |