diff --git a/fw/cache.c b/fw/cache.c index 79c69acb2..e7679f801 100644 --- a/fw/cache.c +++ b/fw/cache.c @@ -27,7 +27,6 @@ #include #include #include -#include #undef DEBUG #if DBG_CACHE > 0 diff --git a/fw/hpack.c b/fw/hpack.c index 999e7529f..cc861db10 100644 --- a/fw/hpack.c +++ b/fw/hpack.c @@ -804,7 +804,7 @@ tfw_hpack_set_entry(TfwPool *__restrict h_pool, TfwMsgParseIter *__restrict it, } T_DBG3("%s: entry created, d_hdr->nchunks=%u, d_hdr->len=%lu," - " d_hdr->flags=%hu, it->nm_len=%lu, it->nm_num=%u, it->tag=%u\n", + " d_hdr->flags=%hu, it->nm_len=%u, it->nm_num=%u, it->tag=%u\n", __func__, d_hdr->nchunks, d_hdr->len, d_hdr->flags, it->nm_len, it->nm_num, it->tag); @@ -1176,6 +1176,7 @@ tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, goto err_dt; et->window = htbl_sz; + et->min_window = HPACK_ENC_TABLE_MAX_SIZE; et->rb_size = HPACK_ENC_TABLE_MAX_SIZE; if (!(et->pool = __tfw_pool_new(HPACK_ENC_TABLE_MAX_SIZE, owner))) goto err_et; @@ -3718,71 +3719,49 @@ tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, * into the HTTP/2 HPACK format. */ int -tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr) +tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, + bool dyn_indexing) { - return __tfw_hpack_encode(resp, hdr, true, true, true); + return __tfw_hpack_encode(resp, hdr, true, dyn_indexing, true); } void -tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, unsigned short new_size) +tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, + unsigned int requested_size) { - if (new_size > HPACK_ENC_TABLE_MAX_SIZE) { - T_WARN("Client requests hpack table size (%hu), which is " - "greater than HPACK_ENC_TABLE_MAX_SIZE.", new_size); - new_size = HPACK_ENC_TABLE_MAX_SIZE; - } - T_DBG3("%s: tbl->rb_len=%hu, tbl->size=%hu, tbl->window=%hu," - " new_size=%hu\n", __func__, tbl->rb_len, tbl->size, - tbl->window, new_size); + " requested_size=%u\n", __func__, tbl->rb_len, tbl->size, + tbl->window, requested_size); - /* - * RFC7541#section-4.2: - * Multiple updates to the maximum table size can occur between the - * transmission of two header blocks. In the case that this size is - * changed more than once in this interval, the smallest maximum table - * size that occurs in that interval MUST be signaled in a dynamic - * table size update. - */ - if (tbl->window != new_size && (likely(!tbl->wnd_changed) - || unlikely(!tbl->window) || new_size < tbl->window)) - { - if (tbl->size > new_size) - tfw_hpack_rbuf_calc(tbl, new_size, NULL, - (TfwHPackETblIter *)tbl); - WARN_ON_ONCE(tbl->rb_len > tbl->size); + if (requested_size == tbl->window) + return; - tbl->window = new_size; - tbl->wnd_changed = true; - } -} + unsigned short new_size = min_t(unsigned int, requested_size, + HPACK_ENC_TABLE_MAX_SIZE); + BUILD_BUG_ON(HPACK_ENC_TABLE_MAX_SIZE > USHRT_MAX || + sizeof(new_size) != sizeof(tbl->window)); -int -tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream) -{ - TfwHPackInt tmp = {}; - TfwStr dst = {}; - char *data; - unsigned int _; - int r = 0; - - WARN_ON_ONCE(!tbl->wnd_changed); - write_int(tbl->window, 0x1F, 0x20, &tmp); - - data = ss_skb_data_ptr_by_offset(stream->xmit.skb_head, - FRAME_HEADER_SIZE); - BUG_ON(!data); + /** + * RFC7541#section-4.2: + * + * The smallest maximum table size that occurs in that interval MUST be + * signaled in a dynamic table size update. The final maximum size is + * always signaled, resulting in at most two dynamic table size updates. + * This ensures that the decoder is able to perform eviction based on + * reductions in dynamic table size. + */ + if (new_size < tbl->min_window) + tbl->min_window = new_size; - r = ss_skb_get_room_w_frag(stream->xmit.skb_head, - stream->xmit.skb_head, - data, tmp.sz, &dst, &_); - if (unlikely(r)) - return r; + if (tbl->size > new_size) + tfw_hpack_rbuf_calc(tbl, new_size, NULL, + (TfwHPackETblIter *)tbl); - memcpy_fast(dst.data, tmp.buf, tmp.sz); - stream->xmit.h_len += tmp.sz; - tbl->wnd_changed = false; + tbl->window = new_size; + tbl->wnd_changed = true; + WARN_ON_ONCE(tbl->rb_len > tbl->size); - return 0; + T_DBG3("%s: New hpack encoder table size has been changed min=%u " + "new=%u\n", __func__, tbl->min_window, tbl->window); } diff --git a/fw/hpack.h b/fw/hpack.h index 1a139b5bb..19dcd2721 100644 --- a/fw/hpack.h +++ b/fw/hpack.h @@ -90,6 +90,7 @@ typedef struct { * * @window - maximum pseudo-length of the dynamic table (in bytes); this * value used as threshold to flushing old entries; + * @min_window - minimum applied @window before sending dynamic table update; * @wnd_changed - flag indicates, that window was changed by settings update; * @rbuf - pointer to the ring buffer; * @root - pointer to the root node of binary tree; @@ -100,6 +101,7 @@ typedef struct { typedef struct { TFW_HPACK_ETBL_COMMON; unsigned short window; + unsigned short min_window; bool wnd_changed; char *rbuf; TfwHPackNode *root; @@ -302,11 +304,12 @@ void write_int(unsigned long index, unsigned short max, unsigned short mask, int tfw_hpack_init(TfwHPack *__restrict hp, TfwClientMem *owner, unsigned int htbl_sz); void tfw_hpack_clean(TfwHPack *__restrict hp); -int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr); +int tfw_hpack_transform(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, + bool dyn_indexing); int tfw_hpack_encode(TfwHttpResp *__restrict resp, TfwStr *__restrict hdr, bool use_pool, bool dyn_indexing); void tfw_hpack_set_rbuf_size(TfwHPackETbl *__restrict tbl, - unsigned short new_size); + unsigned int new_size); int tfw_hpack_decode(TfwHPack *__restrict hp, unsigned char *__restrict src, unsigned long n, TfwHttpReq *__restrict req, unsigned int *__restrict parsed); @@ -314,7 +317,6 @@ int tfw_hpack_cache_decode_expand(TfwHPack *__restrict hp, TfwHttpResp *__restrict resp, unsigned char *__restrict src, unsigned long n, TfwDecodeCacheIter *__restrict cd_iter); -int tfw_hpack_enc_tbl_write_sz(TfwHPackETbl *__restrict tbl, TfwStream *stream); static inline unsigned int tfw_hpack_int_size(unsigned long index, unsigned short max) diff --git a/fw/http.c b/fw/http.c index 7cf2f90f0..16df20a05 100644 --- a/fw/http.c +++ b/fw/http.c @@ -85,7 +85,6 @@ #include #include #include -#include #undef DEBUG #if DBG_HTTP > 0 @@ -1818,31 +1817,12 @@ do { \ } } -static void -__tfw_http_free_cleanup(TfwHttpMsgCleanup *cleanup) -{ - int i; - struct sk_buff *skb; - - while ((skb = ss_skb_dequeue(&cleanup->skb_head))) - __ss_kfree_skb(skb); - - for (i = 0; i < cleanup->pages_sz; i++) - /* - * Pass "true" even for non recyclable pages, relying on check - * pp_magic == PP_SIGNATURE in napi_pp_put_page(), which avoid - * recycling of non page_pool pages. Overhead seems the same - * as to have/maintain flag for each fragment. - */ - skb_page_unref(cleanup->pages[i], true); -} - static void __tfw_http_req_cleanup(TfwHttpReq *req) { if (!req->cleanup) return; - __tfw_http_free_cleanup(req->cleanup); + ss_skb_free_cleanup(req->cleanup); req->cleanup = NULL; } @@ -3927,7 +3907,7 @@ tfw_h1_adjust_req(TfwHttpReq *req) req->vhost, TFW_VHOST_HDRMOD_REQ); - req->cleanup = tfw_pool_alloc(hm->pool, sizeof(TfwHttpMsgCleanup)); + req->cleanup = tfw_pool_alloc(hm->pool, sizeof(TfwSkbCleanup)); if (unlikely(!req->cleanup)) return -ENOMEM; req->cleanup->pages_sz = 0; @@ -4281,10 +4261,10 @@ tfw_h2_adjust_req(TfwHttpReq *req) bool need_cl = req->body.len && TFW_STR_EMPTY(&ht->tbl[TFW_HTTP_HDR_CONTENT_LENGTH]); - req->cleanup = tfw_pool_alloc(req->pool, sizeof(TfwHttpMsgCleanup)); + req->cleanup = tfw_pool_alloc(req->pool, sizeof(TfwSkbCleanup)); if (unlikely(!req->cleanup)) return -ENOMEM; - memset(req->cleanup, 0, sizeof(TfwHttpMsgCleanup)); + memset(req->cleanup, 0, sizeof(TfwSkbCleanup)); if (need_cl) { cl_data_len = tfw_ultoa(req->body.len, cl_data, TFW_ULTOA_BUF_SIZ); @@ -4624,7 +4604,7 @@ tfw_http_resp_get_conn_flags(TfwHttpResp *resp) * headers will be avoided. */ static int -tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) +tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwSkbCleanup *cleanup) { void *opaque_data = TFW_SKB_CB(resp->msg.skb_head)->opaque_data; TfwMsgIter *iter = &resp->iter; @@ -4650,7 +4630,7 @@ tfw_http_resp_set_empty_skb_head(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) } static int -tfw_h1_resp_cutoff_headers(TfwHttpResp *resp, TfwHttpMsgCleanup *cleanup) +tfw_h1_resp_cutoff_headers(TfwHttpResp *resp, TfwSkbCleanup *cleanup) { TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpReq *req = resp->req; @@ -4748,7 +4728,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp) TfwHttpReq *req = resp->req; TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwMsgIter *iter = &resp->iter; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; const TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, TFW_VHOST_HDRMOD_RESP); @@ -4801,7 +4781,7 @@ tfw_http_adjust_resp(TfwHttpResp *resp) r = tfw_http_msg_expand_from_pool(hm, &STR_CRLF); clean: - __tfw_http_free_cleanup(&cleanup); + ss_skb_free_cleanup(&cleanup); return r; } @@ -5397,7 +5377,7 @@ tfw_h2_hpack_encode_headers(TfwHttpResp *resp, const TfwHdrMods *h_mods) || tgt->flags & TFW_STR_TRAILER_HDR) continue; - r = tfw_hpack_transform(resp, tgt); + r = tfw_hpack_transform(resp, tgt, true); if (unlikely(r)) return r; } @@ -5905,7 +5885,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) TfwHttpReq *req = resp->req; TfwHttpMsg *hm = (TfwHttpMsg *)resp; TfwHttpTransIter *mit = &resp->mit; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwStr codings = {}; const TfwHdrMods *h_mods = tfw_vhost_get_hdr_mods(req->location, req->vhost, @@ -6007,7 +5987,7 @@ tfw_h2_resp_encode_headers(TfwHttpResp *resp) req, resp); SS_SKB_QUEUE_DUMP(&resp->msg.skb_head); - __tfw_http_free_cleanup(&cleanup); + ss_skb_free_cleanup(&cleanup); return r; } diff --git a/fw/http.h b/fw/http.h index c77100f83..b6c9bd334 100644 --- a/fw/http.h +++ b/fw/http.h @@ -343,19 +343,6 @@ typedef struct { long m_date; } TfwHttpCond; -/** - * Represents the data that should be cleaned up after message transformation. - * - * @skb_head - head of skb list that must be freed; - * @pages - pages that must be freed; - * @pages_sz - current number of @pages; - */ -typedef struct { - struct sk_buff *skb_head; - netmem_ref pages[MAX_SKB_FRAGS]; - unsigned char pages_sz; -} TfwHttpMsgCleanup; - /** * HTTP Request. * @@ -404,7 +391,7 @@ struct tfw_http_req_t { TfwHttpSess *sess; TfwClient *peer; void *stale_ce; - TfwHttpMsgCleanup *cleanup; + TfwSkbCleanup *cleanup; TfwHttpCond cond; TfwMsgParseIter pit; HttpTfh tfh; @@ -815,5 +802,4 @@ void tfw_http_extract_request_authority(TfwHttpReq *req); bool tfw_http_mark_is_in_whitlist(unsigned int mark); char *tfw_http_resp_status_line(int status, size_t *len); int tfw_h2_on_send_resp(void *conn, struct sk_buff **skb_head); - #endif /* __TFW_HTTP_H__ */ diff --git a/fw/http2.c b/fw/http2.c index aedd48780..eda81c582 100644 --- a/fw/http2.c +++ b/fw/http2.c @@ -99,9 +99,8 @@ tfw_h2_apply_settings_entry(TfwH2Ctx *ctx, unsigned short id, switch (id) { case HTTP2_SETTINGS_TABLE_SIZE: - dest->hdr_tbl_sz = min_t(unsigned int, - val, HPACK_ENC_TABLE_MAX_SIZE); - tfw_hpack_set_rbuf_size(&ctx->hpack.enc_tbl, dest->hdr_tbl_sz); + tfw_hpack_set_rbuf_size(&ctx->hpack.enc_tbl, val); + dest->hdr_tbl_sz = ctx->hpack.enc_tbl.window; break; case HTTP2_SETTINGS_ENABLE_PUSH: @@ -627,7 +626,7 @@ tfw_h2_hpack_encode_trailer_headers(TfwHttpResp *resp) T_DBG3("%s: hid=%hu, d_num=%hu, nchunks=%u\n", __func__, hid, d_num, ht->tbl[hid].nchunks); - r = tfw_hpack_transform(resp, tgt); + r = tfw_hpack_transform(resp, tgt, false); if (unlikely(r)) goto finish; } diff --git a/fw/http_frame.c b/fw/http_frame.c index 1988bc583..231fb60cf 100644 --- a/fw/http_frame.c +++ b/fw/http_frame.c @@ -199,7 +199,19 @@ do { \ return T_BAD; \ } else if (res == STREAM_FSM_RES_TERM_STREAM) { \ WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); \ - return tfw_h2_current_stream_send_rst((ctx), err); \ + if (ctx->cur_stream != ctx->cur_send_headers) { \ + return tfw_h2_current_stream_send_rst((ctx), \ + err); \ + } else { \ + unsigned int id = ctx->cur_stream->id; \ + /** + * If Tempesta is already sending headers, + * only update the stream state and schedule + * sending an RST_STREAM frame, but do not + * remove the stream from the scheduling queue. + */ \ + return tfw_h2_send_rst_stream(ctx, id, err); \ + } \ } \ return T_OK; \ } \ @@ -227,15 +239,6 @@ ctx_new_settings_flags[] = { [HTTP2_SETTINGS_MAX_HDR_LIST_SIZE] = 0x20 }; -static void -tfw_h2_on_tcp_entail_ack(void *conn, struct sk_buff *skb_head) -{ - TfwH2Ctx *ctx = tfw_h2_context_unsafe((TfwConn *)conn); - - if (test_bit(HTTP2_SETTINGS_NEED_TO_APPLY, ctx->settings_to_apply)) - tfw_h2_apply_new_settings(ctx); -} - static int tfw_h2_on_send_goaway(void *conn, struct sk_buff **skb_head) { @@ -294,6 +297,82 @@ tfw_h2_on_send_dflt(void *conn, struct sk_buff **skb_head) return 0; } +/** + * Similar to __tfw_h2_send_frame(), but used to send frames directly, + * bypassing the work queue while the socket is held under the lock. Fits to + * WINDOW_UPDATE, PING and SETTINGS/ack frames, because: + * 1. The frame may be generated on local cpu. + * 2. Response to one of the listed frames may be sent on the same cpu where + * the frame has been received. + */ +static int +__tfw_h2_send_frame_local(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data) +{ + int r; + TfwMsgIter it; + TfwMsg msg = {}; + unsigned char buf[FRAME_HEADER_SIZE]; + TfwStr *hdr_str = TFW_STR_CHUNK(data, 0); + TfwConn *conn = (TfwConn *)ctx->conn; + unsigned char tls_type = TTLS_MSG_APPLICATION_DATA; + + if (hdr_str->data) { + WARN_ONCE(1, "Frame already has data.\n"); + return -EINVAL; + } + hdr_str->data = buf; + hdr_str->len = FRAME_HEADER_SIZE; + + if (data != hdr_str) + data->len += FRAME_HEADER_SIZE; + + tfw_h2_pack_frame_header(buf, hdr); + + T_DBG2("Preparing local HTTP/2 message with %lu bytes data\n", + data->len); + + /** + * TODO: 2136. + * + * This is subject for optimization, we can append the frame to the + * write_queue or postponed queue in case if they are not empty. Avoid + * allocation of the new skb. Also we can use, for instance pool or + * pg_skb_alloc() to allocate fragment and expand this fragment if we + * have a lot of frames to receive and to response. This is good + * strategy for PING frames. + */ + msg.len = data->len; + r = tfw_msg_iter_setup(&it, CLIENT_MEM_FROM_CONN(conn), &msg.skb_head, + msg.len); + if (unlikely(r)) + goto err; + + r = tfw_msg_iter_write(&it, data); + if (unlikely(r)) + goto err; + + ss_skb_setup_head_of_list(msg.skb_head, msg.skb_head->mark, tls_type); + + if (ctx->cur_send_headers) { + ss_skb_queue_splice(&ctx->cur_send_headers->xmit.postponed, + &msg.skb_head); + } else { + struct sock *sk = conn->sk; + + ss_skb_queue_splice(&conn->write_queue, &msg.skb_head); + sock_set_flag(sk, SOCK_TEMPESTA_HAS_DATA); + SS_IN_USE_PROTECT({ + tcp_push_pending_frames(sk); + }); + } + + return 0; + +err: + ss_skb_queue_purge(&msg.skb_head); + return r; +} + /** * Prepare and send HTTP/2 frame to the client; @hdr must contain * the valid data to fill in the frame's header; @data may carry @@ -353,11 +432,6 @@ __tfw_h2_send_frame(TfwH2Ctx *ctx, TfwFrameHdr *hdr, TfwStr *data, TFW_SKB_CB(msg.skb_head)->on_send = tfw_h2_on_send_dflt; } - if (hdr->type == HTTP2_SETTINGS && hdr->flags == HTTP2_F_ACK) { - TFW_SKB_CB(msg.skb_head)->on_tcp_entail = - tfw_h2_on_tcp_entail_ack; - } - if ((r = tfw_connection_send(conn, &msg))) goto err; /* @@ -414,7 +488,7 @@ tfw_h2_send_ping(TfwH2Ctx *ctx) WARN_ON_ONCE(ctx->rlen != FRAME_PING_SIZE); - return tfw_h2_send_frame(ctx, &hdr, &data); + return __tfw_h2_send_frame_local(ctx, &hdr, &data); } @@ -441,7 +515,7 @@ tfw_h2_send_wnd_update(TfwH2Ctx *ctx, unsigned int id, unsigned int wnd_incr) *(unsigned int *)incr_buf = htonl(wnd_incr); - return tfw_h2_send_frame(ctx, &hdr, &data); + return __tfw_h2_send_frame_local(ctx, &hdr, &data); } static inline int @@ -502,7 +576,7 @@ tfw_h2_send_settings_init(TfwH2Ctx *ctx) hdr.length += sizeof(field[0]); } - return tfw_h2_send_frame(ctx, &hdr, &data); + return __tfw_h2_send_frame_local(ctx, &hdr, &data); } static inline int @@ -516,7 +590,7 @@ tfw_h2_send_settings_ack(TfwH2Ctx *ctx) .flags = HTTP2_F_ACK }; - return tfw_h2_send_frame(ctx, &hdr, &data); + return __tfw_h2_send_frame_local(ctx, &hdr, &data); } int @@ -667,15 +741,13 @@ tfw_h2_headers_process(TfwH2Ctx *ctx) T_DBG("Invalid dependency: new stream with %u depends on" " itself\n", hdr->stream_id); - ctx->state = HTTP2_IGNORE_FRAME_DATA; - - if (likely(!ctx->cur_stream)) { - return tfw_h2_send_rst_stream(ctx, hdr->stream_id, - HTTP2_ECODE_PROTO); - } - - WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); - return tfw_h2_current_stream_send_rst(ctx, HTTP2_ECODE_PROTO); + /* + * RFC 7540 states that it MUST be treated as a stream-level + * error, however, it doesn’t make sense to continue servicing + * a suspicious connection. + */ + tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); + return T_BAD; } if (likely(!ctx->cur_stream)) { @@ -777,18 +849,6 @@ tfw_h2_priority_process(TfwH2Ctx *ctx) return T_OK; } - if (ctx->cur_stream->state == HTTP2_STREAM_IDLE) { - /* - * According to RFC 9113 we should response with stream - * error of type PROTOCOL ERROR here, but we can't send - * RST_STREAM for idle stream. - * RFC 9113 doesn't describe this case, so terminate - * connection. - */ - tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); - return T_BAD; - } - /* * Stream cannot depend on itself (see RFC 7540 section 5.1.2 for * details). @@ -796,12 +856,14 @@ tfw_h2_priority_process(TfwH2Ctx *ctx) T_DBG("Invalid dependency: new stream with %u depends on itself\n", hdr->stream_id); - if (tfw_h2_stream_fsm_ignore_err(ctx, ctx->cur_stream, - HTTP2_RST_STREAM, 0)) - return -EPERM; + /* + * RFC 7540 states that it MUST be treated as a stream-level error, + * however, it doesn’t make sense to continue servicing a suspicious + * connection. + */ + tfw_h2_conn_terminate(ctx, HTTP2_ECODE_PROTO); - WARN_ON_ONCE(hdr->stream_id != ctx->cur_stream->id); - return tfw_h2_current_stream_send_rst(ctx, HTTP2_ECODE_PROTO); + return T_BAD; } static inline void @@ -877,6 +939,7 @@ tfw_h2_goaway_process(TfwH2Ctx *ctx) T_DBG("HTTP/2 connection is closed by client with error code:" " %u, ID of last processed stream: %u\n", err_code, last_id); + WARN_ON(err_code == HTTP2_ECODE_COMPRESSION); SET_TO_READ(ctx); return 0; } @@ -1638,7 +1701,8 @@ tfw_h2_frame_recv(void *data, unsigned char *buf, unsigned int len, T_FSM_STATE(HTTP2_RECV_FRAME_RST_STREAM) { FRAME_FSM_READ_SRVC(ctx->to_read); - tfw_h2_rst_stream_process(ctx); + if (ctx->cur_stream != ctx->cur_send_headers) + tfw_h2_rst_stream_process(ctx); FRAME_FSM_EXIT(T_OK); } @@ -1652,6 +1716,10 @@ tfw_h2_frame_recv(void *data, unsigned char *buf, unsigned int len, if (ctx->to_read) FRAME_FSM_MOVE(HTTP2_RECV_FRAME_SETTINGS); + if (test_bit(HTTP2_SETTINGS_NEED_TO_APPLY, + ctx->settings_to_apply)) + tfw_h2_apply_new_settings(ctx); + if ((ret = tfw_h2_send_settings_ack(ctx))) FRAME_FSM_EXIT(ret); @@ -2007,51 +2075,48 @@ tfw_h2_frame_process(TfwConn *c, struct sk_buff *skb, struct sk_buff **next) #undef TFW_H2_CONN_PROCESS_RESULT } +/** + * Note: @snd_wnd underflow is possible. The caller is responsible for ensuring + * that overflow does not occur. + */ +static inline unsigned int +__tfw_h2_snd_wnd_limit(unsigned long snd_wnd) +{ + return min(TLS_MAX_PAYLOAD_SIZE, snd_wnd - TLS_MAX_OVERHEAD); +} + +/** + * Note: @snd_wnd_budget underflow is possible. The caller is responsible for + * ensuring that overflow does not occur. + */ static inline unsigned int -tfw_h2_calc_frame_length(TfwH2Ctx *ctx, TfwStream *stream, TfwFrameType type, - unsigned int len, unsigned int max_len) +__tfw_h2_calc_data_frame_len(TfwH2Ctx *ctx, TfwStream *stream, + unsigned int snd_wnd_budget) { unsigned int length; + unsigned long body_len = stream->xmit.b_len; - length = min3(ctx->rsettings.max_frame_sz, len, max_len); - if (type == HTTP2_DATA) { - length = min3(length, (unsigned int)ctx->rem_wnd, - (unsigned int)stream->rem_wnd); - } + snd_wnd_budget -= FRAME_HEADER_SIZE; + + length = min3(ctx->rsettings.max_frame_sz, body_len, snd_wnd_budget); + length = min3(length, (unsigned int)ctx->rem_wnd, + (unsigned int)stream->rem_wnd); return length; } -static inline char -tfw_h2_calc_frame_flags(TfwStream *stream, TfwFrameType type, - bool trailers) +static inline unsigned int +__total_data_bytes_to_send(unsigned int frame_length) { - unsigned char flags = 0; - - if (!stream->xmit.b_len && !stream->xmit.t_len - && (type == HTTP2_HEADERS || type == HTTP2_DATA) - && !tfw_h2_stream_is_eos_sent(stream)) - flags |= HTTP2_F_END_STREAM; - - if (!stream->xmit.b_len && stream->xmit.t_len - && type == HTTP2_HEADERS && trailers) - flags |= HTTP2_F_END_STREAM; - - if (!stream->xmit.h_len && type != HTTP2_DATA && !trailers) - flags |= HTTP2_F_END_HEADERS; - - if (!stream->xmit.t_len && type != HTTP2_DATA && trailers) - flags |= HTTP2_F_END_HEADERS; - - return flags; + return frame_length + FRAME_HEADER_SIZE; } static inline int -tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, - TfwFrameType type, unsigned int frame_length) +tfw_h2_insert_frame_header(TfwH2Ctx *ctx, TfwStream *stream, TfwFrameType type, + unsigned int frame_length, unsigned char flags, + unsigned int *bytes_to_sent) { TfwFrameHdr frame_hdr = {}; - bool trailers = false; char *data; int r = 0; @@ -2071,8 +2136,11 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, } data = ss_skb_data_ptr_by_offset(stream->xmit.skb_head, - stream->xmit.frame_length); - BUG_ON(!data); + stream->xmit.bytes_to_send); + if (unlikely(!data)) { + WARN_ONCE(1, "Can't find offset in skb."); + return -EPIPE; + } if (type == HTTP2_CONTINUATION || type == HTTP2_DATA) { TfwStr dst = {}; @@ -2088,52 +2156,56 @@ tfw_h2_insert_frame_header(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, data = dst.data; } - if (type == HTTP2_DATA) { - ctx->rem_wnd -= frame_length; - ctx->data_bytes_sent += frame_length; - stream->rem_wnd -= frame_length; - stream->xmit.b_len -= frame_length; - } else if (stream->xmit.h_len) { - stream->xmit.h_len -= frame_length; - } else if (stream->xmit.t_len) { - stream->xmit.t_len -= frame_length; - trailers = true; - } - frame_hdr.length = frame_length; frame_hdr.stream_id = stream->id; frame_hdr.type = type; - frame_hdr.flags = tfw_h2_calc_frame_flags(stream, type, trailers); + frame_hdr.flags = flags; tfw_h2_pack_frame_header(data, &frame_hdr); - stream->xmit.frame_length += frame_length + FRAME_HEADER_SIZE; - switch (tfw_h2_stream_fsm_ignore_err(ctx, stream, type, - frame_hdr.flags)) - { - case STREAM_FSM_RES_OK: - break; - case STREAM_FSM_RES_IGNORE: - fallthrough; - case STREAM_FSM_RES_TERM_STREAM: - /* Send previosly successfully prepared frames if exist. */ - stream->xmit.frame_length -= frame_length + FRAME_HEADER_SIZE; - if (stream->xmit.frame_length) { - r = tfw_h2_entail_stream_skb(sk, stream, - &stream->xmit.frame_length, - true); - } - stream->xmit.frame_length += frame_length + FRAME_HEADER_SIZE; - /* - * Purge stream send queue, but leave postponed - * skbs and rst stream/goaway/tls alert if exist. - */ - tfw_h2_stream_purge_send_queue(stream); - return r; - case STREAM_FSM_RES_TERM_CONN: + *bytes_to_sent += __total_data_bytes_to_send(frame_length); + + return r; +} + +static inline int +tfw_h2_insert_enc_tbl_sz(TfwHPackETbl *tbl, struct sk_buff *skb_head, + unsigned int offset, unsigned int *acc_len) +{ + TfwHPackInt min_val = {}, max_val = {}; + TfwStr dst = {}; + char *data; + unsigned int _; + int r = 0; + unsigned int size; + + WARN_ON_ONCE(!tbl->wnd_changed); + if (tbl->window > tbl->min_window) + write_int(tbl->min_window, 0x1F, 0x20, &min_val); + write_int(tbl->window, 0x1F, 0x20, &max_val); + + data = ss_skb_data_ptr_by_offset(skb_head, + offset + FRAME_HEADER_SIZE); + if (unlikely(!data)) { + WARN_ONCE(1, "Can't find offset in skb."); return -EPIPE; } - return r; + size = min_val.sz + max_val.sz; + r = ss_skb_get_room_w_frag(skb_head, skb_head, data, size, &dst, &_); + if (unlikely(r)) + return r; + + if (tbl->window > tbl->min_window) { + memcpy(dst.data, min_val.buf, min_val.sz); + dst.data += min_val.sz; + } + + memcpy(dst.data, max_val.buf, max_val.sz); + *acc_len += size; + tbl->min_window = HPACK_ENC_TABLE_MAX_SIZE; + tbl->wnd_changed = false; + + return 0; } static int @@ -2146,7 +2218,7 @@ tfw_h2_stream_send_postponed(struct sock *sk, struct sk_buff **skb_head, * We send all data from `conn->write_queue` before call * `tfw_h2_make_frames`. So there is only one case when * `conn->write_queue can be not empty here - we send - * postponed frames from `HTTP2_SEND_FRAMES` state and + * postponed frames from `HTTP2_SEND_HEADERS_FRAME` state and * then call this function again from HTTP2_MAKE_FRAMES_FINISH. * If `conn->write_queue` is not empty, we should not entail new * data to socket write queue (it should be sent later after data @@ -2174,60 +2246,104 @@ tfw_h2_stream_send_postponed(struct sock *sk, struct sk_buff **skb_head, return 0; } +static __always_inline int +__tfw_h2_make_headers_frame(TfwH2Ctx *ctx, TfwStream *stream) +{ + int r; + TfwFrameType type = HTTP2_HEADERS; + unsigned int max_payload_size = ctx->rsettings.max_frame_sz + - FRAME_HEADER_SIZE; + unsigned char flags = 0; + + if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { + r = tfw_h2_insert_enc_tbl_sz(&ctx->hpack.enc_tbl, + stream->xmit.skb_head, + stream->xmit.bytes_to_send, + &stream->xmit.h_len); + if (unlikely(r < 0)) { + T_WARN("Failed to encode hpack dynamic table size %d\n", + r); + return r; + } + } + + unsigned int frame_length = min(max_payload_size, stream->xmit.h_len); + + stream->xmit.h_len -= frame_length; + + if (!stream->xmit.h_len) + flags |= HTTP2_F_END_HEADERS; + if (!stream->xmit.b_len && !stream->xmit.t_len) + flags |= HTTP2_F_END_STREAM; + + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, type, flags); + ctx->cur_send_headers = stream; + if (unlikely(r)) { + stream->xmit.h_len += frame_length; + return r; + } + + return tfw_h2_insert_frame_header(ctx, stream, type, frame_length, + flags, + &stream->xmit.headers_frame_length); +} + +static __always_inline int +__tfw_h2_make_continuation_frame(TfwH2Ctx *ctx, TfwStream *stream) +{ + TfwFrameType type = HTTP2_CONTINUATION; + unsigned int max_payload_size = ctx->rsettings.max_frame_sz + - FRAME_HEADER_SIZE; + unsigned char flags = 0; + + unsigned int frame_length = min(max_payload_size, stream->xmit.h_len); + + stream->xmit.h_len -= frame_length; + if (!stream->xmit.h_len) + flags |= HTTP2_F_END_HEADERS; + + return tfw_h2_insert_frame_header(ctx, stream, type, frame_length, + flags, + &stream->xmit.headers_frame_length); +} + +static inline int +__tfw_h2_make_data_frame(TfwH2Ctx *ctx, TfwStream *stream, + unsigned int frame_length) +{ + int r; + TfwFrameType type = HTTP2_DATA; + unsigned char flags = 0; + + stream->xmit.b_len -= frame_length; + + if (!stream->xmit.b_len && !stream->xmit.t_len) + flags |= HTTP2_F_END_STREAM; + + r = tfw_h2_stream_fsm_ignore_err(ctx, stream, type, flags); + if (unlikely(r)) { + stream->xmit.b_len += frame_length; + return r; + } + + ctx->rem_wnd -= frame_length; + ctx->data_bytes_sent += frame_length; + stream->rem_wnd -= frame_length; + + return tfw_h2_insert_frame_header(ctx, stream, type, frame_length, + flags, + &stream->xmit.bytes_to_send); +} + static int tfw_h2_stream_xmit_process(struct sock *sk, TfwH2Ctx *ctx, TfwStream *stream, bool stream_is_exclusive, unsigned int mss_now, unsigned long *snd_wnd, bool *stop) { int r = 0; - TfwFrameType frame_type; - unsigned int frame_length; - bool is_trailer_cont = false; unsigned int min_to_send = tfw_h2_calc_min_to_send(sk, ctx, mss_now); - T_FSM_INIT(stream->xmit.state, "HTTP/2 make frames"); - -#define ADJUST_BLOCKED_STREAMS_AND_EXIT(len, type) \ -do { \ - /* \ - * If Tempesta FW stop to make frames, because of exceeded \ - * stream->rem_wnd, mark such stream as blocked. \ - */ \ - BUG_ON(stream->xmit.is_blocked); \ - stream->xmit.is_blocked = \ - (type == HTTP2_DATA && stream->rem_wnd <= len); \ - ctx->sched.blocked_streams += stream->xmit.is_blocked; \ - *stop = true; \ - T_FSM_EXIT(); \ -} while(0) - -#define CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(type, len) \ -do { \ - unsigned int max_len; \ - unsigned int min_len; \ - \ - if (*snd_wnd <= FRAME_HEADER_SIZE + TLS_MAX_OVERHEAD) { \ - *stop = true; \ - T_FSM_EXIT(); \ - } \ - max_len = min(TLS_MAX_PAYLOAD_SIZE, *snd_wnd - TLS_MAX_OVERHEAD); \ - max_len -= FRAME_HEADER_SIZE; \ - min_len = min(min_to_send, (unsigned int)len); \ - frame_length = tfw_h2_calc_frame_length(ctx, stream, type, len, \ - max_len); \ - /* \ - * If the lenght of data to send is less then `min_to_send` \ - * use it as a minimum bytes to send. \ - */ \ - if (frame_length < min_len) \ - ADJUST_BLOCKED_STREAMS_AND_EXIT(min_len, type); \ - frame_type = type; \ -} while(0) -#define FRAME_XMIT_FSM_NEXT(frame_length, state) \ -do { \ - *snd_wnd -= frame_length + FRAME_HEADER_SIZE; \ - T_FSM_JMP(state); \ -} while(0) + T_FSM_INIT(stream->xmit.state, "HTTP/2 make frames"); T_FSM_START(stream->xmit.state) { @@ -2249,134 +2365,186 @@ do { \ } T_FSM_STATE(HTTP2_MAKE_HEADERS_FRAMES) { - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, - stream->xmit.h_len); - if (unlikely(ctx->hpack.enc_tbl.wnd_changed)) { - r = tfw_hpack_enc_tbl_write_sz(&ctx->hpack.enc_tbl, - stream); - if (unlikely(r < 0)) { - T_WARN("Failed to encode hpack dynamic" - "table size %d", r); - return r; - } + r = __tfw_h2_make_headers_frame(ctx, stream); + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); + + T_FSM_JMP(HTTP2_SEND_HEADERS_FRAME); + } + + T_FSM_STATE(HTTP2_MAKE_CONTINUATION_FRAMES) { + r = __tfw_h2_make_continuation_frame(ctx, stream); + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); + + T_FSM_JMP(HTTP2_SEND_HEADERS_FRAME); + } + + T_FSM_STATE(HTTP2_MAKE_TRAILER_FRAMES) { + stream->xmit.h_len = stream->xmit.t_len; + stream->xmit.t_len = 0; + + r = __tfw_h2_make_headers_frame(ctx, stream); + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); + + T_FSM_JMP(HTTP2_SEND_HEADERS_FRAME); + } + + T_FSM_STATE(HTTP2_SEND_HEADERS_FRAME) { + if (*snd_wnd <= TLS_MAX_OVERHEAD) { + *stop = true; + T_FSM_EXIT(); } - r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, - frame_length); - if (unlikely(r)) { - T_WARN("Failed to make headers frame %d", r); - return r; + unsigned int snd_wnd_budget = __tfw_h2_snd_wnd_limit(*snd_wnd); + unsigned int bytes_to_send = + min(snd_wnd_budget, stream->xmit.headers_frame_length); + unsigned int is_last_chunk = + bytes_to_send == stream->xmit.headers_frame_length; + + if (bytes_to_send < min_to_send && !is_last_chunk) { + *stop = true; + T_FSM_EXIT(); } - FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); - } + stream->xmit.headers_frame_length -= bytes_to_send; + stream->xmit.bytes_to_send += bytes_to_send; + *snd_wnd -= bytes_to_send; - T_FSM_STATE(HTTP2_MAKE_CONTINUATION_FRAMES) { - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_CONTINUATION, - stream->xmit.h_len); - r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, - frame_length); + /* + * When prepared (framed) headers are finished don't force + * skb splitting, go to the next frame and try to place the new + * frame header to the current skb. It saves one ss_skb_split + * call. + */ + const bool has_prepared_headers = + !!stream->xmit.headers_frame_length; + const bool should_split = + has_prepared_headers || stream->xmit.postponed; + + r = tfw_h2_entail_stream_skb(sk, stream, + &stream->xmit.bytes_to_send, + should_split); if (unlikely(r)) { - T_WARN("Failed to make continuation frame %d", r); + T_WARN("Failed to send frame %d", r); return r; } - FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); - } + /* + * Stream still has remaining framed headers, so it returns to + * the beginning of the state to check available send window. It + * also returns to the beginning of the state when both the + * send window and headers are greater then TLS_MAX_PAYLOAD_SIZE, + * in order to prepare another chunk of TLS_MAX_PAYLOAD_SIZE. + */ + if (has_prepared_headers) + T_FSM_JMP(HTTP2_SEND_HEADERS_FRAME); - T_FSM_STATE(HTTP2_MAKE_DATA_FRAMES) { - if (tfw_h2_conn_or_stream_wnd_is_exceeded(ctx, stream)) - ADJUST_BLOCKED_STREAMS_AND_EXIT(0, HTTP2_DATA); - - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_DATA, - stream->xmit.b_len); - r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, - frame_length); - if (unlikely (r)) { - T_WARN("Failed to make data frame %d", r); - return r; + if (stream->xmit.h_len) + T_FSM_JMP(HTTP2_MAKE_CONTINUATION_FRAMES); + + ctx->cur_send_headers = NULL; + + /* + * We are ready to send postponed frames, send them ASAP to + * ensure that the client applies the new settings before + * receiving DATA frames. + */ + if (unlikely(stream->xmit.postponed)) { + struct sk_buff **head = &stream->xmit.postponed; + + WARN_ONCE(stream->xmit.bytes_to_send, + "sending postponed frames breaking headers" + " block."); + r = tfw_h2_stream_send_postponed(sk, head, mss_now, + snd_wnd); + if (unlikely(r)) { + T_WARN("Failed to send postponed frames %d", r); + return r; + } } - ctx->data_frames_sent++; - FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); + if (stream->xmit.b_len) + T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); + + if (stream->xmit.t_len) + T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); + + T_FSM_JMP(HTTP2_MAKE_FRAMES_FINISH); } - T_FSM_STATE(HTTP2_MAKE_TRAILER_FRAMES) { - is_trailer_cont = true; - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_HEADERS, - stream->xmit.t_len); - r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, - frame_length); - if (unlikely(r)) { - T_WARN("Failed to make trail headers frame %d", r); - return r; + T_FSM_STATE(HTTP2_MAKE_DATA_FRAMES) { + if (*snd_wnd <= FRAME_HEADER_SIZE + TLS_MAX_OVERHEAD) { + *stop = true; + T_FSM_EXIT(); } - FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); - } + if (tfw_h2_conn_or_stream_wnd_is_exceeded(ctx, stream)) { + /* + * If Tempesta FW stop to make frames, because of exceeded + * stream->rem_wnd, mark such stream as blocked. + */ + WARN_ON(stream->xmit.is_blocked); + stream->xmit.is_blocked = stream->rem_wnd <= 0; + ctx->sched.blocked_streams += stream->xmit.is_blocked; + *stop = ctx->rem_wnd <= 0; + T_FSM_EXIT(); + } - T_FSM_STATE(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES) { - is_trailer_cont = true; - CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE(HTTP2_CONTINUATION, - stream->xmit.t_len); - r = tfw_h2_insert_frame_header(sk, ctx, stream, frame_type, - frame_length); - if (unlikely(r)) { - T_WARN("Failed to make trail continuation frame %d", r); - return r; + unsigned int snd_wnd_budget = __tfw_h2_snd_wnd_limit(*snd_wnd); + unsigned int frame_length = + __tfw_h2_calc_data_frame_len(ctx, stream, + snd_wnd_budget); + unsigned int is_last_chunk = + frame_length == stream->xmit.b_len; + + /* + * Calculate min_to_send only for send window, not HTTP2 + * connection window. This allows us to not postpone streams + * that have small window, we want to send such streams and + * move to the next stream in the queue if tcp send window is + * enough. + */ + if (snd_wnd_budget < min_to_send && !is_last_chunk) { + /* + * NOTE: As possible optimization, in this case we + * would not stop sending and try to find the next + * stream with a small data. However this may lead to + * re-scheduling without effect if there is no such + * streams. + */ + *stop = true; + T_FSM_EXIT(); } - FRAME_XMIT_FSM_NEXT(frame_length, HTTP2_SEND_FRAMES); + r = __tfw_h2_make_data_frame(ctx, stream, frame_length); + if (unlikely(r)) + T_FSM_JMP(HTTP2_FRAMING_FAILED); + + ctx->data_frames_sent++; + *snd_wnd -= __total_data_bytes_to_send(frame_length); + fallthrough; } - T_FSM_STATE(HTTP2_SEND_FRAMES) { - if (likely(stream->xmit.frame_length)) { - r = tfw_h2_entail_stream_skb(sk, stream, - &stream->xmit.frame_length, - false); + T_FSM_STATE(HTTP2_SEND_DATA_FRAMES) { + if (likely(stream->xmit.bytes_to_send)) { + r = tfw_h2_entail_stream_skb(sk, stream, + &stream->xmit.bytes_to_send, + false); if (unlikely(r)) { T_WARN("Failed to send frame %d", r); return r; } } - if (stream->xmit.h_len) { - T_FSM_JMP(HTTP2_MAKE_CONTINUATION_FRAMES); - } else { - if (unlikely(stream->xmit.postponed) - && !stream->xmit.frame_length - && !ctx->cur_send_headers) - { - struct sk_buff **head = &stream->xmit.postponed; + if (stream->xmit.b_len) + T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, - snd_wnd); - if (unlikely(r)) { - T_WARN("Failed to send postponed" - " frames %d", r); - return r; - } - } - if (stream->xmit.b_len) { - T_FSM_JMP(HTTP2_MAKE_DATA_FRAMES); - } else if (stream->xmit.t_len) { - if (likely(!is_trailer_cont)) { - T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); - } else { - T_FSM_JMP(HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES); - } - } else { - /* - * If we there is no headers, data or trailer - * frames to send, all data should be entailed - * to the socket write queue at the beginning - * of this state. - */ - WARN_ON(stream->xmit.frame_length); - fallthrough; - } - } + if (stream->xmit.t_len) + T_FSM_JMP(HTTP2_MAKE_TRAILER_FRAMES); + + fallthrough; } T_FSM_STATE(HTTP2_MAKE_FRAMES_FINISH) { @@ -2389,23 +2557,76 @@ do { \ if (unlikely(stream->xmit.skb_head)) { struct sk_buff **head = &stream->xmit.skb_head; - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, + r = tfw_h2_stream_send_postponed(sk, head, mss_now, snd_wnd); if (unlikely(r)) { - T_WARN("Failed to send postponed" - " frames %d", r); + T_WARN("Failed to send postponed frames %d\n", + r); return r; } } if (stream == ctx->error) ctx->error = NULL; - /* - * Don't put exclusive streams in closed queue it - * will be immediately deleted in the caller function. - */ - if (!stream_is_exclusive) - tfw_h2_stream_add_closed(ctx, stream); + T_FSM_EXIT(); + } + + /* + * In this state we handle framing error. It may happen if we trying + * to send DATA frames to closed stream. e.g Stream closed by the + * client while receiving response, that is valid behavior for firefox. + */ + T_FSM_STATE(HTTP2_FRAMING_FAILED) { + switch (r) { + case STREAM_FSM_RES_IGNORE: + fallthrough; + case STREAM_FSM_RES_TERM_STREAM: + /* + * In this case r is positive, set it to zero to not + * return positive r from this function. That doesn't + * reset connection but stops stream scheduling even + * if we have enough send window. + */ + r = 0; + /* Send previosly successfully prepared frames if exist. */ + if (stream->xmit.bytes_to_send) { + r = tfw_h2_entail_stream_skb(sk, stream, + &stream->xmit.bytes_to_send, + true); + if (unlikely(r)) + return r; + } + + /** + * Purge stream send queue, but leave postponed + * skbs and rst stream/goaway/tls alert if exist. + */ + tfw_h2_stream_purge_send_queue(stream); + + if (unlikely(stream->xmit.postponed) && + !ctx->cur_send_headers) { + struct sk_buff **head = &stream->xmit.postponed; + + r = tfw_h2_stream_send_postponed(sk, head, + mss_now, + snd_wnd); + if (unlikely(r)) { + T_WARN("Failed to send postponed" + " frames %d", r); + return r; + } + } + T_FSM_JMP(HTTP2_MAKE_FRAMES_FINISH); + case STREAM_FSM_RES_TERM_CONN: + return -EPIPE; + default: + /* + * Framing error not occurred but framing failed state + * reached. + */ + WARN_ON_ONCE(!r); + return -EPIPE; + } + T_FSM_EXIT(); } @@ -2413,35 +2634,20 @@ do { \ T_FSM_FINISH(r, stream->xmit.state); - if (stream->xmit.frame_length) { + if (stream->xmit.bytes_to_send) { r = tfw_h2_entail_stream_skb(sk, stream, - &stream->xmit.frame_length, + &stream->xmit.bytes_to_send, true); if (unlikely(r)) { T_WARN("Failed to send frame %d", r); return r; } - WARN_ON(stream->xmit.frame_length); - if (unlikely(stream->xmit.postponed) - && !ctx->cur_send_headers) - { - struct sk_buff **head = &stream->xmit.postponed; - - r = tfw_h2_stream_send_postponed(sk, head, - mss_now, - snd_wnd); - if (unlikely(r)) { - T_WARN("Failed to send postponed" - " frames %d", r); - return r; - } - } } return r; #undef FRAME_XMIT_FSM_NEXT -#undef CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE +#undef CALC_FRAME_LENGTH_AND_SET_FRAME_TYPE_OR_EXIT #undef ADJUST_BLOCKED_STREAMS_AND_EXIT } @@ -2496,6 +2702,8 @@ tfw_h2_make_frames(struct sock *sk, TfwH2Ctx *ctx, unsigned int mss_now, if (stream_is_exclusive) { tfw_h2_stream_clean(ctx, stream); } else { + tfw_h2_stream_add_closed(ctx, stream); + TfwStreamSchedEntry *parent = stream->sched->parent; diff --git a/fw/http_msg.c b/fw/http_msg.c index 09440a53d..4971ce20d 100644 --- a/fw/http_msg.c +++ b/fw/http_msg.c @@ -1303,52 +1303,6 @@ tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, return r; } -static inline void -__tfw_http_msg_move_frags(struct sk_buff *skb, int frag_idx, - TfwHttpMsgCleanup *cleanup) -{ - int i, len; - struct skb_shared_info *si = skb_shinfo(skb); - - for (i = 0, len = 0; i < frag_idx; i++) { - cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); - cleanup->pages_sz++; - len += skb_frag_size(&si->frags[i]); - } - - si->nr_frags -= frag_idx; - ss_skb_adjust_data_len(skb, -len); - memmove(&si->frags, &si->frags[frag_idx], - (si->nr_frags) * sizeof(skb_frag_t)); -} - -static inline void -__tfw_http_msg_rm_all_frags(struct sk_buff *skb, TfwHttpMsgCleanup *cleanup) -{ - int i, len; - struct skb_shared_info *si = skb_shinfo(skb); - - for (i = 0; i < si->nr_frags; i++) - cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); - - len = skb->data_len; - cleanup->pages_sz = si->nr_frags; - si->nr_frags = 0; - ss_skb_adjust_data_len(skb, -len); -} - -static inline void -__tfw_http_msg_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx]; - const int len = nbegin - (char*)skb_frag_address(frag); - - /* Add offset and decrease fragment's size */ - skb_frag_off_add(frag, len); - skb_frag_size_sub(frag, len); - ss_skb_adjust_data_len(skb, -len); -} - /* * Delete SKBs and paged fragments related to @hm that contains message * headers. SKBs and fragments will be "unlinked" and placed to @cleanup. @@ -1356,7 +1310,7 @@ __tfw_http_msg_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin * as source for message trasformation. */ int -tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) +tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup) { int i, r = 0; char *begin, *end; @@ -1383,8 +1337,7 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) it->skb, body); break; } else { - ss_skb_put(it->skb, -skb_headlen(it->skb)); - it->skb->tail_lock = 1; + ss_skb_remove_linear_data(it->skb); } } @@ -1402,14 +1355,14 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) * fragments from skb where LF is located. */ if (!body) { - __tfw_http_msg_rm_all_frags(it->skb, cleanup); + ss_skb_rm_all_frags(it->skb, cleanup); goto end; } else if (off != begin) { /* * Fragment contains headers and body. * Set beginning of frag as beginning of body. */ - __tfw_http_msg_shrink_frag(it->skb, i, off); + ss_skb_shrink_frag(it->skb, i, off); } /* @@ -1418,7 +1371,7 @@ tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup) * from skb. */ if (i >= 1) - __tfw_http_msg_move_frags(it->skb, i, cleanup); + ss_skb_shift_frags(it->skb, i, cleanup); goto end; } diff --git a/fw/http_msg.h b/fw/http_msg.h index 22ed7375f..574155529 100644 --- a/fw/http_msg.h +++ b/fw/http_msg.h @@ -164,7 +164,7 @@ int tfw_h2_msg_expand_from_pool_lc(TfwHttpMsg *hm, const TfwStr *str, TfwHttpTransIter *mit); int __hdr_name_cmp(const TfwStr *hdr, const TfwStr *cmp_hdr); int __http_hdr_lookup(TfwHttpMsg *hm, const TfwStr *hdr); -int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwHttpMsgCleanup* cleanup); +int tfw_http_msg_cutoff_headers(TfwHttpMsg *hm, TfwSkbCleanup *cleanup); #define TFW_H2_MSG_HDR_ADD(hm, name, val, idx) \ tfw_h2_msg_hdr_add(hm, name, sizeof(name) - 1, val, \ diff --git a/fw/http_stream.c b/fw/http_stream.c index 923ea20e0..126f4180a 100644 --- a/fw/http_stream.c +++ b/fw/http_stream.c @@ -141,7 +141,7 @@ void tfw_h2_stream_purge_send_queue(TfwStream *stream) { unsigned long len = stream->xmit.h_len + stream->xmit.b_len + - stream->xmit.t_len + stream->xmit.frame_length; + stream->xmit.t_len + stream->xmit.bytes_to_send; struct sk_buff *skb; while (len) { @@ -152,7 +152,7 @@ tfw_h2_stream_purge_send_queue(TfwStream *stream) ss_kfree_skb(skb); } stream->xmit.h_len = stream->xmit.b_len = stream->xmit.t_len - = stream->xmit.frame_length = 0; + = stream->xmit.bytes_to_send = 0; } void @@ -348,8 +348,7 @@ do { \ #define TFW_H2_FSM_TYPE_CHECK(ctx, stream, op, type) \ do { \ - if ((ctx->cur_##op##_headers \ - && (type != HTTP2_CONTINUATION && type != HTTP2_RST_STREAM)) \ + if ((ctx->cur_##op##_headers && type != HTTP2_CONTINUATION) \ || (!ctx->cur_##op##_headers && type == HTTP2_CONTINUATION)) { \ *err = HTTP2_ECODE_PROTO; \ res = STREAM_FSM_RES_TERM_CONN; \ @@ -504,7 +503,6 @@ do { \ * should be DATA frame. */ if (send) { - ctx->cur_send_headers = NULL; if (tfw_h2_stream_is_eos_sent(stream)) { new_state = HTTP2_STREAM_LOC_HALF_CLOSED; @@ -519,9 +517,7 @@ do { \ } } } else { - if (send) - ctx->cur_send_headers = stream; - else + if (!send) ctx->cur_recv_headers = stream; } break; @@ -534,7 +530,6 @@ do { \ && flags & HTTP2_F_END_STREAM) { if (send) { - ctx->cur_send_headers = NULL; new_state = HTTP2_STREAM_LOC_HALF_CLOSED; } else { @@ -554,7 +549,6 @@ do { \ * frame. */ if (send) { - ctx->cur_send_headers = stream; stream->state |= HTTP2_STREAM_SEND_END_OF_STREAM; } else { @@ -564,9 +558,7 @@ do { \ } } else { - if (send) - ctx->cur_send_headers = stream; - else + if (!send) ctx->cur_recv_headers = stream; } break; @@ -670,12 +662,10 @@ do { \ * END_STREAM flag set. */ case HTTP2_F_END_STREAM: - ctx->cur_send_headers = stream; stream->state |= HTTP2_STREAM_SEND_END_OF_STREAM; break; case HTTP2_F_END_HEADERS | HTTP2_F_END_STREAM: - ctx->cur_send_headers = NULL; SET_STATE(HTTP2_STREAM_CLOSED); break; case HTTP2_F_END_HEADERS: @@ -683,11 +673,9 @@ do { \ * Headers are ended, next frame in the * stream should be DATA frame. */ - ctx->cur_send_headers = NULL; break; default: - ctx->cur_send_headers = stream; break; } } else if (type == HTTP2_DATA) { @@ -753,7 +741,7 @@ do { \ break; case HTTP2_STREAM_CLOSED: - T_WARN("%s, stream fully closed: stream->id=%u, type=%hhu," + T_DBG3("%s, stream fully closed: stream->id=%u, type=%hhu," " flags=0x%hhx\n", __func__, stream->id, type, flags); if (send) { res = STREAM_FSM_RES_IGNORE; @@ -763,16 +751,12 @@ do { \ res = STREAM_FSM_RES_TERM_CONN; } } - break; default: BUG(); } finish: - if (type == HTTP2_RST_STREAM || res == STREAM_FSM_RES_TERM_STREAM) - tfw_h2_conn_reset_stream_on_close(ctx, stream); - T_DBG4("exit %s: strm [%p] state %d(%s), res %d\n", __func__, stream, tfw_h2_get_stream_state(stream), __h2_strm_st_n(stream), res); @@ -847,7 +831,8 @@ tfw_h2_stream_init_for_xmit(TfwHttpResp *resp, TfwStreamXmitState state, stream->xmit.b_len = b_len; stream->xmit.t_len = 0; stream->xmit.state = state; - stream->xmit.frame_length = 0; + stream->xmit.bytes_to_send = 0; + stream->xmit.headers_frame_length = 0; stream->xmit.is_blocked = false; spin_unlock(&ctx->lock); diff --git a/fw/http_stream.h b/fw/http_stream.h index 4e9b2c5d4..78b80fe89 100644 --- a/fw/http_stream.h +++ b/fw/http_stream.h @@ -62,11 +62,12 @@ typedef enum { HTTP2_RELEASE_RESPONSE, HTTP2_MAKE_HEADERS_FRAMES, HTTP2_MAKE_CONTINUATION_FRAMES, + HTTP2_SEND_HEADERS_FRAME, HTTP2_MAKE_DATA_FRAMES, HTTP2_MAKE_TRAILER_FRAMES, - HTTP2_MAKE_TRAILER_CONTINUATION_FRAMES, - HTTP2_SEND_FRAMES, + HTTP2_SEND_DATA_FRAMES, HTTP2_MAKE_FRAMES_FINISH, + HTTP2_FRAMING_FAILED } TfwStreamXmitState; static const char *__tfw_strm_st_names[] = { @@ -116,31 +117,34 @@ typedef enum { * Last http2 response info, used to prepare frames * in `xmit` callbacks. * - * @resp - responce, that should be sent; - * @skb_head - head of skb list that must be sent; - * @postponed - head of skb list that must be sent - * after sending headers for this stream; - * @h_len - length of headers in http2 response; - * @t_len - length of trailer headers in http2 response; - * @frame_length - length of current sending frame, or 0 - * if we send some service frames (for - * example RST STREAM after all pending data); - * @b_len - length of body in http2 response; - * @is_blocked - stream is blocked; - * @state - current stream xmit state (what type of - * frame should be made for this stream); - * @is_trailer_cont - need to send trailer CONTINUATION frames; + * @resp - response, that should be sent; + * @skb_head - head of skb list that must be sent; + * @postponed - head of skb list that must be sent + * after sending headers for this stream; + * @h_len - length of headers in http2 response; + * @t_len - length of trailer headers in http2 response; + * @bytes_to_send - length of current sending data, or 0 + * if we send some service frames (for + * example RST_STREAM after all pending data); + * @b_len - length of body in http2 response; + * @is_blocked - stream is blocked; + * @state - current stream xmit state (what type of + * frame should be made for this stream); + * @is_trailer_cont - need to send trailer CONTINUATION frames; + * @headers_frame_length - length of the current framed HEADERS/CONTINUATION + * frame; */ typedef struct { TfwHttpResp *resp; struct sk_buff *skb_head; struct sk_buff *postponed; unsigned int h_len; - unsigned int frame_length; + unsigned int bytes_to_send; u64 b_len : 59; u64 is_blocked : 1; u64 state : 4; unsigned int t_len; + unsigned int headers_frame_length; } TfwHttpXmit; /** diff --git a/fw/ss_skb.c b/fw/ss_skb.c index d8cba68e3..7c6d555c7 100644 --- a/fw/ss_skb.c +++ b/fw/ss_skb.c @@ -25,7 +25,6 @@ */ #include #include -#include #include #include #include @@ -1718,8 +1717,7 @@ ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, if (!split_point) { /* Usage of linear portion of SKB is not expected */ - ss_skb_put(skb, -skb_headlen(skb)); - skb->tail_lock = 1; + ss_skb_remove_linear_data(skb); } else { unsigned int off = split_point - skb->data; diff --git a/fw/ss_skb.h b/fw/ss_skb.h index 5e78a4511..3326c546e 100644 --- a/fw/ss_skb.h +++ b/fw/ss_skb.h @@ -23,6 +23,7 @@ #define __TFW_SS_SKB_H__ #include +#include #include #include "str.h" @@ -59,6 +60,19 @@ struct tfw_skb_cb { #define TFW_SKB_CB(skb) ((struct tfw_skb_cb *)&((skb)->cb[0])) +/** + * Represents a data that should be cleaned up. + * + * @skb_head - head of skb list that must be freed; + * @pages - pages that must be freed; + * @pages_sz - current number of @pages; + */ +typedef struct { + struct sk_buff *skb_head; + netmem_ref pages[MAX_SKB_FRAGS]; + unsigned char pages_sz; +} TfwSkbCleanup; + void ss_skb_set_owner(struct sk_buff *skb, void (*destructor)(struct sk_buff *), TfwClientMem *owner, unsigned int delta); void ss_skb_adjust_client_mem(struct sk_buff *skb, int delta); @@ -545,6 +559,87 @@ int ss_skb_linear_transform(struct sk_buff *skb_head, struct sk_buff *skb, unsigned char *split_point); int ss_skb_realloc_headroom(struct sk_buff *skb); +static inline void +ss_skb_remove_linear_data(struct sk_buff *skb) +{ + ss_skb_put(skb, -skb_headlen(skb)); + skb->tail_lock = 1; +} + +/* Remove all paged fragments from @skb and move them into @cleanup. */ +static inline void +ss_skb_rm_all_frags(struct sk_buff *skb, TfwSkbCleanup *cleanup) +{ + int i, len; + struct skb_shared_info *si = skb_shinfo(skb); + + for (i = 0; i < si->nr_frags; i++) + cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); + + len = skb->data_len; + cleanup->pages_sz = si->nr_frags; + si->nr_frags = 0; + ss_skb_adjust_data_len(skb, -len); +} + +/* + * Remove paged fragments until @frag_idx and move them into @cleanup. Shift + * remaining fragments to the beginning of fragments array. + */ +static inline void +ss_skb_shift_frags(struct sk_buff *skb, int frag_idx, + TfwSkbCleanup *cleanup) +{ + int i, len; + struct skb_shared_info *si = skb_shinfo(skb); + + for (i = 0, len = 0; i < frag_idx; i++) { + cleanup->pages[i] = skb_frag_netmem(&si->frags[i]); + cleanup->pages_sz++; + len += skb_frag_size(&si->frags[i]); + } + + si->nr_frags -= frag_idx; + ss_skb_adjust_data_len(skb, -len); + memmove(&si->frags, &si->frags[frag_idx], + (si->nr_frags) * sizeof(skb_frag_t)); +} + +/* + * Shrink fragment with @frag_idx index, set @nbegin as the starting position + * of that fragment. + */ +static inline void +ss_skb_shrink_frag(struct sk_buff *skb, int frag_idx, const char *nbegin) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[frag_idx]; + const int len = nbegin - (char *)skb_frag_address(frag); + + /* Add offset and decrease fragment's size */ + skb_frag_off_add(frag, len); + skb_frag_size_sub(frag, len); + ss_skb_adjust_data_len(skb, -len); +} + +static inline void +ss_skb_free_cleanup(TfwSkbCleanup *cleanup) +{ + int i; + struct sk_buff *skb; + + while ((skb = ss_skb_dequeue(&cleanup->skb_head))) + __ss_kfree_skb(skb); + + for (i = 0; i < cleanup->pages_sz; i++) + /* + * Pass "true" even for non recyclable pages, relying on check + * pp_magic == PP_SIGNATURE in napi_pp_put_page(), which avoid + * recycling of non page_pool pages. Overhead seems the same + * as to have/maintain flag for each fragment. + */ + skb_page_unref(cleanup->pages[i], true); +} + #if defined(DEBUG) && (DEBUG >= 4) #define ss_skb_queue_for_each_do(queue, lambda) \ do { \ diff --git a/fw/t/unit/test_http_msg.c b/fw/t/unit/test_http_msg.c index 6a9e5a3cc..c2f6f5619 100644 --- a/fw/t/unit/test_http_msg.c +++ b/fw/t/unit/test_http_msg.c @@ -165,7 +165,7 @@ TEST(http_msg, cutoff_linear_headers_paged_body) TFW_STR_STRING("paged_body") }; TfwStr *head = &frags[0], *pgd = &frags[1]; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -200,7 +200,7 @@ TEST(http_msg, cutoff_linear_headers_and_linear_body) TFW_STR_STRING("paged_body2") }; TfwStr *head = &frags[0], *pgd = &frags[1]; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -232,7 +232,7 @@ TEST(http_msg, expand_from_pool_for_headers) }; TfwStr *hdr = &frags[0], *head = &frags[0], *pgd = &frags[1]; TfwHttpMsg *msg = (TfwHttpMsg *)resp; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i; @@ -287,7 +287,7 @@ TEST(http_msg, expand_from_pool_for_trailers) }; TfwStr *trailer = &frags[0], *head = &frags[1], *pgd = &frags[2]; TfwHttpMsg *msg = (TfwHttpMsg *)resp; - TfwHttpMsgCleanup cleanup = {}; + TfwSkbCleanup cleanup = {}; TfwMsgIter *it; int i;