diff --git a/scripts/rust-miri.ts b/scripts/rust-miri.ts index 177351ff25c..10ed54ee276 100644 --- a/scripts/rust-miri.ts +++ b/scripts/rust-miri.ts @@ -41,6 +41,7 @@ const MIRI_CRATES = [ "bun_http_types", "bun_md", "bun_paths", + "bun_picohttp", "bun_ptr", "bun_resolve_builtins", "bun_shell_parser", diff --git a/src/http/h2_client/Stream.rs b/src/http/h2_client/Stream.rs index f835c4e3b30..9e104061274 100644 --- a/src/http/h2_client/Stream.rs +++ b/src/http/h2_client/Stream.rs @@ -35,7 +35,9 @@ pub struct Stream { /// consistent across multiple HEADERS in one read; the resulting strings /// land here until `deliverStream` hands them to handleResponseMetadata. pub decoded_bytes: Vec, - pub decoded_headers: Vec, + /// Self-referential: each `Header`'s name/value point into `decoded_bytes` + /// (lifetime-erased to `'static`); valid until `decoded_bytes` is mutated. + pub decoded_headers: Vec>, /// Final (non-1xx) status code; 0 until the response HEADERS arrive. pub status_code: u32, diff --git a/src/http/h3_client/Stream.rs b/src/http/h3_client/Stream.rs index 9fe49ff1aad..7c9998da6b4 100644 --- a/src/http/h3_client/Stream.rs +++ b/src/http/h3_client/Stream.rs @@ -24,11 +24,11 @@ pub struct Stream { // FFI handle into lsquic; bound from `callbacks.onStreamOpen`, closed via `abort`. pub qstream: Option>, - /// Slices into the lsquic-owned hset buffer; valid only for the duration - /// of the `onStreamHeaders` callback that populated it. `cloneMetadata` - /// deep-copies synchronously inside that callback, so nothing reads these - /// after they go stale. - pub decoded_headers: Vec, + /// Slices into the lsquic-owned hset buffer (lifetime-erased to + /// `'static`); valid only for the duration of the `onStreamHeaders` + /// callback that populated it. `cloneMetadata` deep-copies synchronously + /// inside that callback, so nothing reads these after they go stale. + pub decoded_headers: Vec>, pub body_buffer: Vec, pub status_code: u16, diff --git a/src/http/h3_client/callbacks.rs b/src/http/h3_client/callbacks.rs index 0cd7615c1fb..513f451971f 100644 --- a/src/http/h3_client/callbacks.rs +++ b/src/http/h3_client/callbacks.rs @@ -216,8 +216,15 @@ extern "C" fn on_stream_headers(s: *mut quic::Stream) { i += 1; continue; }; - let name = h.name_bytes(); - let value = h.value_bytes(); + // SAFETY: `name`/`value` point into the lsquic-owned hset buffer, + // which stays alive for the duration of this callback; + // `decoded_headers` is documented as valid only within this callback + // and is deep-copied (`cloneMetadata`) before the callback returns. + // Widened to `'static` so the headers can be stored in + // `Stream::decoded_headers`. + let name: &'static [u8] = unsafe { bun_ptr::detach_lifetime(h.name_bytes()) }; + // SAFETY: same hset-buffer invariant as `name` above. + let value: &'static [u8] = unsafe { bun_ptr::detach_lifetime(h.value_bytes()) }; if name.first() == Some(&b':') { if name == b":status" { status = bun_core::fmt::parse_int::(value, 10).unwrap_or(0); diff --git a/src/http/lib.rs b/src/http/lib.rs index 504cb015e1c..245e6090cbf 100644 --- a/src/http/lib.rs +++ b/src/http/lib.rs @@ -794,26 +794,27 @@ pub type GenHttpContext = http_context::HTTPContext; // ── header constants ──────────────────────────────────────────────────── const HOST_HEADER_NAME: &[u8] = b"Host"; const CONTENT_LENGTH_HEADER_NAME: &[u8] = b"Content-Length"; -const CHUNKED_ENCODED_HEADER: picohttp::Header = +const CHUNKED_ENCODED_HEADER: picohttp::Header<'static> = picohttp::Header::new(b"Transfer-Encoding", b"chunked"); -const CONNECTION_HEADER: picohttp::Header = picohttp::Header::new(b"Connection", b"keep-alive"); -const ACCEPT_HEADER: picohttp::Header = picohttp::Header::new(b"Accept", b"*/*"); +const CONNECTION_HEADER: picohttp::Header<'static> = + picohttp::Header::new(b"Connection", b"keep-alive"); +const ACCEPT_HEADER: picohttp::Header<'static> = picohttp::Header::new(b"Accept", b"*/*"); const ACCEPT_ENCODING_NO_COMPRESSION: &[u8] = b"identity"; const ACCEPT_ENCODING_COMPRESSION: &[u8] = b"gzip, deflate, br, zstd"; -const ACCEPT_ENCODING_HEADER_COMPRESSION: picohttp::Header = +const ACCEPT_ENCODING_HEADER_COMPRESSION: picohttp::Header<'static> = picohttp::Header::new(b"Accept-Encoding", ACCEPT_ENCODING_COMPRESSION); -const ACCEPT_ENCODING_HEADER_NO_COMPRESSION: picohttp::Header = +const ACCEPT_ENCODING_HEADER_NO_COMPRESSION: picohttp::Header<'static> = picohttp::Header::new(b"Accept-Encoding", ACCEPT_ENCODING_NO_COMPRESSION); -const ACCEPT_ENCODING_HEADER: picohttp::Header = if FeatureFlags::DISABLE_COMPRESSION_IN_HTTP_CLIENT -{ - ACCEPT_ENCODING_HEADER_NO_COMPRESSION -} else { - ACCEPT_ENCODING_HEADER_COMPRESSION -}; +const ACCEPT_ENCODING_HEADER: picohttp::Header<'static> = + if FeatureFlags::DISABLE_COMPRESSION_IN_HTTP_CLIENT { + ACCEPT_ENCODING_HEADER_NO_COMPRESSION + } else { + ACCEPT_ENCODING_HEADER_COMPRESSION + }; -fn get_user_agent_header() -> picohttp::Header { +fn get_user_agent_header() -> picohttp::Header<'static> { let ua = OVERRIDDEN_DEFAULT_USER_AGENT.get().copied().unwrap_or(b""); picohttp::Header::new( b"User-Agent", @@ -852,11 +853,12 @@ static PRINT_EVERY_I: AtomicUsize = AtomicUsize::new(0); // we always rewrite the entire HTTP request when write() returns EAGAIN // so we can reuse this buffer const MAX_REQUEST_HEADERS: usize = 256; -static SHARED_REQUEST_HEADERS_BUF: bun_core::RacyCell<[picohttp::Header; MAX_REQUEST_HEADERS]> = - bun_core::RacyCell::new([picohttp::Header::ZERO; MAX_REQUEST_HEADERS]); +static SHARED_REQUEST_HEADERS_BUF: bun_core::RacyCell< + [picohttp::Header<'static>; MAX_REQUEST_HEADERS], +> = bun_core::RacyCell::new([picohttp::Header::ZERO; MAX_REQUEST_HEADERS]); // this doesn't need to be stack memory because it is immediately cloned after use -static SHARED_RESPONSE_HEADERS_BUF: bun_core::RacyCell<[picohttp::Header; 256]> = +static SHARED_RESPONSE_HEADERS_BUF: bun_core::RacyCell<[picohttp::Header<'static>; 256]> = bun_core::RacyCell::new([picohttp::Header::ZERO; 256]); // the first packet for Transfer-Encoding: chunked @@ -875,12 +877,13 @@ static SINGLE_PACKET_SMALL_BUFFER: bun_core::RacyCell<[u8; 16 * 1024]> = mod scratch { use super::*; #[inline] - pub(super) fn request_headers() -> &'static mut [picohttp::Header; MAX_REQUEST_HEADERS] { + pub(super) fn request_headers() -> &'static mut [picohttp::Header<'static>; MAX_REQUEST_HEADERS] + { // SAFETY: see module-level INVARIANT. unsafe { &mut *SHARED_REQUEST_HEADERS_BUF.get() } } #[inline] - pub(super) fn response_headers() -> &'static mut [picohttp::Header; 256] { + pub(super) fn response_headers() -> &'static mut [picohttp::Header<'static>; 256] { // SAFETY: see module-level INVARIANT. unsafe { &mut *SHARED_RESPONSE_HEADERS_BUF.get() } } @@ -2056,7 +2059,7 @@ impl<'a> HTTPClient<'a> { let mut override_connection_header = false; let mut override_user_agent = false; let mut add_transfer_encoding = true; - let mut original_content_length: Option<&[u8]> = None; + let mut original_content_length: Option<&'static [u8]> = None; // Reserve slots for default headers that may be appended after user headers // (Connection, User-Agent, Accept, Host, Accept-Encoding, Content-Length/Transfer-Encoding). @@ -2064,7 +2067,11 @@ impl<'a> HTTPClient<'a> { const MAX_USER_HEADERS: usize = MAX_REQUEST_HEADERS - MAX_DEFAULT_HEADERS; for (i, head) in header_names.iter().enumerate() { - let name = self.header_str(*head); + // SAFETY: `header_str` returns a slice into `self.header_buf`, which + // outlives the returned `Request` (see the SAFETY block on the + // `Request` literal at the end of this fn). Widened to `'static` so + // the `Header` can be stored in the `'static` scratch buffer. + let name: &'static [u8] = unsafe { bun_ptr::detach_lifetime(self.header_str(*head)) }; // Hash it as lowercase let hash = hash_header_name(name); @@ -2079,7 +2086,10 @@ impl<'a> HTTPClient<'a> { match hash { h if h == hash_header_const(b"Content-Length") => { // Content-Length is always consumed (never written to the buffer). - original_content_length = Some(self.header_str(header_values[i])); + // SAFETY: same `header_buf` widening as `name` above. + original_content_length = Some(unsafe { + bun_ptr::detach_lifetime(self.header_str(header_values[i])) + }); continue; } h if h == hash_header_const(b"Connection") => { @@ -2154,8 +2164,10 @@ impl<'a> HTTPClient<'a> { continue; } - request_headers_buf[header_count] = - picohttp::Header::new(name, self.header_str(header_values[i])); + // SAFETY: same `header_buf` widening as `name` above. + let value: &'static [u8] = + unsafe { bun_ptr::detach_lifetime(self.header_str(header_values[i])) }; + request_headers_buf[header_count] = picohttp::Header::new(name, value); header_count += 1; } @@ -2176,8 +2188,11 @@ impl<'a> HTTPClient<'a> { } if !override_host_header { - request_headers_buf[header_count] = - picohttp::Header::new(HOST_HEADER_NAME, self.url.host); + // SAFETY: `url.host` borrows `self.url`, which outlives the + // returned `Request` (see the SAFETY block on the `Request` literal + // at the end of this fn). + let host: &'static [u8] = unsafe { bun_ptr::detach_lifetime(self.url.host) }; + request_headers_buf[header_count] = picohttp::Header::new(HOST_HEADER_NAME, host); header_count += 1; } @@ -3097,8 +3112,23 @@ impl<'a> HTTPClient<'a> { } let shared_resp = scratch::response_headers(); + // SAFETY: the parsed response borrows `SHARED_RESPONSE_HEADERS_BUF` + // and the bytes behind `to_read` (`incoming_data` for the duration + // of this callback, or `response_message_buffer` which lives on + // `self.state`). Both outlive every *read* of the response: it is + // only read within this callback (`handle_response_metadata`) and + // by `clone_metadata()`, which deep-copies it. The early-return + // paths that skip `clone_metadata()` (redirect, proxy CONNECT) + // overwrite or clear `state.pending_response` before the next read + // (`Response::default()` at the top of the next parse; + // `ProxyTunnel` sets it to `None`). Widened to `'static` so it can + // be parsed into the `'static` scratch buffer and stored in + // `state.pending_response`. This matches the pre-existing + // `detach_lifetime()` that was applied to the parsed response + // here before `Header` carried a lifetime. + let parse_buf: &'static [u8] = unsafe { bun_ptr::detach_lifetime(to_read!()) }; let response = match picohttp::Response::parse_parts( - to_read!(), + parse_buf, shared_resp, Some(&mut amount_read), ) { @@ -3125,12 +3155,8 @@ impl<'a> HTTPClient<'a> { }; // we save the successful parsed response - // SAFETY: response borrows SHARED_RESPONSE_HEADERS_BUF / response_message_buffer, - // both of which outlive this fn; widen to 'static for storage. - // Rebind `response` to the detached `'static` copy so it no longer - // borrows `to_read` (lets the `to_read` reassignment below pass - // borrowck — `RawSlice::slice` ties output to `&to_read`). - let response = unsafe { response.detach_lifetime() }; + // (`response` is already `Response<'static>` — `parse_buf` and the + // scratch buffer were both widened to `'static` above.) self.state.pending_response = Some(response); let bytes_read = @@ -3429,7 +3455,10 @@ impl<'a> HTTPClient<'a> { let headers_buf = bun_core::heap::release( vec![picohttp::Header::ZERO; response.headers.list.len()].into_boxed_slice(), ); - let cloned_response = response.clone(headers_buf, &mut builder); + // SAFETY: the cloned response's slices alias `builder`'s heap + // buffer, whose ownership is transferred to `owned_buf` below and + // stored alongside the response in `HTTPResponseMetadata`. + let cloned_response = unsafe { response.clone(headers_buf, &mut builder) }; // we clean the temporary response since cloned_metadata is now the owner self.state.pending_response = None; diff --git a/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs b/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs index a4b06e31f69..5a7c422faf5 100644 --- a/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs +++ b/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs @@ -105,7 +105,11 @@ pub struct HTTPClient { // suffix of `input_body_buf`; stored here as the suffix length so we don't // hold a self-referential slice. to_send_len: usize, - headers_buf: [picohttp::Header; 128], + /// Scratch storage for `Response::parse`. The parsed headers point into + /// the response bytes (`data` / `self.body`), are lifetime-erased to + /// `'static`, and are only read synchronously inside the receive callback + /// that parsed them. + headers_buf: [picohttp::Header<'static>; 128], body: Vec, /// Owned NUL-terminated hostname for SNI; empty when unset. hostname: ZBox, @@ -952,7 +956,13 @@ impl HTTPClient { } } - let response = match picohttp::Response::parse(body, &mut me.headers_buf) { + // SAFETY: `body` aliases `data` (live for this callback) or `me.body`; + // the parsed response (and `me.headers_buf`, which its headers point + // into alongside `body`) is only read synchronously below, before + // either buffer is mutated or freed. Widened to `'static` to match the + // `'static` element type of the `me.headers_buf` scratch array. + let parse_buf: &'static [u8] = unsafe { bun_ptr::detach_lifetime(body) }; + let response = match picohttp::Response::parse(parse_buf, &mut me.headers_buf) { Ok(r) => r, Err(picohttp::ParseResponseError::MalformedHttpResponse) => { // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. @@ -1011,7 +1021,11 @@ impl HTTPClient { } // Parse the response to find the end of headers - let response = match picohttp::Response::parse(body, &mut me.headers_buf) { + // SAFETY: same invariant as the `handle_data` parse — `body` and + // `me.headers_buf` are only read synchronously below, before either is + // mutated or freed. + let parse_buf: &'static [u8] = unsafe { bun_ptr::detach_lifetime(body) }; + let response = match picohttp::Response::parse(parse_buf, &mut me.headers_buf) { Ok(r) => r, Err(picohttp::ParseResponseError::MalformedHttpResponse) => { // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. @@ -1248,7 +1262,11 @@ impl HTTPClient { } } - let response = match picohttp::Response::parse(body, &mut me.headers_buf) { + // SAFETY: same invariant as the `handle_data` parse — `body` and + // `me.headers_buf` are only read synchronously below, before either is + // mutated or freed. + let parse_buf: &'static [u8] = unsafe { bun_ptr::detach_lifetime(body) }; + let response = match picohttp::Response::parse(parse_buf, &mut me.headers_buf) { Ok(r) => r, Err(picohttp::ParseResponseError::MalformedHttpResponse) => { // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. diff --git a/src/picohttp/lib.rs b/src/picohttp/lib.rs index ec88eb35e54..67a704a762f 100644 --- a/src/picohttp/lib.rs +++ b/src/picohttp/lib.rs @@ -92,23 +92,28 @@ use bun_core::strings; /// Zig used `name: []const u8` / `value: []const u8` and relied on Zig's slice /// ABI being `{ptr, len}`. Rust `&[u8]` has no guaranteed field order in /// `#[repr(C)]`, so we spell the fields out and expose `.name()` / `.value()`. +/// +/// `'buf` is the lifetime of the parse buffer (or whatever storage the +/// name/value pointers point into); `name()`/`value()` return `&'buf [u8]` so +/// the borrow is tied to that storage rather than to the `Header` itself. #[repr(C)] #[derive(Clone, Copy)] -pub struct Header { +pub struct Header<'buf> { name_ptr: *const u8, name_len: usize, value_ptr: *const u8, value_len: usize, + _buf: core::marker::PhantomData<&'buf [u8]>, } -impl Default for Header { +impl Default for Header<'_> { #[inline] fn default() -> Self { Self::ZERO } } -impl Header { +impl<'buf> Header<'buf> { /// All-zero sentinel — name/value are empty slices. Used by callers to /// initialize fixed-size header arrays before filling them. /// @@ -121,32 +126,35 @@ impl Header { name_len: 0, value_ptr: core::ptr::null(), value_len: 0, + _buf: core::marker::PhantomData, }; - /// Construct a `Header` from borrowed name/value slices. The caller is - /// responsible for keeping the backing storage alive for as long as the - /// `Header` is read (matches the Zig `[]const u8` field semantics). + /// Construct a `Header` borrowing `name`/`value`. The returned `Header` + /// cannot outlive the backing storage (matches the Zig `[]const u8` field + /// semantics, but compiler-checked). #[inline] - pub const fn new(name: &[u8], value: &[u8]) -> Self { + pub const fn new(name: &'buf [u8], value: &'buf [u8]) -> Self { Self { name_ptr: name.as_ptr(), name_len: name.len(), value_ptr: value.as_ptr(), value_len: value.len(), + _buf: core::marker::PhantomData, } } #[inline] - pub fn name(&self) -> &[u8] { + pub fn name(&self) -> &'buf [u8] { // picohttpparser sets `name = NULL, name_len = 0` for multiline / // continuation headers. `ffi::slice` tolerates the (null, 0) shape. - // SAFETY: ptr/len originate from picohttpparser pointing into the - // caller-provided buffer, or from StringBuilder::append. + // SAFETY: ptr/len point into the `'buf` storage this `Header` borrows + // (`Header::new` ties them to `'buf`; the parse functions tie the + // picohttpparser out-pointers to the input buffer's lifetime). unsafe { bun_core::ffi::slice(self.name_ptr, self.name_len) } } #[inline] - pub fn value(&self) -> &[u8] { + pub fn value(&self) -> &'buf [u8] { // Defensive: picohttpparser always points `value` into `buf` on // success; `ffi::slice` tolerates the (null, 0) shape. // SAFETY: same as name() @@ -162,18 +170,38 @@ impl Header { builder.count(self.value()); } - pub fn clone(&self, builder: &mut StringBuilder) -> Header { + /// Copy name/value into `builder` and return a `Header` pointing at the + /// copies. The returned lifetime is unbound (same contract as + /// [`StringBuilder::append_raw`]). + /// + /// # Safety + /// The returned `Header` aliases `builder`'s heap buffer; the caller must + /// keep the builder (or its moved-out buffer) alive and unmodified for as + /// long as the returned `Header` is read. + pub unsafe fn clone<'b>(&self, builder: &mut StringBuilder) -> Header<'b> { // SAFETY: returned slices alias `builder`'s heap buffer; caller of the // outer `clone` keeps the builder (or its moved-out buffer) alive for // the lifetime of the cloned `Header` (see PORT NOTE on `StringBuilder`). let name = unsafe { builder.append_raw(self.name()) }; // SAFETY: same buffer-lifetime invariant as `name` above. let value = unsafe { builder.append_raw(self.value()) }; + Header::new(name, value) + } + + /// Widen the borrow to `'static` for self-referential / static-buffer + /// storage. Field-by-field move (no bitwise reinterpret). + /// + /// # Safety + /// Caller guarantees the storage `name()`/`value()` point into outlives + /// every read through the returned value. + #[inline] + pub unsafe fn detach_lifetime(self) -> Header<'static> { Header { - name_ptr: name.as_ptr(), - name_len: name.len(), - value_ptr: value.as_ptr(), - value_len: value.len(), + name_ptr: self.name_ptr, + name_len: self.name_len, + value_ptr: self.value_ptr, + value_len: self.value_len, + _buf: core::marker::PhantomData, } } @@ -182,7 +210,7 @@ impl Header { } } -impl fmt::Display for Header { +impl fmt::Display for Header<'_> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // NOTE: pretty_fmt! is the comptime ANSI-tag expander (`` → escape // codes). bun_core's current impl is a passthrough TODO(port) until the @@ -217,11 +245,13 @@ impl fmt::Display for Header { } } -const _: () = assert!(core::mem::size_of::
() == core::mem::size_of::()); -const _: () = assert!(core::mem::align_of::
() == core::mem::align_of::()); +const _: () = + assert!(core::mem::size_of::>() == core::mem::size_of::()); +const _: () = + assert!(core::mem::align_of::>() == core::mem::align_of::()); pub struct HeaderCurlFormatter<'a> { - header: &'a Header, + header: &'a Header<'a>, } impl fmt::Display for HeaderCurlFormatter<'_> { @@ -246,7 +276,7 @@ impl fmt::Display for HeaderCurlFormatter<'_> { #[derive(Clone, Copy, Default)] pub struct HeaderList<'a> { - pub list: &'a [Header], + pub list: &'a [Header<'a>], // TODO(port): Zig field is `[]Header` (mutable slice) but only ever read // through `*const List`; using `&'a [Header]` here. Revisit if a caller // mutates through it. @@ -301,7 +331,7 @@ pub struct Request<'a> { pub method: &'a [u8], pub path: &'a [u8], pub minor_version: usize, - pub headers: &'a [Header], + pub headers: &'a [Header<'a>], pub bytes_read: u32, } @@ -314,9 +344,24 @@ impl<'a> Request<'a> { } } - pub fn clone(&self, headers: &'a mut [Header], builder: &mut StringBuilder) -> Request<'a> { + /// Deep-copy `method`/`path`/headers into `builder` and return a `Request` + /// whose slices point at the copies (the header *list* itself lives in + /// `headers`). + /// + /// # Safety + /// The returned `Request`'s `method`/`path`/header contents alias + /// `builder`'s heap buffer; the caller must keep the builder (or its + /// moved-out buffer) alive and unmodified for as long as the returned + /// `Request` is read. + pub unsafe fn clone( + &self, + headers: &'a mut [Header<'a>], + builder: &mut StringBuilder, + ) -> Request<'a> { for (i, header) in self.headers.iter().enumerate() { - headers[i] = header.clone(builder); + // SAFETY: forwarded caller contract — `builder` outlives the + // returned `Request`. + headers[i] = unsafe { header.clone(builder) }; } Request { @@ -347,13 +392,22 @@ impl<'a> Request<'a> { // SAFETY: caller contract. path: unsafe { &*core::ptr::from_ref::<[u8]>(self.path) }, minor_version: self.minor_version, - // SAFETY: caller contract. - headers: unsafe { &*core::ptr::from_ref::<[Header]>(self.headers) }, + // SAFETY: caller contract. `Header<'a>` and `Header<'static>` have + // identical layout (the lifetime only lives in `PhantomData`). + headers: unsafe { + core::slice::from_raw_parts( + self.headers.as_ptr().cast::>(), + self.headers.len(), + ) + }, bytes_read: self.bytes_read, } } - pub fn parse(buf: &'a [u8], src: &'a mut [Header]) -> Result, ParseRequestError> { + pub fn parse( + buf: &'a [u8], + src: &'a mut [Header<'a>], + ) -> Result, ParseRequestError> { let mut method_ptr: *const u8 = core::ptr::null(); let mut method_len: usize = 0; let mut path_ptr: *const u8 = core::ptr::null(); @@ -570,8 +624,15 @@ impl<'a> Response<'a> { // SAFETY: caller contract. status: unsafe { &*core::ptr::from_ref::<[u8]>(self.status) }, headers: HeaderList { - // SAFETY: caller contract. - list: unsafe { &*core::ptr::from_ref::<[Header]>(self.headers.list) }, + // SAFETY: caller contract. `Header<'a>` and `Header<'static>` + // have identical layout (the lifetime only lives in + // `PhantomData`). + list: unsafe { + core::slice::from_raw_parts( + self.headers.list.as_ptr().cast::>(), + self.headers.list.len(), + ) + }, }, bytes_read: self.bytes_read, } @@ -585,13 +646,27 @@ impl<'a> Response<'a> { } } - pub fn clone(&self, headers: &'a mut [Header], builder: &mut StringBuilder) -> Response<'a> { + /// Deep-copy `status` and the header contents into `builder` and return a + /// `Response` whose slices point at the copies (the header *list* itself + /// lives in `headers`). + /// + /// # Safety + /// The returned `Response`'s `status`/header contents alias `builder`'s + /// heap buffer; the caller must keep the builder (or its moved-out buffer) + /// alive and unmodified for as long as the returned `Response` is read. + pub unsafe fn clone( + &self, + headers: &'a mut [Header<'a>], + builder: &mut StringBuilder, + ) -> Response<'a> { let mut that = *self; // SAFETY: see `Header::clone` — caller keeps `builder` alive. that.status = unsafe { builder.append_raw(self.status) }; for (i, header) in self.headers.list.iter().enumerate() { - headers[i] = header.clone(builder); + // SAFETY: forwarded caller contract — `builder` outlives the + // returned `Response`. + headers[i] = unsafe { header.clone(builder) }; } that.headers.list = &headers[0..self.headers.list.len()]; @@ -601,7 +676,7 @@ impl<'a> Response<'a> { pub fn parse_parts( buf: &'a [u8], - src: &'a mut [Header], + src: &'a mut [Header<'a>], offset: Option<&mut usize>, ) -> Result, ParseResponseError> { let mut minor_version: c_int = 1; @@ -653,7 +728,10 @@ impl<'a> Response<'a> { } } - pub fn parse(buf: &'a [u8], src: &'a mut [Header]) -> Result, ParseResponseError> { + pub fn parse( + buf: &'a [u8], + src: &'a mut [Header<'a>], + ) -> Result, ParseResponseError> { let mut offset: usize = 0; let response = Self::parse_parts(buf, src, Some(&mut offset))?; Ok(response) @@ -699,11 +777,14 @@ bun_core::impl_tag_error!(ParseHeadersError); bun_core::named_error_set!(ParseHeadersError); pub struct Headers<'a> { - pub headers: &'a [Header], + pub headers: &'a [Header<'a>], } impl<'a> Headers<'a> { - pub fn parse(buf: &'a [u8], src: &'a mut [Header]) -> Result, ParseHeadersError> { + pub fn parse( + buf: &'a [u8], + src: &'a mut [Header<'a>], + ) -> Result, ParseHeadersError> { let mut num_headers: usize = src.len(); // SAFETY: src is layout-compatible with phr_header (asserted above). @@ -755,4 +836,90 @@ pub use c::phr_parse_response; pub use c::struct_phr_chunked_decoder; pub use c::struct_phr_header; +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn header_new_round_trips_name_and_value() { + let buf = b"Content-Type: text/plain".to_vec(); + let header = Header::new(&buf[..12], &buf[14..]); + assert_eq!(header.name(), b"Content-Type"); + assert_eq!(header.value(), b"text/plain"); + assert!(!header.is_multiline()); + } + + #[test] + fn header_zero_is_empty() { + let header = Header::ZERO; + assert_eq!(header.name(), b""); + assert_eq!(header.value(), b""); + assert!(header.is_multiline()); + assert_eq!(Header::default().name(), b""); + } + + // `strings::eql_case_insensitive_ascii` calls libc `strncasecmp`, which + // Miri cannot interpret. + #[cfg(not(miri))] + #[test] + fn header_list_lookup_is_case_insensitive() { + let storage = [Header::new(b"Content-Type", b"text/html"), Header::ZERO]; + let list = HeaderList { list: &storage }; + assert_eq!(list.get(b"content-type"), Some(&b"text/html"[..])); + assert_eq!(list.get(b"missing"), None); + assert_eq!( + list.get_if_other_is_absent(b"content-type", b"etag"), + Some(&b"text/html"[..]) + ); + assert_eq!(list.get_if_other_is_absent(b"etag", b"content-type"), None); + } + + #[test] + fn header_clone_copies_into_builder() { + let name = b"X-Custom".to_vec(); + let value = b"hello world".to_vec(); + let original = Header::new(&name, &value); + + let mut builder = StringBuilder::default(); + original.count(&mut builder); + builder.allocate().unwrap(); + // SAFETY: `builder` outlives `cloned` (dropped at end of scope, after + // the last read of `cloned`). + let cloned: Header<'_> = unsafe { original.clone(&mut builder) }; + drop((name, value)); + assert_eq!(cloned.name(), b"X-Custom"); + assert_eq!(cloned.value(), b"hello world"); + drop(builder); + } + + // NOTE: `Request::parse` / `Response::parse` / `Headers::parse` call the + // vendored picohttpparser C library, whose objects are only linked into + // the final binary by the CMake build — `cargo test` / `cargo miri test` + // cannot link or interpret them, so the C parse round-trip is covered by + // the HTTP client/server JS test suites instead. The test below mirrors + // the parser's output shape (a header array whose entries point into the + // request buffer) without the foreign call. + #[test] + fn request_headers_point_into_parse_buffer() { + let buf = b"GET /foo HTTP/1.1\r\nHost: example.com\r\nAccept: */*\r\n\r\n".to_vec(); + let mut storage = [Header::ZERO; 8]; + storage[0] = Header::new(&buf[19..23], &buf[25..36]); + storage[1] = Header::new(&buf[38..44], &buf[46..49]); + let request = Request { + method: &buf[0..3], + path: &buf[4..8], + minor_version: 1, + headers: &storage[0..2], + bytes_read: buf.len() as u32, + }; + assert_eq!(request.method, b"GET"); + assert_eq!(request.path, b"/foo"); + assert_eq!(request.headers.len(), 2); + assert_eq!(request.headers[0].name(), b"Host"); + assert_eq!(request.headers[0].value(), b"example.com"); + assert_eq!(request.headers[1].name(), b"Accept"); + assert_eq!(request.headers[1].value(), b"*/*"); + } +} + // ported from: src/picohttp/picohttp.zig diff --git a/src/runtime/webcore/fetch.rs b/src/runtime/webcore/fetch.rs index 717ca433322..c5f6732f541 100644 --- a/src/runtime/webcore/fetch.rs +++ b/src/runtime/webcore/fetch.rs @@ -1884,19 +1884,23 @@ fn fetch_impl( let mut header_buffer: [picohttp::Header; SignResult::MAX_HEADERS + 1] = [picohttp::Header::ZERO; SignResult::MAX_HEADERS + 1]; + // The mixed-in `Content-Type` header borrows `content_type`, which + // points into the *current* `headers` value — deep-copy into the + // replacement `Headers` before assigning over (and thereby dropping) + // the old one. if let Some(range_) = &range { let new_headers = result.mix_with_header( &mut header_buffer, picohttp::Header::new(b"range", range_.as_bytes()), ); - set_headers(&mut headers, new_headers); + headers = Some(Headers::from_pico_http_headers(new_headers)); } else if let Some(ct) = content_type { if !ct.is_empty() { let new_headers = result.mix_with_header( &mut header_buffer, picohttp::Header::new(b"Content-Type", ct), ); - set_headers(&mut headers, new_headers); + headers = Some(Headers::from_pico_http_headers(new_headers)); } else { set_headers(&mut headers, result.headers()); } diff --git a/src/s3_signing/credentials.rs b/src/s3_signing/credentials.rs index ba39111d3db..1c73ecf416f 100644 --- a/src/s3_signing/credentials.rs +++ b/src/s3_signing/credentials.rs @@ -27,15 +27,23 @@ macro_rules! alloc_print { use bun_core::fmt::hex_lower as HexLower; #[inline] -fn pico_header_empty() -> PicoHeader { +fn pico_header_empty() -> PicoHeader<'static> { PicoHeader::ZERO } -// TODO(port): bun_picohttp::Header::new — fields are private; constructing via -// repr(C) layout-pun until a public ctor lands. Layout is asserted in bun_picohttp. +/// Build a header destined for `SignResult::_headers`, widening the borrow to +/// `'static` for the self-referential storage (the values point into the +/// sibling `Box<[u8]>` fields of the same `SignResult`, which are heap-stable +/// across moves). +/// +/// # Safety +/// `name`/`value` must point into storage that outlives the `SignResult` the +/// returned header is stored in (its own `Box<[u8]>` fields or `'static` +/// data), and that storage must not be mutated while the header is readable. #[inline] -fn pico_header_new(name: &[u8], value: &[u8]) -> PicoHeader { - PicoHeader::new(name, value) +unsafe fn pico_header_new(name: &[u8], value: &[u8]) -> PicoHeader<'static> { + // SAFETY: forwarded caller contract — see `# Safety` above. + unsafe { PicoHeader::new(name, value).detach_lifetime() } } // ────────────────────────────────────────────────────────────────────────── @@ -878,57 +886,76 @@ impl S3Credentials { // bun_picohttp::Header stores raw `*const u8, len` (verified), so the heap pointers stay // valid across SignResult moves. SAFETY relies on the Box<[u8]> fields not being mutated // after the corresponding header is written. - result._headers[0] = pico_header_new(b"x-amz-content-sha256", aws_content_hash); - result._headers[1] = pico_header_new(b"x-amz-date", &result.amz_date); - result._headers[2] = pico_header_new(b"Host", &result.host); - result._headers[3] = pico_header_new(b"Authorization", &result.authorization); + // + // SAFETY (every `pico_header_new` below): the value points either at + // `'static` data, at a caller-owned slice that outlives the returned + // `SignResult` (`aws_content_hash`, `acl_value`, `storage_class_value`), + // or into one of `result`'s own `Box<[u8]>` fields, whose heap storage + // is stable across moves of `result` and lives until `result` is + // dropped. None of those fields are mutated after the header is written. + // + // SAFETY: see block comment above. + result._headers[0] = unsafe { pico_header_new(b"x-amz-content-sha256", aws_content_hash) }; + // SAFETY: see block comment above. + result._headers[1] = unsafe { pico_header_new(b"x-amz-date", &result.amz_date) }; + // SAFETY: see block comment above. + result._headers[2] = unsafe { pico_header_new(b"Host", &result.host) }; + // SAFETY: see block comment above. + result._headers[3] = unsafe { pico_header_new(b"Authorization", &result.authorization) }; if let Some(acl_value) = acl { + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"x-amz-acl", acl_value); + unsafe { pico_header_new(b"x-amz-acl", acl_value) }; result._headers_len += 1; } if let Some(token) = session_token { let session_token_value = Box::<[u8]>::from(token); + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"x-amz-security-token", &session_token_value); + unsafe { pico_header_new(b"x-amz-security-token", &session_token_value) }; result.session_token = session_token_value; result._headers_len += 1; } if let Some(storage_class_value) = storage_class { + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"x-amz-storage-class", storage_class_value); + unsafe { pico_header_new(b"x-amz-storage-class", storage_class_value) }; result._headers_len += 1; } if let Some(cd) = content_disposition { let content_disposition_value = Box::<[u8]>::from(cd); + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"content-disposition", &content_disposition_value); + unsafe { pico_header_new(b"content-disposition", &content_disposition_value) }; result.content_disposition = content_disposition_value; result._headers_len += 1; } if let Some(ce) = content_encoding { let content_encoding_value = Box::<[u8]>::from(ce); + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"content-encoding", &content_encoding_value); + unsafe { pico_header_new(b"content-encoding", &content_encoding_value) }; result.content_encoding = content_encoding_value; result._headers_len += 1; } if let Some(c_md5) = content_md5.as_deref() { let content_md5_value = Box::<[u8]>::from(c_md5); + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"content-md5", &content_md5_value); + unsafe { pico_header_new(b"content-md5", &content_md5_value) }; result.content_md5 = content_md5_value; result._headers_len += 1; } if request_payer { + // SAFETY: see block comment above. result._headers[result._headers_len as usize] = - pico_header_new(b"x-amz-request-payer", b"requester"); + unsafe { pico_header_new(b"x-amz-request-payer", b"requester") }; result._headers_len += 1; } @@ -1022,24 +1049,27 @@ pub struct SignResult { pub acl: Option, pub storage_class: Option, pub request_payer: bool, - // TODO(port): self-referential — entries borrow from the Box<[u8]> fields above. PicoHeader - // must be a raw (ptr,len) pair; see note in sign_request. - pub _headers: [PicoHeader; Self::MAX_HEADERS], + // TODO(port): self-referential — entries borrow from the Box<[u8]> fields above + // (lifetime-erased to `'static`); see note in sign_request. + pub _headers: [PicoHeader<'static>; Self::MAX_HEADERS], pub _headers_len: u8, } impl SignResult { pub const MAX_HEADERS: usize = 11; - pub fn headers(&self) -> &[PicoHeader] { + /// The header borrows are narrowed from the stored (lifetime-erased) + /// `'static` to `&self` so a copied-out `Header` cannot be read after this + /// `SignResult` is dropped. + pub fn headers(&self) -> &[PicoHeader<'_>] { &self._headers[0..self._headers_len as usize] } - pub fn mix_with_header<'b>( + pub fn mix_with_header<'b, 'h>( &self, - headers_buffer: &'b mut [PicoHeader], - header: PicoHeader, - ) -> &'b [PicoHeader] { + headers_buffer: &'b mut [PicoHeader<'h>], + header: PicoHeader<'h>, + ) -> &'b [PicoHeader<'h>] { // copy the headers to buffer let len = self._headers_len as usize; for (i, existing_header) in self._headers[0..len].iter().enumerate() {