diff --git a/packages/bun-uws/src/Http3Context.h b/packages/bun-uws/src/Http3Context.h index 4c8865290dd..2b4a4086997 100644 --- a/packages/bun-uws/src/Http3Context.h +++ b/packages/bun-uws/src/Http3Context.h @@ -26,6 +26,12 @@ struct Http3Context { Http3ContextData *cd = (Http3ContextData *) us_quic_socket_context_ext(us_quic_stream_context(s)); Http3Response *res = (Http3Response *) s; Http3ResponseData *rd = res->getHttpResponseData(); + /* lsquic re-fires on_stream_headers for every HEADERS block on + * the stream; only the first one is the request. Re-running + * reset()/route() for trailers would wipe the live response's + * onAborted/userData and dispatch the handler a second time. + * state is 0 only before the first reset() on this stream. */ + if (rd->state != 0) return; rd->reset(); Http3Request req(s); diff --git a/packages/bun-uws/src/HttpParser.h b/packages/bun-uws/src/HttpParser.h index f90f74d9f18..dc08aa896ef 100644 --- a/packages/bun-uws/src/HttpParser.h +++ b/packages/bun-uws/src/HttpParser.h @@ -570,8 +570,9 @@ namespace uWS } - bool isHTTPMethod = (__builtin_expect(data[1] == '/', 1)); - bool isConnect = !isHTTPMethod && ((data - start) == 7 && memcmp(start, "CONNECT", 7) == 0); + /* RFC 9112 3: exactly one SP separates method and request-target */ + bool isHTTPMethod = (__builtin_expect(data[0] == 32 && data[1] == '/', 1)); + bool isConnect = !isHTTPMethod && ((data - start) == 7 && data[0] == 32 && memcmp(start, "CONNECT", 7) == 0); /* Also accept proxy-style absolute URLs (http://... or https://...) as valid request targets */ bool isProxyStyleURL = !isHTTPMethod && !isConnect && data[0] == 32 && isHTTPorHTTPSPrefixForProxies(data + 1, end) == 1; if (isHTTPMethod || isConnect || isProxyStyleURL) [[likely]] { diff --git a/src/ast/e.rs b/src/ast/e.rs index 0c772fe2abc..89e6043c321 100644 --- a/src/ast/e.rs +++ b/src/ast/e.rs @@ -902,17 +902,22 @@ pub struct Rope { } impl Rope { pub fn append(&mut self, expr: Expr, bump: &Bump) -> Result<*mut Rope, AllocError> { - if let Some(mut next) = core::ptr::NonNull::new(self.next).map(StoreRef::from_non_null) { - // Arena-allocated Rope nodes are uniquely owned by the chain at this - // point in TOML parsing; route through `StoreRef::DerefMut` (the - // arena-backed handle whose deref is centralised in `nodes.rs`). - return next.append(expr, bump); + // Walk to the tail iteratively: recursing once per node overflows the + // native stack on adversarially deep ropes (e.g. an `.npmrc` section + // header with thousands of dot-separated segments). + // + // Arena-allocated Rope nodes are uniquely owned by the chain at this + // point; route through `StoreRef::DerefMut` (the arena-backed handle + // whose deref is centralised in `nodes.rs`). + let mut tail = StoreRef::from_bump(self); + while let Some(next) = core::ptr::NonNull::new(tail.next).map(StoreRef::from_non_null) { + tail = next; } let rope: *mut Rope = bump.alloc(Rope { head: expr, next: core::ptr::null_mut(), }); - self.next = rope; + tail.next = rope; Ok(rope) } diff --git a/src/base64/lib.rs b/src/base64/lib.rs index e73c2c3abd6..dfa9ac5c89f 100644 --- a/src/base64/lib.rs +++ b/src/base64/lib.rs @@ -303,8 +303,8 @@ pub mod vlq { const U7_MAX: u8 = 127; // base64 stores values up to 7 bits - const BASE64_LUT: [u8; U7_MAX as usize] = { - let mut bytes = [U7_MAX; U7_MAX as usize]; + const BASE64_LUT: [u8; U7_MAX as usize + 1] = { + let mut bytes = [U7_MAX; U7_MAX as usize + 1]; let mut i = 0; while i < BASE64.len() { bytes[BASE64[i] as usize] = i as u8; diff --git a/src/brotli/lib.rs b/src/brotli/lib.rs index 7b0d0feb77a..249d90422f5 100644 --- a/src/brotli/lib.rs +++ b/src/brotli/lib.rs @@ -59,6 +59,9 @@ pub struct BrotliReaderArrayList<'a> { pub state: ReaderState, pub total_out: usize, pub total_in: usize, + /// Decompression-bomb guard: `read_all` errors instead of growing the + /// output past this many bytes. Defaults to unbounded. + pub max_output_size: usize, pub flush_op: c::BrotliEncoderOperation, pub finish_flush_op: c::BrotliEncoderOperation, pub full_flush_op: c::BrotliEncoderOperation, @@ -152,6 +155,7 @@ impl<'a> BrotliReaderArrayList<'a> { state: ReaderState::Uninitialized, total_out: 0, total_in: 0, + max_output_size: usize::MAX, flush_op, finish_flush_op, full_flush_op, @@ -204,6 +208,13 @@ impl<'a> BrotliReaderArrayList<'a> { unsafe { bun_core::vec::commit_spare(self.list_ptr, bytes_written) }; self.total_in += bytes_read; + // Enforce the cap after every write so a chunk that ends the + // stream (`success`) cannot push the output past the limit. + if self.list_ptr.len() > self.max_output_size { + self.state = ReaderState::Error; + return Err(err!("BrotliDecompressionError")); + } + match result { c::BrotliDecoderResult::success => { debug_assert!(BrotliDecoder::is_finished(self.brotli())); @@ -237,6 +248,10 @@ impl<'a> BrotliReaderArrayList<'a> { return Err(err!("ShortRead")); } c::BrotliDecoderResult::needs_more_output => { + if self.list_ptr.len() >= self.max_output_size { + self.state = ReaderState::Error; + return Err(err!("BrotliDecompressionError")); + } let target = self.list_ptr.capacity() + 4096; self.list_ptr .reserve(target.saturating_sub(self.list_ptr.len())); diff --git a/src/bun_core/util.rs b/src/bun_core/util.rs index 05a6967d529..172840cdc2e 100644 --- a/src/bun_core/util.rs +++ b/src/bun_core/util.rs @@ -5786,21 +5786,53 @@ pub mod form_data { /// `FormData.getBoundary` — borrow the `boundary=` value out of a /// `Content-Type` header. Returns `None` on malformed quoting. + /// + /// Parameters are `;`-delimited per RFC 7231 and the parameter *name* must + /// be exactly `boundary`, so a different parameter (`xboundary=FAKE`) or a + /// `boundary=` substring inside another parameter's value is not picked up + /// by an unanchored substring search. A `;` inside a quoted parameter + /// value (RFC 7230 quoted-string, `\` escapes the next byte) does not + /// delimit parameters. pub fn get_boundary(content_type: &[u8]) -> Option<&[u8]> { - let idx = ::bstr::ByteSlice::find(content_type, b"boundary=")?; - let begin = &content_type[idx + b"boundary=".len()..]; - if begin.is_empty() { - return None; + let mut rest = content_type; + loop { + let semi = index_of_unquoted_semicolon(rest)?; + rest = &rest[semi + 1..]; + let Some(begin) = + crate::strings_impl::trim_left(rest, b" \t").strip_prefix(b"boundary=") + else { + continue; + }; + if begin.is_empty() { + return None; + } + let end = crate::strings_impl::index_of_char(begin, b';').unwrap_or(begin.len()); + if begin[0] == b'"' { + if end > 1 && begin[end - 1] == b'"' { + return Some(&begin[1..end - 1]); + } + // Opening quote with no matching closing quote — malformed. + return None; + } + return Some(&begin[..end]); } - let end = crate::strings_impl::index_of_char(begin, b';').unwrap_or(begin.len()); - if begin[0] == b'"' { - if end > 1 && begin[end - 1] == b'"' { - return Some(&begin[1..end - 1]); + } + + /// Index of the next `;` in `s` that is not inside an RFC 7230 + /// quoted-string (`\` escapes the following byte inside quotes). + fn index_of_unquoted_semicolon(s: &[u8]) -> Option { + let mut in_quotes = false; + let mut i = 0; + while i < s.len() { + match s[i] { + b'"' => in_quotes = !in_quotes, + b'\\' if in_quotes => i += 1, + b';' if !in_quotes => return Some(i), + _ => {} } - // Opening quote with no matching closing quote — malformed. - return None; + i += 1; } - Some(&begin[..end]) + None } /// `FormData.AsyncFormData` — heap-allocated, owns its `Encoding`. diff --git a/src/bundler/linker_context/postProcessCSSChunk.rs b/src/bundler/linker_context/postProcessCSSChunk.rs index db469638061..54c558b0bfd 100644 --- a/src/bundler/linker_context/postProcessCSSChunk.rs +++ b/src/bundler/linker_context/postProcessCSSChunk.rs @@ -73,8 +73,22 @@ pub fn post_process_css_chunk( j.push_static(b"/* "); line_offset.advance(b"/* "); - j.push_static(pretty); - line_offset.advance(pretty); + // A `*/` in the path would terminate the comment early and let the + // rest of the path be parsed as CSS in the bundled output. + if bun_core::strings::contains(pretty, b"*/") { + let mut escaped = Vec::with_capacity(pretty.len() + 1); + for &byte in pretty { + if byte == b'/' && escaped.last() == Some(&b'*') { + escaped.push(b'\\'); + } + escaped.push(byte); + } + line_offset.advance(&escaped); + j.push_owned(escaped.into_boxed_slice()); + } else { + j.push_static(pretty); + line_offset.advance(pretty); + } j.push_static(b" */\n"); line_offset.advance(b" */\n"); diff --git a/src/bundler/linker_context/postProcessJSChunk.rs b/src/bundler/linker_context/postProcessJSChunk.rs index f1757d01638..8148d71b907 100644 --- a/src/bundler/linker_context/postProcessJSChunk.rs +++ b/src/bundler/linker_context/postProcessJSChunk.rs @@ -680,8 +680,19 @@ pub fn post_process_js_chunk( } } - j.push_static(pretty); - line_offset.advance(pretty); + // A `*/` in the path would terminate the block comment early and + // turn the rest of the path into generated JavaScript. + if matches!(comment_type, CommentType::Multiline) && strings::contains(pretty, b"*/") { + let mut sanitized = pretty.to_vec(); + while let Some(i) = strings::index_of(&sanitized, b"*/") { + sanitized[i + 1] = b'_'; + } + line_offset.advance(&sanitized); + j.push_owned(sanitized.into_boxed_slice()); + } else { + j.push_static(pretty); + line_offset.advance(pretty); + } if emit_targets_in_commands { j.push_static(b" ("); diff --git a/src/http/Decompressor.rs b/src/http/Decompressor.rs index e706db24449..6c550c9c20e 100644 --- a/src/http/Decompressor.rs +++ b/src/http/Decompressor.rs @@ -54,6 +54,11 @@ unsafe fn seat<'a>(input: &'a [u8], out: &'a mut Vec) -> (&'static [u8], &'s } } +/// Decompression-bomb guard for response bodies inflated on the HTTP thread: +/// a hostile server must not be able to expand a tiny compressed payload into +/// an unbounded allocation. +const MAX_DECOMPRESSED_BODY_SIZE: usize = 1024 * 1024 * 1024; + impl Decompressor { // PORT NOTE: Zig `deinit` called `that.deinit()` on the active reader and // reset to `.none`. The boxed readers' `Drop` impls call `end()`, so an @@ -77,7 +82,7 @@ impl Decompressor { let (input, out) = unsafe { seat(buffer, &mut body_out_str.list) }; match encoding { Encoding::Gzip | Encoding::Deflate => { - let reader = ZlibReaderArrayList::init_with_options_and_list_allocator( + let mut reader = ZlibReaderArrayList::init_with_options_and_list_allocator( input, out, // PORT NOTE: Zig passed `body_out_str.allocator` and @@ -97,26 +102,29 @@ impl Decompressor { ..Default::default() }, )?; + reader.max_output_size = MAX_DECOMPRESSED_BODY_SIZE; *self = Decompressor::Zlib(reader); return Ok(()); } Encoding::Brotli => { - let reader = BrotliReaderArrayList::new_with_options( + let mut reader = BrotliReaderArrayList::new_with_options( input, out, // PORT NOTE: Zig passed `body_out_str.allocator`; dropped per §Allocators. &Default::default(), )?; + reader.max_output_size = MAX_DECOMPRESSED_BODY_SIZE; *self = Decompressor::Brotli(reader); return Ok(()); } Encoding::Zstd => { - let reader = ZstdReaderArrayList::init_with_list_allocator( + let mut reader = ZstdReaderArrayList::init_with_list_allocator( input, out, // PORT NOTE: Zig passed `body_out_str.allocator` and // `bun.http.default_allocator`; dropped per §Allocators. )?; + reader.max_output_size = MAX_DECOMPRESSED_BODY_SIZE; *self = Decompressor::Zstd(reader); return Ok(()); } diff --git a/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs b/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs index a4b06e31f69..41d4b0a8f5c 100644 --- a/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs +++ b/src/http_jsc/websocket_client/WebSocketUpgradeClient.rs @@ -993,6 +993,11 @@ impl HTTPClient { let me = unsafe { &mut *this }; let mut body = data; if !me.body.is_empty() { + if me.body.len().saturating_add(data.len()) > bun_http::max_http_header_size() { + // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. + unsafe { Self::terminate(this, ErrorCode::InvalidResponse) }; + return; + } me.body.extend_from_slice(data); body = &me.body; } @@ -1020,6 +1025,11 @@ impl HTTPClient { } Err(picohttp::ParseResponseError::ShortRead) => { if me.body.is_empty() { + if data.len() > bun_http::max_http_header_size() { + // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. + unsafe { Self::terminate(this, ErrorCode::InvalidResponse) }; + return; + } me.body.extend_from_slice(data); } return; @@ -1233,6 +1243,11 @@ impl HTTPClient { // Process as if it came directly from the socket let mut body = data; if !me.body.is_empty() { + if me.body.len().saturating_add(data.len()) > bun_http::max_http_header_size() { + // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. + unsafe { Self::terminate(this, ErrorCode::InvalidResponse) }; + return; + } me.body.extend_from_slice(data); body = &me.body; } @@ -1257,6 +1272,11 @@ impl HTTPClient { } Err(picohttp::ParseResponseError::ShortRead) => { if me.body.is_empty() { + if data.len() > bun_http::max_http_header_size() { + // SAFETY: `me`'s last use is above; no `&mut Self` spans this call. + unsafe { Self::terminate(this, ErrorCode::InvalidResponse) }; + return; + } me.body.extend_from_slice(data); } return; diff --git a/src/ini/lib.rs b/src/ini/lib.rs index 07f2685af13..bab318fc112 100644 --- a/src/ini/lib.rs +++ b/src/ini/lib.rs @@ -257,6 +257,13 @@ mod draft { type OOM = Result; + /// Hard cap on dot-separated segments in a section-header rope. The rope is + /// consumed by `E::Object::get_or_put_object`, which recurses once per + /// `rope.next` link, so an unbounded header overflows the stack. Past the + /// cap the remainder of the header (dots included) becomes the final + /// segment. Mirrors `MAX_DOTTED_KEY_SEGMENTS` in the TOML parser. + const MAX_SECTION_ROPE_SEGMENTS: usize = 512; + // ────────────────────────────────────────────────────────────────────────── // Parser // ────────────────────────────────────────────────────────────────────────── @@ -692,6 +699,7 @@ mod draft { // RopeT is *Rope when usage==Section, else unit. In Rust we just // keep an Option<&mut Rope> and ignore it for non-section usages. let mut rope: Option<&'a mut Rope> = None; + let mut rope_parts: usize = 0; let mut i: usize = 0; 'walk: while i < val.len() { @@ -779,8 +787,10 @@ mod draft { did_any_escape = true; } b'.' => { - if usage == Usage::Section { + if usage == Usage::Section && rope_parts < MAX_SECTION_ROPE_SEGMENTS + { self.commit_rope_part(bump, ropealloc, &mut unesc, &mut rope)?; + rope_parts += 1; } else { unesc.push(b'.'); } @@ -1069,10 +1079,11 @@ mod draft { next: ptr::null_mut(), }); + let mut segments: usize = 1; while dot_idx + 1 < key.len() { let next_dot_idx = match next_dot(&key[dot_idx + 1..]) { - Some(n) => dot_idx + 1 + n, - None => { + Some(n) if segments < MAX_SECTION_ROPE_SEGMENTS => dot_idx + 1 + n, + _ => { let rest = &key[dot_idx + 1..]; let _ = rope_head .append(Expr::init(E::EString::init(rest), Loc::EMPTY), ropealloc)?; @@ -1082,6 +1093,7 @@ mod draft { let part = &key[dot_idx + 1..next_dot_idx]; let _ = rope_head.append(Expr::init(E::EString::init(part), Loc::EMPTY), ropealloc)?; + segments += 1; dot_idx = next_dot_idx; } diff --git a/src/install/PackageInstaller.rs b/src/install/PackageInstaller.rs index 9ecf887a8bd..a0be3a71913 100644 --- a/src/install/PackageInstaller.rs +++ b/src/install/PackageInstaller.rs @@ -357,6 +357,32 @@ impl<'a> LazyPackageDestinationDir<'a> { } } +/// A dependency alias becomes the install destination inside `node_modules` +/// (the existing entry is renamed aside, deleted, and re-created). Reject +/// anything that could escape `node_modules`: empty names, `.`/`..` +/// components, absolute paths, drive letters, backslashes, NUL bytes, and any +/// separator other than the single `/` in a scoped name (`@scope/name`). +fn alias_is_safe_install_target(alias: &[u8]) -> bool { + if alias.is_empty() + || alias.len() >= MAX_PATH_BYTES + || alias.contains(&b'\\') + || alias.contains(&b':') + || alias.contains(&0) + { + return false; + } + + let mut component_count = 0usize; + for component in alias.split(|&c| c == b'/') { + component_count += 1; + if component.is_empty() || component == b"." || component == b".." { + return false; + } + } + + component_count == 1 || (component_count == 2 && alias[0] == b'@') +} + impl<'a> PackageInstaller<'a> { // ────────────────────────────────────────────────────────────────────── // BACKREF accessors @@ -1164,6 +1190,25 @@ impl<'a> PackageInstaller<'a> { } let alias = self.lockfile().buffers.dependencies.as_slice()[dependency_id as usize].name; + + // The alias is used as a path relative to `node_modules` for delete, + // rename, and create operations. Refuse anything that could escape it. + if !alias_is_safe_install_target(alias.slice(string_buf!())) { + if log_level != Options::LogLevel::Silent { + Output::pretty_errorln(format_args!( + "error: refusing to install dependency with unsafe name {}", + bstr::BStr::new(alias.slice(string_buf!())), + )); + } + self.summary.fail += 1; + self.increment_tree_install_count( + !IS_PENDING_PACKAGE_INSTALL, + self.current_tree_id, + log_level, + ); + return; + } + // PORT NOTE: `PackageInstall` stores both `destination_dir_subpath: &mut ZStr` // and `destination_dir_subpath_buf: &mut [u8]` aliasing the same bytes (Zig // slices don't enforce noalias). Derive BOTH from a single `*mut PathBuffer` @@ -1731,10 +1776,11 @@ impl<'a> PackageInstaller<'a> { { break 'brk (true, true); } - if self - .lockfile() - .has_trusted_dependency(alias.slice(string_buf!()), resolution) - { + if self.lockfile().has_trusted_dependency( + alias.slice(string_buf!()), + pkg_name.slice(string_buf!()), + resolution, + ) { break 'brk (true, false); } break 'brk (false, false); diff --git a/src/install/TarballStream.rs b/src/install/TarballStream.rs index 6be4de6a529..e205a1b0520 100644 --- a/src/install/TarballStream.rs +++ b/src/install/TarballStream.rs @@ -144,6 +144,13 @@ pub struct TarballStream { want_first_dirname: bool, npm_mode: bool, + /// Symlinks created so far during this extraction. Later entries whose + /// path traverses one of these are skipped: the per-target check in + /// `make_symlink` is purely lexical, so once a link is on disk the kernel + /// would follow it and a chained link could escape the extraction root. + #[cfg(unix)] + created_symlinks: Vec>, + bytes_received: usize, entry_count: u32, fail: Option, @@ -237,6 +244,8 @@ impl TarballStream { resolved_github_dirname: b"", want_first_dirname, npm_mode, + #[cfg(unix)] + created_symlinks: Vec::new(), bytes_received: 0, entry_count: 0, fail: None, @@ -800,6 +809,17 @@ impl TarballStream { let path_slice: &[OSPathChar] = &path[..]; let dest = self.dest.unwrap(); + // Reject any entry whose path traverses a symlink created earlier in + // this extraction; the kernel would follow it and the entry could land + // outside the extraction root. Same defense as the buffered extractor + // in `Archiver::extract_to_dir`. + #[cfg(unix)] + if bun_libarchive::path_traverses_created_symlink(path_slice, &self.created_symlinks) { + self.phase = Phase::WantData; + self.out_fd = None; + return Ok(()); + } + match kind { FileKind::Directory => { make_directory(entry, dest, path, path_slice); @@ -808,16 +828,24 @@ impl TarballStream { } FileKind::SymLink => { #[cfg(unix)] - make_symlink(entry, dest, path, path_slice); + if make_symlink(entry, dest, path, path_slice) { + self.created_symlinks.push(path_slice.to_vec()); + } self.phase = Phase::WantData; self.out_fd = None; } FileKind::File => { #[cfg(windows)] let mode: Mode = 0; + // Mask to permission bits so setuid/setgid/sticky bits from the + // archive never reach `openat`'s mode argument. #[cfg(not(windows))] - let mode: Mode = Mode::try_from(entry.perm() | 0o666).expect("int cast"); - let fd = open_output_file(dest, path, path_slice, mode)?; + let mode: Mode = Mode::try_from((entry.perm() & 0o777) | 0o666).expect("int cast"); + #[cfg(unix)] + let nofollow = !self.created_symlinks.is_empty(); + #[cfg(not(unix))] + let nofollow = false; + let fd = open_output_file(dest, path, path_slice, mode, nofollow)?; self.entry_count += 1; #[cfg(any(target_os = "linux", target_os = "android"))] @@ -1264,8 +1292,20 @@ fn open_output_file( path: OSPathZ, path_slice: &[OSPathChar], mode: Mode, + nofollow: bool, ) -> Result { - let flags = O::WRONLY | O::CREAT | O::TRUNC; + // `path_traverses_created_symlink` is a lexical check: on filesystems that + // alias differently-encoded names (Unicode NFC/NFD normalization on + // APFS/HFS+), a path component can reach a created symlink without + // byte-matching its recorded path. Once this extraction has created any + // symlink, ask the kernel to refuse to follow symlinks while opening file + // entries. `NOFOLLOW_ANY` is 0 on non-Darwin targets. Same defense as the + // buffered extractor in `Archiver::extract_to_dir`. + let flags = if nofollow { + O::WRONLY | O::CREAT | O::TRUNC | O::NOFOLLOW_ANY + } else { + O::WRONLY | O::CREAT | O::TRUNC + }; #[cfg(windows)] { let _ = mode; @@ -1339,13 +1379,20 @@ fn make_directory(entry: &mut lib::Entry, dest_fd: Fd, path: OSPathZ, path_slice } } +/// Returns `true` only when a symlink was actually created on disk, so the +/// caller can record it in `created_symlinks`. #[cfg(unix)] -fn make_symlink(entry: &mut lib::Entry, dest_fd: Fd, path: OSPathZ, path_slice: &[OSPathChar]) { +fn make_symlink( + entry: &mut lib::Entry, + dest_fd: Fd, + path: OSPathZ, + path_slice: &[OSPathChar], +) -> bool { let target = entry.symlink(); // Same safety rule as `isSymlinkTargetSafe` in the buffered path: // reject absolute targets and anything that escapes via `..`. if target.is_empty() || target[0] == b'/' { - return; + return false; } { let symlink_dir = bun_paths::dirname(path_slice).unwrap_or(b""); @@ -1356,19 +1403,19 @@ fn make_symlink(entry: &mut lib::Entry, dest_fd: Fd, path: OSPathZ, path_slice: &[symlink_dir, target.as_bytes()], ); if !resolved.starts_with(b"/packages/") { - return; + return false; } } match bun_sys::symlinkat(target, dest_fd, path) { - Ok(()) => {} + Ok(()) => true, Err(e) if matches!(e.get_errno(), bun_sys::E::EPERM | bun_sys::E::ENOENT) => { let Some(dir) = bun_paths::dirname(path_slice) else { - return; + return false; }; let _ = dest_fd.make_path(dir); - let _ = bun_sys::symlinkat(target, dest_fd, path); + bun_sys::symlinkat(target, dest_fd, path).is_ok() } - Err(_) => {} + Err(_) => false, } } diff --git a/src/install/bin.rs b/src/install/bin.rs index 3ee27a7d7e1..96158eb6630 100644 --- a/src/install/bin.rs +++ b/src/install/bin.rs @@ -757,6 +757,45 @@ pub fn normalized_bin_name(name: &[u8]) -> &[u8] { name } +/// True when a `bin` entry's target value would resolve outside the package +/// directory (absolute path or `..` traversal). The bin *value* is taken +/// verbatim from package.json, so without this check a malicious package could +/// point a bin link at (and chmod) an arbitrary file on disk (the bug class +/// npm fixed as CVE-2019-16775). +pub fn bin_target_escapes_package_dir(target: &[u8]) -> bool { + if path::is_absolute(target) { + return true; + } + // Windows drive-relative paths (`C:foo`, `C:..\evil`) are not "absolute" + // (no separator after the colon) and their `C:..` component is not a bare + // `..`, so they would slip past the depth walk below while still resolving + // outside the package directory. A colon in the *first* component can only + // be a drive prefix (or an NTFS alternate-data-stream on the leading + // segment) — reject it. Colons in later components are left alone so Unix + // filenames containing `:` keep working. + if target + .split(|&b| b == b'/' || b == b'\\') + .next() + .is_some_and(|first| first.contains(&b':')) + { + return true; + } + let mut depth: isize = 0; + for component in target.split(|&b| b == b'/' || b == b'\\') { + match component { + b"" | b"." => {} + b".." => { + depth -= 1; + if depth < 0 { + return true; + } + } + _ => depth += 1, + } + } + false +} + pub struct Linker<'a> { pub bin: Bin, @@ -1436,7 +1475,7 @@ impl<'a> Linker<'a> { Tag::File => { let file = self.bin.value.file; let target = file.slice(self.string_buf); - if target.is_empty() { + if target.is_empty() || bin_target_escapes_package_dir(target) { return; } @@ -1481,7 +1520,10 @@ impl<'a> Linker<'a> { let name = named[0].slice(self.string_buf); let normalized_name = normalized_bin_name(name); let target = named[1].slice(self.string_buf); - if normalized_name.is_empty() || target.is_empty() { + if normalized_name.is_empty() + || target.is_empty() + || bin_target_escapes_package_dir(target) + { return; } if normalized_name.len() >= self.abs_dest_buf.len().saturating_sub(dest_off) { @@ -1524,7 +1566,10 @@ impl<'a> Linker<'a> { let normalized_bin_dest = normalized_bin_name(bin_dest); let bin_target = self.extern_string_buf[(i + 1) as usize].slice(self.string_buf); - if bin_target.is_empty() || normalized_bin_dest.is_empty() { + if bin_target.is_empty() + || normalized_bin_dest.is_empty() + || bin_target_escapes_package_dir(bin_target) + { i += 2; continue; } @@ -1564,7 +1609,7 @@ impl<'a> Linker<'a> { Tag::Dir => { let dir = self.bin.value.dir; let target = dir.slice(self.string_buf); - if target.is_empty() { + if target.is_empty() || bin_target_escapes_package_dir(target) { return; } diff --git a/src/install/isolated_install.rs b/src/install/isolated_install.rs index 2c3b7c8b885..1894b07dd41 100644 --- a/src/install/isolated_install.rs +++ b/src/install/isolated_install.rs @@ -1305,10 +1305,12 @@ pub fn install_isolated_packages( pkg_name_hashes[pkg_id as usize], ) }; - if lockfile.has_trusted_dependency(dep_name, pkg_res) - || trusted_from_update.contains( - &(dep_name_hash as crate::TruncatedPackageNameHash), - ) + if lockfile.has_trusted_dependency( + dep_name, + pkg_names[pkg_id as usize].slice(string_buf), + pkg_res, + ) || trusted_from_update + .contains(&(dep_name_hash as crate::TruncatedPackageNameHash)) { break 'eligible false; } diff --git a/src/install/isolated_install/Installer.rs b/src/install/isolated_install/Installer.rs index f9a3736653c..982e975a2c1 100644 --- a/src/install/isolated_install/Installer.rs +++ b/src/install/isolated_install/Installer.rs @@ -1632,7 +1632,11 @@ impl Task { { break 'brk (true, true); } - if lockfile.has_trusted_dependency(dep.name.slice(string_buf), &pkg_res) { + if lockfile.has_trusted_dependency( + dep.name.slice(string_buf), + pkg_name.slice(string_buf), + &pkg_res, + ) { break 'brk (true, false); } break 'brk (false, false); diff --git a/src/install/lockfile.rs b/src/install/lockfile.rs index c2a94c4b404..f3dd6fabbd2 100644 --- a/src/install/lockfile.rs +++ b/src/install/lockfile.rs @@ -3349,14 +3349,22 @@ pub mod default_trusted_dependencies { } impl Lockfile { - pub fn has_trusted_dependency(&self, name: &[u8], resolution: &Resolution) -> bool { + pub fn has_trusted_dependency( + &self, + alias: &[u8], + pkg_name: &[u8], + resolution: &Resolution, + ) -> bool { if let Some(trusted_dependencies) = &self.trusted_dependencies { - let hash = SemverStringBuilder::string_hash(name) as u32; + let hash = SemverStringBuilder::string_hash(alias) as u32; return trusted_dependencies.contains(&hash); } - // Only allow default trusted dependencies for npm packages - resolution.tag == ResolutionTag::Npm && default_trusted_dependencies::has(name) + // Only allow default trusted dependencies for npm packages. Check the + // resolved package's real name, not the dependency alias, so an + // `npm:`-aliased package can't inherit trust from a default-trusted + // name. + resolution.tag == ResolutionTag::Npm && default_trusted_dependencies::has(pkg_name) } } diff --git a/src/install/lockfile/Package.rs b/src/install/lockfile/Package.rs index dec11f6a1f1..901bab9889a 100644 --- a/src/install/lockfile/Package.rs +++ b/src/install/lockfile/Package.rs @@ -3484,6 +3484,40 @@ pub mod serializer { } } } + if matches!(field, PackageField::Meta) { + // Same hardening as `Resolution` above: `Meta` embeds two + // `#[repr(u8)]` enums (`Origin` = 0..=2 and + // `HasInstallScript` = 0..=2). Copying an out-of-range byte + // into either field and reading it back as the enum would + // be immediate UB, so check the raw stream bytes first. + let stride = mem::size_of::(); + let origin_at = mem::offset_of!(Meta, origin); + let install_script_at = mem::offset_of!(Meta, has_install_script); + debug_assert!(stride != 0 && src.len().is_multiple_of(stride)); + for raw in src.chunks_exact(stride) { + if !matches!(raw[origin_at], 0 | 1 | 2) + || !matches!(raw[install_script_at], 0 | 1 | 2) + { + return Err(bun_core::err!( + "Lockfile validation failed: invalid package meta" + )); + } + } + } + if matches!(field, PackageField::Bin) { + // `Bin.tag` is a `#[repr(u8)]` enum with discriminants + // 0..=4; validate it the same way before the copy. + let stride = mem::size_of::(); + let tag_at = mem::offset_of!(Bin, tag); + debug_assert!(stride != 0 && src.len().is_multiple_of(stride)); + for raw in src.chunks_exact(stride) { + if !matches!(raw[tag_at], 0 | 1 | 2 | 3 | 4) { + return Err(bun_core::err!( + "Lockfile validation failed: invalid bin tag" + )); + } + } + } bytes.copy_from_slice(src); stream.pos = end_pos; if matches!(field, PackageField::Meta) { diff --git a/src/install/lockfile/Package/Scripts.rs b/src/install/lockfile/Package/Scripts.rs index 165c2b3ff36..dd02cbcfad7 100644 --- a/src/install/lockfile/Package/Scripts.rs +++ b/src/install/lockfile/Package/Scripts.rs @@ -309,20 +309,20 @@ impl Scripts { ) -> Result, bun_core::Error> { // TODO(port): narrow error set if self.has_any() { - let add_node_gyp_rebuild_script = if lockfile - .has_trusted_dependency(folder_name, resolution) - && self.install.is_empty() - && self.preinstall.is_empty() - { - // `defer save.restore()` — `save()` returns an RAII guard that - // restores the path length on Drop and derefs to the path. - let mut save = folder_path.save(); - let _ = save.append(b"binding.gyp"); // OOM/capacity: Zig aborts; port keeps fire-and-forget - - bun_sys::exists(save.slice()) - } else { - false - }; + let add_node_gyp_rebuild_script = + if lockfile.has_trusted_dependency(folder_name, folder_name, resolution) + && self.install.is_empty() + && self.preinstall.is_empty() + { + // `defer save.restore()` — `save()` returns an RAII guard that + // restores the path length on Drop and derefs to the path. + let mut save = folder_path.save(); + let _ = save.append(b"binding.gyp"); // OOM/capacity: Zig aborts; port keeps fire-and-forget + + bun_sys::exists(save.slice()) + } else { + false + }; return Ok(self.create_list( lockfile, diff --git a/src/js/internal/debugger.ts b/src/js/internal/debugger.ts index 4b400473692..46df6908d5a 100644 --- a/src/js/internal/debugger.ts +++ b/src/js/internal/debugger.ts @@ -349,6 +349,12 @@ class Debugger { }); } + if (!isOriginAllowed(headers.get("Origin"))) { + return new Response(null, { + status: 403, // Forbidden + }); + } + const data: Connection = { refEventLoop: headers.get("Ref-Event-Loop") === "0", }; @@ -591,6 +597,34 @@ function parseUrl(input: string): URL { return url; } +// Browsers always send an `Origin` header on WebSocket handshakes, so rejecting +// unexpected web origins prevents a malicious website from connecting to the +// inspector and evaluating code. This matters most when the user passes an +// explicit pathname to --inspect, which replaces the random UUID pathname that +// otherwise acts as a bearer token. Non-browser clients (IDEs, CLI tools) do +// not send an `Origin` header and are unaffected. +function isOriginAllowed(origin: string | null): boolean { + if (!origin) { + return true; + } + let url: URL; + try { + url = new URL(origin); + } catch { + // Includes the opaque "null" origin sent by sandboxed iframes and file://. + return false; + } + const { protocol, hostname } = url; + if (protocol !== "http:" && protocol !== "https:") { + // Privileged schemes (e.g. devtools://) cannot be claimed by a web page. + return true; + } + if (url.origin === "https://debug.bun.sh") { + return true; + } + return hostname === "localhost" || hostname === "[::1]" || /^127(\.\d{1,3}){3}$/.test(hostname); +} + function randomId() { return crypto.randomUUID(); } diff --git a/src/js/internal/validators.ts b/src/js/internal/validators.ts index f10b6c30522..038b1b1f7eb 100644 --- a/src/js/internal/validators.ts +++ b/src/js/internal/validators.ts @@ -21,10 +21,19 @@ function checkIsHttpToken(val) { This regex validates any string surrounded by angle brackets (not necessarily a valid URI reference) followed by zero or more link-params separated by semicolons. + + The parameter name excludes "=" so it cannot overlap with the optional + "=value" suffix; otherwise inputs like "<>;a=b;a=b;...;a=b " trigger + catastrophic backtracking. */ -const linkValueRegExp = /^(?:<[^>]*>)(?:\s*;\s*[^;"\s]+(?:=(")?[^;"\s]*\1)?)*$/; +const linkValueRegExp = /^(?:<[^>]*>)(?:\s*;\s*[^;"\s=]+(?:=(")?[^;"\s]*\1)?)*$/; +const linkValueForbiddenCharsRegExp = /[\r\n]/; function validateLinkHeaderFormat(value, name) { - if (typeof value === "undefined" || !RegExpPrototypeExec.$call(linkValueRegExp, value)) { + if ( + typeof value === "undefined" || + !RegExpPrototypeExec.$call(linkValueRegExp, value) || + RegExpPrototypeExec.$call(linkValueForbiddenCharsRegExp, value) !== null + ) { throw $ERR_INVALID_ARG_VALUE( name, value, diff --git a/src/js/node/net.ts b/src/js/node/net.ts index a4c9a777e59..30906ac0c8c 100644 --- a/src/js/node/net.ts +++ b/src/js/node/net.ts @@ -936,6 +936,9 @@ Socket.prototype.connect = function connect(...args) { tls.requestCert = true; tls.session = session || tls.session; this.servername = tls.servername; + if (checkServerIdentity !== undefined) { + validateFunction(checkServerIdentity, "options.checkServerIdentity"); + } tls.checkServerIdentity = checkServerIdentity || tls.checkServerIdentity; this[bunTLSConnectOptions] = tls; if (!connection && tls.socket) { diff --git a/src/js/node/tls.ts b/src/js/node/tls.ts index c4ef991ef83..c684f4f00a7 100644 --- a/src/js/node/tls.ts +++ b/src/js/node/tls.ts @@ -5,7 +5,7 @@ const Duplex = require("internal/streams/duplex"); const addServerName = $newZigFunction("Listener.zig", "jsAddServerName", 3); const { throwNotImplemented } = require("internal/shared"); const { throwOnInvalidTLSArray } = require("internal/tls"); -const { validateString } = require("internal/validators"); +const { validateString, validateFunction } = require("internal/validators"); const { Server: NetServer, Socket: NetSocket } = net; @@ -532,6 +532,9 @@ function TLSSocket(socket?, options?) { this.secureConnecting = true; this._secureEstablished = false; this._securePending = true; + if (options.checkServerIdentity !== undefined) { + validateFunction(options.checkServerIdentity, "options.checkServerIdentity"); + } this[kcheckServerIdentity] = options.checkServerIdentity || checkServerIdentity; this[ksession] = options.session || null; } diff --git a/src/js/node/wasi.ts b/src/js/node/wasi.ts index a0ae57df5c5..b91fa0c3ba1 100644 --- a/src/js/node/wasi.ts +++ b/src/js/node/wasi.ts @@ -915,6 +915,22 @@ var require_wasi = __commonJS({ } return stats; }; + // Resolve a guest-supplied path against the directory backing `stats` and + // reject anything that escapes it (absolute paths, ".." traversal). + const RESOLVE_PATH = (stats, p) => { + if (!stats.path) { + throw new types_1.WASIError(constants_1.WASI_EINVAL); + } + const base = path.resolve(stats.path); + const resolved = path.resolve(base, p); + if (resolved !== base) { + const rel = path.relative(base, resolved); + if (rel === ".." || rel.startsWith(`..${path.sep}`) || path.isAbsolute(rel)) { + throw new types_1.WASIError(constants_1.WASI_ENOTCAPABLE); + } + } + return resolved; + }; const CPUTIME_START = Bun.nanoseconds(); const timeOrigin = Math.trunc(performance.timeOrigin * 1e6); const now = clockId => { @@ -1357,7 +1373,7 @@ var require_wasi = __commonJS({ } this.refreshMemory(); const p = Buffer.from(this.memory.buffer, pathPtr, pathLen).toString(); - fs.mkdirSync(path.resolve(stats.path, p)); + fs.mkdirSync(RESOLVE_PATH(stats, p)); return constants_1.WASI_ESUCCESS; }), path_filestat_get: wrap((fd, flags, pathPtr, pathLen, bufPtr) => { @@ -1367,11 +1383,12 @@ var require_wasi = __commonJS({ } this.refreshMemory(); const p = Buffer.from(this.memory.buffer, pathPtr, pathLen).toString(); + const resolved = RESOLVE_PATH(stats, p); let rstats; if (flags) { - rstats = fs.statSync(path.resolve(stats.path, p)); + rstats = fs.statSync(resolved); } else { - rstats = fs.lstatSync(path.resolve(stats.path, p)); + rstats = fs.lstatSync(resolved); } this.view.setBigUint64(bufPtr, BigInt(rstats.dev), true); bufPtr += 8; @@ -1419,7 +1436,7 @@ var require_wasi = __commonJS({ mtim = n; } const p = Buffer.from(this.memory.buffer, pathPtr, pathLen).toString(); - fs.utimesSync(path.resolve(stats.path, p), new Date(atim), new Date(mtim)); + fs.utimesSync(RESOLVE_PATH(stats, p), new Date(atim), new Date(mtim)); return constants_1.WASI_ESUCCESS; }), path_link: wrap((oldFd, _oldFlags, oldPath, oldPathLen, newFd, newPath, newPathLen) => { @@ -1431,13 +1448,13 @@ var require_wasi = __commonJS({ this.refreshMemory(); const op = Buffer.from(this.memory.buffer, oldPath, oldPathLen).toString(); const np = Buffer.from(this.memory.buffer, newPath, newPathLen).toString(); - fs.linkSync(path.resolve(ostats.path, op), path.resolve(nstats.path, np)); + fs.linkSync(RESOLVE_PATH(ostats, op), RESOLVE_PATH(nstats, np)); return constants_1.WASI_ESUCCESS; }), path_open: wrap( (dirfd, _dirflags, pathPtr, pathLen, oflags, fsRightsBase, fsRightsInheriting, fsFlags, fdPtr) => { try { - CHECK_FD(dirfd, constants_1.WASI_RIGHT_PATH_OPEN); + const stats = CHECK_FD(dirfd, constants_1.WASI_RIGHT_PATH_OPEN); fsRightsBase = BigInt(fsRightsBase); fsRightsInheriting = BigInt(fsRightsInheriting); const read = @@ -1512,17 +1529,61 @@ var require_wasi = __commonJS({ if (p.startsWith("proc/")) { throw new types_1.WASIError(constants_1.WASI_EBADF); } - const fullUnresolved = path.resolve(p); + const fullUnresolved = RESOLVE_PATH(stats, p); let full; try { full = fs.realpathSync(fullUnresolved); } catch (e) { if (e?.code === "ENOENT") { - full = fullUnresolved; + // The final component may legitimately not exist yet (e.g. + // O_CREAT), but the rest of the path must not be redirected + // by symlinks: resolve the parent directory and re-attach + // the final component. A dangling symlink as the final + // component would still redirect the create, so reject it. + const parentDir = path.dirname(fullUnresolved); + const lastComponent = path.basename(fullUnresolved); + let realParent = parentDir; + try { + realParent = fs.realpathSync(parentDir); + } catch (e2) { + if (e2?.code !== "ENOENT") throw e2; + } + full = path.join(realParent, lastComponent); + let finalIsLink = false; + try { + finalIsLink = fs.lstatSync(full).isSymbolicLink(); + } catch {} + if (finalIsLink) { + throw new types_1.WASIError(constants_1.WASI_ENOTCAPABLE); + } } else { throw e; } } + // RESOLVE_PATH is a lexical check on the guest-supplied path; + // realpathSync above follows symlinks on disk, so a symlink + // inside the directory can still point the resolved path + // outside of it. Re-check containment before the path is + // opened or recorded as a new directory base in FD_MAP. The + // resolved path must stay under the directory's lexical + // location or its own resolved location (the latter matters + // when the preopened directory is itself reached via a + // symlink). + { + const contained = base => { + if (full === base) return true; + const rel = path.relative(base, full); + return rel !== ".." && !rel.startsWith(`..${path.sep}`) && !path.isAbsolute(rel); + }; + const lexicalBase = path.resolve(stats.path); + let realBase = lexicalBase; + try { + realBase = fs.realpathSync(lexicalBase); + } catch {} + if (!contained(lexicalBase) && !contained(realBase)) { + throw new types_1.WASIError(constants_1.WASI_ENOTCAPABLE); + } + } let isDirectory; if (write) { try { @@ -1548,6 +1609,9 @@ var require_wasi = __commonJS({ stat(this, newfd); this.view.setUint32(fdPtr, newfd, true); } catch (e) { + if (e instanceof types_1.WASIError) { + return e.errno; + } console.error(e); } return constants_1.WASI_ESUCCESS; @@ -1560,7 +1624,7 @@ var require_wasi = __commonJS({ } this.refreshMemory(); const p = Buffer.from(this.memory.buffer, pathPtr, pathLen).toString(); - const full = path.resolve(stats.path, p); + const full = RESOLVE_PATH(stats, p); const r = fs.readlinkSync(full); const used = Buffer.from(this.memory.buffer).write(r, buf, bufLen); this.view.setUint32(bufused, used, true); @@ -1573,7 +1637,7 @@ var require_wasi = __commonJS({ } this.refreshMemory(); const p = Buffer.from(this.memory.buffer, pathPtr, pathLen).toString(); - fs.rmdirSync(path.resolve(stats.path, p)); + fs.rmdirSync(RESOLVE_PATH(stats, p)); return constants_1.WASI_ESUCCESS; }), path_rename: wrap((oldFd, oldPath, oldPathLen, newFd, newPath, newPathLen) => { @@ -1585,7 +1649,7 @@ var require_wasi = __commonJS({ this.refreshMemory(); const op = Buffer.from(this.memory.buffer, oldPath, oldPathLen).toString(); const np = Buffer.from(this.memory.buffer, newPath, newPathLen).toString(); - fs.renameSync(path.resolve(ostats.path, op), path.resolve(nstats.path, np)); + fs.renameSync(RESOLVE_PATH(ostats, op), RESOLVE_PATH(nstats, np)); return constants_1.WASI_ESUCCESS; }), path_symlink: wrap((oldPath, oldPathLen, fd, newPath, newPathLen) => { @@ -1596,7 +1660,7 @@ var require_wasi = __commonJS({ this.refreshMemory(); const op = Buffer.from(this.memory.buffer, oldPath, oldPathLen).toString(); const np = Buffer.from(this.memory.buffer, newPath, newPathLen).toString(); - fs.symlinkSync(op, path.resolve(stats.path, np)); + fs.symlinkSync(op, RESOLVE_PATH(stats, np)); return constants_1.WASI_ESUCCESS; }), path_unlink_file: wrap((fd, pathPtr, pathLen) => { @@ -1606,7 +1670,7 @@ var require_wasi = __commonJS({ } this.refreshMemory(); const p = Buffer.from(this.memory.buffer, pathPtr, pathLen).toString(); - fs.unlinkSync(path.resolve(stats.path, p)); + fs.unlinkSync(RESOLVE_PATH(stats, p)); return constants_1.WASI_ESUCCESS; }), poll_oneoff: (sin, sout, nsubscriptions, neventsPtr) => { diff --git a/src/jsc/bindings/node/http/NodeHTTPParser.cpp b/src/jsc/bindings/node/http/NodeHTTPParser.cpp index 487210cdd51..75e5ebd8462 100644 --- a/src/jsc/bindings/node/http/NodeHTTPParser.cpp +++ b/src/jsc/bindings/node/http/NodeHTTPParser.cpp @@ -392,7 +392,10 @@ int HTTPParser::onHeaderField(const char* at, size_t length) if (m_numFields == kMaxHeaderFieldsCount) { // ran out of space - flush to javascript land flush(); - RETURN_IF_EXCEPTION(scope, 0); + if (scope.exception()) [[unlikely]] { + llhttp_set_error_reason(&m_parserData, "HPE_USER:JS Exception"); + return HPE_USER; + } m_numFields = 1; m_numValues = 0; } diff --git a/src/jsc/bindings/sqlite/JSSQLStatement.cpp b/src/jsc/bindings/sqlite/JSSQLStatement.cpp index 4343288aefe..d09de269a82 100644 --- a/src/jsc/bindings/sqlite/JSSQLStatement.cpp +++ b/src/jsc/bindings/sqlite/JSSQLStatement.cpp @@ -1211,6 +1211,11 @@ JSC_DEFINE_HOST_FUNCTION(jsSQLStatementDeserialize, (JSC::JSGlobalObject * lexic sqlite3* db = nullptr; if (sqlite3_open_v2(":memory:", &db, openFlags, nullptr) != SQLITE_OK) { + // Ownership of `data` is only transferred by sqlite3_deserialize below; + // on open failure it must be released here. A failed open can still + // return a partially-initialized handle that needs sqlite3_close(). + sqlite3_free(data); + sqlite3_close(db); throwException(lexicalGlobalObject, scope, createError(lexicalGlobalObject, "Failed to open SQLite"_s)); return {}; } @@ -1225,15 +1230,18 @@ JSC_DEFINE_HOST_FUNCTION(jsSQLStatementDeserialize, (JSC::JSGlobalObject * lexic } status = sqlite3_deserialize(db, "main", reinterpret_cast(data), byteLength, byteLength, deserializeFlags); + // SQLITE_DESERIALIZE_FREEONCLOSE transfers ownership of `data` to SQLite, + // which frees it itself when deserialization fails. Do not free it again here. if (status == SQLITE_BUSY) { - sqlite3_free(data); + sqlite3_close(db); throwException(lexicalGlobalObject, scope, createError(lexicalGlobalObject, "SQLITE_BUSY"_s)); return {}; } if (status != SQLITE_OK) { - sqlite3_free(data); - throwException(lexicalGlobalObject, scope, createError(lexicalGlobalObject, status == SQLITE_ERROR ? "unable to deserialize database"_s : sqliteString(sqlite3_errstr(status)))); + auto message = status == SQLITE_ERROR ? "unable to deserialize database"_s : sqliteString(sqlite3_errstr(status)); + sqlite3_close(db); + throwException(lexicalGlobalObject, scope, createError(lexicalGlobalObject, message)); return {}; } diff --git a/src/jsc/bindings/webcore/SerializedScriptValue.cpp b/src/jsc/bindings/webcore/SerializedScriptValue.cpp index 631226598d7..180351b7888 100644 --- a/src/jsc/bindings/webcore/SerializedScriptValue.cpp +++ b/src/jsc/bindings/webcore/SerializedScriptValue.cpp @@ -4067,9 +4067,15 @@ class CloneDeserializer : public CloneBase { if (primeCount < 2) return false; + // Each additional prime is encoded as three length-prefixed byte vectors, so it + // requires at least 3 * sizeof(uint32_t) bytes of remaining input. Reject counts + // that could not possibly be satisfied to avoid a huge up-front allocation. + if (static_cast(primeCount - 2) > static_cast(m_end - m_ptr) / (3 * sizeof(uint32_t))) + return false; + CryptoKeyRSAComponents::PrimeInfo firstPrimeInfo; CryptoKeyRSAComponents::PrimeInfo secondPrimeInfo; - Vector otherPrimeInfos(primeCount - 2); + Vector otherPrimeInfos; if (!read(firstPrimeInfo.primeFactor)) return false; @@ -4082,12 +4088,14 @@ class CloneDeserializer : public CloneBase { if (!read(secondPrimeInfo.factorCRTCoefficient)) return false; for (unsigned i = 2; i < primeCount; ++i) { - if (!read(otherPrimeInfos[i - 2].primeFactor)) + CryptoKeyRSAComponents::PrimeInfo info; + if (!read(info.primeFactor)) return false; - if (!read(otherPrimeInfos[i - 2].factorCRTExponent)) + if (!read(info.factorCRTExponent)) return false; - if (!read(otherPrimeInfos[i - 2].factorCRTCoefficient)) + if (!read(info.factorCRTCoefficient)) return false; + otherPrimeInfos.append(WTF::move(info)); } auto keyData = CryptoKeyRSAComponents::createPrivateWithAdditionalData(modulus, exponent, privateExponent, firstPrimeInfo, secondPrimeInfo, otherPrimeInfos); @@ -4637,14 +4645,15 @@ class CloneDeserializer : public CloneBase { return JSValue(); } - BIO* bio = nullptr; - if (!read(&bio, pemSize)) { + BIO* rawBio = nullptr; + if (!read(&rawBio, pemSize)) { fail(); return JSValue(); } + ncrypto::BIOPointer bio(rawBio); if (keyType == CryptoKeyType::Public) { - EVP_PKEY* pkey = PEM_read_bio_PUBKEY(bio, nullptr, nullptr, nullptr); + EVP_PKEY* pkey = PEM_read_bio_PUBKEY(bio.get(), nullptr, nullptr, nullptr); if (!pkey) { fail(); return JSValue(); @@ -4654,7 +4663,7 @@ class CloneDeserializer : public CloneBase { return JSPublicKeyObject::create(vm, structure, m_globalObject, WTF::move(keyObject)); } - EVP_PKEY* pkey = PEM_read_bio_PrivateKey(bio, nullptr, nullptr, nullptr); + EVP_PKEY* pkey = PEM_read_bio_PrivateKey(bio.get(), nullptr, nullptr, nullptr); if (!pkey) { fail(); return JSValue(); @@ -4701,6 +4710,11 @@ class CloneDeserializer : public CloneBase { #endif } + if (lengthInUint64 > static_cast(m_end - m_ptr) / sizeof(uint64_t)) { + fail(); + return JSValue(); + } + #if USE(BIGINT32) static_assert(sizeof(JSBigInt::Digit) == sizeof(uint64_t)); if (lengthInUint64 == 1) { @@ -4962,7 +4976,10 @@ class CloneDeserializer : public CloneBase { if (!readStringData(flags)) return JSValue(); auto reFlags = Yarr::parseFlags(flags->string()); - ASSERT(reFlags.has_value()); + if (!reFlags.has_value()) { + fail(); + return JSValue(); + } VM& vm = m_lexicalGlobalObject->vm(); RegExp* regExp = RegExp::create(vm, pattern->string(), reFlags.value()); return RegExpObject::create(vm, m_globalObject->regExpStructure(), regExp); diff --git a/src/libarchive/lib.rs b/src/libarchive/lib.rs index 3dbe9655f45..ed9c6f002dc 100644 --- a/src/libarchive/lib.rs +++ b/src/libarchive/lib.rs @@ -1364,7 +1364,7 @@ fn is_symlink_target_safe( /// during later `mkdirat`/`openat`/`symlinkat` calls — such entries must be /// rejected rather than resolved. #[cfg(unix)] -fn path_traverses_created_symlink(path: &[u8], created_symlinks: &[Vec]) -> bool { +pub fn path_traverses_created_symlink(path: &[u8], created_symlinks: &[Vec]) -> bool { if created_symlinks.is_empty() { return false; } @@ -1993,10 +1993,27 @@ impl Archiver { #[cfg(not(windows))] let mode: bun_sys::Mode = bun_sys::Mode::try_from( // SAFETY: entry valid - lib::Entry::opaque_ref(entry).perm() | 0o666, + (lib::Entry::opaque_ref(entry).perm() & 0o777) | 0o666, ) .unwrap(); + // `path_traverses_created_symlink` is a lexical check: on + // filesystems that alias differently-encoded names (Unicode + // NFC/NFD normalization on APFS/HFS+), a path component can + // reach a created symlink without byte-matching its recorded + // path. Once this extraction has created any symlink, ask the + // kernel to refuse to follow symlinks while opening file + // entries. `NOFOLLOW_ANY` is 0 on non-Darwin targets. + #[cfg(unix)] + let flags = { + let mut flags = + bun_sys::O::WRONLY | bun_sys::O::CREAT | bun_sys::O::TRUNC; + if !created_symlinks.is_empty() { + flags |= bun_sys::O::NOFOLLOW_ANY; + } + flags + }; + #[cfg(not(unix))] let flags = bun_sys::O::WRONLY | bun_sys::O::CREAT | bun_sys::O::TRUNC; #[cfg(windows)] diff --git a/src/runtime/api/bun/h2_frame_parser.rs b/src/runtime/api/bun/h2_frame_parser.rs index 3166f2891b1..2701a6506f5 100644 --- a/src/runtime/api/bun/h2_frame_parser.rs +++ b/src/runtime/api/bun/h2_frame_parser.rs @@ -4442,6 +4442,20 @@ impl H2FrameParser { if stream_identifier & 1 == 0 { return None; } + // Bound per-connection stream state before allocating: a peer flooding + // tiny HEADERS frames with fresh stream ids would otherwise grow + // `streams` (and the JS objects pinned by `streamStart`) without limit. + // Mirrors the maxSessionMemory check on the PING and request() paths. + if self.get_session_memory_usage() > self.max_session_memory.get() as usize { + self.send_go_away( + stream_identifier, + ErrorCode::ENHANCE_YOUR_CALM, + b"ENHANCE_YOUR_CALM", + self.last_stream_id.get(), + true, + ); + return None; + } self.handle_received_stream_id(stream_identifier) } @@ -5518,7 +5532,22 @@ impl H2FrameParser { impl H2FrameParser { // get memory usage in MB fn get_session_memory_usage(&self) -> usize { - (self.write_buffer.get().len_u32() as usize + self.queued_data_size.get() as usize) + // Count only live streams: entries stay in the map until connection + // teardown, so counting every entry would grow monotonically over the + // life of a keep-alive connection and eventually trip the session cap + // for a well-behaved peer making sequential requests. + let live_streams = self + .streams + .get() + .iter() + .filter(|(_, item)| { + // SAFETY: item is &*mut Stream from streams.iter(); the boxed Stream outlives the iteration + unsafe { &***item }.state != StreamState::CLOSED + }) + .count(); + (self.write_buffer.get().len_u32() as usize + + self.queued_data_size.get() as usize + + live_streams * core::mem::size_of::()) / 1024 / 1024 } diff --git a/src/runtime/api/bun/js_bun_spawn_bindings.rs b/src/runtime/api/bun/js_bun_spawn_bindings.rs index 4c066e428f5..d4ff66ddba4 100644 --- a/src/runtime/api/bun/js_bun_spawn_bindings.rs +++ b/src/runtime/api/bun/js_bun_spawn_bindings.rs @@ -257,6 +257,24 @@ fn get_argv( cmds_array.next()?.unwrap(), )?; + // CreateProcessW runs `.bat`/`.cmd` files through `cmd.exe`, which + // re-tokenizes the command line with shell metacharacter rules + // (BatBadBut, CVE-2024-24576 / CVE-2024-27980). libuv's MSVCRT-style + // quoting cannot make that safe, so reject arguments that cmd.exe would + // reinterpret. + let is_batch_file = cfg!(windows) && bun_which::is_batch_file(argv0_result.argv0.as_bytes()); + if is_batch_file && bun_which::batch_arg_has_cmd_metachars(argv0_result.arg0.as_bytes()) { + return Err(global_this + .err( + jsc::ErrorCode::INVALID_ARG_VALUE, + format_args!( + "The command name contains a cmd.exe special character and cannot be safely passed to a .bat/.cmd file. Received {}", + bun_fmt::quote(argv0_result.arg0.as_bytes()) + ), + ) + .throw()); + } + *argv0 = Some(argv0_result.argv0.as_ptr()); argv.push(argv0_result.arg0.as_ptr()); // Transfer ownership to the caller's backing store so the pointers above @@ -283,6 +301,18 @@ fn get_argv( } let owned = arg.to_owned_slice_z(); + if is_batch_file && bun_which::batch_arg_has_cmd_metachars(owned.as_bytes()) { + return Err(global_this + .err( + jsc::ErrorCode::INVALID_ARG_VALUE, + format_args!( + "The argument 'args[{}]' contains a cmd.exe special character and cannot be safely passed to a .bat/.cmd file. Received {}", + arg_index, + bun_fmt::quote(owned.as_bytes()) + ), + ) + .throw()); + } argv.push(owned.as_ptr()); storage.push(owned); arg_index += 1; diff --git a/src/runtime/bake/DevServer.rs b/src/runtime/bake/DevServer.rs index 689678d70e3..f638ed92975 100644 --- a/src/runtime/bake/DevServer.rs +++ b/src/runtime/bake/DevServer.rs @@ -1415,25 +1415,16 @@ pub enum DevHandlerId { MemoryVisualizer, } -/// DNS-rebinding guard for `/_bun/...` internal routes. A rebound origin +/// DNS-rebinding guard for `/_bun/...` internal routes and the Chrome +/// DevTools `/.well-known/...` route. A rebound origin /// (`attacker.com` → 127.0.0.1) presents `Host: attacker.com`; rejecting /// non-loopback / non-IP / non-configured hostnames prevents the attacker's /// page from reading bundled source via same-origin fetch. -fn is_allowed_dev_host(dev: &DevServer, req: &Request) -> bool { +pub(crate) fn is_allowed_dev_host(dev: &DevServer, req: &Request) -> bool { let Some(host) = req.header(b"host") else { return false; }; - let host = if host.first() == Some(&b'[') { - match strings::index_of_scalar(host, b']') { - Some(end) => &host[..=end], - None => host, - } - } else { - match strings::last_index_of_char(host, b':') { - Some(colon) => &host[..colon], - None => host, - } - }; + let host = host_without_port(host); if strings::eql_case_insensitive_ascii(host, b"localhost", true) { return true; } @@ -1466,6 +1457,67 @@ fn is_allowed_dev_host(dev: &DevServer, req: &Request) -> bool { false } +/// `host[":" port]` / `"[" v6 "]" [":" port]` → host (brackets retained for IPv6). +/// Malformed authorities (missing `]`, empty or non-numeric port, trailing +/// garbage) yield an empty slice so callers fail closed. +fn host_without_port(host: &[u8]) -> &[u8] { + let (host, rest) = if host.first() == Some(&b'[') { + match strings::index_of_scalar(host, b']') { + Some(end) => (&host[..=end], &host[end + 1..]), + None => return b"", + } + } else { + match strings::last_index_of_char(host, b':') { + Some(colon) => (&host[..colon], &host[colon..]), + None => (host, &host[host.len()..]), + } + }; + match rest { + [] => host, + [b':', port @ ..] if !port.is_empty() && port.iter().all(u8::is_ascii_digit) => host, + _ => b"", + } +} + +/// Cross-origin guard for the HMR WebSocket. WebSocket handshakes are exempt +/// from the same-origin policy, so any page the developer visits could open +/// `ws://localhost:/_bun/hmr` and subscribe to hot-update payloads (the +/// bundled source) — the browser still sends `Host: localhost`, so +/// `is_allowed_dev_host` alone does not stop it. Browsers always include an +/// `Origin` header on WebSocket handshakes; require its host to be the +/// request's own host or a localhost name. Requests without an `Origin` +/// header (non-browser clients) are allowed. +fn is_allowed_dev_origin(req: &Request) -> bool { + let Some(origin) = req.header(b"origin") else { + return true; + }; + // An origin is `scheme "://" host [":" port]`; opaque origins serialize + // to `null` and are rejected here. + let Some(scheme_end) = strings::index_of(origin, b"://") else { + return false; + }; + let origin_host = host_without_port(&origin[scheme_end + 3..]); + if strings::eql_case_insensitive_ascii(origin_host, b"localhost", true) { + return true; + } + const DOT_LOCALHOST: &[u8] = b".localhost"; + if origin_host.len() > DOT_LOCALHOST.len() + && strings::eql_case_insensitive_ascii( + &origin_host[origin_host.len() - DOT_LOCALHOST.len()..], + DOT_LOCALHOST, + true, + ) + { + return true; + } + match req.header(b"host") { + Some(host) => { + strings::eql_case_insensitive_ascii(origin_host, host_without_port(host), true) + } + None => false, + } +} + fn host_forbidden(resp: AnyResponse) { resp.corked(move || { resp.write_status(b"403 Forbidden"); @@ -1473,6 +1525,16 @@ fn host_forbidden(resp: AnyResponse) { }); } +fn origin_forbidden(resp: AnyResponse) { + resp.corked(move || { + resp.write_status(b"403 Forbidden"); + resp.end( + b"Blocked: Origin header does not match the dev server", + false, + ); + }); +} + /// `extern "C"` trampoline: recovers `&mut DevServer` from user-data and wraps /// the raw `uws_res` as `AnyResponse`, then calls the handler for `ID`. extern "C" fn dev_route_tramp( @@ -1600,6 +1662,9 @@ impl bun_uws_sys::web_socket::WebSocketUpgradeServer for D if !is_allowed_dev_host(this, req) { return host_forbidden(res.as_any_response()); } + if !is_allowed_dev_origin(req) { + return origin_forbidden(res.as_any_response()); + } let dw = bun_core::heap::into_raw(HmrSocket::new(this, res)); let _ = this.active_websocket_connections.insert(dw, ()); let _ = res.upgrade( diff --git a/src/runtime/bake/DevServer/ErrorReportRequest.rs b/src/runtime/bake/DevServer/ErrorReportRequest.rs index db780cde5b8..08a69ea17b6 100644 --- a/src/runtime/bake/DevServer/ErrorReportRequest.rs +++ b/src/runtime/bake/DevServer/ErrorReportRequest.rs @@ -135,16 +135,16 @@ impl ErrorReportRequest { let dev: &DevServer = unsafe { &*ctx }.dev.get(); // Read payload, assemble ZigException - let name = read_string32(&mut reader)?; - let message = read_string32(&mut reader)?; - let browser_url = read_string32(&mut reader)?; + let name = sanitize_for_terminal(read_string32(&mut reader)?, &arena); + let message = sanitize_for_terminal(read_string32(&mut reader)?, &arena); + let browser_url = sanitize_for_terminal(read_string32(&mut reader)?, &arena); let stack_count = reader.read_int_le::()?.min(255); // does not support more than 255 let mut frames: Vec = Vec::with_capacity(stack_count as usize); for _ in 0..stack_count { let line = reader.read_int_le::()?; let column = reader.read_int_le::()?; - let function_name = read_string32(&mut reader)?; - let file_name = read_string32(&mut reader)?; + let function_name = sanitize_for_terminal(read_string32(&mut reader)?, &arena); + let file_name = sanitize_for_terminal(read_string32(&mut reader)?, &arena); frames.push(ZigStackFrame { function_name: BunString::init(function_name), source_url: BunString::init(file_name), @@ -577,4 +577,46 @@ fn read_string32<'a>( Ok(s) } +/// The report body is attacker-controlled: `/_bun/report_error` accepts a +/// CORS "simple request" POST from any origin, and these strings are printed +/// to the developer's terminal. Replace C0 control bytes (except `\t`/`\n`) +/// and DEL so the payload cannot inject ANSI/OSC escape sequences (cursor +/// movement, OSC 52 clipboard writes, hyperlinks). UTF-8-encoded C1 controls +/// (U+0080..=U+009F, i.e. `0xC2 0x80..=0x9F`) are also replaced: xterm-family +/// terminals decode them back to C1, so `0xC2 0x9B` would otherwise act as CSI. +fn sanitize_for_terminal<'a>(s: &'a [u8], arena: &'a Arena) -> &'a [u8] { + fn is_disallowed(prev: u8, b: u8) -> bool { + // Lone 0x80..=0x9F bytes are continuation bytes of legitimate + // multi-byte characters and must not be blanked; only the encoded C1 + // form (a 0xC2 lead byte followed by 0x80..=0x9F) reaches the + // terminal as a control. + (b < 0x20 && b != b'\t' && b != b'\n') + || b == 0x7f + || (prev == 0xc2 && (0x80..=0x9f).contains(&b)) + } + let mut prev = 0u8; + if !s.iter().any(|&b| { + let bad = is_disallowed(prev, b); + prev = b; + bad + }) { + return s; + } + let copy = arena.alloc_slice_copy(s); + let mut prev = 0u8; + for i in 0..copy.len() { + let cur = copy[i]; + if is_disallowed(prev, cur) { + copy[i] = b' '; + // For an encoded C1 control, blank the 0xC2 lead byte too so the + // output stays valid UTF-8 instead of leaving a dangling lead byte. + if prev == 0xc2 && i > 0 { + copy[i - 1] = b' '; + } + } + prev = cur; + } + copy +} + // ported from: src/bake/DevServer/ErrorReportRequest.zig diff --git a/src/runtime/bake/mod.rs b/src/runtime/bake/mod.rs index be41b115f2d..e39b7b63bf7 100644 --- a/src/runtime/bake/mod.rs +++ b/src/runtime/bake/mod.rs @@ -20,6 +20,7 @@ pub(crate) mod bake_body; #[path = "DevServer.rs"] mod dev_server_body; pub(crate) use dev_server_body::get_deinit_count_for_testing; +pub(crate) use dev_server_body::is_allowed_dev_host; #[path = "FrameworkRouter.rs"] pub(crate) mod framework_router_body; diff --git a/src/runtime/cli/bunx_command.rs b/src/runtime/cli/bunx_command.rs index f2d7c11906a..1cf8ae96a34 100644 --- a/src/runtime/cli/bunx_command.rs +++ b/src/runtime/cli/bunx_command.rs @@ -279,6 +279,21 @@ impl BunxCommand { #[cfg(windows)] const NANOSECONDS_CACHE_VALID: i128 = (Self::SECONDS_CACHE_VALID as i128) * 1_000_000_000; + /// `bin` keys (and the `name` fallback) in package.json are command + /// names, not paths. The bunx cache lives in a world-writable temp dir, + /// so a crafted package.json there could yield a key like + /// `../../../../tmp/x` or `/tmp/x`; `bun_which::which` resolves + /// slash-containing names against the cwd, escaping `node_modules/.bin` + /// and skipping the cache-ownership check before execution. Reject + /// anything that isn't a plain file name. + fn is_safe_bin_name(name: &[u8]) -> bool { + !name.is_empty() + && name != b"." + && name != b".." + && strings::index_of_char(name, b'/').is_none() + && strings::index_of_char(name, b'\\').is_none() + } + fn get_bin_name_from_subpath( transpiler: &mut Transpiler, dir_fd: Fd, @@ -308,7 +323,7 @@ impl BunxCommand { for prop in object.properties.slice() { if let Some(key) = &prop.key { if let Some(bin_name) = key.as_string(&bump) { - if bin_name.is_empty() { + if !Self::is_safe_bin_name(bin_name) { continue; } return Ok(Box::<[u8]>::from(bin_name)); @@ -319,7 +334,16 @@ impl BunxCommand { ExprData::EString(_) => { if let Some(name_expr) = expr.get(b"name") { if let Some(name) = name_expr.as_string(&bump) { - return Ok(Box::<[u8]>::from(name)); + // A scoped `name` (`@scope/pkg`) is legitimate here; + // the command name is its unscoped portion. + let bin_name = if name.is_empty() { + name + } else { + bun_install::dependency::unscoped_package_name(name) + }; + if Self::is_safe_bin_name(bin_name) { + return Ok(Box::<[u8]>::from(bin_name)); + } } } } diff --git a/src/runtime/cli/create_command.rs b/src/runtime/cli/create_command.rs index 21fc332968e..7fc891a77cc 100644 --- a/src/runtime/cli/create_command.rs +++ b/src/runtime/cli/create_command.rs @@ -1428,6 +1428,11 @@ impl CreateCommand { let mut npm_client_: Option = None; + // Remember whether the user explicitly opted out (`--no-install`) + // before this is widened to also cover dependency-less templates: + // the flag must skip template tasks, but a template with no + // dependencies should still run its documented postinstall hooks. + let user_skipped_install = create_options.skip_install; create_options.skip_install = create_options.skip_install || !has_dependencies; if !create_options.skip_git { @@ -1511,7 +1516,7 @@ impl CreateCommand { let _ = process?; } - if !postinstall_tasks.is_empty() { + if !user_skipped_install && !postinstall_tasks.is_empty() { for task in &postinstall_tasks { exec_task(task, destination, path_env, npm_client_); } diff --git a/src/runtime/cli/pm_trusted_command.rs b/src/runtime/cli/pm_trusted_command.rs index 02597fd3043..f80827aa60e 100644 --- a/src/runtime/cli/pm_trusted_command.rs +++ b/src/runtime/cli/pm_trusted_command.rs @@ -95,8 +95,9 @@ impl UntrustedCommand { // called alias because a dependency name is not always the package name let alias = dep.name.slice(buf); + let pkg_name = packages.items_name()[package_id as usize].slice(buf); let resolution = &resolutions[package_id as usize]; - if !lockfile.has_trusted_dependency(alias, resolution) { + if !lockfile.has_trusted_dependency(alias, pkg_name, resolution) { untrusted_dep_ids.put(dep_id, ())?; } } @@ -325,8 +326,9 @@ impl TrustCommand { } let alias = dep.name.slice(buf); + let pkg_name = packages.items_name()[package_id as usize].slice(buf); let resolution = &resolutions[package_id as usize]; - if !lockfile.has_trusted_dependency(alias, resolution) { + if !lockfile.has_trusted_dependency(alias, pkg_name, resolution) { untrusted_dep_ids.put(dep_id, ())?; } } @@ -407,7 +409,11 @@ impl TrustCommand { for package_name_from_cli in &packages_to_trust { if strings::eql_long(package_name_from_cli, alias, true) - && !lockfile.has_trusted_dependency(alias, resolution) + && !lockfile.has_trusted_dependency( + alias, + packages.items_name()[package_id as usize].slice(buf), + resolution, + ) { break 'brk false; } diff --git a/src/runtime/crypto/pwhash.rs b/src/runtime/crypto/pwhash.rs index 2b5a49193ae..fdff648fde3 100644 --- a/src/runtime/crypto/pwhash.rs +++ b/src/runtime/crypto/pwhash.rs @@ -435,7 +435,8 @@ pub mod bcrypt { let computed = vendor::bcrypt(u32::from(rounds_log), salt, &buf[..used]); // Zig: `if (!mem.eql(u8, &hash, expected_hash)) return PasswordVerificationFailed`. - if computed[..DK_LENGTH] == expected { + // Compare in constant time like the `$2b$` path (BoringSSL `CRYPTO_memcmp`). + if bun_boringssl_sys::constant_time_eq(&computed[..DK_LENGTH], &expected) { Ok(()) } else { Err(bun_core::err!("PasswordVerificationFailed")) diff --git a/src/runtime/server/server_body.rs b/src/runtime/server/server_body.rs index 749440a6640..45949392b25 100644 --- a/src/runtime/server/server_body.rs +++ b/src/runtime/server/server_body.rs @@ -3402,7 +3402,15 @@ where } let authorized = 'brk: { - if self.dev_server.is_none() { + let Some(dev_server) = self.dev_server.as_deref() else { + break 'brk false; + }; + + // The loopback source-IP check below is not enough on its own: a + // DNS-rebound origin connects from 127.0.0.1 but presents the + // attacker's hostname in `Host`. Apply the same Host allowlist as + // the `/_bun/*` routes before disclosing the project root path. + if !bake::is_allowed_dev_host(dev_server, req) { break 'brk false; } diff --git a/src/runtime/shell/states/Cmd.rs b/src/runtime/shell/states/Cmd.rs index 528c9000cd6..fa01e49d5ac 100644 --- a/src/runtime/shell/states/Cmd.rs +++ b/src/runtime/shell/states/Cmd.rs @@ -523,6 +523,30 @@ impl Cmd { format_args!("bun: command not found: {}\n", bstr::BStr::new(&first_arg)), ); }; + // CreateProcessW runs `.bat`/`.cmd` files through `cmd.exe`, which + // re-tokenizes the command line with shell metacharacter rules + // (BatBadBut). libuv's MSVCRT-style quoting cannot make that safe, so + // reject arguments that cmd.exe would reinterpret. + if cfg!(windows) && bun_which::is_batch_file(&resolved) { + let unsafe_arg: Option> = interp + .as_cmd(this) + .args + .iter() + .skip(1) + .find(|a| bun_which::batch_arg_has_cmd_metachars(&a[..a.len() - 1])) + .map(|a| a[..a.len() - 1].to_vec()); + if let Some(arg) = unsafe_arg { + drop(spawn_args); + return Builtin::cmd_write_failing_error( + interp, + this, + format_args!( + "bun: refusing to pass argument with cmd.exe special characters to a batch file: {}\n", + bstr::BStr::new(&arg) + ), + ); + } + } // Replace argv[0] with the resolved absolute path (NUL-terminated for // `execve`). resolved.push(0); diff --git a/src/runtime/shell/states/Expansion.rs b/src/runtime/shell/states/Expansion.rs index 6a4367626bd..503503236c2 100644 --- a/src/runtime/shell/states/Expansion.rs +++ b/src/runtime/shell/states/Expansion.rs @@ -31,6 +31,12 @@ pub struct Expansion { /// boundary is hit (IFS split / glob result), it is flushed into `out` /// via `push_current_out`. Spec: Expansion.zig `current_out`. pub current_out: Vec, + /// Byte offsets in `current_out` written by literal `BraceBegin`/`Comma`/ + /// `BraceEnd` atoms. Only these positions may act as brace-expansion + /// metacharacters in `do_brace_expand`; `{`/`,`/`}` bytes from any other + /// source (JS interpolation, quoted text, `$var`, command substitution) + /// are data and must not change the expansion structure. + pub brace_meta_offsets: Vec, pub child_script: Option, /// Whether the in-flight command substitution was `"$(...)"` (no IFS /// splitting on its result). Only meaningful while `state == CmdSubst`. @@ -104,6 +110,7 @@ impl Expansion { word_idx: 0, out: ExpansionOut::default(), current_out: Vec::new(), + brace_meta_offsets: Vec::new(), child_script: None, cmd_subst_quoted: false, has_quoted_empty: false, @@ -179,6 +186,7 @@ impl Expansion { shell, simple, &mut me.current_out, + &mut me.brace_meta_offsets, &mut me.has_quoted_empty, true, event_loop, @@ -224,6 +232,7 @@ impl Expansion { // All sub-atoms expanded — post-process leading tilde then finish. if leading_tilde { let home = me.base.shell().get_homedir(); + let len_before = me.current_out.len(); match me.current_out.first() { Some(b'/') | Some(b'\\') => { me.current_out.splice(0..0, home.slice().iter().copied()); @@ -236,6 +245,17 @@ impl Expansion { } None => {} } + // The first two arms prepend; shift the recorded brace + // metacharacter offsets so they keep pointing at the same + // bytes. The `extend_from_slice` arm only runs when + // `current_out` (and therefore `brace_meta_offsets`) is + // empty, so the shift is a no-op there. + let prepended = (me.current_out.len() - len_before) as u32; + if prepended != 0 { + for off in &mut me.brace_meta_offsets { + *off += prepended; + } + } home.deref(); } // Spec (Expansion.zig next() lines 209-221): brace expansion @@ -264,7 +284,24 @@ impl Expansion { /// `expand_simple_no_io`) and push each variant as a separate argv word. fn do_brace_expand(me: &mut Expansion) { use bun_shell_parser::braces; - let brace_str = &me.current_out[..]; + // Only the `{`/`,`/`}` bytes recorded in `brace_meta_offsets` (written + // by literal BraceBegin/Comma/BraceEnd atoms) are brace-expansion + // metacharacters. Bytes from Text/Var/cmd-subst expansion — notably JS + // `${...}` interpolations — are data: backslash-escape them so the + // brace lexer cannot be steered into emitting extra argv words. + let mut escaped: Vec = Vec::with_capacity(me.current_out.len()); + let mut next_meta = 0usize; + for (i, &b) in me.current_out.iter().enumerate() { + if next_meta < me.brace_meta_offsets.len() + && me.brace_meta_offsets[next_meta] as usize == i + { + next_meta += 1; + } else if matches!(b, b'{' | b'}' | b',' | b'\\') { + escaped.push(b'\\'); + } + escaped.push(b); + } + let brace_str = &escaped[..]; let mut lexer_output = if bun_core::is_all_ascii(brace_str) { bun_core::handle_oom(braces::Lexer::tokenize(brace_str)) } else { @@ -359,6 +396,7 @@ impl Expansion { shell: &ShellExecEnv, atom: &ast::SimpleAtom, out: &mut Vec, + brace_meta_offsets: &mut Vec, has_quoted_empty: &mut bool, expand_tilde: bool, event_loop: EventLoopHandle, @@ -393,9 +431,18 @@ impl Expansion { } ast::SimpleAtom::Asterisk => out.push(b'*'), ast::SimpleAtom::DoubleAsterisk => out.extend_from_slice(b"**"), - ast::SimpleAtom::BraceBegin => out.push(b'{'), - ast::SimpleAtom::BraceEnd => out.push(b'}'), - ast::SimpleAtom::Comma => out.push(b','), + ast::SimpleAtom::BraceBegin => { + brace_meta_offsets.push(out.len() as u32); + out.push(b'{'); + } + ast::SimpleAtom::BraceEnd => { + brace_meta_offsets.push(out.len() as u32); + out.push(b'}'); + } + ast::SimpleAtom::Comma => { + brace_meta_offsets.push(out.len() as u32); + out.push(b','); + } ast::SimpleAtom::Tilde => { if expand_tilde { let home = shell.get_homedir(); @@ -419,6 +466,7 @@ impl Expansion { me.out.bounds.push(me.out.buf.len() as u32); } me.out.buf.append(&mut me.current_out); + me.brace_meta_offsets.clear(); } /// Spec: Expansion.zig `postSubshellExpansion` + `convertNewlinesToSpaces`. diff --git a/src/runtime/valkey_jsc/js_valkey.rs b/src/runtime/valkey_jsc/js_valkey.rs index 98890974bd7..0c0dee561cc 100644 --- a/src/runtime/valkey_jsc/js_valkey.rs +++ b/src/runtime/valkey_jsc/js_valkey.rs @@ -727,6 +727,7 @@ impl JSValkeyClient { idle_timeout_interval_ms: options.idle_timeout_ms, write_buffer: Default::default(), read_buffer: Default::default(), + reply_scanner: Default::default(), retry_attempts: 0, auto_flusher: Default::default(), }), @@ -851,6 +852,7 @@ impl JSValkeyClient { idle_timeout_interval_ms: client.idle_timeout_interval_ms, write_buffer: Default::default(), read_buffer: Default::default(), + reply_scanner: Default::default(), retry_attempts: 0, auto_flusher: Default::default(), }), diff --git a/src/runtime/valkey_jsc/valkey.rs b/src/runtime/valkey_jsc/valkey.rs index 6246d0b1b80..e096e0986f1 100644 --- a/src/runtime/valkey_jsc/valkey.rs +++ b/src/runtime/valkey_jsc/valkey.rs @@ -280,6 +280,8 @@ pub struct ValkeyClient { // Buffer management pub write_buffer: OffsetByteList, pub read_buffer: OffsetByteList, + /// Resumable end-of-reply scanner over `read_buffer.remaining()`. + pub reply_scanner: protocol::ReplyScanner, /// In-flight commands, after the data has been written to the network socket // TODO(port): `Queue` is `std.fifo.LinearFifo(PromisePair, .Dynamic)` in Zig — assume @@ -777,25 +779,40 @@ impl ValkeyClient { break; // Buffer processed completely } + // Incrementally check whether a complete reply is buffered + // before running the allocating tree parser. The scanner + // resumes from its saved position, so the elements of a + // partially-received aggregate are not re-parsed on every + // socket callback (which is quadratic in the element count). + match self.reply_scanner.scan(remaining_buffer) { + Ok(protocol::ScanResult::Complete) => {} + Ok(protocol::ScanResult::NeedMoreData) => { + // Need more data in the buffer, wait for next onData call + if cfg!(debug_assertions) { + debug!( + "read_buffer: needs more data ({} bytes available)", + remaining_buffer.len() + ); + } + return Ok(()); + } + Err(err) => { + self.fail(b"Failed to read data (buffer path)", err)?; + return Ok(()); + } + } + let mut reader = protocol::ValkeyReader::init(remaining_buffer); let before_read_pos = reader_pos(&reader); let value = match reader.read_value() { Ok(v) => v, Err(err) => { - if err == RedisError::InvalidResponse { - // Need more data in the buffer, wait for next onData call - if cfg!(debug_assertions) { - debug!( - "read_buffer: needs more data ({} bytes available)", - remaining_buffer.len() - ); - } - return Ok(()); - } else { - self.fail(b"Failed to read data (buffer path)", err)?; - return Ok(()); - } + // The scanner verified a complete reply is buffered, so + // a parse failure here (including `InvalidResponse`) is + // a protocol error, not a short read. + self.fail(b"Failed to read data (buffer path)", err)?; + return Ok(()); } }; // PORT NOTE: `defer value.deinit(allocator)` — RESPValue should impl Drop. @@ -810,6 +827,7 @@ impl ValkeyClient { } self.read_buffer.consume(bytes_consumed as u32); + self.reply_scanner.reset(); let mut value_to_handle = value; // Use temp var for defer // TODO(port): narrow error set — Zig caller passes err to fail() which takes RedisError; @@ -843,6 +861,7 @@ impl ValkeyClient { current_data_slice.len() - before_read_pos ); } + self.reply_scanner.reset(); self.read_buffer .write(¤t_data_slice[before_read_pos..]) .expect("failed to write remaining stack data to buffer"); @@ -1265,6 +1284,7 @@ impl ValkeyClient { self.socket = socket; self.write_buffer.clear_and_free(); self.read_buffer.clear_and_free(); + self.reply_scanner.reset(); // A fresh socket has opened, so reset per-connection state. Without // this, `send()` would permanently reject with "Connection has failed" // after a previous connection exhausted retries (#29925), and the diff --git a/src/runtime/webcore/Blob.rs b/src/runtime/webcore/Blob.rs index 607ce8df5f4..37287a4ba3c 100644 --- a/src/runtime/webcore/Blob.rs +++ b/src/runtime/webcore/Blob.rs @@ -78,6 +78,15 @@ pub extern "C" fn blob_store_array_buffer_deallocator(_bytes: *mut c_void, ctx: } } +/// WHATWG File API §3.1: a Blob/File `type` is only used when every character +/// is in the range U+0020 to U+007E; otherwise it is treated as the empty +/// string. Stricter than `is_all_ascii`: also rejects control characters such +/// as CR/LF, which would otherwise be stored in `content_type` and written +/// verbatim into outgoing HTTP headers. +fn is_valid_blob_type(slice: &[u8]) -> bool { + slice.iter().all(|&c| matches!(c, 0x20..=0x7E)) +} + /// Result delivered to `ReadBytesHandler::on_read_bytes`. pub enum ReadBytesResult { /// global-allocator-owned by the callback. @@ -1336,7 +1345,7 @@ impl BlobExt for Blob { } let content_type_str = content_type.to_slice(global_this)?; let slice = content_type_str.slice(); - if strings::is_all_ascii(slice) { + if is_valid_blob_type(slice) { self.free_content_type(); self.content_type_was_set.set(true); @@ -1775,7 +1784,7 @@ impl BlobExt for Blob { } let content_type_str = content_type.to_slice(global_this)?; let slice = content_type_str.slice(); - if strings::is_all_ascii(slice) { + if is_valid_blob_type(slice) { self.free_content_type(); self.content_type_was_set.set(true); // SAFETY: see other `mime_type` call sites. @@ -2107,7 +2116,7 @@ impl BlobExt for Blob { let zig_str = content_type_.get_zig_string(global_this)?; let slicer = zig_str.to_slice(); let slice = slicer.slice(); - if !strings::is_all_ascii(slice) { + if !is_valid_blob_type(slice) { break 'inner; } @@ -2463,7 +2472,7 @@ impl BlobExt for Blob { if content_type.is_string() { let content_type_str = content_type.to_slice(global_this)?; let slice = content_type_str.slice(); - if !strings::is_all_ascii(slice) { + if !is_valid_blob_type(slice) { break 'inner; } blob.content_type_was_set.set(true); @@ -5584,7 +5593,7 @@ pub fn jsdom_file_construct_( if content_type.is_string() { let content_type_str = content_type.to_slice(global_this)?; let slice = content_type_str.slice(); - if !strings::is_all_ascii(slice) { + if !is_valid_blob_type(slice) { break 'inner; } blob.content_type_was_set.set(true); @@ -5684,7 +5693,7 @@ pub fn construct_bun_file( if file_type.is_string() { let str = file_type.to_slice(global_object)?; let slice = str.slice(); - if !strings::is_all_ascii(slice) { + if !is_valid_blob_type(slice) { break 'inner; } blob.content_type_was_set.set(true); diff --git a/src/semver/SemverQuery.rs b/src/semver/SemverQuery.rs index 5ee4cb2c15c..195f4c671f4 100644 --- a/src/semver/SemverQuery.rs +++ b/src/semver/SemverQuery.rs @@ -33,6 +33,17 @@ pub struct Query { pub next: Option>, } +impl Drop for Query { + fn drop(&mut self) { + // Unlink the chain iteratively so the derived recursive drop glue + // can't overflow the stack on very long AND chains. + let mut next = self.next.take(); + while let Some(mut node) = next { + next = node.next.take(); + } + } +} + pub struct QueryFormatter<'a> { query: &'a Query, buffer: &'a [u8], @@ -133,6 +144,17 @@ unsafe impl Send for List {} // `&List` exposes no unsynchronized interior mutability. unsafe impl Sync for List {} +impl Drop for List { + fn drop(&mut self) { + // Unlink the chain iteratively so the derived recursive drop glue + // can't overflow the stack on very long OR chains. + let mut next = self.next.take(); + while let Some(mut node) = next { + next = node.next.take(); + } + } +} + impl Clone for List { fn clone(&self) -> Self { let mut out = List { @@ -369,8 +391,8 @@ impl Group { writer.write_str("\"") } - // PORT NOTE: `deinit` deleted — `next: Option>` chains are freed by Drop. - // PERF(port): recursive Box drop could overflow stack on very long chains. + // PORT NOTE: `deinit` deleted — `next: Option>` chains are freed by the + // iterative `Drop` impls on `Query` and `List`. pub fn get_exact_version(&self) -> Option { let range = &self.head.head.range; @@ -402,7 +424,8 @@ impl Group { }, next: None, }, - ..Default::default() + tail: None, + next: None, }, ..Default::default() } diff --git a/src/shell_parser/parse.rs b/src/shell_parser/parse.rs index 0961042fb71..b626a983e1f 100644 --- a/src/shell_parser/parse.rs +++ b/src/shell_parser/parse.rs @@ -3485,7 +3485,21 @@ impl<'bump, const ENCODING: StringEncoding> Lexer<'bump, ENCODING> { })); return Ok(()); } - self.append_string_to_str_pool(bunstr) + let start = self.j; + self.append_string_to_str_pool(bunstr)?; + // Interpolated values are data, not shell syntax. If the value would + // begin its Text token with `~`, flush it as a quoted-text token so the + // parser does not re-interpret it as tilde expansion. Values that + // cannot be misread stay coalesced with the surrounding word so that + // literal source text after the interpolation (e.g. `${name}~bak`) + // keeps its meaning. + if self.chars.state == CharState::Normal && self.strpool.get(start as usize) == Some(&b'~') + { + self.tokens + .push(Token::DoubleQuotedText(TextRange { start, end: self.j })); + self.word_start = self.j; + } + Ok(()) } fn looks_like_js_obj_ref(&mut self) -> bool { diff --git a/src/sql/mysql/protocol/StackReader.rs b/src/sql/mysql/protocol/StackReader.rs index b0b7670fcd3..8c88244fefe 100644 --- a/src/sql/mysql/protocol/StackReader.rs +++ b/src/sql/mysql/protocol/StackReader.rs @@ -27,7 +27,10 @@ impl<'a> StackReader<'a> { } pub fn ensure_capacity(&self, length: usize) -> bool { - self.buffer.len() >= (self.offset.get() + length) + self.offset + .get() + .checked_add(length) + .is_some_and(|end| self.buffer.len() >= end) } pub fn init( diff --git a/src/sql_jsc/mysql/MySQLConnection.rs b/src/sql_jsc/mysql/MySQLConnection.rs index 53a44a3d49a..0f28e938a8f 100644 --- a/src/sql_jsc/mysql/MySQLConnection.rs +++ b/src/sql_jsc/mysql/MySQLConnection.rs @@ -706,13 +706,14 @@ impl MySQLConnection { self.tls_status = TLSStatus::SslNotAvailable; match self.ssl_mode { - SSLMode::VerifyCa | SSLMode::VerifyFull => { + // The server did not advertise CLIENT_SSL. `require` and + // stricter must fail rather than silently continue in + // plaintext (matches the Postgres driver's TLSNotAvailable + // handling). + SSLMode::Require | SSLMode::VerifyCa | SSLMode::VerifyFull => { return Err(AnyMySQLError::AuthenticationFailed); } - // require behaves like prefer for postgres.js compatibility, - // allowing graceful fallback to non-SSL when the server - // doesn't support it. - SSLMode::Require | SSLMode::Prefer | SSLMode::Disable => {} + SSLMode::Prefer | SSLMode::Disable => {} } } // Send auth response @@ -1415,6 +1416,14 @@ impl MySQLConnection { // Can't be 0 return Err(AnyMySQLError::UnexpectedPacket); } + // field_count is a server-controlled lenenc int (up to 2^64-1) and is + // used below to size a Vec (~256 bytes each). MySQL + // hard-caps a result set at 4096 columns (MAX_FIELDS), so anything + // larger is a malformed/hostile packet trying to make us commit + // gigabytes from a ~13-byte response. + if header.field_count > 4096 { + return Err(AnyMySQLError::UnexpectedPacket); + } if statement.columns.len() as u64 != header.field_count { bun_core::scoped_log!( MySQLConnection, diff --git a/src/sql_jsc/postgres/PostgresSQLConnection.rs b/src/sql_jsc/postgres/PostgresSQLConnection.rs index 129a983e73b..3baefd0281e 100644 --- a/src/sql_jsc/postgres/PostgresSQLConnection.rs +++ b/src/sql_jsc/postgres/PostgresSQLConnection.rs @@ -2863,6 +2863,19 @@ impl PostgresSQLConnection { } } protocol::Authentication::Ok => { + // RFC 5802 §5: once a SCRAM exchange has begun, the + // server must prove itself via SASLFinal (whose + // signature check above resets the state to `None`) + // before AuthenticationOk is acceptable. Accepting Ok + // mid-exchange would let a MITM skip the + // server-signature verification. + if matches!( + self.authentication_state.get(), + AuthenticationState::Sasl(_) + ) { + debug!("AuthenticationOk before SASL exchange completed"); + return Err(AnyPostgresError::UnexpectedMessage); + } debug!("Authentication OK"); self.authentication_state.with_mut(|s| s.zero()); self.authentication_state.set(AuthenticationState::Ok); diff --git a/src/sys/lib.rs b/src/sys/lib.rs index 053313cafd0..f71adefb9b3 100644 --- a/src/sys/lib.rs +++ b/src/sys/lib.rs @@ -1289,6 +1289,12 @@ pub mod O { pub const EVTONLY: i32 = libc::O_EVTONLY; #[cfg(not(target_os = "macos"))] pub const EVTONLY: i32 = 0; + // Darwin-only: fail with ELOOP if *any* path component is a symlink, not + // just the final one like O_NOFOLLOW. 0 elsewhere so the bit-or is a no-op. + #[cfg(target_os = "macos")] + pub const NOFOLLOW_ANY: i32 = libc::O_NOFOLLOW_ANY; + #[cfg(not(target_os = "macos"))] + pub const NOFOLLOW_ANY: i32 = 0; } // ────────────────────────────────────────────────────────────────────────── // `File` / `Dir` — high-level handles. Extracted to file.rs / dir.rs diff --git a/src/valkey/valkey_protocol.rs b/src/valkey/valkey_protocol.rs index fc218bcf7b2..76e4fb5d21f 100644 --- a/src/valkey/valkey_protocol.rs +++ b/src/valkey/valkey_protocol.rs @@ -215,11 +215,18 @@ impl fmt::Display for RESPValue { pub struct ValkeyReader<'a> { buffer: &'a [u8], pos: usize, + /// Bytes of aggregate `Vec` preallocation still allowed for the current + /// `read_value` call. See `take_prealloc_budget`. + prealloc_budget: usize, } impl<'a> ValkeyReader<'a> { pub fn init(buffer: &'a [u8]) -> ValkeyReader<'a> { - ValkeyReader { buffer, pos: 0 } + ValkeyReader { + buffer, + pos: 0, + prealloc_budget: buffer.len(), + } } /// Current read offset into the underlying buffer. @@ -331,7 +338,20 @@ impl<'a> ValkeyReader<'a> { /// attacker-chosen size. const MAX_BULK_LEN: i64 = 512 * 1024 * 1024; + /// Caps an aggregate's `Vec::with_capacity` so the total bytes reserved + /// across the whole parse — every nesting level combined — never exceed + /// the input buffer's size. Re-applying a per-level "remaining buffer" + /// cap at each of up to `MAX_NESTING_DEPTH` levels would let a hostile + /// server amplify a few KB of nested aggregate headers carrying huge + /// declared lengths into gigabytes of reserved capacity. + fn take_prealloc_budget(&mut self, len: usize, element_size: usize) -> usize { + let cap = len.min(self.prealloc_budget / element_size.max(1)); + self.prealloc_budget -= cap * element_size; + cap + } + pub fn read_value(&mut self) -> Result { + self.prealloc_budget = self.buffer.len() - self.pos; self.read_value_with_depth(0) } @@ -384,7 +404,8 @@ impl<'a> ValkeyReader<'a> { return Ok(RESPValue::Array(Vec::new())); } let len = usize::try_from(len).expect("int cast"); - let mut array = Vec::with_capacity(len.min(self.buffer.len() - self.pos)); + let mut array = + Vec::with_capacity(self.take_prealloc_budget(len, size_of::())); // errdefer cleanup handled by Vec Drop on `?` let mut i: usize = 0; while i < len { @@ -436,7 +457,8 @@ impl<'a> ValkeyReader<'a> { } let len = usize::try_from(len).expect("int cast"); - let mut entries = Vec::with_capacity(len.min(self.buffer.len() - self.pos)); + let mut entries = + Vec::with_capacity(self.take_prealloc_budget(len, size_of::())); // errdefer cleanup handled by Vec Drop on `?` let mut i: usize = 0; while i < len { @@ -458,7 +480,8 @@ impl<'a> ValkeyReader<'a> { } let len = usize::try_from(len).expect("int cast"); - let mut set = Vec::with_capacity(len.min(self.buffer.len() - self.pos)); + let mut set = + Vec::with_capacity(self.take_prealloc_budget(len, size_of::())); // errdefer cleanup handled by Vec Drop on `?` let mut i: usize = 0; while i < len { @@ -477,7 +500,8 @@ impl<'a> ValkeyReader<'a> { } let len = usize::try_from(len).expect("int cast"); - let mut attrs = Vec::with_capacity(len.min(self.buffer.len() - self.pos)); + let mut attrs = + Vec::with_capacity(self.take_prealloc_budget(len, size_of::())); // errdefer cleanup handled by Vec Drop on `?` let mut i: usize = 0; while i < len { @@ -526,7 +550,8 @@ impl<'a> ValkeyReader<'a> { // Read the rest of the data let data_len = usize::try_from(len - 1).expect("int cast"); - let mut data = Vec::with_capacity(data_len.min(self.buffer.len() - self.pos)); + let mut data = + Vec::with_capacity(self.take_prealloc_budget(data_len, size_of::())); // errdefer cleanup handled by Vec Drop on `?` let mut i: usize = 0; while i < data_len { @@ -548,6 +573,177 @@ impl<'a> ValkeyReader<'a> { } } +/// Outcome of an incremental [`ReplyScanner::scan`] pass. +pub enum ScanResult { + /// A complete top-level reply is buffered and safe to hand to + /// [`ValkeyReader::read_value`]. + Complete, + /// The buffer does not yet contain a complete reply. + NeedMoreData, +} + +/// Incrementally locates the end of the next complete RESP reply without +/// materializing any values. +/// +/// `on_data` re-runs the tree parser over the accumulated read buffer on every +/// socket callback. Without this scanner, an aggregate reply (`*N`, `%N`, `~N`, +/// `>N`, `|N`) whose elements arrive in separate TCP segments is re-parsed from +/// its header each time — O(N^2) element parses for an N-element reply, which a +/// hostile server can use to pin the JS thread. The scanner persists its byte +/// offset and the stack of in-progress aggregates across calls so each buffered +/// byte is examined a bounded number of times; the allocating parser only runs +/// once a full reply is known to be present. +#[derive(Default)] +pub struct ReplyScanner { + /// Byte offset of the next unscanned element, relative to the start of the + /// buffer passed to [`ReplyScanner::scan`]. + pos: usize, + /// Remaining child-value count for each in-progress aggregate, outermost + /// first. + stack: Vec, +} + +impl ReplyScanner { + /// Discard all progress. Must be called whenever the underlying buffer is + /// consumed or replaced. + pub fn reset(&mut self) { + self.pos = 0; + self.stack.clear(); + } + + /// Resume scanning `buffer` (the connection's accumulated, unconsumed read + /// buffer) for the end of the next complete reply. `buffer` must be the + /// same byte stream as the previous call with zero or more bytes appended. + pub fn scan(&mut self, buffer: &[u8]) -> Result { + loop { + let mut reader = ValkeyReader { + buffer, + pos: self.pos, + prealloc_budget: 0, + }; + let children = match Self::scan_one(&mut reader, self.stack.len()) { + Ok(children) => children, + // `InvalidResponse` is the parser's "ran out of bytes" sentinel. + Err(RedisError::InvalidResponse) => return Ok(ScanResult::NeedMoreData), + Err(err) => return Err(err), + }; + self.pos = reader.pos; + if let Some(children) = children + && children > 0 + { + self.stack.push(children); + continue; + } + // A scalar or empty aggregate finished; unwind every aggregate it + // completes. + while let Some(remaining) = self.stack.last_mut() { + *remaining -= 1; + if *remaining > 0 { + break; + } + self.stack.pop(); + } + if self.stack.is_empty() { + return Ok(ScanResult::Complete); + } + } + } + + /// Skip a single element starting at `reader.pos`. Returns `Some(n)` for an + /// aggregate expecting `n` further child values, or `None` for a + /// fully-skipped scalar. `InvalidResponse` means the element is not yet + /// fully buffered. + fn scan_one(reader: &mut ValkeyReader<'_>, depth: usize) -> Result, RedisError> { + let type_byte = reader.read_byte()?; + let ty = RESPType::from_byte(type_byte).ok_or(RedisError::InvalidResponseType)?; + match ty { + RESPType::SimpleString + | RESPType::Error + | RESPType::Integer + | RESPType::Null + | RESPType::Double + | RESPType::Boolean + | RESPType::BigNumber => { + let _ = reader.read_until_crlf()?; + Ok(None) + } + RESPType::BulkString | RESPType::BlobError | RESPType::VerbatimString => { + let invalid = match ty { + RESPType::BlobError => RedisError::InvalidBlobError, + RESPType::VerbatimString => RedisError::InvalidVerbatimString, + _ => RedisError::InvalidBulkString, + }; + let len = reader.read_integer()?; + if len < 0 { + // Only `$-1` (RESP2 null bulk string) is legal; the tree + // parser rejects negative `!`/`=` lengths. + return if ty == RESPType::BulkString { + Ok(None) + } else { + Err(invalid) + }; + } + if len > ValkeyReader::MAX_BULK_LEN { + return Err(invalid); + } + let len = usize::try_from(len).expect("int cast"); + // The payload plus its trailing CRLF must be fully buffered. + if reader.buffer.len() - reader.pos < len + 2 { + return Err(RedisError::InvalidResponse); + } + if reader.buffer[reader.pos + len] != b'\r' + || reader.buffer[reader.pos + len + 1] != b'\n' + { + return Err(invalid); + } + reader.pos += len + 2; + Ok(None) + } + RESPType::Array | RESPType::Set | RESPType::Push => { + if depth >= ValkeyReader::MAX_NESTING_DEPTH { + return Err(RedisError::NestingDepthExceeded); + } + let len = reader.read_integer()?; + // Mirror the tree parser: only `*-1` (RESP2 null array) is a + // legal non-positive aggregate length here. + match ty { + RESPType::Array if len < 0 => Ok(Some(0)), + RESPType::Set if len < 0 => Err(RedisError::InvalidSet), + RESPType::Push if len <= 0 => Err(RedisError::InvalidPush), + _ => Ok(Some(u64::try_from(len).expect("int cast"))), + } + } + RESPType::Map => { + if depth >= ValkeyReader::MAX_NESTING_DEPTH { + return Err(RedisError::NestingDepthExceeded); + } + let len = reader.read_integer()?; + if len < 0 { + return Err(RedisError::InvalidMap); + } + Ok(Some( + u64::try_from(len).expect("int cast").saturating_mul(2), + )) + } + RESPType::Attribute => { + if depth >= ValkeyReader::MAX_NESTING_DEPTH { + return Err(RedisError::NestingDepthExceeded); + } + let len = reader.read_integer()?; + if len < 0 { + return Err(RedisError::InvalidAttribute); + } + Ok(Some( + u64::try_from(len) + .expect("int cast") + .saturating_mul(2) + .saturating_add(1), + )) + } + } + } +} + pub struct MapEntry { pub key: RESPValue, pub value: RESPValue, diff --git a/src/which/lib.rs b/src/which/lib.rs index 524eac570a3..5433511a4ac 100644 --- a/src/which/lib.rs +++ b/src/which/lib.rs @@ -139,6 +139,43 @@ pub fn ends_with_extension(str: &[u8]) -> bool { false } +/// Returns true when `path` names a Windows batch script (`.cmd` / `.bat`). +/// +/// `CreateProcessW` runs these through `cmd.exe`, which re-tokenizes the +/// command line with shell metacharacter rules ("BatBadBut", +/// CVE-2024-24576 / CVE-2024-27980). Spawn paths must not pass untrusted +/// arguments to one without checking [`batch_arg_has_cmd_metachars`]. +pub fn is_batch_file(path: &[u8]) -> bool { + // Windows strips trailing ASCII spaces and periods from the final path + // component, so `foo.cmd.` / `foo.cmd ` still run `foo.cmd` through + // cmd.exe (CVE-2024-43402). Trim them before checking the extension. + let mut end = path.len(); + while end > 0 && matches!(path[end - 1], b' ' | b'.') { + end -= 1; + } + if end < 4 || path[end - 4] != b'.' { + return false; + } + let file_ext = &path[end - 3..end]; + strings::eql_case_insensitive_asciii_check_length(file_ext, b"cmd") + || strings::eql_case_insensitive_asciii_check_length(file_ext, b"bat") +} + +/// Returns true when `arg` contains a byte `cmd.exe` would reinterpret while +/// re-tokenizing the command line of a `.bat`/`.cmd` invocation: `"` breaks +/// out of libuv's MSVCRT-style quoting, `%` expands environment variables +/// even inside quotes, and the rest are command separators / redirection / +/// escape characters in unquoted positions. None of these can be escaped for +/// `cmd.exe`, so callers must reject the spawn instead. +pub fn batch_arg_has_cmd_metachars(arg: &[u8]) -> bool { + arg.iter().any(|&c| { + matches!( + c, + b'"' | b'%' | b'&' | b'|' | b'<' | b'>' | b'^' | b'\r' | b'\n' + ) + }) +} + /// Check if the WPathBuffer holds a existing file path, checking also for windows extensions variants like .exe, .cmd and .bat (internally used by which_win) fn search_bin( buf: &mut WPathBuffer, diff --git a/src/zlib/lib.rs b/src/zlib/lib.rs index ea4e463fa42..c474c7fcc87 100644 --- a/src/zlib/lib.rs +++ b/src/zlib/lib.rs @@ -367,6 +367,9 @@ pub struct ZlibReaderArrayList<'a> { pub zlib: zStream_struct, // PORT NOTE: allocator field dropped (global mimalloc) pub state: ZlibReaderArrayListState, + /// Decompression-bomb guard: `read_all` errors instead of growing the + /// output past this many bytes. Defaults to unbounded. + pub max_output_size: usize, } impl<'a> Drop for ZlibReaderArrayList<'a> { @@ -414,6 +417,7 @@ impl<'a> ZlibReaderArrayList<'a> { list_ptr: list, zlib: bun_core::ffi::zeroed(), state: ZlibReaderArrayListState::Uninitialized, + max_output_size: usize::MAX, }); let list_len = zlib_reader.list_ptr.len(); @@ -510,10 +514,20 @@ impl<'a> ZlibReaderArrayList<'a> { // flush parameter). if self.zlib.avail_out == 0 { + let produced = self.zlib.total_out as usize; + let remaining_budget = self.max_output_size.saturating_sub(produced); + if remaining_budget == 0 { + self.state = ZlibReaderArrayListState::Error; + return Err(ZlibError::ZlibError); + } // SAFETY: zlib writes the tail; len is truncated to `total_out` before any read. - let (next_out, avail_out) = unsafe { self.list_ptr.reserve_expand_tail(4096) }; + let (next_out, avail_out) = unsafe { + self.list_ptr + .reserve_expand_tail(remaining_budget.min(4096)) + }; self.zlib.next_out = next_out; - self.zlib.avail_out = avail_out as uInt; + // Clamp so a single inflate call cannot write past `max_output_size`. + self.zlib.avail_out = avail_out.min(remaining_budget) as uInt; } // Try to inflate even if avail_in is 0, as this could be a valid empty gzip stream diff --git a/src/zstd/lib.rs b/src/zstd/lib.rs index a0a1c76b960..875d95a8e01 100644 --- a/src/zstd/lib.rs +++ b/src/zstd/lib.rs @@ -312,6 +312,9 @@ pub struct ZstdReaderArrayList<'a> { pub state: State, pub total_out: usize, pub total_in: usize, + /// Decompression-bomb guard: `read_all` errors instead of growing the + /// output past this many bytes. Defaults to unbounded. + pub max_output_size: usize, } impl<'a> ZstdReaderArrayList<'a> { @@ -343,6 +346,7 @@ impl<'a> ZstdReaderArrayList<'a> { state: State::Uninitialized, total_out: 0, total_in: 0, + max_output_size: usize::MAX, })) } @@ -380,6 +384,14 @@ impl<'a> ZstdReaderArrayList<'a> { return Ok(()); } + // Decompression-bomb guard: clamp the output space handed to a single + // ZSTD_decompressStream call so one call can never write past the cap. + let remaining_output = self.max_output_size.saturating_sub(self.list_ptr.len()); + if remaining_output == 0 { + self.state = State::Error; + return Err(ZstdError::ZstdDecompressionError); + } + // SAFETY: write-only spare; ZSTD_decompressStream initializes the // first `out_buf.pos` bytes. let spare = unsafe { bun_core::vec::reserve_spare_bytes(self.list_ptr, 4096) }; @@ -390,7 +402,7 @@ impl<'a> ZstdReaderArrayList<'a> { }; let mut out_buf = c::ZSTD_outBuffer { dst: spare.as_mut_ptr().cast::(), - size: spare.len(), + size: spare.len().min(remaining_output), pos: 0, }; diff --git a/test/js/bun/shell/bunshell.test.ts b/test/js/bun/shell/bunshell.test.ts index f6a668f349b..c597aad0e89 100644 --- a/test/js/bun/shell/bunshell.test.ts +++ b/test/js/bun/shell/bunshell.test.ts @@ -483,6 +483,17 @@ describe("bunshell", () => { .stdout("/home/user/bin\n") .runAsTest("cmd subst as last atom"); }); + + describe("interpolated values", async () => { + // A `~` that comes from an interpolated value is data, not syntax. + TestBuilder.command`echo ${"~"}/x`.stdout("~/x\n").runAsTest("interpolated tilde stays literal"); + TestBuilder.command`echo ${"~/secret"}`.stdout("~/secret\n").runAsTest("interpolated tilde path stays literal"); + // A literal `~` in the source after an interpolation keeps its meaning. + TestBuilder.command`echo ${"a b"}~/x`.stdout("a b~/x\n").runAsTest("literal tilde after interpolated value"); + TestBuilder.command`echo ${"a b"}~bak` + .stdout("a b~bak\n") + .runAsTest("backup-suffix idiom after interpolated value"); + }); }); // Ported from GNU bash "quote.tests" diff --git a/test/js/node/http/early-hints-crlf-injection.test.ts b/test/js/node/http/early-hints-crlf-injection.test.ts index ad24729659a..38678d6d3e2 100644 --- a/test/js/node/http/early-hints-crlf-injection.test.ts +++ b/test/js/node/http/early-hints-crlf-injection.test.ts @@ -134,4 +134,47 @@ describe.concurrent("writeEarlyHints", () => { expect(stdout).toContain("body:ok"); expect(exitCode).toBe(0); }); + + test("rejects pathological link value without catastrophic backtracking", async () => { + await using proc = Bun.spawn({ + cmd: [ + bunExe(), + "-e", + ` + const http = require("node:http"); + const server = http.createServer((req, res) => { + try { + res.writeEarlyHints({ + link: "" + ";a=b".repeat(32) + " ", + }); + console.log("FAIL: no error thrown"); + process.exit(1); + } catch (e) { + console.log("error_code:" + e.code); + res.writeHead(200); + res.end("ok"); + } + }); + server.listen(0, () => { + http.get({ port: server.address().port }, (res) => { + let data = ""; + res.on("data", (c) => data += c); + res.on("end", () => { + console.log("body:" + data); + server.close(); + }); + }); + }); + `, + ], + env: bunEnv, + stderr: "pipe", + }); + + const [stdout, stderr, exitCode] = await Promise.all([proc.stdout.text(), proc.stderr.text(), proc.exited]); + + expect(stdout).toContain("error_code:ERR_INVALID_ARG_VALUE"); + expect(stdout).toContain("body:ok"); + expect(exitCode).toBe(0); + }); });