Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/bun.js/VirtualMachine.zig
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,13 @@
return this.rareData().mimeTypeFromString(this.allocator, str);
}

/// Interning lookup for Blob/File `type` that preserves the raw MIME
/// string (no charset substitution). See `rare_data.mimeTypeInternedValue`.
pub fn mimeTypeInternedValue(this: *VirtualMachine, str: []const u8) ?[]const u8 {
return this.rareData().mimeTypeInternedValue(this.allocator, str);
}

pub fn onAfterEventLoop(this: *VirtualMachine) void {

Check failure on line 355 in src/bun.js/VirtualMachine.zig

View check run for this annotation

Claude / Claude Code Review

S3File construction paths still canonicalize MIME types

S3File construction paths still call the old `mimeType()` helper, which routes through `MimeType.Compact.toMimeType()` and silently rewrites `text/plain` → `text/plain;charset=utf-8`, etc. Any S3-backed blob created with a well-known MIME type will have its `.type` property canonicalized, violating the same WHATWG File API semantics this PR fixes. Fix by replacing `mimeType(str.slice())` with `mimeTypeInternedValue(slice)` at both S3File.zig lines 286 and 330, matching the pattern applied throug
if (this.after_event_loop_callback) |cb| {
const ctx = this.after_event_loop_callback_ctx;
this.after_event_loop_callback = null;
Expand Down
24 changes: 24 additions & 0 deletions src/bun.js/rare_data.zig
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,30 @@ pub fn mimeTypeFromString(this: *RareData, allocator: std.mem.Allocator, str: []
return null;
}

/// Look up a MIME type string in the interned table and return its raw
/// (uncanonicalized) static-string slice, if it exists.
///
/// Unlike `mimeTypeFromString`, this does NOT substitute canonical
/// charset-appended forms (e.g. `text/plain` is returned as-is, not as
/// `text/plain;charset=utf-8`). Use this where the WHATWG File/Blob API
/// requires preserving the user-supplied MIME type verbatim.
///
/// Returns a slice into a static `_bytes` blob — safe to store without
/// allocation tracking.
pub fn mimeTypeInternedValue(this: *RareData, allocator: std.mem.Allocator, str: []const u8) ?[]const u8 {
if (this.mime_types == null) {
this.mime_types = bun.http.MimeType.createHashTable(
allocator,
) catch |err| bun.handleOom(err);
}

if (this.mime_types.?.get(str)) |entry| {
return entry.slice();
}

return null;
}

pub const HotMap = struct {
_map: bun.StringArrayHashMap(Entry),

Expand Down
48 changes: 33 additions & 15 deletions src/bun.js/webcore/Blob.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1829,8 +1829,11 @@
}
blob.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
blob.content_type = mime.value;
// WHATWG File API: the stored `type` must be the
// lowercased input verbatim — do NOT canonicalize into
// charset-appended forms like `text/plain;charset=utf-8`.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -1929,8 +1932,10 @@
break :inner;
}
blob.content_type_was_set = true;
if (vm.mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (vm.mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -2323,11 +2328,14 @@
if (strings.isAllASCII(slice)) {
if (this.content_type_allocated) {
bun.default_allocator.free(this.content_type);
this.content_type_allocated = false;
}
this.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
this.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
this.content_type = interned;
} else {
const content_type_buf = bun.handleOom(bun.default_allocator.alloc(u8, slice.len));
this.content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -2666,11 +2674,14 @@
if (strings.isAllASCII(slice)) {
if (this.content_type_allocated) {
bun.default_allocator.free(this.content_type);
this.content_type_allocated = false;
}
this.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
this.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
this.content_type = interned;
} else {
const content_type_buf = bun.handleOom(bun.default_allocator.alloc(u8, slice.len));
this.content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -2923,14 +2934,16 @@
defer slicer.deinit();
const slice = slicer.slice();
if (!strings.isAllASCII(slice)) {
break :inner;
}

if (globalThis.bunVM().mimeType(slice)) |mime| {
content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
content_type = interned;
break :inner;
}

Check notice on line 2946 in src/bun.js/webcore/Blob.zig

View check run for this annotation

Claude / Claude Code Review

getSlice: content_type_was_set stays false for interned MIME types

In `blob.slice(start, end, type)`, passing a known MIME type (e.g. `'text/plain'`) leaves `content_type_was_set=false` on the returned slice when the parent blob has no type, causing `getMimeTypeOrContentType()` to skip the user-supplied type and return the store MIME instead — producing wrong Content-Type headers on HTTP responses. This is a pre-existing issue; the old `mimeType()` path had the same omission, and the PR modifies this exact code without fixing the flag.
content_type_was_allocated = slice.len > 0;
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -3354,8 +3367,10 @@
}
blob.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
blob.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -3519,15 +3534,18 @@
pub fn dupeWithContentType(this: *const Blob, include_content_type: bool) Blob {
if (this.store != null) this.store.?.ref();
var duped = this.*;
duped.setNotHeapAllocated();
if (duped.content_type_allocated and duped.isHeapAllocated() and !include_content_type) {

// for now, we just want to avoid a use-after-free here
if (jsc.VirtualMachine.get().mimeType(duped.content_type)) |mime| {
duped.content_type = mime.value;
// for now, we just want to avoid a use-after-free here.
// Use the interned value so the content_type isn't silently
// canonicalized into a charset-appended form (which would break
// WHATWG File API semantics).
if (jsc.VirtualMachine.get().mimeTypeInternedValue(duped.content_type)) |interned| {
duped.content_type = interned;
} else {
// TODO: fix this
// this is a bug.

Check failure on line 3548 in src/bun.js/webcore/Blob.zig

View check run for this annotation

Claude / Claude Code Review

dupeWithContentType: use-after-free guard is unreachable dead code

The use-after-free fix in `dupeWithContentType` is unreachable dead code: `duped.setNotHeapAllocated()` sets `#ref_count` to zero, but both subsequent guards check `duped.isHeapAllocated()` (which tests `raw_value != 0`), so both branches are permanently false. The `mimeTypeInternedValue` substitution this PR introduces into that block never executes, and when `include_content_type=true` with a heap-allocated `content_type`, the pointer is silently shared between the original and duped blob with
// it means whenever
duped.content_type = "";
}
Expand Down
14 changes: 8 additions & 6 deletions test/js/bun/util/inspect.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -587,24 +587,26 @@ describe("console.logging class displays names and extends", async () => {
});

it("console.log on a Blob shows name", () => {
// Per WHATWG File API, `text/plain` must NOT be canonicalized to
// `text/plain;charset=utf-8` — see #29257.
const blob = new Blob(["foo"], { type: "text/plain" });
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n type: "text/plain"\n}');
blob.name = "bar";
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "bar",\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "bar",\n type: "text/plain"\n}');
blob.name = "foobar";
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "foobar",\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "foobar",\n type: "text/plain"\n}');

const file = new File(["foo"], "bar.txt", { type: "text/plain" });
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "bar.txt",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "bar.txt",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
file.name = "foobar";
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "foobar",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "foobar",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
file.name = "";
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
});

Expand Down
22 changes: 12 additions & 10 deletions test/js/web/structured-clone-blob-file.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ describe("structuredClone with Blob and File", () => {
const cloned = structuredClone(blob);
expect(cloned).toBeInstanceOf(Blob);
expect(cloned.size).toBe(11);
expect(cloned.type).toBe("text/plain;charset=utf-8");
// Per WHATWG File API, `text/plain` must NOT be canonicalized to
// `text/plain;charset=utf-8` — see #29257.
expect(cloned.type).toBe("text/plain");

const originalText = await blob.text();
const clonedText = await cloned.text();
Expand Down Expand Up @@ -63,11 +65,11 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.first).toBeInstanceOf(Blob);
expect(cloned.first.size).toBe(5);
expect(cloned.first.type).toBe("text/plain;charset=utf-8");
expect(cloned.first.type).toBe("text/plain");

expect(cloned.second).toBeInstanceOf(Blob);
expect(cloned.second.size).toBe(5);
expect(cloned.second.type).toBe("text/html;charset=utf-8");
expect(cloned.second.type).toBe("text/html");
});

test("deeply nested Blob", () => {
Expand All @@ -92,7 +94,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned).toBeInstanceOf(File);
expect(cloned.name).toBe("test.txt");
expect(cloned.size).toBe(7);
expect(cloned.type).toBe("text/plain;charset=utf-8");
expect(cloned.type).toBe("text/plain");
expect(cloned.lastModified).toBe(1234567890000);
});

Expand All @@ -103,7 +105,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned).toBeInstanceOf(File);
expect(cloned.name).toBe("test.txt");
expect(cloned.size).toBe(7);
expect(cloned.type).toBe("text/plain;charset=utf-8");
expect(cloned.type).toBe("text/plain");
expect(cloned.lastModified).toBeGreaterThan(0);
});

Expand All @@ -125,7 +127,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned.file).toBeInstanceOf(File);
expect(cloned.file.name).toBe("test.txt");
expect(cloned.file.size).toBe(4);
expect(cloned.file.type).toBe("text/plain;charset=utf-8");
expect(cloned.file.type).toBe("text/plain");
});

test("multiple Files in object", () => {
Expand All @@ -136,11 +138,11 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.txt).toBeInstanceOf(File);
expect(cloned.txt.name).toBe("hello.txt");
expect(cloned.txt.type).toBe("text/plain;charset=utf-8");
expect(cloned.txt.type).toBe("text/plain");

expect(cloned.html).toBeInstanceOf(File);
expect(cloned.html.name).toBe("world.html");
expect(cloned.html.type).toBe("text/html;charset=utf-8");
expect(cloned.html.type).toBe("text/html");
});
});

Expand All @@ -153,12 +155,12 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.blob).toBeInstanceOf(Blob);
expect(cloned.blob.size).toBe(12);
expect(cloned.blob.type).toBe("text/plain;charset=utf-8");
expect(cloned.blob.type).toBe("text/plain");

expect(cloned.file).toBeInstanceOf(File);
expect(cloned.file.name).toBe("test.txt");
expect(cloned.file.size).toBe(12);
expect(cloned.file.type).toBe("text/plain;charset=utf-8");
expect(cloned.file.type).toBe("text/plain");
});

test("array with mixed Blob and File", () => {
Expand Down
68 changes: 68 additions & 0 deletions test/regression/issue/29257.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import { expect, test } from "bun:test";

// https://github.com/oven-sh/bun/issues/29257
//
// Bun was rewriting `text/plain` (and `text/css`, `text/html`,
// `application/json`, ...) to their charset-appended canonical forms
// (`text/plain;charset=utf-8`, etc.) when the user set the `type` on a
// Blob/File at construction time.
//
// Per the WHATWG File API (https://w3c.github.io/FileAPI/#blob), user
// agents must NOT append a charset parameter to the media type.

test("new File(..., { type: 'text/plain' }).type is preserved verbatim", () => {
const file = new File([], "empty.txt", { type: "text/plain" });
expect(file.type).toBe("text/plain");
});

test("new Blob([], { type: 'text/plain' }).type is preserved verbatim", () => {
const blob = new Blob([], { type: "text/plain" });
expect(blob.type).toBe("text/plain");
});

test("File/Blob type is preserved for other types Bun used to canonicalize", () => {
// These are the types Compact.toMimeType() substitutes into
// charset-appended forms for HTTP responses. None of them should leak
// the substitution into the File/Blob `type` property.
const types = [
"text/plain",
"text/css",
"text/html",
"text/javascript",
"application/json",
"application/javascript",
];
for (const type of types) {
expect(new File([], "x", { type }).type).toBe(type);
expect(new Blob([], { type }).type).toBe(type);
}
});

test("File/Blob type with explicit charset is preserved verbatim", () => {
// A user who explicitly passes a charset parameter should get it back
// unchanged — not silently swapped for a different canonical form.
const file = new File([], "x.txt", { type: "text/plain;charset=utf-8" });
expect(file.type).toBe("text/plain;charset=utf-8");

const blob = new Blob([], { type: "text/plain;charset=utf-8" });
expect(blob.type).toBe("text/plain;charset=utf-8");
});

test("File/Blob type is lowercased (per WHATWG spec)", () => {
// The spec requires lowercasing but not charset canonicalization.
expect(new File([], "x", { type: "TEXT/PLAIN" }).type).toBe("text/plain");
expect(new Blob([], { type: "Text/Plain" }).type).toBe("text/plain");
});

test("uncommon MIME types still round-trip unchanged", () => {
// Types not in the interning table take the copyLowercase path. They
// should also round-trip verbatim (lowercased).
const file = new File([], "x", { type: "application/x-custom-type" });
expect(file.type).toBe("application/x-custom-type");
});

test("Bun.file(path, { type: 'text/plain' }).type is preserved verbatim", () => {
// Covers the `constructBunFile` path in Blob.zig.
const file = Bun.file(import.meta.path, { type: "text/plain" });
expect(file.type).toBe("text/plain");
});
Loading