Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/bun.js/VirtualMachine.zig
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,12 @@ pub fn mimeType(this: *VirtualMachine, str: []const u8) ?bun.http.MimeType {
return this.rareData().mimeTypeFromString(this.allocator, str);
}

/// Interning lookup for Blob/File `type` that preserves the raw MIME
/// string (no charset substitution). See `rare_data.mimeTypeInternedValue`.
pub fn mimeTypeInternedValue(this: *VirtualMachine, str: []const u8) ?[]const u8 {
return this.rareData().mimeTypeInternedValue(this.allocator, str);
}

pub fn onAfterEventLoop(this: *VirtualMachine) void {
if (this.after_event_loop_callback) |cb| {
const ctx = this.after_event_loop_callback_ctx;
Expand Down
24 changes: 24 additions & 0 deletions src/bun.js/rare_data.zig
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,30 @@ pub fn mimeTypeFromString(this: *RareData, allocator: std.mem.Allocator, str: []
return null;
}

/// Look up a MIME type string in the interned table and return its raw
/// (uncanonicalized) static-string slice, if it exists.
///
/// Unlike `mimeTypeFromString`, this does NOT substitute canonical
/// charset-appended forms (e.g. `text/plain` is returned as-is, not as
/// `text/plain;charset=utf-8`). Use this where the WHATWG File/Blob API
/// requires preserving the user-supplied MIME type verbatim.
///
/// Returns a slice into a static `_bytes` blob — safe to store without
/// allocation tracking.
pub fn mimeTypeInternedValue(this: *RareData, allocator: std.mem.Allocator, str: []const u8) ?[]const u8 {
if (this.mime_types == null) {
this.mime_types = bun.http.MimeType.createHashTable(
allocator,
) catch |err| bun.handleOom(err);
}

if (this.mime_types.?.get(str)) |entry| {
return entry.slice();
}

return null;
}

pub const HotMap = struct {
_map: bun.StringArrayHashMap(Entry),

Expand Down
56 changes: 42 additions & 14 deletions src/bun.js/webcore/Blob.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1829,8 +1829,11 @@ pub fn JSDOMFile__construct_(globalThis: *jsc.JSGlobalObject, callframe: *jsc.Ca
}
blob.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
blob.content_type = mime.value;
// WHATWG File API: the stored `type` must be the
// lowercased input verbatim — do NOT canonicalize into
// charset-appended forms like `text/plain;charset=utf-8`.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -1929,8 +1932,10 @@ pub fn constructBunFile(
break :inner;
}
blob.content_type_was_set = true;
if (vm.mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (vm.mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -2323,11 +2328,14 @@ pub fn doWrite(this: *Blob, globalThis: *jsc.JSGlobalObject, callframe: *jsc.Cal
if (strings.isAllASCII(slice)) {
if (this.content_type_allocated) {
bun.default_allocator.free(this.content_type);
this.content_type_allocated = false;
}
this.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
this.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
this.content_type = interned;
} else {
const content_type_buf = bun.handleOom(bun.default_allocator.alloc(u8, slice.len));
this.content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -2666,11 +2674,14 @@ pub fn getWriter(
if (strings.isAllASCII(slice)) {
if (this.content_type_allocated) {
bun.default_allocator.free(this.content_type);
this.content_type_allocated = false;
}
this.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
this.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
this.content_type = interned;
} else {
const content_type_buf = bun.handleOom(bun.default_allocator.alloc(u8, slice.len));
this.content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -2843,7 +2854,13 @@ pub fn getSliceFrom(this: *Blob, globalThis: *jsc.JSGlobalObject, relativeStart:
blob.content_type = content_type;
}
blob.content_type_allocated = content_type_was_allocated;
blob.content_type_was_set = this.content_type_was_set or content_type_was_allocated;
// A user-supplied `content_type` argument to `.slice(start, end, type)`
// may come from the interning table (static, not allocated) — the
// previous `or content_type_was_allocated` condition missed that case
// and left the slice's `content_type_was_set` false, so HTTP serving
// of the slice fell back to the store MIME type. Check the string
// length directly instead.
blob.content_type_was_set = this.content_type_was_set or blob.content_type.len > 0;

var blob_ = Blob.new(blob);
return blob_.toJS(globalThis);
Expand Down Expand Up @@ -2926,8 +2943,10 @@ pub fn getSlice(
break :inner;
}

if (globalThis.bunVM().mimeType(slice)) |mime| {
content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
content_type = interned;
break :inner;
}

Expand Down Expand Up @@ -3354,8 +3373,10 @@ pub fn constructor(globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) b
}
blob.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
blob.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -3520,8 +3541,15 @@ pub fn dupeWithContentType(this: *const Blob, include_content_type: bool) Blob {
if (this.store != null) this.store.?.ref();
var duped = this.*;
duped.setNotHeapAllocated();
// NOTE: both branches below are currently unreachable — `setNotHeapAllocated`
// above zeroes the ref count so `duped.isHeapAllocated()` is always false.
// That means neither the use-after-free workaround (first branch) nor the
// `content_type` duplication for `include_content_type=true` (second branch)
// ever runs at runtime. Left in place because fixing both guards (e.g. by
// checking `this.isHeapAllocated()` instead) would activate previously-dead
// behavior and is out of scope for the WHATWG-compliance fix; it needs
// its own testing and is tracked as a separate follow-up.
if (duped.content_type_allocated and duped.isHeapAllocated() and !include_content_type) {

// for now, we just want to avoid a use-after-free here
if (jsc.VirtualMachine.get().mimeType(duped.content_type)) |mime| {
duped.content_type = mime.value;
Expand Down
12 changes: 8 additions & 4 deletions src/bun.js/webcore/S3File.zig
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,10 @@ pub fn constructS3FileWithS3CredentialsAndOptions(
break :inner;
}
blob.content_type_was_set = true;
if (globalObject.bunVM().mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalObject.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -327,8 +329,10 @@ pub fn constructS3FileWithS3Credentials(
break :inner;
}
blob.content_type_was_set = true;
if (globalObject.bunVM().mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalObject.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down
14 changes: 8 additions & 6 deletions test/js/bun/util/inspect.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -587,24 +587,26 @@ describe("console.logging class displays names and extends", async () => {
});

it("console.log on a Blob shows name", () => {
// Per WHATWG File API, `text/plain` must NOT be canonicalized to
// `text/plain;charset=utf-8` — see #29257.
const blob = new Blob(["foo"], { type: "text/plain" });
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n type: "text/plain"\n}');
blob.name = "bar";
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "bar",\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "bar",\n type: "text/plain"\n}');
blob.name = "foobar";
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "foobar",\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "foobar",\n type: "text/plain"\n}');

const file = new File(["foo"], "bar.txt", { type: "text/plain" });
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "bar.txt",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "bar.txt",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
file.name = "foobar";
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "foobar",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "foobar",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
file.name = "";
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
});

Expand Down
22 changes: 12 additions & 10 deletions test/js/web/structured-clone-blob-file.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ describe("structuredClone with Blob and File", () => {
const cloned = structuredClone(blob);
expect(cloned).toBeInstanceOf(Blob);
expect(cloned.size).toBe(11);
expect(cloned.type).toBe("text/plain;charset=utf-8");
// Per WHATWG File API, `text/plain` must NOT be canonicalized to
// `text/plain;charset=utf-8` — see #29257.
expect(cloned.type).toBe("text/plain");

const originalText = await blob.text();
const clonedText = await cloned.text();
Expand Down Expand Up @@ -63,11 +65,11 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.first).toBeInstanceOf(Blob);
expect(cloned.first.size).toBe(5);
expect(cloned.first.type).toBe("text/plain;charset=utf-8");
expect(cloned.first.type).toBe("text/plain");

expect(cloned.second).toBeInstanceOf(Blob);
expect(cloned.second.size).toBe(5);
expect(cloned.second.type).toBe("text/html;charset=utf-8");
expect(cloned.second.type).toBe("text/html");
});

test("deeply nested Blob", () => {
Expand All @@ -92,7 +94,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned).toBeInstanceOf(File);
expect(cloned.name).toBe("test.txt");
expect(cloned.size).toBe(7);
expect(cloned.type).toBe("text/plain;charset=utf-8");
expect(cloned.type).toBe("text/plain");
expect(cloned.lastModified).toBe(1234567890000);
});

Expand All @@ -103,7 +105,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned).toBeInstanceOf(File);
expect(cloned.name).toBe("test.txt");
expect(cloned.size).toBe(7);
expect(cloned.type).toBe("text/plain;charset=utf-8");
expect(cloned.type).toBe("text/plain");
expect(cloned.lastModified).toBeGreaterThan(0);
});

Expand All @@ -125,7 +127,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned.file).toBeInstanceOf(File);
expect(cloned.file.name).toBe("test.txt");
expect(cloned.file.size).toBe(4);
expect(cloned.file.type).toBe("text/plain;charset=utf-8");
expect(cloned.file.type).toBe("text/plain");
});

test("multiple Files in object", () => {
Expand All @@ -136,11 +138,11 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.txt).toBeInstanceOf(File);
expect(cloned.txt.name).toBe("hello.txt");
expect(cloned.txt.type).toBe("text/plain;charset=utf-8");
expect(cloned.txt.type).toBe("text/plain");

expect(cloned.html).toBeInstanceOf(File);
expect(cloned.html.name).toBe("world.html");
expect(cloned.html.type).toBe("text/html;charset=utf-8");
expect(cloned.html.type).toBe("text/html");
});
});

Expand All @@ -153,12 +155,12 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.blob).toBeInstanceOf(Blob);
expect(cloned.blob.size).toBe(12);
expect(cloned.blob.type).toBe("text/plain;charset=utf-8");
expect(cloned.blob.type).toBe("text/plain");

expect(cloned.file).toBeInstanceOf(File);
expect(cloned.file.name).toBe("test.txt");
expect(cloned.file.size).toBe(12);
expect(cloned.file.type).toBe("text/plain;charset=utf-8");
expect(cloned.file.type).toBe("text/plain");
});

test("array with mixed Blob and File", () => {
Expand Down
80 changes: 80 additions & 0 deletions test/regression/issue/29257.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import { expect, test } from "bun:test";

// https://github.com/oven-sh/bun/issues/29257
//
// Bun was rewriting `text/plain` (and `text/css`, `text/html`,
// `application/json`, ...) to their charset-appended canonical forms
// (`text/plain;charset=utf-8`, etc.) when the user set the `type` on a
// Blob/File at construction time.
//
// Per the WHATWG File API (https://w3c.github.io/FileAPI/#blob), user
// agents must NOT append a charset parameter to the media type.

test("new File(..., { type: 'text/plain' }).type is preserved verbatim", () => {
const file = new File([], "empty.txt", { type: "text/plain" });
expect(file.type).toBe("text/plain");
});

test("new Blob([], { type: 'text/plain' }).type is preserved verbatim", () => {
const blob = new Blob([], { type: "text/plain" });
expect(blob.type).toBe("text/plain");
});

test("File/Blob type is preserved for other types Bun used to canonicalize", () => {
// These are the types Compact.toMimeType() substitutes into
// charset-appended forms for HTTP responses. None of them should leak
// the substitution into the File/Blob `type` property.
const types = [
"text/plain",
"text/css",
"text/html",
"text/javascript",
"application/json",
"application/javascript",
];
for (const type of types) {
expect(new File([], "x", { type }).type).toBe(type);
expect(new Blob([], { type }).type).toBe(type);
}
});

test("File/Blob type with explicit charset is preserved verbatim", () => {
// A user who explicitly passes a charset parameter should get it back
// unchanged — not silently swapped for a different canonical form.
const file = new File([], "x.txt", { type: "text/plain;charset=utf-8" });
expect(file.type).toBe("text/plain;charset=utf-8");

const blob = new Blob([], { type: "text/plain;charset=utf-8" });
expect(blob.type).toBe("text/plain;charset=utf-8");
});

test("File/Blob type is lowercased (per WHATWG spec)", () => {
// The spec requires lowercasing but not charset canonicalization.
expect(new File([], "x", { type: "TEXT/PLAIN" }).type).toBe("text/plain");
expect(new Blob([], { type: "Text/Plain" }).type).toBe("text/plain");
});

test("uncommon MIME types still round-trip unchanged", () => {
// Types not in the interning table take the copyLowercase path. They
// should also round-trip verbatim (lowercased) — check both the File
// and Blob constructor paths since they share logic but are separate
// call sites in src/bun.js/webcore/Blob.zig.
const file = new File([], "x", { type: "application/x-custom-type" });
expect(file.type).toBe("application/x-custom-type");
const blob = new Blob([], { type: "application/x-custom-type" });
expect(blob.type).toBe("application/x-custom-type");
});

test("Bun.file(path, { type: 'text/plain' }).type is preserved verbatim", () => {
// Covers the `constructBunFile` path in Blob.zig.
const file = Bun.file(import.meta.path, { type: "text/plain" });
expect(file.type).toBe("text/plain");
});

test("Bun.s3.file(path, { type: 'text/plain' }).type is preserved verbatim", () => {
// Covers the S3File constructor paths in S3File.zig (same bug, different
// file). The object is never actually touched over the network — we only
// check that the `type` field is set from our argument verbatim.
const file = Bun.s3.file("test.txt", { type: "text/plain" });
expect(file.type).toBe("text/plain");
});
Loading