Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions src/bun.js/VirtualMachine.zig
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,12 @@ pub fn mimeType(this: *VirtualMachine, str: []const u8) ?bun.http.MimeType {
return this.rareData().mimeTypeFromString(this.allocator, str);
}

/// Interning lookup for Blob/File `type` that preserves the raw MIME
/// string (no charset substitution). See `rare_data.mimeTypeInternedValue`.
pub fn mimeTypeInternedValue(this: *VirtualMachine, str: []const u8) ?[]const u8 {
return this.rareData().mimeTypeInternedValue(this.allocator, str);
}

pub fn onAfterEventLoop(this: *VirtualMachine) void {
if (this.after_event_loop_callback) |cb| {
const ctx = this.after_event_loop_callback_ctx;
Expand Down
24 changes: 24 additions & 0 deletions src/bun.js/rare_data.zig
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,30 @@ pub fn mimeTypeFromString(this: *RareData, allocator: std.mem.Allocator, str: []
return null;
}

/// Look up a MIME type string in the interned table and return its raw
/// (uncanonicalized) static-string slice, if it exists.
///
/// Unlike `mimeTypeFromString`, this does NOT substitute canonical
/// charset-appended forms (e.g. `text/plain` is returned as-is, not as
/// `text/plain;charset=utf-8`). Use this where the WHATWG File/Blob API
/// requires preserving the user-supplied MIME type verbatim.
///
/// Returns a slice into a static `_bytes` blob — safe to store without
/// allocation tracking.
pub fn mimeTypeInternedValue(this: *RareData, allocator: std.mem.Allocator, str: []const u8) ?[]const u8 {
if (this.mime_types == null) {
this.mime_types = bun.http.MimeType.createHashTable(
allocator,
) catch |err| bun.handleOom(err);
}

if (this.mime_types.?.get(str)) |entry| {
return entry.slice();
}

return null;
}

pub const HotMap = struct {
_map: bun.StringArrayHashMap(Entry),

Expand Down
77 changes: 63 additions & 14 deletions src/bun.js/webcore/Blob.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1829,8 +1829,18 @@ pub fn JSDOMFile__construct_(globalThis: *jsc.JSGlobalObject, callframe: *jsc.Ca
}
blob.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
blob.content_type = mime.value;
// WHATWG File API: the stored `type` must be the
// lowercased input verbatim — do NOT canonicalize into
// charset-appended forms like `text/plain;charset=utf-8`.
//
// `blob` may have come from `get()` → `dupe()`, which
// shallow-copies a parent's `content_type_allocated=true`
// flag along with an aliased pointer. Reset the flag
// before overwriting so we don't mark a static slice
// (or a future copyLowercase buffer) with stale state.
blob.content_type_allocated = false;
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -1929,8 +1939,15 @@ pub fn constructBunFile(
break :inner;
}
blob.content_type_was_set = true;
if (vm.mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
//
// `findOrCreateFileFromPath` can return a duped blob
// (standalone module graph path), which shallow-copies
// `content_type_allocated=true`. Reset before overwrite.
blob.content_type_allocated = false;
if (vm.mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -2323,11 +2340,14 @@ pub fn doWrite(this: *Blob, globalThis: *jsc.JSGlobalObject, callframe: *jsc.Cal
if (strings.isAllASCII(slice)) {
if (this.content_type_allocated) {
bun.default_allocator.free(this.content_type);
this.content_type_allocated = false;
}
this.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
this.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
this.content_type = interned;
} else {
const content_type_buf = bun.handleOom(bun.default_allocator.alloc(u8, slice.len));
this.content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -2666,11 +2686,14 @@ pub fn getWriter(
if (strings.isAllASCII(slice)) {
if (this.content_type_allocated) {
bun.default_allocator.free(this.content_type);
this.content_type_allocated = false;
}
this.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
this.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
this.content_type = interned;
} else {
const content_type_buf = bun.handleOom(bun.default_allocator.alloc(u8, slice.len));
this.content_type = strings.copyLowercase(slice, content_type_buf);
Expand Down Expand Up @@ -2843,7 +2866,13 @@ pub fn getSliceFrom(this: *Blob, globalThis: *jsc.JSGlobalObject, relativeStart:
blob.content_type = content_type;
}
blob.content_type_allocated = content_type_was_allocated;
blob.content_type_was_set = this.content_type_was_set or content_type_was_allocated;
// The slice's `content_type_was_set` is true if the parent already
// had one set *or* the caller of `.slice(start, end, type)` passed a
// non-empty `type` argument. Checking the argument slice directly
// (rather than the old `content_type_was_allocated` flag) is required
// for the interned path, which points at a static slice and leaves
// `content_type_was_allocated` false.
blob.content_type_was_set = this.content_type_was_set or content_type.len > 0;

var blob_ = Blob.new(blob);
return blob_.toJS(globalThis);
Expand Down Expand Up @@ -2926,8 +2955,10 @@ pub fn getSlice(
break :inner;
}

if (globalThis.bunVM().mimeType(slice)) |mime| {
content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
content_type = interned;
break :inner;
}

Expand Down Expand Up @@ -3354,8 +3385,16 @@ pub fn constructor(globalThis: *jsc.JSGlobalObject, callframe: *jsc.CallFrame) b
}
blob.content_type_was_set = true;

if (globalThis.bunVM().mimeType(slice)) |mime| {
blob.content_type = mime.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
//
// `blob` came from `get()` which may shallow-copy
// a parent's `content_type_allocated=true` via
// `dupe()`. Reset before overwrite so we do not
// mark a static slice as owned.
blob.content_type_allocated = false;
if (globalThis.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -3520,8 +3559,18 @@ pub fn dupeWithContentType(this: *const Blob, include_content_type: bool) Blob {
if (this.store != null) this.store.?.ref();
var duped = this.*;
duped.setNotHeapAllocated();
// NOTE: both branches below are currently unreachable — `setNotHeapAllocated`
// above zeroes the ref count so `duped.isHeapAllocated()` is always false.
// That means neither the use-after-free workaround (first branch) nor the
// `content_type` duplication for `include_content_type=true` (second branch)
// ever runs at runtime. Left in place because fixing both guards (e.g. by
// checking `this.isHeapAllocated()` instead) would activate previously-dead
// behavior and is out of scope for the WHATWG-compliance fix; it needs
// its own testing and is tracked as a separate follow-up. If/when the
// guards are revived, `jsc.VirtualMachine.get().mimeType(duped.content_type)`
// below must be swapped for `mimeTypeInternedValue` or it will silently
// re-introduce the charset canonicalization this PR removes elsewhere.
if (duped.content_type_allocated and duped.isHeapAllocated() and !include_content_type) {

// for now, we just want to avoid a use-after-free here
if (jsc.VirtualMachine.get().mimeType(duped.content_type)) |mime| {
duped.content_type = mime.value;
Expand Down
12 changes: 8 additions & 4 deletions src/bun.js/webcore/S3File.zig
Original file line number Diff line number Diff line change
Expand Up @@ -283,8 +283,10 @@ pub fn constructS3FileWithS3CredentialsAndOptions(
break :inner;
}
blob.content_type_was_set = true;
if (globalObject.bunVM().mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalObject.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down Expand Up @@ -327,8 +329,10 @@ pub fn constructS3FileWithS3Credentials(
break :inner;
}
blob.content_type_was_set = true;
if (globalObject.bunVM().mimeType(str.slice())) |entry| {
blob.content_type = entry.value;
// WHATWG File API: preserve the lowercased input
// verbatim; do not canonicalize to charset-appended forms.
if (globalObject.bunVM().mimeTypeInternedValue(slice)) |interned| {
blob.content_type = interned;
break :inner;
}
const content_type_buf = bun.handleOom(allocator.alloc(u8, slice.len));
Expand Down
4 changes: 3 additions & 1 deletion test/js/bun/s3/s3.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,8 +170,10 @@ describe.concurrent.skipIf(!r2Credentials.endpoint && !isCI)("Virtual Hosted-Sty
type: "text/plain",
})
.slice(10);
// Per WHATWG File API the user-supplied `text/plain` is preserved
// verbatim — no charset canonicalization. See #29257.
expect(Bun.inspect(file)).toBe(
'S3Ref ("bucket/filename.txt") {\n type: "text/plain;charset=utf-8",\n offset: 10,\n endpoint: "bucket.test.r2.cloudflarestorage.com",\n region: "auto",\n accessKeyId: "[REDACTED]",\n secretAccessKey: "[REDACTED]",\n partSize: 5242880,\n queueSize: 5,\n retry: 3\n}',
'S3Ref ("bucket/filename.txt") {\n type: "text/plain",\n offset: 10,\n endpoint: "bucket.test.r2.cloudflarestorage.com",\n region: "auto",\n accessKeyId: "[REDACTED]",\n secretAccessKey: "[REDACTED]",\n partSize: 5242880,\n queueSize: 5,\n retry: 3\n}',
);
}
});
Expand Down
14 changes: 8 additions & 6 deletions test/js/bun/util/inspect.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -587,24 +587,26 @@ describe("console.logging class displays names and extends", async () => {
});

it("console.log on a Blob shows name", () => {
// Per WHATWG File API, `text/plain` must NOT be canonicalized to
// `text/plain;charset=utf-8` — see #29257.
const blob = new Blob(["foo"], { type: "text/plain" });
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n type: "text/plain"\n}');
blob.name = "bar";
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "bar",\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "bar",\n type: "text/plain"\n}');
blob.name = "foobar";
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "foobar",\n type: "text/plain;charset=utf-8"\n}');
expect(Bun.inspect(blob)).toBe('Blob (3 bytes) {\n name: "foobar",\n type: "text/plain"\n}');

const file = new File(["foo"], "bar.txt", { type: "text/plain" });
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "bar.txt",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "bar.txt",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
file.name = "foobar";
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "foobar",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "foobar",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
file.name = "";
expect(Bun.inspect(file)).toBe(
`File (3 bytes) {\n name: "",\n type: "text/plain;charset=utf-8",\n lastModified: ${file.lastModified}\n}`,
`File (3 bytes) {\n name: "",\n type: "text/plain",\n lastModified: ${file.lastModified}\n}`,
);
});

Expand Down
22 changes: 12 additions & 10 deletions test/js/web/structured-clone-blob-file.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ describe("structuredClone with Blob and File", () => {
const cloned = structuredClone(blob);
expect(cloned).toBeInstanceOf(Blob);
expect(cloned.size).toBe(11);
expect(cloned.type).toBe("text/plain;charset=utf-8");
// Per WHATWG File API, `text/plain` must NOT be canonicalized to
// `text/plain;charset=utf-8` — see #29257.
expect(cloned.type).toBe("text/plain");

const originalText = await blob.text();
const clonedText = await cloned.text();
Expand Down Expand Up @@ -63,11 +65,11 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.first).toBeInstanceOf(Blob);
expect(cloned.first.size).toBe(5);
expect(cloned.first.type).toBe("text/plain;charset=utf-8");
expect(cloned.first.type).toBe("text/plain");

expect(cloned.second).toBeInstanceOf(Blob);
expect(cloned.second.size).toBe(5);
expect(cloned.second.type).toBe("text/html;charset=utf-8");
expect(cloned.second.type).toBe("text/html");
});

test("deeply nested Blob", () => {
Expand All @@ -92,7 +94,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned).toBeInstanceOf(File);
expect(cloned.name).toBe("test.txt");
expect(cloned.size).toBe(7);
expect(cloned.type).toBe("text/plain;charset=utf-8");
expect(cloned.type).toBe("text/plain");
expect(cloned.lastModified).toBe(1234567890000);
});

Expand All @@ -103,7 +105,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned).toBeInstanceOf(File);
expect(cloned.name).toBe("test.txt");
expect(cloned.size).toBe(7);
expect(cloned.type).toBe("text/plain;charset=utf-8");
expect(cloned.type).toBe("text/plain");
expect(cloned.lastModified).toBeGreaterThan(0);
});

Expand All @@ -125,7 +127,7 @@ describe("structuredClone with Blob and File", () => {
expect(cloned.file).toBeInstanceOf(File);
expect(cloned.file.name).toBe("test.txt");
expect(cloned.file.size).toBe(4);
expect(cloned.file.type).toBe("text/plain;charset=utf-8");
expect(cloned.file.type).toBe("text/plain");
});

test("multiple Files in object", () => {
Expand All @@ -136,11 +138,11 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.txt).toBeInstanceOf(File);
expect(cloned.txt.name).toBe("hello.txt");
expect(cloned.txt.type).toBe("text/plain;charset=utf-8");
expect(cloned.txt.type).toBe("text/plain");

expect(cloned.html).toBeInstanceOf(File);
expect(cloned.html.name).toBe("world.html");
expect(cloned.html.type).toBe("text/html;charset=utf-8");
expect(cloned.html.type).toBe("text/html");
});
});

Expand All @@ -153,12 +155,12 @@ describe("structuredClone with Blob and File", () => {

expect(cloned.blob).toBeInstanceOf(Blob);
expect(cloned.blob.size).toBe(12);
expect(cloned.blob.type).toBe("text/plain;charset=utf-8");
expect(cloned.blob.type).toBe("text/plain");

expect(cloned.file).toBeInstanceOf(File);
expect(cloned.file.name).toBe("test.txt");
expect(cloned.file.size).toBe(12);
expect(cloned.file.type).toBe("text/plain;charset=utf-8");
expect(cloned.file.type).toBe("text/plain");
});

test("array with mixed Blob and File", () => {
Expand Down
Loading
Loading