Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 130 additions & 0 deletions turbopack/crates/turbo-tasks-backend/src/backend/cell_data.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
//! Unified cell storage.
//!
//! Every task cell — whether its value type is bincode-serializable, hash-only,
//! derivable, or non-reconstructible — lives in a single `CellData` map keyed
//! by [`CellId`]. The map's bincode impl decides at encode time which entries
//! to persist, by consulting the global [`ValueType`] registry: entries whose
//! value type has no bincode function are omitted from the serialized output.
//!
//! This replaces the older split of `persistent_cell_data` /
//! `transient_cell_data` fields which routed every cell write through an
//! `is_serializable_cell_content: bool` that threaded through ~14 call sites.
//! By keying the bincode decision on the value type itself, the routing
//! collapses to an unconditional insert.
//!
//! The inner value is stored as [`SharedReference`] rather than
//! [`TypedSharedReference`] because the `CellId` key already carries the
//! [`ValueTypeId`] — duplicating it in each map entry would waste memory.
//! Encode / decode recover the value type from the key.

use std::{
hash::BuildHasherDefault,
ops::{Deref, DerefMut},
};

use auto_hash_map::AutoMap;
use bincode::{
Decode, Encode,
error::{DecodeError, EncodeError},
};
use rustc_hash::FxHasher;
use turbo_bincode::{
TurboBincodeDecode, TurboBincodeDecoder, TurboBincodeEncode, TurboBincodeEncoder,
impl_decode_for_turbo_bincode_decode, impl_encode_for_turbo_bincode_encode,
};
use turbo_tasks::{CellId, SharedReference, ShrinkToFit, ValueTypePersistence, registry};

type InnerMap = AutoMap<CellId, SharedReference, BuildHasherDefault<FxHasher>, 1>;

/// Map of cell id → shared reference, with bincode that filters out entries
/// whose value type has no bincode function.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct CellData(InnerMap);

impl CellData {
pub fn new() -> Self {
Self::default()
}
}

impl Deref for CellData {
type Target = InnerMap;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl DerefMut for CellData {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl ShrinkToFit for CellData {
fn shrink_to_fit(&mut self) {
self.0.shrink_to_fit();
}
}

impl TurboBincodeEncode for CellData {
/// Writes `count-of-bincodable-entries` followed by each bincodable
/// `(CellId, encoded-value)`. Entries whose value type is `Derivable` or
/// `SessionStateful` (no bincode) are skipped; they will be reconstructed
/// on the next task execution after restore.
fn encode(&self, encoder: &mut TurboBincodeEncoder) -> Result<(), EncodeError> {
// First pass: count bincodable entries. One extra O(N) iteration over
// the registry — cold path (snapshot time only) and the registry is a
// static array indexed by ValueTypeId, so each lookup is cheap.
let count = self
.0
.iter()
.filter(|(cell, _)| {
matches!(
registry::get_value_type(cell.type_id).persistence,
ValueTypePersistence::Bincodable(_, _),
)
})
.count();
count.encode(encoder)?;
for (cell_id, reference) in self.0.iter() {
let value_type = registry::get_value_type(cell_id.type_id);
let ValueTypePersistence::Bincodable(encode_fn, _) = value_type.persistence else {
continue;
};
cell_id.encode(encoder)?;
encode_fn(&*reference.0, encoder)?;
}
Ok(())
}
}

impl<Context> TurboBincodeDecode<Context> for CellData {
/// Reads the count written by [`CellData::encode`] and decodes each
/// `(CellId, SharedReference)` entry by looking up the value type's
/// bincode decode function.
///
/// Missing cell types — or cells whose value type isn't `Bincodable` —
/// are a decode error: the encoder filters them out, so they should not
/// appear on the wire.
fn decode(decoder: &mut TurboBincodeDecoder) -> Result<Self, DecodeError> {
let count = usize::decode(decoder)?;
let mut map = InnerMap::with_capacity_and_hasher(count, BuildHasherDefault::default());
for _ in 0..count {
let cell = CellId::decode(decoder)?;
let value_type = registry::get_value_type(cell.type_id);
let ValueTypePersistence::Bincodable(_, decode_fn) = value_type.persistence else {
return Err(DecodeError::OtherString(format!(
"cell of type {} has no bincode decoder",
value_type.ty.global_name
)));
};
let reference = decode_fn(decoder)?;
map.insert(cell, reference);
}
Ok(Self(map))
}
}

impl_encode_for_turbo_bincode_encode!(CellData);
impl_decode_for_turbo_bincode_decode!(CellData);
50 changes: 16 additions & 34 deletions turbopack/crates/turbo-tasks-backend/src/backend/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
mod cell_data;
mod counter_map;
mod operation;
mod storage;
Expand Down Expand Up @@ -857,7 +858,6 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
}

let ReadCellOptions {
is_serializable_cell_content,
tracking,
final_read_hint,
} = options;
Expand All @@ -878,17 +878,17 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
};

let content = if final_read_hint {
task.remove_cell_data(is_serializable_cell_content, cell)
task.remove_cell_data(&cell)
} else {
task.get_cell_data(is_serializable_cell_content, cell)
task.get_cell_data(&cell).cloned()
};
if let Some(content) = content {
if tracking.should_track(false) {
add_cell_dependency(task_id, task, reader, reader_task, cell, tracking.key());
}
return Ok(Ok(TypedCellContent(
cell.type_id,
CellContent(Some(content.reference)),
CellContent(Some(content)),
)));
}

Expand Down Expand Up @@ -2689,35 +2689,22 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
// Note: We do not mark the tasks as dirty here, as these tasks are unused or stale
// anyway and we want to avoid needless re-executions. When the cells become
// used again, they are invalidated from the update cell operation.
// Remove cell data for cells that no longer exist
let to_remove_persistent: Vec<_> = task
.iter_persistent_cell_data()
// Remove cell data for cells that no longer exist. Both
// bincode-able and non-bincode-able cells live in the single
// `cell_data` map; identifying stale entries is purely a CellId
// index check.
let to_remove: Vec<_> = task
.iter_cell_data()
.filter_map(|(cell, _)| {
cell_counters
.get(&cell.type_id)
.is_none_or(|start_index| cell.index >= *start_index)
.then_some(*cell)
})
.collect();

// Remove transient cell data for cells that no longer exist
let to_remove_transient: Vec<_> = task
.iter_transient_cell_data()
.filter_map(|(cell, _)| {
cell_counters
.get(&cell.type_id)
.is_none_or(|start_index| cell.index >= *start_index)
.then_some(*cell)
})
.collect();
removed_cell_data.reserve_exact(to_remove_persistent.len() + to_remove_transient.len());
for cell in to_remove_persistent {
if let Some(data) = task.remove_persistent_cell_data(&cell) {
removed_cell_data.push(data.into_untyped());
}
}
for cell in to_remove_transient {
if let Some(data) = task.remove_transient_cell_data(&cell) {
removed_cell_data.reserve_exact(to_remove.len());
for cell in to_remove {
if let Some(data) = task.remove_cell_data(&cell) {
removed_cell_data.push(data);
}
}
Expand Down Expand Up @@ -2904,14 +2891,13 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
&self,
task_id: TaskId,
cell: CellId,
options: ReadCellOptions,
_options: ReadCellOptions,
turbo_tasks: &dyn TurboTasksBackendApi<TurboTasksBackend<B>>,
) -> Result<TypedCellContent> {
let mut ctx = self.execute_context(turbo_tasks);
let task = ctx.task(task_id, TaskDataCategory::Data);
if let Some(content) = task.get_cell_data(options.is_serializable_cell_content, cell) {
debug_assert!(content.type_id == cell.type_id, "Cell type ID mismatch");
Ok(CellContent(Some(content.reference)).into_typed(cell.type_id))
if let Some(content) = task.get_cell_data(&cell).cloned() {
Ok(CellContent(Some(content)).into_typed(cell.type_id))
} else {
Ok(CellContent(None).into_typed(cell.type_id))
}
Expand Down Expand Up @@ -3041,7 +3027,6 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
&self,
task_id: TaskId,
cell: CellId,
is_serializable_cell_content: bool,
content: CellContent,
updated_key_hashes: Option<SmallVec<[u64; 2]>>,
content_hash: Option<CellHash>,
Expand All @@ -3052,7 +3037,6 @@ impl<B: BackingStorage> TurboTasksBackendInner<B> {
task_id,
cell,
content,
is_serializable_cell_content,
updated_key_hashes,
content_hash,
verification_mode,
Expand Down Expand Up @@ -3598,7 +3582,6 @@ impl<B: BackingStorage> Backend for TurboTasksBackend<B> {
&self,
task_id: TaskId,
cell: CellId,
is_serializable_cell_content: bool,
content: CellContent,
updated_key_hashes: Option<SmallVec<[u64; 2]>>,
content_hash: Option<CellHash>,
Expand All @@ -3608,7 +3591,6 @@ impl<B: BackingStorage> Backend for TurboTasksBackend<B> {
self.0.update_task_cell(
task_id,
cell,
is_serializable_cell_content,
content,
updated_key_hashes,
content_hash,
Expand Down
66 changes: 10 additions & 56 deletions turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ use tracing::info_span;
#[cfg(feature = "trace_prepare_tasks")]
use tracing::trace_span;
use turbo_tasks::{
CellId, DynTaskInputs, FxIndexMap, RawVc, TaskExecutionReason, TaskId, TaskPriority,
TurboTasksBackendApi, TurboTasksCallApi, TypedSharedReference, backend::CachedTaskType,
CellId, DynTaskInputs, FxIndexMap, RawVc, SharedReference, TaskExecutionReason, TaskId,
TaskPriority, TurboTasksBackendApi, TurboTasksCallApi, backend::CachedTaskType,
macro_helpers::NativeFunction,
};

Expand Down Expand Up @@ -1250,60 +1250,14 @@ pub trait TaskGuard: Debug + TaskStorageAccessors {
.unwrap_or_default();
dirty_count > clean_count
}
fn remove_cell_data(
&mut self,
is_serializable_cell_content: bool,
cell: CellId,
) -> Option<TypedSharedReference> {
if is_serializable_cell_content {
self.remove_persistent_cell_data(&cell)
} else {
self.remove_transient_cell_data(&cell)
.map(|sr| sr.into_typed(cell.type_id))
}
}
fn get_cell_data(
&self,
is_serializable_cell_content: bool,
cell: CellId,
) -> Option<TypedSharedReference> {
if is_serializable_cell_content {
self.get_persistent_cell_data(&cell).cloned()
} else {
self.get_transient_cell_data(&cell)
.map(|sr| sr.clone().into_typed(cell.type_id))
}
}
fn has_cell_data(&self, is_serializable_cell_content: bool, cell: CellId) -> bool {
if is_serializable_cell_content {
self.persistent_cell_data_contains(&cell)
} else {
self.transient_cell_data_contains(&cell)
}
}
/// Set cell data, returning the old value if any.
fn set_cell_data(
&mut self,
is_serializable_cell_content: bool,
cell: CellId,
value: TypedSharedReference,
) -> Option<TypedSharedReference> {
if is_serializable_cell_content {
self.insert_persistent_cell_data(cell, value)
} else {
self.insert_transient_cell_data(cell, value.into_untyped())
.map(|sr| sr.into_typed(cell.type_id))
}
}

/// Add new cell data (asserts that the cell is new and didn't exist before).
fn add_cell_data(
&mut self,
is_serializable_cell_content: bool,
cell: CellId,
value: TypedSharedReference,
) {
let old = self.set_cell_data(is_serializable_cell_content, cell, value);
/// Add new cell data. Panics if the cell already had a value.
///
/// The value type's serialization mode (including whether it's
/// bincode-able) is determined by `cell.type_id` via the `ValueType`
/// registry, not by a threaded bool — the `CellData` encoder filters
/// non-bincodable entries at snapshot time.
fn add_cell_data(&mut self, cell: CellId, value: SharedReference) {
let old = self.insert_cell_data(cell, value);
assert!(old.is_none(), "Cell data already exists for {cell:?}");
}

Expand Down
Loading
Loading