Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
- Aligned `core_lib::math::u256` user docs with unified LE stack limb ordering (`a0/b0` on top), removing conflicting `[b7..b0, a7..a0]` notation ([#3066](https://github.com/0xMiden/miden-vm/pull/3066)).
- Made all internal `core::math` procedures natively little-endian ([#3084](https://github.com/0xMiden/miden-vm/pull/3084)).
- [BREAKING] Updated the Miden crypto stack to `miden-crypto` v0.25, and switched SMT leaf hashing to use Poseidon2 domain separation so masm-side leaf digests match `SmtLeaf::hash()` ([#3095](https://github.com/0xMiden/miden-vm/pull/3095)).
- Improved performances of auxiliary trace generation ([#3119](https://github.com/0xMiden/miden-vm/pull/3119)).

## 0.22.3 (2026-05-01)

Expand Down
22 changes: 21 additions & 1 deletion air/src/lookup/aux_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ use super::{Challenges, LookupAir, ProverLookupBuilder, prover::build_lookup_fra
/// [`crate::trace::main_trace::ROW_MAJOR_CHUNK_SIZE`] so we stay consistent with the
/// repo's row-major tuning: ~512 rows × avg shape ~3 ≈ 1.5 K fractions per chunk and
/// ~24 KiB of chunk-local scratch, comfortably L1-resident on any modern x86/arm core.
const ACCUMULATE_ROWS_PER_CHUNK: usize = 512;
pub(crate) const ACCUMULATE_ROWS_PER_CHUNK: usize = 512;

// TOP-LEVEL DRIVER
// ================================================================================================
Expand Down Expand Up @@ -174,6 +174,26 @@ where
}
}

#[cfg(feature = "concurrent")]
/// Build a `LookupFractions` from already-populated `fractions` and `counts` buffers.
pub(super) fn from_parts(
shape: Vec<usize>,
num_rows: usize,
fractions: Vec<(F, EF)>,
counts: Vec<usize>,
) -> Self {
let num_cols = shape.len();
debug_assert_eq!(counts.len(), num_rows * num_cols);

Self {
fractions,
counts,
shape,
num_rows,
num_cols,
}
}

/// Number of permutation columns.
pub fn num_columns(&self) -> usize {
self.num_cols
Expand Down
69 changes: 52 additions & 17 deletions air/src/lookup/prover.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,34 +156,69 @@ pub fn build_lookup_fractions<A, F, EF>(
challenges: &Challenges<EF>,
) -> LookupFractions<F, EF>
where
F: Field,
EF: ExtensionField<F>,
F: Field + Sync,
EF: ExtensionField<F> + Sync,
Comment thread
Nashtare marked this conversation as resolved.
Outdated
A: Sync,
for<'a> A: LookupAir<ProverLookupBuilder<'a, F, EF>>,
{
let num_rows = main_trace.height();
let width = main_trace.width();
let flat: &[F] = main_trace.values.borrow();

let shape = air.column_shape().to_vec();
let mut fractions = LookupFractions::from_shape(shape, num_rows);

// Per-row periodic slice, filled in place each row — no per-iteration allocation.
let mut periodic_row: Vec<F> = vec![F::ZERO; periodic_columns.len()];

for r in 0..num_rows {
let curr = &flat[r * width..(r + 1) * width];
let nxt_idx = (r + 1) % num_rows;
let next = &flat[nxt_idx * width..(nxt_idx + 1) * width];
let window = RowWindow::from_two_rows(curr, next);
// Fill one chunk of rows into a fresh per-chunk `LookupFractions`.
let process_chunk = |row_lo: usize, row_hi: usize| -> LookupFractions<F, EF> {
let mut chunk = LookupFractions::from_shape(shape.clone(), row_hi - row_lo);
let mut periodic_row: Vec<F> = vec![F::ZERO; periodic_columns.len()];
for r in row_lo..row_hi {
let curr = &flat[r * width..(r + 1) * width];
let nxt_idx = (r + 1) % num_rows;
let next = &flat[nxt_idx * width..(nxt_idx + 1) * width];
let window = RowWindow::from_two_rows(curr, next);
for (i, col) in periodic_columns.iter().enumerate() {
periodic_row[i] = col[r % col.len()];
}
let mut lb =
ProverLookupBuilder::new(window, &periodic_row, challenges, air, &mut chunk);
air.eval(&mut lb);
}
chunk
};

for (i, col) in periodic_columns.iter().enumerate() {
periodic_row[i] = col[r % col.len()];
#[cfg(not(feature = "concurrent"))]
let fractions = process_chunk(0, num_rows);

// Concatenation after parallel processing preserves global row order because chunks
// tile `0..num_rows` contiguously and each chunk's `fractions` / `counts` are
// row-major within the chunk.
#[cfg(feature = "concurrent")]
let fractions = {
use miden_crypto::parallel::*;

let num_cols = shape.len();
let rows_per_chunk = crate::lookup::aux_builder::ACCUMULATE_ROWS_PER_CHUNK;
let num_chunks = num_rows.div_ceil(rows_per_chunk);

let chunks: Vec<LookupFractions<F, EF>> = (0..num_chunks)
.into_par_iter()
.map(|chunk_idx| {
let row_lo = chunk_idx * rows_per_chunk;
let row_hi = (row_lo + rows_per_chunk).min(num_rows);
process_chunk(row_lo, row_hi)
})
.collect();

let total_fractions: usize = chunks.iter().map(|c| c.fractions.len()).sum();
let mut fractions_vec: Vec<(F, EF)> = Vec::with_capacity(total_fractions);
let mut counts_vec: Vec<usize> = Vec::with_capacity(num_rows * num_cols);
for chunk in chunks {
fractions_vec.extend(chunk.fractions);
counts_vec.extend(chunk.counts);
}

let mut lb =
ProverLookupBuilder::new(window, &periodic_row, challenges, air, &mut fractions);
air.eval(&mut lb);
}
LookupFractions::from_parts(shape, num_rows, fractions_vec, counts_vec)
};

debug_assert_eq!(
fractions.counts().len(),
Expand Down
2 changes: 1 addition & 1 deletion processor/src/trace/chiplets/memory/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ impl Memory {
};

let (delta_hi, delta_lo) = split_u32_into_u16(delta);
range.add_range_checks(row, &[delta_lo, delta_hi]);
range.add_range_checks(&[delta_lo, delta_hi]);

// word index decomposition range checks: prove addr is a valid 32-bit value
// by checking w0, w1, and 4*w1 are all in [0, 2^16).
Expand Down
4 changes: 2 additions & 2 deletions processor/src/trace/parallel/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,8 @@ fn initialize_range_checker(
let mut range_checker = RangeChecker::new();

// Add all u32 range checks recorded during execution
for (clk, values) in range_checker_replay.into_iter() {
range_checker.add_range_checks(clk, &values);
for (_clk, values) in range_checker_replay.into_iter() {
range_checker.add_range_checks(&values);
}

// Add all memory-related range checks
Expand Down
20 changes: 2 additions & 18 deletions processor/src/trace/range/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ use core::mem::MaybeUninit;

use miden_air::trace::RANGE_CHECK_TRACE_WIDTH;

use super::RowIndex;
use crate::{
Felt, ZERO,
utils::{assume_init_vec, uninit_vector},
Expand Down Expand Up @@ -50,10 +49,6 @@ pub struct RangeCheckTrace {
pub struct RangeChecker {
/// Tracks lookup count for each checked value.
lookups: BTreeMap<u16, usize>,
/// Range check lookups performed by all user operations, grouped and sorted by clock cycle.
/// Each cycle is mapped to a vector of the range checks requested at that cycle, which can
/// come from the stack, memory, or both.
cycle_lookups: BTreeMap<RowIndex, Vec<u16>>,
}

impl RangeChecker {
Expand All @@ -66,7 +61,7 @@ impl RangeChecker {
// range checker table are initialized. this simplifies trace table building later on.
lookups.insert(0, 0);
lookups.insert(u16::MAX, 0);
Self { lookups, cycle_lookups: BTreeMap::new() }
Self { lookups }
}

// TRACE MUTATORS
Expand All @@ -78,7 +73,7 @@ impl RangeChecker {
}

/// Adds range check lookups from the stack or memory to this [RangeChecker] instance.
pub fn add_range_checks(&mut self, clk: RowIndex, values: &[u16]) {
pub fn add_range_checks(&mut self, values: &[u16]) {
// range checks requests only come from memory or from the stack, which always request 2 or
// 4 lookups respectively.
debug_assert!(values.len() == 2 || values.len() == 4);
Expand All @@ -87,17 +82,6 @@ impl RangeChecker {
// add the specified value to the trace of this range checker's lookups.
self.add_value(*value);
}

// track the range check requests at each cycle
// TODO: optimize this to use a struct instead of vectors, e.g. (#2793):
// struct MemoryLookupValues {
// num_lookups: u8,
// lookup_values: [u16; 6],
// }
self.cycle_lookups
.entry(clk)
.and_modify(|entry| entry.extend_from_slice(values))
.or_insert_with(|| values.to_vec());
}

// EXECUTION TRACE GENERATION (INTERNAL)
Expand Down
Loading