diff --git a/Cargo.lock b/Cargo.lock index 1752b75fc6d..2a3f7ac758f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12705,7 +12705,6 @@ dependencies = [ "move-asm", "move-binary-format", "move-core-types", - "move-vm-types", "parking_lot 0.12.5", "specializer", ] @@ -18246,6 +18245,7 @@ dependencies = [ "datatest-stable", "legacy-move-compiler", "mono-move-core", + "mono-move-gas", "move-asm", "move-binary-format", "move-bytecode-verifier", diff --git a/third_party/move/mono-move/global-context/Cargo.toml b/third_party/move/mono-move/global-context/Cargo.toml index 8df46eaf63d..d20ae6d7a71 100644 --- a/third_party/move/mono-move/global-context/Cargo.toml +++ b/third_party/move/mono-move/global-context/Cargo.toml @@ -18,14 +18,13 @@ dashmap = { workspace = true } fxhash = { workspace = true } mono-move-alloc = { workspace = true } mono-move-core = { workspace = true } -mono-move-gas = { workspace = true } move-binary-format = { workspace = true } move-core-types = { workspace = true } -move-vm-types = { workspace = true } parking_lot = { workspace = true } specializer = { workspace = true } [dev-dependencies] +mono-move-gas = { workspace = true } mono-move-runtime = { workspace = true } mono-move-testsuite = { workspace = true } move-asm = { workspace = true } diff --git a/third_party/move/mono-move/global-context/src/context/executable.rs b/third_party/move/mono-move/global-context/src/context/executable.rs index 42e7bae2431..58249aaf1f9 100644 --- a/third_party/move/mono-move/global-context/src/context/executable.rs +++ b/third_party/move/mono-move/global-context/src/context/executable.rs @@ -13,17 +13,12 @@ use crate::{ use anyhow::{anyhow, bail}; use fxhash::FxBuildHasher; use mono_move_alloc::{ExecutableArena, ExecutableArenaPtr, GlobalArenaPtr}; -use mono_move_core::{ - ExecutableId, FrameLayoutInfo, Function, MicroOpGasSchedule, SortedSafePointEntries, - FRAME_METADATA_SIZE, -}; -use mono_move_gas::GasInstrumentor; +use mono_move_core::{ExecutableId, FrameLayoutInfo, Function, SortedSafePointEntries}; use move_binary_format::{ access::ModuleAccess, file_format::{SignatureToken, StructDefinition, StructFieldInformation, StructHandleIndex}, CompiledModule, }; -use move_vm_types::loaded_data::struct_name_indexing::StructNameIndex; use parking_lot::Mutex; use std::collections::HashMap; @@ -100,6 +95,8 @@ impl Executable { } // TODO: this is likely to change. Placeholder. +// TODO: refactor to own CompiledModule instead of borrowing it (needed for ModuleIR cache). +// Split mutable state into a separate struct to avoid borrow conflicts with self.module. #[allow(dead_code)] pub struct ExecutableBuilder<'a, 'guard, 'ctx> { // TODO: support scripts. @@ -159,51 +156,23 @@ impl<'a, 'guard, 'ctx> ExecutableBuilder<'a, 'guard, 'ctx> { self.resolve_struct_def(struct_def)?; } - // Specializer pipeline. - // TODO: Factor this out into specializer. - - let struct_name_table: Vec = (0..self.module.struct_handles.len()) - .map(|i| StructNameIndex::new(i as u32)) - .collect(); - let module_ir = specializer::destack(self.module.clone(), &struct_name_table)?; - let func_id_map = specializer::lower::build_func_id_map(&module_ir.module); + let lowered = specializer::destack_and_lower_module(self.module.clone())?; // Indexed by definition index. Generic functions that are not // lowered leave their slot as None. - let mut func_ptrs = vec![None; module_ir.functions.len()]; - for (def_idx, func_ir) in module_ir.functions.iter().enumerate() { - let name = module_ir.module.identifier_at(func_ir.name_idx); - let name = self.guard.intern_identifier_internal(name); - - // TODO: support generic functions. - if let Some(ctx) = - specializer::lower::try_build_context(&module_ir.module, func_ir, &func_id_map)? - { - let micro_ops = specializer::lower::lower_function(func_ir, &ctx)?; - let micro_ops = GasInstrumentor::new(MicroOpGasSchedule).run(micro_ops); - - // Compute frame layout. - let args_size = ctx.home_slots[..func_ir.num_params as usize] - .iter() - .map(|s| s.size as usize) - .sum::(); - let args_and_locals_size = ctx.frame_data_size as usize; - let extended_frame_size = ctx - .call_sites - .iter() - .flat_map(|cs| cs.arg_write_slots.iter().chain(cs.ret_read_slots.iter())) - .map(|s| (s.offset + s.size) as usize) - .max() - .unwrap_or(args_and_locals_size + FRAME_METADATA_SIZE); - - // Allocate micro-ops and frame layout in the executable arena. - let code = self.arena.alloc_slice_fill_iter(micro_ops); + let mut func_ptrs = vec![None; lowered.functions.len()]; + for (def_idx, lowered_fn) in lowered.functions.into_iter().enumerate() { + if let Some(lf) = lowered_fn { + let name = self + .guard + .intern_identifier_internal(self.module.identifier_at(lf.name_idx)); + let code = self.arena.alloc_slice_fill_iter(lf.code); let func = Function { name, code, - args_size, - args_and_locals_size, - extended_frame_size, + args_size: lf.args_size, + args_and_locals_size: lf.args_and_locals_size, + extended_frame_size: lf.extended_frame_size, // TODO: hardcoded for now. zero_frame: false, frame_layout: FrameLayoutInfo::empty(&self.arena), diff --git a/third_party/move/mono-move/specializer/Cargo.toml b/third_party/move/mono-move/specializer/Cargo.toml index 090318e2e9b..55c6d31c004 100644 --- a/third_party/move/mono-move/specializer/Cargo.toml +++ b/third_party/move/mono-move/specializer/Cargo.toml @@ -8,6 +8,7 @@ publish = false anyhow = { workspace = true } clap = { workspace = true } mono-move-core = { workspace = true } +mono-move-gas = { workspace = true } move-asm = { workspace = true } move-binary-format = { workspace = true } move-bytecode-verifier = { workspace = true } diff --git a/third_party/move/mono-move/specializer/src/bin/mseir-compiler.rs b/third_party/move/mono-move/specializer/src/bin/mseir-compiler.rs index 05d4ec62cf6..408df155c9d 100644 --- a/third_party/move/mono-move/specializer/src/bin/mseir-compiler.rs +++ b/third_party/move/mono-move/specializer/src/bin/mseir-compiler.rs @@ -63,7 +63,7 @@ fn print_stats(module_ir: &specializer::stackless_exec_ir::ModuleIR) { let mod_name = module.identifier_at(self_handle.name); let mod_prefix = format!("0x{}::{}", addr.short_str_lossless(), mod_name); - for func_ir in &module_ir.functions { + for func_ir in module_ir.functions.iter().flatten() { let func_name = module.identifier_at(func_ir.name_idx); // Find the matching FunctionDefinition to get bytecode stats. diff --git a/third_party/move/mono-move/specializer/src/destack/optimize.rs b/third_party/move/mono-move/specializer/src/destack/optimize.rs index db8a6f9cf60..fc5f983f15c 100644 --- a/third_party/move/mono-move/specializer/src/destack/optimize.rs +++ b/third_party/move/mono-move/specializer/src/destack/optimize.rs @@ -17,7 +17,7 @@ use shared_dsa::{UnorderedMap, UnorderedSet}; /// Optimize all functions in a module IR. /// Pre: slot allocation complete — no `Vid`s remain. pub fn optimize_module(module_ir: &mut ModuleIR) { - for func in &mut module_ir.functions { + for func in module_ir.functions.iter_mut().flatten() { eliminate_identity_moves(func); copy_propagation(func); eliminate_identity_moves(func); diff --git a/third_party/move/mono-move/specializer/src/destack/translate.rs b/third_party/move/mono-move/specializer/src/destack/translate.rs index a8e521f547e..330d28f235a 100644 --- a/third_party/move/mono-move/specializer/src/destack/translate.rs +++ b/third_party/move/mono-move/specializer/src/destack/translate.rs @@ -44,44 +44,45 @@ pub fn translate_module( let functions = module .function_defs .iter() - .filter_map(|fdef| { - fdef.code.as_ref().map(|code| -> Result { - let handle = module.function_handle_at(fdef.function); - let name_idx = handle.name; - let handle_idx = fdef.function; - let param_sig_toks = &module.signature_at(handle.parameters).0; - let local_sig_toks = &module.signature_at(code.locals).0; - let num_params = param_sig_toks.len() as u16; - let num_locals = local_sig_toks.len() as u16; - let all_sig_toks: Vec = param_sig_toks - .iter() - .chain(local_sig_toks.iter()) - .cloned() - .collect(); - // [TODO]: we currently convert signature tokens into the runtime type representation, but - // this will change to use more efficient cached type representations. - let local_types = convert_sig_tokens(&module, &all_sig_toks, struct_name_table); + .map(|fdef| { + let Some(code) = fdef.code.as_ref() else { + return Ok(None); + }; + let handle = module.function_handle_at(fdef.function); + let name_idx = handle.name; + let handle_idx = fdef.function; + let param_sig_toks = &module.signature_at(handle.parameters).0; + let local_sig_toks = &module.signature_at(code.locals).0; + let num_params = param_sig_toks.len() as u16; + let num_locals = local_sig_toks.len() as u16; + let all_sig_toks: Vec = param_sig_toks + .iter() + .chain(local_sig_toks.iter()) + .cloned() + .collect(); + // [TODO]: we currently convert signature tokens into the runtime type representation, but + // this will change to use more efficient cached type representations. + let local_types = convert_sig_tokens(&module, &all_sig_toks, struct_name_table); - // Pass: Bytecode -> Intra-Block SSA -> Fusion - let converter = SsaConverter::new(local_types, struct_name_table); - let ssa = converter - .convert_function(&module, &code.code)? - .with_fusion_passes(); + // Pass: Bytecode -> Intra-Block SSA -> Fusion + let converter = SsaConverter::new(local_types, struct_name_table); + let ssa = converter + .convert_function(&module, &code.code)? + .with_fusion_passes(); - // Pass: Greedy Slot Allocation (consumes SSA, remaps in-place) - let alloc = super::slot_alloc::allocate_slots(ssa)?; + // Pass: Greedy Slot Allocation (consumes SSA, remaps in-place) + let alloc = super::slot_alloc::allocate_slots(ssa)?; - Ok(FunctionIR { - name_idx, - handle_idx, - num_params, - num_locals, - num_home_slots: alloc.num_home_slots, - num_xfer_slots: alloc.num_xfer_slots, - blocks: alloc.blocks, - home_slot_types: alloc.home_slot_types, - }) - }) + Ok(Some(FunctionIR { + name_idx, + handle_idx, + num_params, + num_locals, + num_home_slots: alloc.num_home_slots, + num_xfer_slots: alloc.num_xfer_slots, + blocks: alloc.blocks, + home_slot_types: alloc.home_slot_types, + })) }) .collect::>>()?; diff --git a/third_party/move/mono-move/specializer/src/lib.rs b/third_party/move/mono-move/specializer/src/lib.rs index ecea28aaada..f6863fd928f 100644 --- a/third_party/move/mono-move/specializer/src/lib.rs +++ b/third_party/move/mono-move/specializer/src/lib.rs @@ -5,5 +5,8 @@ pub mod stackless_exec_ir; pub mod destack; pub mod lower; +pub mod pipeline; pub use destack::destack; +pub use lower::{LoweredFunction, LoweredModule}; +pub use pipeline::destack_and_lower_module; diff --git a/third_party/move/mono-move/specializer/src/lower/mod.rs b/third_party/move/mono-move/specializer/src/lower/mod.rs index dd814ded435..ed70e477efa 100644 --- a/third_party/move/mono-move/specializer/src/lower/mod.rs +++ b/third_party/move/mono-move/specializer/src/lower/mod.rs @@ -9,4 +9,28 @@ mod translate; pub use context::{build_func_id_map, try_build_context, LoweringContext, SlotInfo}; pub use display::MicroOpsFunctionDisplay; +use mono_move_core::MicroOp; +use move_binary_format::file_format::IdentifierIndex; pub use translate::lower_function; + +/// Result of lowering a single non-generic function. +// TODO: unify with `mono_move_core::Function` once the specializer has access to arenas. +pub struct LoweredFunction { + /// Function name, as an index into the module's identifier pool. + pub name_idx: IdentifierIndex, + /// Gas-instrumented micro-ops. + pub code: Vec, + /// Size of the argument region at the start of the frame. + pub args_size: usize, + /// Size of the arguments + locals region. + pub args_and_locals_size: usize, + /// Total frame footprint (args + locals + metadata + callee slots). + pub extended_frame_size: usize, +} + +/// Result of lowering an entire module. +pub struct LoweredModule { + /// Per-definition-index results. `None` for functions that were + /// not lowered (e.g., generic functions). + pub functions: Vec>, +} diff --git a/third_party/move/mono-move/specializer/src/pipeline.rs b/third_party/move/mono-move/specializer/src/pipeline.rs new file mode 100644 index 00000000000..bb53e230221 --- /dev/null +++ b/third_party/move/mono-move/specializer/src/pipeline.rs @@ -0,0 +1,65 @@ +// Copyright (c) Aptos Foundation +// Licensed pursuant to the Innovation-Enabling Source Code License, available at https://github.com/aptos-labs/aptos-core/blob/main/LICENSE + +//! High-level pipeline: destack → lower → gas instrument → frame layout. + +use crate::{ + destack, + lower::{build_func_id_map, lower_function, try_build_context, LoweredFunction, LoweredModule}, +}; +use anyhow::Result; +use mono_move_core::{MicroOpGasSchedule, FRAME_METADATA_SIZE}; +use mono_move_gas::GasInstrumentor; +use move_binary_format::CompiledModule; +use move_vm_types::loaded_data::struct_name_indexing::StructNameIndex; + +/// Run the full specializer pipeline: destack → lower → gas instrument → frame layout. +// TODO: extend with additional passes (e.g., monomorphization, GC safe-point layout). +pub fn destack_and_lower_module(module: CompiledModule) -> Result { + // Identity mapping: valid when loading a single module in isolation. + let struct_name_table: Vec = (0..module.struct_handles.len()) + .map(|i| StructNameIndex::new(i as u32)) + .collect(); + let module_ir = destack(module, &struct_name_table)?; + let func_id_map = build_func_id_map(&module_ir.module); + + let mut functions = Vec::with_capacity(module_ir.functions.len()); + for func_ir in &module_ir.functions { + let Some(func_ir) = func_ir else { + functions.push(None); + continue; + }; + let lowered = match try_build_context(&module_ir.module, func_ir, &func_id_map)? { + Some(ctx) => { + let micro_ops = lower_function(func_ir, &ctx)?; + let code = GasInstrumentor::new(MicroOpGasSchedule).run(micro_ops); + + let args_size = ctx.home_slots[..func_ir.num_params as usize] + .iter() + .map(|s| s.size as usize) + .sum::(); + let args_and_locals_size = ctx.frame_data_size as usize; + let extended_frame_size = ctx + .call_sites + .iter() + .flat_map(|cs| cs.arg_write_slots.iter().chain(cs.ret_read_slots.iter())) + .map(|s| (s.offset + s.size) as usize) + .max() + // Leaf function: no callee slots needed beyond metadata. + .unwrap_or(args_and_locals_size + FRAME_METADATA_SIZE); + + Some(LoweredFunction { + name_idx: func_ir.name_idx, + code, + args_size, + args_and_locals_size, + extended_frame_size, + }) + }, + None => None, + }; + functions.push(lowered); + } + + Ok(LoweredModule { functions }) +} diff --git a/third_party/move/mono-move/specializer/src/stackless_exec_ir/display.rs b/third_party/move/mono-move/specializer/src/stackless_exec_ir/display.rs index 6a597a3782a..ad5526c4153 100644 --- a/third_party/move/mono-move/specializer/src/stackless_exec_ir/display.rs +++ b/third_party/move/mono-move/specializer/src/stackless_exec_ir/display.rs @@ -31,7 +31,7 @@ impl fmt::Display for ModuleIR { name )?; - for func_ir in &self.functions { + for func_ir in self.functions.iter().flatten() { writeln!(f)?; display_function(f, module, func_ir)?; } diff --git a/third_party/move/mono-move/specializer/src/stackless_exec_ir/mod.rs b/third_party/move/mono-move/specializer/src/stackless_exec_ir/mod.rs index cba5e633771..0327c6c3941 100644 --- a/third_party/move/mono-move/specializer/src/stackless_exec_ir/mod.rs +++ b/third_party/move/mono-move/specializer/src/stackless_exec_ir/mod.rs @@ -307,6 +307,6 @@ impl FunctionIR { pub struct ModuleIR { /// The original compiled module for resolving pool indices. pub module: CompiledModule, - /// One per non-native FunctionDefinition. - pub functions: Vec, + /// Indexed by `FunctionDefinitionIndex`. `None` for native functions. + pub functions: Vec>, } diff --git a/third_party/move/mono-move/specializer/tests/testsuite.rs b/third_party/move/mono-move/specializer/tests/testsuite.rs index 8c3d3463539..f5e923619e8 100644 --- a/third_party/move/mono-move/specializer/tests/testsuite.rs +++ b/third_party/move/mono-move/specializer/tests/testsuite.rs @@ -34,7 +34,7 @@ fn format_micro_ops(module_ir: &ModuleIR) -> String { mod_name )); - for func_ir in &module_ir.functions { + for func_ir in module_ir.functions.iter().flatten() { let func_name = module.identifier_at(func_ir.name_idx).to_string(); match try_build_context(module, func_ir, &func_id_map) { Err(e) => {