diff --git a/crates/ark/src/lsp/find_references.rs b/crates/ark/src/lsp/find_references.rs index 92f665187..fe50b7c92 100644 --- a/crates/ark/src/lsp/find_references.rs +++ b/crates/ark/src/lsp/find_references.rs @@ -1,16 +1,11 @@ -// -// references.rs -// -// Copyright (C) 2022-2026 Posit Software, PBC. All rights reserved. -// -// - use std::path::Path; +use aether_lsp_utils::proto::from_proto; +use aether_lsp_utils::proto::to_proto; use anyhow::anyhow; use stdext::result::ResultExt; +use stdext::unwrap; use stdext::unwrap::IntoResult; -use stdext::*; use tower_lsp::lsp_types::Location; use tower_lsp::lsp_types::Position; use tower_lsp::lsp_types::Range; @@ -32,6 +27,68 @@ use crate::treesitter::ExtractOperatorType; use crate::treesitter::NodeType; use crate::treesitter::NodeTypeExt; +pub(crate) fn find_references( + params: ReferenceParams, + state: &WorldState, +) -> anyhow::Result> { + let uri = params.text_document_position.text_document.uri; + let position = params.text_document_position.position; + let include_declaration = params.context.include_declaration; + + let document = state.get_document(&uri)?; + + let mut locations: Vec = Vec::new(); + + let index = document.semantic_index(); + let root = document.syntax()?; + + // Intra-file resolution is precise via the semantic index + let offset = from_proto::offset_from_position( + position, + &document.line_index, + document.position_encoding, + )?; + let pos = oak_ide::FilePosition { + file: uri.clone(), + offset, + }; + let intra = oak_ide::find_references(&index, &root, &pos, include_declaration); + + for file_range in intra { + let Some(range) = to_proto::range( + file_range.range, + &document.line_index, + document.position_encoding, + ) + .log_err() else { + continue; + }; + locations.push(Location::new(file_range.file, range)); + } + + if !locations.is_empty() { + // If the intra-file pass resolved cleanly, we have the precise answer. + // The textual cross-file walk can only add unrefined external matches + // which, for a locally-bound symbol, would just be unrelated noise. + return Ok(locations); + } + + // Truly free variable: no within-file binding. Fall back to a textual walk + // over workspace folders (including the current file: intra-file gave + // nothing, so no dedup needed). No semantic refinement yet, so unrelated + // same-name symbols in other files leak through. + if let Ok(context) = build_context(&uri, position, state) { + for folder in state.workspace.folders.iter() { + if let Ok(path) = folder.to_file_path() { + lsp::log_info!("searching references in folder {}", path.display()); + find_references_in_folder(&context, &path, &mut locations, state); + } + } + } + + Ok(locations) +} + #[derive(Debug, PartialEq)] enum ReferenceKind { Symbol, // a regular R symbol @@ -94,21 +151,19 @@ fn found_match(node: &Node, contents: &str, context: &Context) -> bool { if !node.is_identifier() { return false; } - - let symbol = node.node_to_string(contents).unwrap(); + let Ok(symbol) = node.node_to_string(contents) else { + return false; + }; if symbol != context.symbol { return false; } - context.kind == node_reference_kind(node) } fn build_context(uri: &Url, position: Position, state: &WorldState) -> anyhow::Result { - // Unwrap the URL. let path = uri.file_path()?; - // Figure out the identifier we're looking for. - let context = with_document(path.as_path(), state, |document| { + with_document(path.as_path(), state, |document| { let ast = &document.ast; let contents = document.contents.as_str(); let point = document.tree_sitter_point_from_lsp_position(position)?; @@ -118,23 +173,32 @@ fn build_context(uri: &Url, position: Position, state: &WorldState) -> anyhow::R .descendant_for_point_range(point, point) .into_result()?; - // Check and see if we got an identifier. If we didn't, we might need to use - // some heuristics to look around. Unfortunately, it seems like if you double-click - // to select an identifier, and then use Right Click -> Find All References, the - // position received by the LSP maps to the _end_ of the selected range, which - // is technically not part of the associated identifier's range. In addition, we - // can't just subtract 1 from the position column since that would then fail to - // resolve the correct identifier when the cursor is located at the start of the - // identifier. + // Zero-width range queries at an identifier boundary return the + // wrapping node rather than the identifier itself. If the cursor is at + // the trailing edge of a selection (column past the last character), + // retry one column back. If it's at the leading edge (column on the + // first character), retry one column forward. if !node.is_identifier() && point.column > 0 { - let point = Point::new(point.row, point.column - 1); - node = ast + let back = Point::new(point.row, point.column - 1); + if let Some(retry) = ast .root_node() - .descendant_for_point_range(point, point) - .into_result()?; + .descendant_for_point_range(back, back) + .filter(|n| n.is_identifier()) + { + node = retry; + } + } + if !node.is_identifier() { + let fwd = Point::new(point.row, point.column + 1); + if let Some(retry) = ast + .root_node() + .descendant_for_point_range(fwd, fwd) + .filter(|n| n.is_identifier()) + { + node = retry; + } } - // double check that we found an identifier if !node.is_identifier() { return Err(anyhow!( "couldn't find an identifier associated with point {point:?}", @@ -142,14 +206,10 @@ fn build_context(uri: &Url, position: Position, state: &WorldState) -> anyhow::R } let kind = node_reference_kind(&node); - - // return identifier text contents let symbol = node.node_to_string(contents)?; Ok(Context { kind, symbol }) - }); - - context + }) } fn find_references_in_folder( @@ -167,7 +227,6 @@ fn find_references_in_folder( continue; } - lsp::log_info!("found R file {}", path.display()); let result = with_document(path, state, |document| { find_references_in_document(context, path, document, locations) }); @@ -201,30 +260,3 @@ fn find_references_in_document( }); Ok(()) } - -pub(crate) fn find_references( - params: ReferenceParams, - state: &WorldState, -) -> anyhow::Result> { - // Create our locations vector. - let mut locations: Vec = Vec::new(); - - // Extract relevant parameters. - let uri = params.text_document_position.text_document.uri; - let position = params.text_document_position.position; - - // Figure out what we're looking for. - let context = unwrap!(build_context(&uri, position, state), Err(err) => { - return Err(anyhow!("Failed to find build context at position {position:?}: {err:?}")); - }); - - // Now, start searching through workspace folders for references to that identifier. - for folder in state.workspace.folders.iter() { - if let Ok(path) = folder.to_file_path() { - lsp::log_info!("searching references in folder {}", path.display()); - find_references_in_folder(&context, &path, &mut locations, state); - } - } - - Ok(locations) -} diff --git a/crates/ark/src/lsp/tests.rs b/crates/ark/src/lsp/tests.rs index a0e609947..a5c232fb0 100644 --- a/crates/ark/src/lsp/tests.rs +++ b/crates/ark/src/lsp/tests.rs @@ -1 +1,2 @@ +mod find_references; mod goto_definition; diff --git a/crates/ark/src/lsp/tests/find_references.rs b/crates/ark/src/lsp/tests/find_references.rs new file mode 100644 index 000000000..bacb2b030 --- /dev/null +++ b/crates/ark/src/lsp/tests/find_references.rs @@ -0,0 +1,288 @@ +use tower_lsp::lsp_types; +use tower_lsp::lsp_types::Location; +use tower_lsp::lsp_types::ReferenceContext; +use tower_lsp::lsp_types::ReferenceParams; + +use crate::lsp::document::Document; +use crate::lsp::find_references::find_references; +use crate::lsp::state::WorldState; +use crate::lsp::util::test_path; + +fn make_params( + uri: lsp_types::Url, + line: u32, + character: u32, + include_decl: bool, +) -> ReferenceParams { + ReferenceParams { + text_document_position: lsp_types::TextDocumentPositionParams { + text_document: lsp_types::TextDocumentIdentifier { uri }, + position: lsp_types::Position::new(line, character), + }, + context: ReferenceContext { + include_declaration: include_decl, + }, + work_done_progress_params: Default::default(), + partial_result_params: Default::default(), + } +} + +fn make_state(uri: &lsp_types::Url, doc: &Document) -> WorldState { + let mut state = WorldState::default(); + state.documents.insert(uri.clone(), doc.clone()); + state +} + +fn range(start: (u32, u32), end: (u32, u32)) -> lsp_types::Range { + lsp_types::Range { + start: lsp_types::Position::new(start.0, start.1), + end: lsp_types::Position::new(end.0, end.1), + } +} + +#[test] +fn test_intra_file_use_and_def() { + let code = "foo <- 1\nfoo + foo\n"; + let doc = Document::new(code, None); + let uri = test_path("test.R"); + let state = make_state(&uri, &doc); + + // Cursor on the first use at (1, 0) + let params = make_params(uri.clone(), 1, 0, true); + let locs = find_references(params, &state).unwrap(); + let expected: Vec = vec![ + Location::new(uri.clone(), range((0, 0), (0, 3))), + Location::new(uri.clone(), range((1, 0), (1, 3))), + Location::new(uri, range((1, 6), (1, 9))), + ]; + assert_eq!(locs, expected); +} + +#[test] +fn test_excludes_declaration() { + let code = "foo <- 1\nfoo\n"; + let doc = Document::new(code, None); + let uri = test_path("test.R"); + let state = make_state(&uri, &doc); + + let params = make_params(uri.clone(), 1, 0, false); + let locs = find_references(params, &state).unwrap(); + assert_eq!(locs, vec![Location::new(uri, range((1, 0), (1, 3)))]); +} + +#[test] +fn test_no_identifier_returns_empty() { + let code = "x <- 1\n"; + let doc = Document::new(code, None); + let uri = test_path("test.R"); + let state = make_state(&uri, &doc); + + // Cursor on the `<-` operator: no identifier, no refs. + let params = make_params(uri, 0, 3, true); + let locs = find_references(params, &state).unwrap(); + assert!(locs.is_empty()); +} + +#[test] +fn test_cursor_past_trailing_edge_resolves_intra_file() { + // Locally-bound symbol with cursor at the trailing edge of the use + // (typical for double-click then "find references"). The intra-file + // pass must catch this via `Identifier::classify`'s offset retry -- + // otherwise it would fall through to the textual walk and pollute + // the result with cross-file noise. + let code = "foo <- 1\nfoo\n"; + let doc = Document::new(code, None); + let uri = test_path("intra_trailing.R"); + let state = make_state(&uri, &doc); + + // Cursor at line 1, column 3: one past the last character of `foo` + // (which spans columns 0..3). + let params = make_params(uri.clone(), 1, 3, true); + let locs = find_references(params, &state).unwrap(); + + let expected: Vec = vec![ + Location::new(uri.clone(), range((0, 0), (0, 3))), + Location::new(uri, range((1, 0), (1, 3))), + ]; + assert_eq!(locs, expected); +} + +#[test] +fn test_cursor_past_trailing_edge_resolves_via_fallback() { + // Some LSP clients send the cursor one past the last character of a + // selected identifier (typical for double-click then "find references"). + // We exercise this through the cross-file fallback because `mutate` is + // unbound: the intra-file pass returns nothing, and `build_context`'s + // boundary retry pulls the cursor back one column onto the identifier + // before the textual walk runs. + let dir = tempfile::tempdir().unwrap(); + + let file1_path = dir.path().join("a.R"); + std::fs::write(&file1_path, "mutate\n").unwrap(); + let file1_uri = lsp_types::Url::from_file_path(&file1_path).unwrap(); + + let file2_path = dir.path().join("b.R"); + std::fs::write(&file2_path, "mutate\n").unwrap(); + let file2_uri = lsp_types::Url::from_file_path(&file2_path).unwrap(); + + let doc1 = Document::new("mutate\n", None); + let mut state = make_state(&file1_uri, &doc1); + state.workspace.folders = vec![lsp_types::Url::from_directory_path(dir.path()).unwrap()]; + + // Cursor at column 6: one past the last character of `mutate` (which + // spans columns 0..6). + let params = make_params(file1_uri.clone(), 0, 6, true); + let locs = find_references(params, &state).unwrap(); + + assert!(locs + .iter() + .any(|l| l.uri == file1_uri && l.range == range((0, 0), (0, 6)))); + assert!(locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((0, 0), (0, 6)))); +} + +#[test] +fn test_cross_file_walks_for_unbound_symbol() { + // file1 uses `mutate` but doesn't define it locally. file2 also has + // `mutate`. Cursor on file1's `mutate` -- intra-file returns nothing + // (unbound), so the cross-file textual walk fires and picks up + // file2's occurrences. This is not ideal behaviour, it's part of the legacy + // Ark fallback handling. + let dir = tempfile::tempdir().unwrap(); + + let file1_path = dir.path().join("a.R"); + std::fs::write(&file1_path, "mutate\n").unwrap(); + let file1_uri = lsp_types::Url::from_file_path(&file1_path).unwrap(); + + let file2_path = dir.path().join("b.R"); + std::fs::write(&file2_path, "mutate\nmutate + 1\n").unwrap(); + let file2_uri = lsp_types::Url::from_file_path(&file2_path).unwrap(); + + let doc1 = Document::new("mutate\n", None); + let mut state = make_state(&file1_uri, &doc1); + state.workspace.folders = vec![lsp_types::Url::from_directory_path(dir.path()).unwrap()]; + + let params = make_params(file1_uri.clone(), 0, 0, true); + let locs = find_references(params, &state).unwrap(); + + // The current file's occurrence (textual walk picks it up too, + // since intra-file resolution gave nothing to dedup against). + assert!(locs + .iter() + .any(|l| l.uri == file1_uri && l.range == range((0, 0), (0, 6)))); + // file2's two `mutate` occurrences. + assert!(locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((0, 0), (0, 6)))); + assert!(locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((1, 0), (1, 6)))); +} + +#[test] +fn test_cross_file_walk_respects_dollar_kind() { + // Cursor on `bar` in `foo$bar`. Intra-file gives nothing (member names + // aren't tracked by the semantic index), so the cross-file walk fires. + // The walk's `ReferenceKind` discrimination should match `bar` only + // when it's also the RHS of a `$`, not plain identifier occurrences. + let dir = tempfile::tempdir().unwrap(); + + let file1_path = dir.path().join("a.R"); + std::fs::write(&file1_path, "foo <- list()\nfoo$bar\n").unwrap(); + let file1_uri = lsp_types::Url::from_file_path(&file1_path).unwrap(); + + let file2_path = dir.path().join("b.R"); + std::fs::write(&file2_path, "foo$bar\nbar\n").unwrap(); + let file2_uri = lsp_types::Url::from_file_path(&file2_path).unwrap(); + + let doc1 = Document::new("foo <- list()\nfoo$bar\n", None); + let mut state = make_state(&file1_uri, &doc1); + state.workspace.folders = vec![lsp_types::Url::from_directory_path(dir.path()).unwrap()]; + + // Cursor on `bar` (RHS of `$`) in file1, line 1 col 4. + let params = make_params(file1_uri.clone(), 1, 4, true); + let locs = find_references(params, &state).unwrap(); + + // file1's own `foo$bar` `bar`. + assert!(locs + .iter() + .any(|l| l.uri == file1_uri && l.range == range((1, 4), (1, 7)))); + // file2's `foo$bar` `bar` (Dollar kind, matches). + assert!(locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((0, 4), (0, 7)))); + // file2's plain `bar` (Symbol kind, does NOT match). + assert!(!locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((1, 0), (1, 3)))); +} + +#[test] +fn test_fixme_cross_file_walk_on_namespace_access() { + // Cursor on `mutate` in `dplyr::mutate`. Intra-file gives nothing + // (NamespaceAccess), so the cross-file walk fires. The kind is + // `Symbol` (parent is a namespace expression, not an extract + // operator), so the walk matches other plain-identifier occurrences + // of `mutate` but not `foo$mutate`. This is the noisy fallback + // behavior; cross-file resolution will refine it later. + let dir = tempfile::tempdir().unwrap(); + + let file1_path = dir.path().join("a.R"); + std::fs::write(&file1_path, "dplyr::mutate\n").unwrap(); + let file1_uri = lsp_types::Url::from_file_path(&file1_path).unwrap(); + + let file2_path = dir.path().join("b.R"); + std::fs::write(&file2_path, "mutate\nfoo$mutate\n").unwrap(); + let file2_uri = lsp_types::Url::from_file_path(&file2_path).unwrap(); + + let doc1 = Document::new("dplyr::mutate\n", None); + let mut state = make_state(&file1_uri, &doc1); + state.workspace.folders = vec![lsp_types::Url::from_directory_path(dir.path()).unwrap()]; + + // Cursor on `mutate` in `dplyr::mutate`, line 0 col 7. + let params = make_params(file1_uri.clone(), 0, 7, true); + let locs = find_references(params, &state).unwrap(); + + // file1's own `mutate` in `dplyr::mutate`. + assert!(locs + .iter() + .any(|l| l.uri == file1_uri && l.range == range((0, 7), (0, 13)))); + // file2's plain `mutate` (Symbol kind, matches). + assert!(locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((0, 0), (0, 6)))); + // file2's `foo$mutate` `mutate` (Dollar kind, does NOT match). + assert!(!locs + .iter() + .any(|l| l.uri == file2_uri && l.range == range((1, 4), (1, 10)))); +} + +#[test] +fn test_intra_file_match_skips_cross_file_walk() { + // file1 defines + uses `foo` locally. file2 has unrelated `foo`s. + // Cursor on file1's def -- intra-file resolves cleanly, so the + // textual walk is skipped and file2's `foo`s don't pollute the + // result. + let dir = tempfile::tempdir().unwrap(); + + let file1_path = dir.path().join("a.R"); + std::fs::write(&file1_path, "foo <- 1\nfoo\n").unwrap(); + let file1_uri = lsp_types::Url::from_file_path(&file1_path).unwrap(); + + let file2_path = dir.path().join("b.R"); + std::fs::write(&file2_path, "foo <- 99\nfoo\n").unwrap(); + let file2_uri = lsp_types::Url::from_file_path(&file2_path).unwrap(); + + let doc1 = Document::new("foo <- 1\nfoo\n", None); + let mut state = make_state(&file1_uri, &doc1); + state.workspace.folders = vec![lsp_types::Url::from_directory_path(dir.path()).unwrap()]; + + let params = make_params(file1_uri.clone(), 0, 0, true); + let locs = find_references(params, &state).unwrap(); + + // Exactly the two file1 refs. None from file2. + assert_eq!(locs.len(), 2); + assert!(locs.iter().all(|l| l.uri == file1_uri)); + assert!(locs.iter().all(|l| l.uri != file2_uri)); +} diff --git a/crates/oak_ide/src/find_references.rs b/crates/oak_ide/src/find_references.rs new file mode 100644 index 000000000..5b4e3c17b --- /dev/null +++ b/crates/oak_ide/src/find_references.rs @@ -0,0 +1,118 @@ +use std::collections::HashSet; + +use aether_syntax::RSyntaxNode; +use oak_semantic::semantic_index::SemanticIndex; +use oak_semantic::DefinitionId; +use oak_semantic::ScopeId; + +use crate::FilePosition; +use crate::FileRange; +use crate::Identifier; + +/// Find all in-file references to the symbol at offset. +/// +/// The target is usually a single def. It grows when a use is reached by +/// conditional defs, or when a free variable picks up multiple visible +/// defs from an enclosing scope. +/// +/// Returns an empty vec for: +/// - Non-identifier cursors (no `Identifier::classify` match). +/// - `pkg::sym` namespace access. TODO(salsa). +/// - Truly free variables. These are handled by the ark-layer cross-file +/// fallback, TODO(salsa). +/// +/// TODO(salsa): switch the candidate pool to a textual scan so the same +/// `candidates -> refine via name resolution` path works for intra-file +/// and cross-file uniformly (r-a / ty's approach). +/// +/// TODO(places): `foo$bar` / `foo@bar` member accesses aren't tracked by +/// the semantic index, so cursor on a member name returns empty here. +pub fn find_references( + index: &SemanticIndex, + root: &RSyntaxNode, + position: &FilePosition, + include_declaration: bool, +) -> Vec { + let Some(ident) = Identifier::classify(index, root, position.offset) else { + return Vec::new(); + }; + + // Compute the cursor's reaching defs. Same operation we'll run on + // every candidate use below. + let (target_defs, name): (HashSet<(ScopeId, DefinitionId)>, String) = match ident { + Identifier::Definition { + scope_id, + def_id, + name, + .. + } => ( + std::iter::once((scope_id, def_id)).collect(), + name.to_string(), + ), + Identifier::Use { + scope_id, + use_id, + name, + .. + } => ( + index.reaching_definitions(scope_id, use_id).collect(), + name.to_string(), + ), + Identifier::NamespaceAccess { .. } => return Vec::new(), + }; + + if target_defs.is_empty() { + return Vec::new(); + } + + let mut results: Vec = Vec::new(); + + // Definition sites come straight from `target_defs`. + if include_declaration { + for &(scope_id, def_id) in &target_defs { + let def = &index.definitions(scope_id)[def_id]; + results.push(FileRange { + file: position.file.clone(), + range: def.range(), + }); + } + } + + // Walk all uses in every scope and check for each use of the same name + // whether its binding set intersects the target. + for scope_id in index.scope_ids() { + let symbols = index.symbols(scope_id); + let Some(symbol_id) = symbols.id(&name) else { + // The scope doesn't have any uses for that symbol + continue; + }; + + for (use_id, use_site) in index.uses(scope_id).iter() { + if use_site.symbol() != symbol_id { + continue; + } + let intersects = index + .reaching_definitions(scope_id, use_id) + .any(|d| target_defs.contains(&d)); + if !intersects { + continue; + } + results.push(FileRange { + file: position.file.clone(), + range: use_site.range(), + }); + } + } + + // Defs are emitted in `target_defs` (HashSet) iteration order, which + // is non-deterministic. Sort by start offset so callers see source + // order regardless of how we collected results. + // + // TODO(salsa): once cross-file resolution lands, this becomes + // file-then-offset: current file first, then other files in some + // stable order (probably alphabetical by URL), with source order + // preserved within each file. + results.sort_by_key(|r| r.range.start()); + + results +} diff --git a/crates/oak_ide/src/goto_definition.rs b/crates/oak_ide/src/goto_definition.rs index 5d864e1e7..0d8bd38eb 100644 --- a/crates/oak_ide/src/goto_definition.rs +++ b/crates/oak_ide/src/goto_definition.rs @@ -23,10 +23,7 @@ pub fn goto_definition( }; match ident { - Identifier::Definition { scope_id, def_id } => { - let def = &index.definitions(scope_id)[def_id]; - let name = index.symbols(scope_id).symbol(def.symbol()).name(); - + Identifier::Definition { def, name, .. } => { vec![NavigationTarget { file: position.file.clone(), name: name.to_string(), @@ -34,9 +31,12 @@ pub fn goto_definition( focus_range: def.range(), }] }, - Identifier::Use { scope_id, use_id } => { - resolve_use(index, &position.file, scope_id, use_id) - }, + Identifier::Use { + scope_id, + use_id, + name, + .. + } => resolve_use(index, &position.file, scope_id, use_id, name), Identifier::NamespaceAccess { .. } => Vec::new(), } } @@ -46,10 +46,8 @@ fn resolve_use( file: &Url, scope_id: ScopeId, use_id: UseId, + name: &str, ) -> Vec { - let symbol = index.uses(scope_id)[use_id].symbol(); - let symbol_name = index.symbols(scope_id).symbol(symbol).name().to_string(); - // `reaching_definitions` unions the local use-def map with the // enclosing-scope snapshot when `may_be_unbound` is true. That // covers the conditional-local-plus-outer-binding case where both @@ -63,7 +61,7 @@ fn resolve_use( let def = &index.definitions(scope)[def_id]; NavigationTarget { file: file.clone(), - name: symbol_name.clone(), + name: name.to_string(), full_range: def.range(), focus_range: def.range(), } diff --git a/crates/oak_ide/src/identifier.rs b/crates/oak_ide/src/identifier.rs index 6ce0d0de3..e8463a689 100644 --- a/crates/oak_ide/src/identifier.rs +++ b/crates/oak_ide/src/identifier.rs @@ -2,26 +2,35 @@ use aether_syntax::AnyRSelector; use aether_syntax::RNamespaceExpression; use aether_syntax::RSyntaxKind; use aether_syntax::RSyntaxNode; +use aether_syntax::RSyntaxToken; use biome_rowan::AstNode; use biome_rowan::TextRange; use biome_rowan::TextSize; +use biome_rowan::TokenAtOffset; use oak_core::syntax_ext::RIdentifierExt; use oak_core::syntax_ext::RStringValueExt; +use oak_semantic::semantic_index::Definition; use oak_semantic::semantic_index::SemanticIndex; +use oak_semantic::semantic_index::Use; use oak_semantic::DefinitionId; use oak_semantic::ScopeId; use oak_semantic::UseId; +/// Semantic identity of identifier at cursor. #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Identifier { +pub enum Identifier<'index> { Definition { scope_id: ScopeId, def_id: DefinitionId, + def: &'index Definition, + name: &'index str, }, Use { scope_id: ScopeId, use_id: UseId, + use_site: &'index Use, + name: &'index str, }, NamespaceAccess { @@ -33,20 +42,43 @@ pub enum Identifier { }, } -impl Identifier { - pub fn classify(index: &SemanticIndex, root: &RSyntaxNode, offset: TextSize) -> Option { - let (scope_id, _) = index.scope_at(offset); +impl<'index> Identifier<'index> { + pub fn classify( + index: &'index SemanticIndex, + root: &RSyntaxNode, + offset: TextSize, + ) -> Option { + let offset = snap_to_name_at_boundary(root, offset); + Self::classify_at(index, root, offset) + } + fn classify_at( + index: &'index SemanticIndex, + root: &RSyntaxNode, + offset: TextSize, + ) -> Option { // `Import` definitions have empty ranges (no physical text position, - // since `source()` injects them) so `contains()` skips them. If the - // cursor is on the `source` symbol, the offset classifies instead as a - // use of `source` via the check below. - if let Some((def_id, _def)) = index.definitions(scope_id).contains(offset) { - return Some(Identifier::Definition { scope_id, def_id }); + // since `source()` injects them) so `definition_at()` skips them. If + // the cursor is on the `source` symbol, the offset classifies instead + // as a use of `source` via the check below. + if let Some((scope_id, def_id, def)) = index.definition_at(offset) { + let name = index.symbols(scope_id).symbol(def.symbol()).name(); + return Some(Identifier::Definition { + scope_id, + def_id, + def, + name, + }); } - if let Some((use_id, _)) = index.uses(scope_id).contains(offset) { - return Some(Identifier::Use { scope_id, use_id }); + if let Some((scope_id, use_id, use_site)) = index.use_at(offset) { + let name = index.symbols(scope_id).symbol(use_site.symbol()).name(); + return Some(Identifier::Use { + scope_id, + use_id, + use_site, + name, + }); } if let Some(namespace_access) = classify_namespace(root, offset) { @@ -57,7 +89,7 @@ impl Identifier { } } -fn classify_namespace(root: &RSyntaxNode, offset: TextSize) -> Option { +fn classify_namespace<'index>(root: &RSyntaxNode, offset: TextSize) -> Option> { let token = root.token_at_offset(offset).right_biased()?; let ns_expr = token @@ -90,3 +122,50 @@ fn selector_name(selector: &AnyRSelector) -> Option { _ => None, } } + +/// At a token boundary, snap `offset` to the start of an adjacent name-like +/// token (regular identifier or string-form-def literal) if either neighbor +/// qualifies. Otherwise return `offset` unchanged. +/// +/// LSP clients commonly land the cursor at the trailing edge of an identifier +/// (`offset == range.end`), e.g. after a double-click followed by a request. A +/// half-open `TextRange::contains` would miss this, so we snap the offset +/// towards a name-like token. Mirrors rust-analyzer's `pick_best_token` and +/// ty's `Tokens::at_offset` + token-kind ranking. +/// +/// Two cases to handle: +/// - `Between(left, right)`: cursor sits exactly between two tokens; pick +/// the name-like neighbor. +/// - `Single(token)`: cursor lies within a token's full range but past its +/// trimmed range (common when the identifier has trailing trivia like a +/// newline). Snap to the trimmed start so `contains` matches. +fn snap_to_name_at_boundary(root: &RSyntaxNode, offset: TextSize) -> TextSize { + match root.token_at_offset(offset) { + TokenAtOffset::None => offset, + TokenAtOffset::Single(token) => { + if is_name_token(&token) { + token.text_trimmed_range().start() + } else { + offset + } + }, + TokenAtOffset::Between(left, right) => { + if is_name_token(&left) { + left.text_trimmed_range().start() + } else if is_name_token(&right) { + right.text_trimmed_range().start() + } else { + offset + } + }, + } +} + +/// Tokens that participate in a name binding: regular identifiers and +/// string literals (which are valid def sites via `"foo" <- 1`). +fn is_name_token(token: &RSyntaxToken) -> bool { + matches!( + token.kind(), + RSyntaxKind::IDENT | RSyntaxKind::R_STRING_LITERAL + ) +} diff --git a/crates/oak_ide/src/lib.rs b/crates/oak_ide/src/lib.rs index 52555c00f..b5602d047 100644 --- a/crates/oak_ide/src/lib.rs +++ b/crates/oak_ide/src/lib.rs @@ -1,23 +1,35 @@ +mod find_references; mod goto_definition; mod identifier; use biome_rowan::TextRange; use biome_rowan::TextSize; +pub use find_references::find_references; pub use goto_definition::goto_definition; pub use identifier::Identifier; use url::Url; -/// A cursor location in the workspace: a file and an offset into it. +/// A cursor location in the workspace: a file and a byte offset into it. #[derive(Debug, Clone, PartialEq, Eq)] pub struct FilePosition { pub file: Url, pub offset: TextSize, } +/// A span in the workspace: a file and a byte range within it. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileRange { + pub file: Url, + pub range: TextRange, +} + /// A location in source code that the editor can navigate to. /// -/// Shared result type for IDE features like goto-definition, find-references, -/// etc. The LSP layer converts these uniformly into `LocationLink`s. +/// Shared result type for IDE features like goto-definition, hover, +/// etc., where the editor distinguishes the full extent of the binding +/// from the focus (the name selection). The LSP layer converts this into +/// `LocationLink`. For features without that distinction (find-refs, +/// document-highlight), use `FileRange` directly. #[derive(Debug, Clone, PartialEq, Eq)] pub struct NavigationTarget { pub file: Url, diff --git a/crates/oak_ide/tests/find_references.rs b/crates/oak_ide/tests/find_references.rs new file mode 100644 index 000000000..03dd70c83 --- /dev/null +++ b/crates/oak_ide/tests/find_references.rs @@ -0,0 +1,334 @@ +use aether_parser::parse; +use aether_parser::RParserOptions; +use aether_syntax::RSyntaxNode; +use biome_rowan::TextRange; +use biome_rowan::TextSize; +use oak_ide::find_references; +use oak_ide::FilePosition; +use oak_ide::FileRange; +use oak_semantic::build_index; +use oak_semantic::semantic_index::SemanticIndex; +use oak_semantic::NoopImportsResolver; +use url::Url; + +fn parse_source(source: &str) -> (RSyntaxNode, SemanticIndex) { + let parsed = parse(source, RParserOptions::default()); + let root = parsed.syntax(); + let index = build_index(&parsed.tree(), NoopImportsResolver); + (root, index) +} + +fn text_range(start: u32, end: u32) -> TextRange { + TextRange::new(TextSize::from(start), TextSize::from(end)) +} + +fn file_url(name: &str) -> Url { + Url::parse(&format!("file:///project/R/{name}")).unwrap() +} + +fn offset(n: u32) -> TextSize { + TextSize::from(n) +} + +fn pos(file: &Url, n: u32) -> FilePosition { + FilePosition { + file: file.clone(), + offset: offset(n), + } +} + +fn ranges(refs: Vec) -> Vec { + refs.into_iter().map(|r| r.range).collect() +} + +// --- Local resolution --- + +#[test] +fn test_local_simple() { + // "x <- 1\nx\n" + // 0123456 78 + let source = "x <- 1\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on the use at offset 7 + let refs = ranges(find_references(&idx, &root, &pos(&file, 7), true)); + assert_eq!(refs, vec![text_range(0, 1), text_range(7, 8)]); +} + +#[test] +fn test_local_excludes_declaration() { + let source = "x <- 1\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // With include_declaration = false, the def at 0..1 is dropped + let refs = ranges(find_references(&idx, &root, &pos(&file, 7), false)); + assert_eq!(refs, vec![text_range(7, 8)]); +} + +#[test] +fn test_from_definition_site() { + // Cursor on the def `x` in `x <- 1` should still return all refs + let source = "x <- 1\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let refs = ranges(find_references(&idx, &root, &pos(&file, 0), true)); + assert_eq!(refs, vec![text_range(0, 1), text_range(7, 8)]); +} + +#[test] +fn test_shadowing_excludes_outer() { + // Outer `x` and inner `x` are different bindings; refs of inner + // shouldn't include outer (and vice versa). + // + // "x <- 1\nf <- function() {\n x <- 2\n x\n}\n" + // 0 7 26 35 + // outer def at 0..1, outer never used + // inner def at 28..29, inner use at 37..38 + let source = "x <- 1\nf <- function() {\n x <- 2\n x\n}\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on the inner use `x` (offset 37). Should return inner pair only. + let inner_def = source.find("x <- 2").unwrap() as u32; + let inner_use = source.rfind('x').unwrap() as u32; + let refs = ranges(find_references(&idx, &root, &pos(&file, inner_use), true)); + assert_eq!(refs, vec![ + text_range(inner_def, inner_def + 1), + text_range(inner_use, inner_use + 1), + ]); + + // Cursor on the outer def `x` (offset 0). Outer has no uses, so just + // the def itself. + let refs = ranges(find_references(&idx, &root, &pos(&file, 0), true)); + assert_eq!(refs, vec![text_range(0, 1)]); +} + +#[test] +fn test_free_variable_in_inner_scope() { + // Free `x` inside `f` resolves to file-scope `x`. Refs should include + // the file-scope def and the inner use. + let source = "x <- 1\nf <- function() {\n x\n}\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let inner_use = source.rfind('x').unwrap() as u32; + let refs = ranges(find_references(&idx, &root, &pos(&file, inner_use), true)); + assert_eq!(refs, vec![ + text_range(0, 1), + text_range(inner_use, inner_use + 1) + ]); +} + +#[test] +fn test_multiple_uses() { + let source = "x <- 1\nx + x + x\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let refs = ranges(find_references(&idx, &root, &pos(&file, 0), true)); + assert_eq!(refs, vec![ + text_range(0, 1), + text_range(7, 8), + text_range(11, 12), + text_range(15, 16), + ]); +} + +#[test] +fn test_parameter_refs() { + let source = "f <- function(x) {\n x + x\n}\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on parameter `x` at offset 14 + let refs = ranges(find_references(&idx, &root, &pos(&file, 14), true)); + assert_eq!(refs.len(), 3); + assert_eq!(refs[0], text_range(14, 15)); +} + +#[test] +fn test_reassignment_separates_refs() { + // After `x <- 2`, the use of `x` binds to the second def. Find-refs on + // the second def returns just the def itself and the use that follows. + let source = "x <- 1\nx <- 2\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on second def `x` at offset 7 + let refs = ranges(find_references(&idx, &root, &pos(&file, 7), true)); + // Use at offset 14 binds to the second def. The first def at offset 0 + // is killed by the second. + assert_eq!(refs, vec![text_range(7, 8), text_range(14, 15)]); + + // Cursor on first def at offset 0. The only ref is the def itself + // (its use is shadowed by the second def). + let refs = ranges(find_references(&idx, &root, &pos(&file, 0), true)); + assert_eq!(refs, vec![text_range(0, 1)]); +} + +#[test] +fn test_conditional_defs_seen_from_enclosing_scope() { + // Outer scope has conditional defs; inner function uses the name as + // a free variable. The use's target def set has BOTH outer defs + // (multiple-element set via the enclosing snapshot). Refs should + // include both defs and the inner use. + // + // "if (TRUE) x <- 1 else x <- 2\nf <- function() x\n" + // 0 10 16 22 45 + let source = "if (TRUE) x <- 1 else x <- 2\nf <- function() x\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let use_offset = source.rfind('x').unwrap() as u32; + let refs = ranges(find_references(&idx, &root, &pos(&file, use_offset), true)); + assert_eq!(refs, vec![ + text_range(10, 11), + text_range(22, 23), + text_range(use_offset, use_offset + 1), + ]); +} + +#[test] +fn test_super_assignment_targets_outer_scope() { + // `x <<- 2` inside `f` defines `x` in the enclosing scope. A use of + // `x` outside `f` sees BOTH the original def and the super-assigned + // def (the use-def map adds `<<-` without clearing). + // + // "x <- 1\nf <- function() x <<- 2\nx\n" + // 0 23 31 + let source = "x <- 1\nf <- function() x <<- 2\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let super_def = source.find("x <<-").unwrap() as u32; + let outer_use = source.rfind('x').unwrap() as u32; + let refs = ranges(find_references(&idx, &root, &pos(&file, outer_use), true)); + // Both file-scope defs (`x <- 1` and the super-assigned `x`) and the + // use are returned. + assert_eq!(refs, vec![ + text_range(0, 1), + text_range(super_def, super_def + 1), + text_range(outer_use, outer_use + 1), + ]); +} + +#[test] +fn test_conditional_binding_includes_both_defs() { + let source = "if (TRUE) x <- 1 else x <- 2\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let use_offset = source.rfind('x').unwrap() as u32; + let refs = ranges(find_references(&idx, &root, &pos(&file, use_offset), true)); + // Both conditional defs reach the use, so both are returned. + assert_eq!(refs, vec![ + text_range(10, 11), + text_range(22, 23), + text_range(use_offset, use_offset + 1), + ]); +} + +// --- Boundary cursor --- + +#[test] +fn test_cursor_at_trailing_edge_resolves() { + // LSP clients often send the cursor at offset == range.end after a + // double-click followed by a request. `Identifier::classify` retries + // one byte earlier so the use is still found. + // + // "x <- 1\nx\n" + // 0 7 8 + // use of `x` spans 7..8; cursor at offset 8 is the trailing edge. + let source = "x <- 1\nx\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let refs = ranges(find_references(&idx, &root, &pos(&file, 8), true)); + assert_eq!(refs, vec![text_range(0, 1), text_range(7, 8)]); +} + +// --- No resolution --- + +#[test] +fn test_no_identifier_at_offset() { + let source = "x <- 1\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on `<-` operator + let refs = find_references(&idx, &root, &pos(&file, 3), true); + assert!(refs.is_empty()); +} + +#[test] +fn test_unbound_use_returns_empty() { + // `foo` has no local def: classification gives Use but the target def + // set is empty. Within-file find-refs returns nothing; cross-file + // textual fallback in the ark wrapper handles this case. + let source = "foo\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let refs = find_references(&idx, &root, &pos(&file, 0), true); + assert!(refs.is_empty()); +} + +#[test] +fn test_fixme_namespace_access_returns_empty() { + let source = "dplyr::mutate\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on `mutate` - NamespaceAccess, returns empty. + let refs = find_references(&idx, &root, &pos(&file, 7), true); + assert!(refs.is_empty()); +} + +// --- Dollar/at member access (places TODO) --- + +#[test] +fn test_fixme_dollar_lhs_resolves_only_to_variable() { + // `foo` on the LHS of `$` is a real variable use. Find-refs should + // include the def and the LHS use, but NOT the RHS member name. + let source = "foo <- list()\nfoo$foo\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on `foo` LHS at offset 14 + let refs = ranges(find_references(&idx, &root, &pos(&file, 14), true)); + // Def at 0..3, LHS use at 14..17. RHS `foo` (offset 18..21) is a + // member name, not tracked by the semantic index. + assert_eq!(refs, vec![text_range(0, 3), text_range(14, 17)]); +} + +#[test] +fn test_fixme_dollar_rhs_returns_empty() { + // Cursor on `bar` in `foo$bar` - member names aren't variable refs. + let source = "foo <- list()\nfoo$bar\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + let refs = find_references(&idx, &root, &pos(&file, 18), true); + assert!(refs.is_empty()); +} + +#[test] +fn test_string_def_returns_quoted_range_for_def() { + // `"foo" <- 1` defines `foo`. The semantic index records the def's + // range as covering the whole `"foo"` token (5 chars). Find-refs on + // the use of `foo` returns the def at the quoted range and uses at + // the bare range -- semantically correct, just visually unusual. + let source = "\"foo\" <- 1\nfoo\n"; + let file = file_url("test.R"); + let (root, idx) = parse_source(source); + + // Cursor on the bare `foo` use at offset 11 + let refs = ranges(find_references(&idx, &root, &pos(&file, 11), true)); + assert_eq!(refs, vec![ + text_range(0, 5), // covers `"foo"` (the def site) + text_range(11, 14), // covers `foo` (the use site) + ]); +} diff --git a/crates/oak_semantic/src/semantic_index.rs b/crates/oak_semantic/src/semantic_index.rs index 39727264c..1b830fa17 100644 --- a/crates/oak_semantic/src/semantic_index.rs +++ b/crates/oak_semantic/src/semantic_index.rs @@ -161,7 +161,7 @@ impl SemanticIndex { // Start at the file scope let mut current = ScopeId::from(0); 'outer: loop { - for child_id in self.child_scopes(current) { + for child_id in self.child_scope_ids(current) { if self.scopes[child_id].range.contains(offset) { current = child_id; continue 'outer; @@ -189,21 +189,26 @@ impl SemanticIndex { } /// Iterate direct child scopes of `scope`. - pub fn child_scopes(&self, scope: ScopeId) -> ChildScopesIter<'_> { - let descendants = &self.scopes[scope].descendants; - ChildScopesIter { + pub fn child_scope_ids(&self, scope_id: ScopeId) -> ChildScopeIdsIter<'_> { + let descendants = &self.scopes[scope_id].descendants; + ChildScopeIdsIter { index: self, current: descendants.start, end: descendants.end, } } + /// Iterate over every scope in the file (source order, file scope first). + pub fn scope_ids(&self) -> impl Iterator + '_ { + self.scopes.iter().map(|(id, _)| id) + } + /// Walk from `scope` up through ancestors to the file root. Note that /// `scope` itself is included in the ancestors. - pub fn ancestor_scopes(&self, scope: ScopeId) -> AncestorsIter<'_> { - AncestorsIter { + pub fn ancestor_scope_ids(&self, scope_id: ScopeId) -> AncestorScopeIdsIter<'_> { + AncestorScopeIdsIter { index: self, - current: Some(scope), + current: Some(scope_id), } } @@ -216,7 +221,7 @@ impl SemanticIndex { name: &str, scope: ScopeId, ) -> Option<(ScopeId, DefinitionId, &Definition)> { - for ancestor in self.ancestor_scopes(scope) { + for ancestor in self.ancestor_scope_ids(scope) { let Some(symbol_id) = self.symbol_tables[ancestor].id(name) else { continue; }; @@ -247,7 +252,7 @@ impl SemanticIndex { None } - /// All definitions that reach the use at `(scope, use_id)`. + /// All definitions that could reach the use at `(scope, use_id)`. /// /// The local use-def bindings always count. The enclosing-scope snapshot /// also counts when `may_be_unbound` is true. That happens when the local @@ -259,27 +264,21 @@ impl SemanticIndex { /// same name. pub fn reaching_definitions( &self, - scope: ScopeId, + scope_id: ScopeId, use_id: UseId, ) -> impl Iterator + '_ { - let bindings = self.use_def_map(scope).bindings_at_use(use_id); - let local = bindings.definitions().iter().map(move |&d| (scope, d)); + let bindings = self.use_def_map(scope_id).bindings_at_use(use_id); + let local = bindings.definitions().iter().map(move |&d| (scope_id, d)); let enclosing = if bindings.may_be_unbound() { - let symbol = self.uses(scope)[use_id].symbol(); - self.enclosing_bindings(scope, symbol) + let symbol_id = self.uses(scope_id)[use_id].symbol(); + self.enclosing_bindings(scope_id, symbol_id) } else { None }; - let enclosing_iter = - enclosing - .into_iter() - .flat_map(|(enclosing_scope, enclosing_bindings)| { - enclosing_bindings - .definitions() - .iter() - .map(move |&def| (enclosing_scope, def)) - }); + let enclosing_iter = enclosing.into_iter().flat_map(|(scope, bindings)| { + bindings.definitions().iter().map(move |&def| (scope, def)) + }); local.chain(enclosing_iter) } @@ -665,13 +664,13 @@ impl SemanticCall { // --- Iterators --- -pub struct ChildScopesIter<'a> { +pub struct ChildScopeIdsIter<'a> { index: &'a SemanticIndex, current: ScopeId, end: ScopeId, } -impl<'a> Iterator for ChildScopesIter<'a> { +impl<'a> Iterator for ChildScopeIdsIter<'a> { type Item = ScopeId; fn next(&mut self) -> Option { @@ -685,12 +684,12 @@ impl<'a> Iterator for ChildScopesIter<'a> { } } -pub struct AncestorsIter<'a> { +pub struct AncestorScopeIdsIter<'a> { index: &'a SemanticIndex, current: Option, } -impl<'a> Iterator for AncestorsIter<'a> { +impl<'a> Iterator for AncestorScopeIdsIter<'a> { type Item = ScopeId; fn next(&mut self) -> Option { diff --git a/crates/oak_semantic/tests/builder.rs b/crates/oak_semantic/tests/builder.rs index a7133b52b..ea15780e9 100644 --- a/crates/oak_semantic/tests/builder.rs +++ b/crates/oak_semantic/tests/builder.rs @@ -273,7 +273,7 @@ fn test_child_scopes() { let index = index("f <- function(x) x\ng <- function(y) y"); let file = ScopeId::from(0); - let children: Vec<_> = index.child_scopes(file).collect(); + let children: Vec<_> = index.child_scope_ids(file).collect(); assert_eq!(children.len(), 2); } @@ -284,7 +284,7 @@ fn test_ancestor_scopes() { let outer = ScopeId::from(1); let file = ScopeId::from(0); - let ancestors: Vec<_> = index.ancestor_scopes(inner).collect(); + let ancestors: Vec<_> = index.ancestor_scope_ids(inner).collect(); assert_eq!(ancestors, vec![inner, outer, file]); }