Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
86605a4
Draft data structures
lionel- Jul 8, 2025
adc048d
Inspect library paths on LSP startup
lionel- Jul 8, 2025
1f8b9ed
Extract structs from `Package`
lionel- Jul 8, 2025
9eab565
Add `NAMESPACE` parser
lionel- Jul 8, 2025
714fa44
Draft `Library`
lionel- Jul 8, 2025
3854690
Handle `library()` calls in diagnostics
lionel- Jul 10, 2025
b23af64
Simplify argument extraction
lionel- Jul 11, 2025
3fa816a
Take into account position of `library()` calls
lionel- Jul 11, 2025
8d2131d
Support `require()` too
lionel- Jul 11, 2025
128afee
Parse `DESCRIPTION` files
lionel- Jul 11, 2025
a5395e2
Extract `arguments()` iterator from `arguments_values()`
lionel- Jul 14, 2025
90ea54b
Bail when `character.only` is supplied
lionel- Jul 14, 2025
e06e5d1
Warn outside of `handle_package_attach_call()`
lionel- Jul 14, 2025
90e6604
Fix typos
lionel- Jul 15, 2025
4ca839c
Don't fail creation of `Package` is there is no `NAMESPACE` file
lionel- Jul 24, 2025
1209357
Flatten argument iterators at call site
lionel- Jul 24, 2025
d5ea6a9
Retrieve string from TS child
lionel- Jul 24, 2025
9016fb5
Rename `bulk_imports` to `package_imports`
lionel- Jul 25, 2025
7e3cb8d
Extract `TSQuery::from_query()`
lionel- Jul 25, 2025
144c4dc
Rename `TSQuery` to `TsQuery`
lionel- Jul 25, 2025
56d34f0
Add `TsQuery::all_captures()`
lionel- Jul 25, 2025
1886249
Add `TsQuery::captures_by`
lionel- Jul 25, 2025
e3c5529
Iterate lazily in `all_captures()`
lionel- Jul 25, 2025
249bc5c
Take references in `from_query()` and supply static queries
lionel- Jul 25, 2025
1c5bae4
Refactor iterators with cleaner logic
lionel- Jul 24, 2025
eb61a0f
Let unlikely case go through
lionel- Jul 24, 2025
dd9d846
Propagate error from `load_package()`
lionel- Jul 25, 2025
541ddb2
Don't clone depends
lionel- Jul 25, 2025
72ed267
Add test for multiple arguments
lionel- Jul 25, 2025
7f96aae
Support S4 exports
lionel- Jul 25, 2025
9f8277e
Give `contents` their own lifetime
lionel- Jul 25, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
380 changes: 370 additions & 10 deletions crates/ark/src/lsp/diagnostics.rs

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion crates/ark/src/lsp/diagnostics_syntax.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,12 @@ mod tests {
use crate::lsp::diagnostics::DiagnosticContext;
use crate::lsp::diagnostics_syntax::syntax_diagnostics;
use crate::lsp::documents::Document;
use crate::lsp::inputs::library::Library;

fn text_diagnostics(text: &str) -> Vec<Diagnostic> {
let document = Document::new(text, None);
let context = DiagnosticContext::new(&document.contents);
let library = Library::default();
let context = DiagnosticContext::new(&document.contents, &library);
let diagnostics = syntax_diagnostics(document.ast.root_node(), &context).unwrap();
diagnostics
}
Expand Down
41 changes: 23 additions & 18 deletions crates/ark/src/lsp/indexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use stdext::unwrap;
use stdext::unwrap::IntoResult;
use tower_lsp::lsp_types::Range;
use tree_sitter::Node;
use tree_sitter::Query;
use walkdir::DirEntry;
use walkdir::WalkDir;

Expand All @@ -29,7 +30,7 @@ use crate::lsp::traits::rope::RopeExt;
use crate::treesitter::BinaryOperatorType;
use crate::treesitter::NodeType;
use crate::treesitter::NodeTypeExt;
use crate::treesitter::TSQuery;
use crate::treesitter::TsQuery;

#[derive(Clone, Debug)]
pub enum IndexEntryData {
Expand Down Expand Up @@ -347,23 +348,27 @@ fn index_r6_class_methods(
entries: &mut Vec<IndexEntry>,
) -> anyhow::Result<()> {
// Tree-sitter query to match individual methods in R6Class public/private lists
let query_str = r#"
(argument
name: (identifier) @access
value: (call
function: (identifier) @_list_fn
arguments: (arguments
(argument
name: (identifier) @method_name
value: (function_definition) @method_fn
)
)
)
(#match? @access "public|private")
(#eq? @_list_fn "list")
)
"#;
let mut ts_query = TSQuery::new(query_str)?;
static R6_METHODS_QUERY: LazyLock<Query> = LazyLock::new(|| {
let query_str = r#"
(argument
name: (identifier) @access
value: (call
function: (identifier) @_list_fn
arguments: (arguments
(argument
name: (identifier) @method_name
value: (function_definition) @method_fn
)
)
)
(#match? @access "public|private")
(#eq? @_list_fn "list")
)
"#;
let language = &tree_sitter_r::LANGUAGE.into();
Query::new(language, query_str).expect("Failed to compile R6 methods query")
});
let mut ts_query = TsQuery::from_query(&*R6_METHODS_QUERY);

// We'll switch from Rope to String in the near future so let's not
// worry about this conversion now
Expand Down
147 changes: 147 additions & 0 deletions crates/ark/src/lsp/inputs/library.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
//
// library.rs
//
// Copyright (C) 2025 by Posit Software, PBC
//

use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::RwLock;

use super::package::Package;
use crate::lsp;

/// Lazily manages a list of known R packages by name
#[derive(Default, Clone, Debug)]
pub struct Library {
/// Paths to library directories, i.e. what `base::libPaths()` returns.
pub library_paths: Arc<Vec<PathBuf>>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub library_paths: Arc<Vec<PathBuf>>,
pub library_paths: Vec<PathBuf>,

Is it overkill to Arc this? Probably like 1 or 2 items right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yep I didn't Arc this to avoid cloning the data, but to express that this is a view into the worldstate. With an owned vector, the paths could have been modified along the way. Not sure it is that useful to make the distinction but that was the idea.


packages: Arc<RwLock<HashMap<String, Option<Arc<Package>>>>>,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
packages: Arc<RwLock<HashMap<String, Option<Arc<Package>>>>>,
packages: Arc<RwLock<HashMap<String, Option<Arc<Package>>>>>,

Why is Package itself Arced?

I was assuming that get() would return an immutable reference to a Package in the package list

i.e. what self.packages.read().unwrap().get(name) already returns

And if it's not cached yet, you write it to the cache but still return a get() reference

Then it feels like you don't need to Arc the Package because packages would be the sole owner

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Packages are meant to be shared across threads so we can't use &. We could make the references &'static but I think in the future we'll want to react to updated packages, so that doesn't feel like the right move.

}

impl Library {
pub fn new(library_paths: Vec<PathBuf>) -> Self {
Self {
packages: Arc::new(RwLock::new(HashMap::new())),
library_paths: Arc::new(library_paths),
}
}

/// Get a package by name, loading and caching it if necessary.
/// Returns `None` if the package can't be found or loaded.
pub fn get(&self, name: &str) -> Option<Arc<Package>> {
// Try to get from cache first (could be `None` if we already tried to
// load a non-existent or broken package)
if let Some(entry) = self.packages.read().unwrap().get(name) {
return entry.clone();
}

// Not cached, try to load
let pkg = match self.load_package(name) {
Ok(Some(pkg)) => Some(Arc::new(pkg)),
Ok(None) => None,
Err(err) => {
lsp::log_error!("Can't load R package: {err:?}");
None
},
};

self.packages
.write()
.unwrap()
.insert(name.to_string(), pkg.clone());

pkg
}

/// Insert a package in the library for testing purposes.
#[cfg(test)]
pub fn insert(self, name: &str, package: Package) -> Self {
self.packages
.write()
.unwrap()
.insert(name.to_string(), Some(Arc::new(package)));
self
}

fn load_package(&self, name: &str) -> anyhow::Result<Option<Package>> {
for lib_path in self.library_paths.iter() {
match Package::load(&lib_path, name)? {
Some(pkg) => return Ok(Some(pkg)),
None => (),
}
}

Ok(None)
}
}

#[cfg(test)]
mod tests {
use std::fs::File;
use std::fs::{self};
use std::io::Write;

use tempfile::TempDir;

use super::*;

// Helper to create a temporary package directory with DESCRIPTION and NAMESPACE
fn create_temp_package(
pkg_name: &str,
description: &str,
namespace: &str,
) -> (TempDir, PathBuf) {
let temp_dir = TempDir::new().unwrap();
let pkg_dir = temp_dir.path().join(pkg_name);
fs::create_dir(&pkg_dir).unwrap();

let desc_path = pkg_dir.join("DESCRIPTION");
let mut desc_file = File::create(&desc_path).unwrap();
desc_file.write_all(description.as_bytes()).unwrap();

let ns_path = pkg_dir.join("NAMESPACE");
let mut ns_file = File::create(&ns_path).unwrap();
ns_file.write_all(namespace.as_bytes()).unwrap();

(temp_dir, pkg_dir)
}

#[test]
fn test_load_and_cache_package() {
let pkg_name = "mypkg";
let description = r#"
Package: mypkg
Version: 1.0
"#;
let namespace = r#"
export(foo)
export(bar)
importFrom(pkg, baz)
"#;

let (temp_dir, _pkg_dir) = create_temp_package(pkg_name, description, namespace);

// Library should point to the temp_dir as its only library path
let lib = Library::new(vec![temp_dir.path().to_path_buf()]);

// First access loads from disk
let pkg = lib.get(pkg_name).unwrap();
assert_eq!(pkg.description.name, "mypkg");

// Second access uses cache (note that we aren't testing that we are
// indeed caching, just exercising the cache code path)
assert!(lib.get(pkg_name).is_some());

// Negative cache: missing package
assert!(lib.get("notapkg").is_none());
// Now cached as absent
assert!(lib.get("notapkg").is_none());

// Namespace is parsed
assert_eq!(pkg.namespace.exports, vec!["bar", "foo"]);
assert_eq!(pkg.namespace.imports, vec!["baz"]);
}
}
12 changes: 12 additions & 0 deletions crates/ark/src/lsp/inputs/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
//
// mod.rs
//
// Copyright (C) 2025 by Posit Software, PBC
//
//

pub mod library;
pub mod package;
pub mod package_description;
pub mod package_namespace;
pub mod source_root;
64 changes: 64 additions & 0 deletions crates/ark/src/lsp/inputs/package.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
//
// package.rs
//
// Copyright (C) 2025 by Posit Software, PBC
//
//

use std::fs;
use std::path::PathBuf;

use crate::lsp::inputs::package_description::Description;
use crate::lsp::inputs::package_namespace::Namespace;

/// Represents an R package and its metadata relevant for static analysis.
#[derive(Clone, Debug)]
pub struct Package {
/// Path to the directory that contains `DESCRIPTION``. Can
/// be an installed package or a package source.
pub path: PathBuf,

pub description: Description,
pub namespace: Namespace,
}

impl Package {
/// Attempts to load a package from the given path and name.
pub fn load(lib_path: &std::path::Path, name: &str) -> anyhow::Result<Option<Self>> {
let package_path = lib_path.join(name);

let description_path = package_path.join("DESCRIPTION");
let namespace_path = package_path.join("NAMESPACE");

// Only consider libraries that have a folder named after the
// requested package and that contains a description file
if !description_path.is_file() {
return Ok(None);
}
Comment on lines +33 to +37
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You seem to also require a NAMESPACE below, so bail early here too?

Copy link
Contributor Author

@lionel- lionel- Jul 24, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm actually we probably should be robust to missing NAMESPACE files?

Edit: I went ahead and made NAMESPACE optional, with an info-level message when it's missing.


// This fails if there is no `Package` field, so we're never loading
// folders like bookdown projects as package
let description_contents = fs::read_to_string(&description_path)?;
let description = Description::parse(&description_contents)?;

if description.name != name {
return Err(anyhow::anyhow!(
"`Package` field in `DESCRIPTION` doesn't match folder name '{name}'"
));
}

let namespace = if namespace_path.is_file() {
let namespace_contents = fs::read_to_string(&namespace_path)?;
Namespace::parse(&namespace_contents)?
} else {
tracing::info!("Package `{name}` doesn't contain a NAMESPACE file, using defaults");
Namespace::default()
};

Ok(Some(Package {
path: package_path,
description,
namespace,
}))
}
}
Loading
Loading