diff --git a/crates/cairo-lang-doc/src/db.rs b/crates/cairo-lang-doc/src/db.rs index 07dd2b85b71..a303bd175c3 100644 --- a/crates/cairo-lang-doc/src/db.rs +++ b/crates/cairo-lang-doc/src/db.rs @@ -154,16 +154,18 @@ fn extract_item_outer_documentation<'db>( ) -> Option { // Get the text of the item (trivia + definition) let raw_text = item_id.stable_location(db)?.syntax_node(db).get_text(db); - Some( - raw_text + let comment_lines: Vec<&str> = raw_text .lines() .filter(|line| !line.trim().is_empty()) // Takes all the lines before the definition. // Anything other than doc comments will be filtered out later. .take_while_ref(|line| is_comment_line(line) || line.trim_start().starts_with("#")) - .filter_map(|line| extract_comment_from_code_line(line, &["///"])) - .join("\n"), - ) + .filter_map(|line| extract_comment_line_content(line, &["///"])) + .collect(); + if comment_lines.is_empty() { + return None; + } + Some(dedent_comment_block(&comment_lines)) } /// Gets the module level comments of the item. @@ -187,12 +189,16 @@ fn extract_item_module_level_documentation<'db>( /// Only gets the comments inside the item. fn extract_item_inner_documentation_from_raw_text(raw_text: String) -> String { - raw_text + let comment_lines: Vec<&str> = raw_text .lines() .filter(|line| !line.trim().is_empty()) .skip_while(|line| is_comment_line(line)) - .filter_map(|line| extract_comment_from_code_line(line, &["//!"])) - .join("\n") + .filter_map(|line| extract_comment_line_content(line, &["//!"])) + .collect(); + if comment_lines.is_empty() { + return String::new(); + } + dedent_comment_block(&comment_lines) } /// Gets the module level comments of certain file. @@ -201,41 +207,66 @@ fn extract_item_module_level_documentation_from_file<'db>( file_id: FileId<'db>, ) -> Option { let file_content = db.file_content(file_id)?.to_string(); - Some( - file_content - .lines() - .filter(|line| !line.trim().is_empty()) - .take_while_ref(|line| is_comment_line(line)) - .filter_map(|line| extract_comment_from_code_line(line, &["//!"])) - .join("\n"), - ) + let comment_lines: Vec<&str> = file_content + .lines() + .filter(|line| !line.trim().is_empty()) + .take_while_ref(|line| is_comment_line(line)) + .filter_map(|line| extract_comment_line_content(line, &["//!"])) + .collect(); + if comment_lines.is_empty() { + return None; + } + Some(dedent_comment_block(&comment_lines)) } -/// This function does 3 things to the line of comment: -/// 1. Removes indentation -/// 2. If it starts with one of the passed prefixes, removes the given prefixes (including the space -/// after the prefix). -/// 3. If the comment starts with a slash, returns None. -fn extract_comment_from_code_line(line: &str, comment_markers: &[&'static str]) -> Option { - // Remove indentation. +/// Extracts the content from a comment line (without the marker, but preserving indentation +/// after the marker). +/// Returns None if the line is not a doc comment or starts with a slash after the marker. +fn extract_comment_line_content<'a>( + line: &'a str, + comment_markers: &[&'static str], +) -> Option<&'a str> { + // Remove indentation before the comment marker. let dedent = line.trim_start(); // Check if this is a doc comment. for comment_marker in comment_markers { if let Some(content) = dedent.strip_prefix(*comment_marker) { - // TODO(mkaput): The way how removing this indentation is performed is probably - // wrong. The code should probably learn how many spaces are used at the first - // line of comments block, and then remove the same amount of spaces in the - // block, instead of assuming just one space. - // Remove inner indentation if one exists. + // Skip lines that start with a slash (like /// or //!). if content.starts_with('/') { return None; } - return Some(content.strip_prefix(' ').unwrap_or(content).to_string()); + // Return the content after the marker, preserving any spaces after the marker. + return Some(content); } } None } +/// Removes the common leading indentation from a block of comment lines. +/// This function finds the minimum indentation (number of spaces after the comment marker) +/// across all non-empty lines and removes that amount from each line. +fn dedent_comment_block(lines: &[&str]) -> String { + if lines.is_empty() { + return String::new(); + } + + // Find the minimum indentation (number of leading spaces) across all lines. + let min_indent = lines + .iter() + .filter_map(|line| { + let trimmed = line.trim(); + if trimmed.is_empty() { None } else { Some(line.len() - line.trim_start().len()) } + }) + .min() + .unwrap_or(0); + + // Remove the minimum indentation from each line. + lines + .iter() + .map(|line| if line.len() >= min_indent { &line[min_indent..] } else { line }) + .join("\n") +} + /// Check whether the code line is a comment line. fn is_comment_line(line: &str) -> bool { line.trim_start().starts_with("//") diff --git a/crates/cairo-lang-doc/src/parser.rs b/crates/cairo-lang-doc/src/parser.rs index dcb5e6aac1a..75adad5704f 100644 --- a/crates/cairo-lang-doc/src/parser.rs +++ b/crates/cairo-lang-doc/src/parser.rs @@ -83,6 +83,13 @@ impl<'db> DocumentationCommentParser<'db> { item_id: DocumentableItemId<'db>, documentation_comment: String, ) -> Vec> { + // Build a map of line indices to their leading indentation (number of spaces) + // before markdown parsing removes them. + let line_indents: Vec = documentation_comment + .lines() + .map(|line| line.len() - line.trim_start().len()) + .collect(); + let mut tokens = Vec::new(); let mut current_link: Option> = None; let mut is_indented_code_block = false; @@ -139,7 +146,88 @@ impl<'db> DocumentationCommentParser<'db> { if is_indented_code_block { format!(" {text}") } else { - text.to_string() + // Process text line by line to restore indentation + let text_str = text.as_ref(); + let lines: Vec<&str> = text_str.split_inclusive('\n').collect(); + + let mut result = String::new(); + for (line_idx, line) in lines.iter().enumerate() { + let trimmed_line = line.trim(); + + // Check if this is the start of a new line in the original text + let is_new_line = line_idx == 0 + && (tokens.is_empty() + || tokens + .last() + .and_then(|last| { + if let DocumentationCommentToken::Content( + content, + ) = last + { + Some(content.ends_with('\n')) + } else { + None + } + }) + .unwrap_or(true)); + + // For each non-empty line, try to find matching line in + // original text + if !trimmed_line.is_empty() && (is_new_line || line_idx > 0) { + // Find the line in original text that matches this content + // Try exact match first, then partial match + let mut found_line_num = None; + for (i, orig_line) in + documentation_comment.lines().enumerate() + { + let trimmed_orig = orig_line.trim(); + // Exact match (most reliable) + if trimmed_orig == trimmed_line { + found_line_num = Some(i); + break; + } + } + + // If no exact match, try partial match + if found_line_num.is_none() { + for (i, orig_line) in + documentation_comment.lines().enumerate() + { + let trimmed_orig = orig_line.trim(); + // Check if one is a prefix of the other (for cases + // where markdown splits text) + if (trimmed_line.len() >= 5 + && trimmed_orig.starts_with( + &trimmed_line[..trimmed_line + .len() + .min(trimmed_orig.len())], + )) + || (trimmed_orig.len() >= 5 + && trimmed_line.starts_with( + &trimmed_orig[..trimmed_orig + .len() + .min(trimmed_line.len())], + )) + { + found_line_num = Some(i); + break; + } + } + } + + if let Some(line_num) = found_line_num + && line_num < line_indents.len() + { + let indent = line_indents[line_num]; + if indent > 0 { + result.push_str(&" ".repeat(indent)); + } + } + } + result.push_str(line); + } + + if result.is_empty() { text.to_string() } else { result } } }; tokens.push(DocumentationCommentToken::Content(text)); diff --git a/crates/cairo-lang-doc/src/tests/test-data/indentation.txt b/crates/cairo-lang-doc/src/tests/test-data/indentation.txt new file mode 100644 index 00000000000..1daafa43c99 --- /dev/null +++ b/crates/cairo-lang-doc/src/tests/test-data/indentation.txt @@ -0,0 +1,119 @@ +//! > Documentation + +//! > test_runner_name +documentation_test_runner + +//! > cairo_project.toml +[crate_roots] +hello = "src" + +//! > cairo_code +/// Function with multi-line doc comment. +/// This line has extra indentation. +/// This line has less indentation. +/// Another indented line. +fn test_function() {} + +/// Another function with varying indentation. +/// First line with some indentation. +/// Second line with more indentation. +/// Third line with no extra indentation. +/// Fourth line with some indentation. +fn another_function() {} + +/// Function demonstrating that minimum indentation is removed uniformly. +/// All lines have at least 2 spaces. +/// Some lines have 4 spaces. +/// But minimum (2 spaces) is removed from all. +fn indented_function() {} + +/// This test case would have failed before the fix. +/// Line with 2 spaces indentation. +/// Line with 4 spaces indentation. +/// Another line with 2 spaces. +/// Line with 6 spaces indentation. +fn test_case_that_failed_before() {} + +//! > Item signature #1 + +//! > Item documentation #1 + +//! > Item documentation tokens #1 + +//! > Item signature #2 +fn test_function() + +//! > Item documentation #2 +Function with multi-line doc comment. + This line has extra indentation. + This line has less indentation. + Another indented line. + +//! > Item documentation tokens #2 +Content("Function with multi-line doc comment.") +Content("\n") +Content(" This line has extra indentation.") +Content("\n") +Content(" This line has less indentation.") +Content("\n") +Content(" Another indented line.") + +//! > Item signature #3 +fn another_function() + +//! > Item documentation #3 +Another function with varying indentation. + First line with some indentation. + Second line with more indentation. +Third line with no extra indentation. + Fourth line with some indentation. + +//! > Item documentation tokens #3 +Content("Another function with varying indentation.") +Content("\n") +Content(" First line with some indentation.") +Content("\n") +Content(" Second line with more indentation.") +Content("\n") +Content("Third line with no extra indentation.") +Content("\n") +Content(" Fourth line with some indentation.") + +//! > Item signature #4 +fn indented_function() + +//! > Item documentation #4 +Function demonstrating that minimum indentation is removed uniformly. + All lines have at least 2 spaces. + Some lines have 4 spaces. + But minimum (2 spaces) is removed from all. + +//! > Item documentation tokens #4 +Content("Function demonstrating that minimum indentation is removed uniformly.") +Content("\n") +Content(" All lines have at least 2 spaces.") +Content("\n") +Content(" Some lines have 4 spaces.") +Content("\n") +Content(" But minimum (2 spaces) is removed from all.") + +//! > Item signature #5 +fn test_case_that_failed_before() + +//! > Item documentation #5 +This test case would have failed before the fix. + Line with 2 spaces indentation. + Line with 4 spaces indentation. + Another line with 2 spaces. + Line with 6 spaces indentation. + +//! > Item documentation tokens #5 +Content("This test case would have failed before the fix.") +Content("\n") +Content(" Line with 2 spaces indentation.") +Content("\n") +Content(" Line with 4 spaces indentation.") +Content("\n") +Content(" Another line with 2 spaces.") +Content("\n") +Content(" Line with 6 spaces indentation.") diff --git a/crates/cairo-lang-doc/src/tests/test.rs b/crates/cairo-lang-doc/src/tests/test.rs index d10ceba857f..6f6a63d4e0a 100644 --- a/crates/cairo-lang-doc/src/tests/test.rs +++ b/crates/cairo-lang-doc/src/tests/test.rs @@ -30,6 +30,7 @@ cairo_lang_test_utils::test_file_test!( tables_formatting: "tables_formatting.txt", rules_formatting: "rules_formatting.txt", font_formatting: "font_formatting.txt", + indentation: "indentation.txt", }, documentation_test_runner );