diff --git a/crates/oxide/Cargo.toml b/crates/oxide/Cargo.toml index 9b800b04994c..c1de8e528dbb 100644 --- a/crates/oxide/Cargo.toml +++ b/crates/oxide/Cargo.toml @@ -3,13 +3,14 @@ name = "tailwindcss-oxide" version = "0.1.0" edition = "2021" +[lib] +crate-type = ["lib", "cdylib"] + [dependencies] bstr = "1.11.3" globwalk = "0.9.1" log = "0.4.22" -rayon = "1.10.0" fxhash = { package = "rustc-hash", version = "2.1.1" } -crossbeam = "0.8.4" tracing = { version = "0.1.40", features = [] } tracing-subscriber = { version = "0.3.18", features = ["env-filter"] } walkdir = "2.5.0" @@ -20,6 +21,16 @@ classification-macros = { path = "../classification-macros" } ignore = { path = "../ignore" } regex = "1.11.1" +# Threading dependencies - not available on wasm32-unknown-unknown +[target.'cfg(not(all(target_arch = "wasm32", target_os = "unknown")))'.dependencies] +rayon = "1.10.0" +crossbeam = "0.8.4" + +# WASM-specific dependencies +[target.'cfg(all(target_arch = "wasm32", target_os = "unknown"))'.dependencies] +wasm-bindgen = "0.2" +console_error_panic_hook = "0.1" + [dev-dependencies] tempfile = "3.13.0" pretty_assertions = "1.4.1" diff --git a/crates/oxide/src/lib.rs b/crates/oxide/src/lib.rs index d7f0321af16a..88940ccf7a0c 100644 --- a/crates/oxide/src/lib.rs +++ b/crates/oxide/src/lib.rs @@ -10,3 +10,10 @@ pub use glob::GlobEntry; pub use scanner::sources::PublicSourceEntry; pub use scanner::ChangedContent; pub use scanner::Scanner; + +// WASM bindings +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +pub mod wasm; + +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +pub use wasm::*; diff --git a/crates/oxide/src/scanner/mod.rs b/crates/oxide/src/scanner/mod.rs index 0d2fceba60bb..9887b5f3f788 100644 --- a/crates/oxide/src/scanner/mod.rs +++ b/crates/oxide/src/scanner/mod.rs @@ -14,7 +14,137 @@ use bstr::ByteSlice; use fast_glob::glob_match; use fxhash::{FxHashMap, FxHashSet}; use ignore::{gitignore::GitignoreBuilder, WalkBuilder}; +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] use rayon::prelude::*; + +// Conditional parallel processing helpers +#[cfg(not(all(target_arch = "wasm32", target_os = "unknown")))] +mod parallel { + use rayon::prelude::*; + use fxhash::FxHashSet; + + pub fn sort_candidates(candidates: &mut Vec) { + candidates.par_sort_unstable(); + } + + pub fn filter_new_candidates(candidates: Vec, existing: &FxHashSet) -> Vec { + candidates.into_par_iter().filter(|c| !existing.contains(c)).collect() + } + + pub fn extend_candidates(target: &mut FxHashSet, new: Vec) { + target.par_extend(new); + } + + pub fn map_files(files: &[std::path::PathBuf]) -> Vec { + files.par_iter().filter_map(|x| x.clone().into_os_string().into_string().ok()).collect() + } + + pub fn process_changed_content(content: Vec) -> Vec> { + content.into_par_iter().filter_map(super::read_changed_content).collect() + } + + pub fn process_extraction_blobs(blobs: Vec>, handle: H) -> Vec + where + H: Fn(crate::extractor::Extractor) -> Vec + std::marker::Sync, + { + let mut result: Vec<_> = blobs + .par_iter() + .flat_map(|blob| blob.par_split(|x| *x == b'\n')) + .filter_map(|blob| { + if blob.is_empty() { return None; } + let extracted = handle(crate::extractor::Extractor::new(blob)); + if extracted.is_empty() { return None; } + Some(fxhash::FxHashSet::from_iter(extracted.into_iter().map(|x| match x { + crate::extractor::Extracted::Candidate(bytes) => bytes, + crate::extractor::Extracted::CssVariable(bytes) => bytes, + }))) + }) + .reduce(Default::default, |mut a, b| { a.extend(b); a }) + .into_iter() + .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) + .collect(); + result.par_sort_unstable(); + result + } + + pub fn extract_with_positions(extracted: Vec, offset: usize, original_content: &[u8]) -> Vec<(String, usize)> { + extracted.into_par_iter().flat_map(|extracted| match extracted { + crate::extractor::Extracted::Candidate(s) => { + let i = s.as_ptr() as usize - offset; + let original = &original_content[i..i + s.len()]; + if original.contains_str("-[]") { + return Some(unsafe { (String::from_utf8_unchecked(original.to_vec()), i) }); + } + Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }) + } + _ => None, + }).collect() + } +} + +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +mod parallel { + use fxhash::FxHashSet; + use bstr::ByteSlice; + + pub fn sort_candidates(candidates: &mut Vec) { + candidates.sort_unstable(); + } + + pub fn filter_new_candidates(candidates: Vec, existing: &FxHashSet) -> Vec { + candidates.into_iter().filter(|c| !existing.contains(c)).collect() + } + + pub fn extend_candidates(target: &mut FxHashSet, new: Vec) { + target.extend(new); + } + + pub fn map_files(files: &[std::path::PathBuf]) -> Vec { + files.iter().filter_map(|x| x.clone().into_os_string().into_string().ok()).collect() + } + + pub fn process_changed_content(content: Vec) -> Vec> { + content.into_iter().filter_map(super::read_changed_content).collect() + } + + pub fn process_extraction_blobs(blobs: Vec>, handle: H) -> Vec + where + H: Fn(crate::extractor::Extractor) -> Vec, + { + let mut result: Vec<_> = blobs + .iter() + .flat_map(|blob| blob.split(|x| *x == b'\n')) + .filter_map(|blob| { + if blob.is_empty() { return None; } + let extracted = handle(crate::extractor::Extractor::new(blob)); + if extracted.is_empty() { return None; } + Some(fxhash::FxHashSet::from_iter(extracted.into_iter().map(|x| match x { + crate::extractor::Extracted::Candidate(bytes) => bytes, + crate::extractor::Extracted::CssVariable(bytes) => bytes, + }))) + }) + .fold(fxhash::FxHashSet::default(), |mut a, b| { a.extend(b); a }) + .into_iter() + .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) + .collect(); + result.sort_unstable(); + result + } + + pub fn extract_with_positions(extracted: Vec, offset: usize, original_content: &[u8]) -> Vec<(String, usize)> { + extracted.into_iter().flat_map(|extracted| match extracted { + crate::extractor::Extracted::Candidate(s) => { + let i = s.as_ptr() as usize - offset; + let original = &original_content[i..i + s.len()]; + if original.contains_str("-[]") { + return Some(unsafe { (String::from_utf8_unchecked(original.to_vec()), i) }); + } + Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }) + } + _ => None, + }).collect() + } +} use std::collections::{BTreeMap, BTreeSet}; use std::fs::OpenOptions; use std::io::{self, Write}; @@ -190,7 +320,7 @@ impl Scanner { // Make sure we have a sorted list of candidates let mut candidates = self.candidates.iter().cloned().collect::>(); - candidates.par_sort_unstable(); + parallel::sort_candidates(&mut candidates); // Return all candidates instead of only the new ones candidates @@ -299,15 +429,12 @@ impl Scanner { // Only compute the new candidates and ignore the ones we already have. This is for // subsequent calls to prevent serializing the entire set of candidates every time. - let mut new_candidates = new_candidates - .into_par_iter() - .filter(|candidate| !self.candidates.contains(candidate)) - .collect::>(); + let mut new_candidates = parallel::filter_new_candidates(new_candidates, &self.candidates); - new_candidates.par_sort_unstable(); + parallel::sort_candidates(&mut new_candidates); // Track new candidates for subsequent calls - self.candidates.par_extend(new_candidates.clone()); + parallel::extend_candidates(&mut self.candidates, new_candidates.clone()); new_candidates } @@ -355,10 +482,7 @@ impl Scanner { pub fn get_files(&mut self) -> Vec { self.scan_sources(); - self.files - .par_iter() - .filter_map(|x| x.clone().into_os_string().into_string().ok()) - .collect() + parallel::map_files(&self.files) } #[tracing::instrument(skip_all)] @@ -433,28 +557,7 @@ impl Scanner { let mut extractor = Extractor::new(&content[..]); - extractor - .extract() - .into_par_iter() - .flat_map(|extracted| match extracted { - Extracted::Candidate(s) => { - let i = s.as_ptr() as usize - offset; - let original = &original_content[i..i + s.len()]; - if original.contains_str("-[]") { - return Some(unsafe { - (String::from_utf8_unchecked(original.to_vec()), i) - }); - } - - // SAFETY: When we parsed the candidates, we already guaranteed that the byte - // slices are valid, therefore we don't have to re-check here when we want to - // convert it back to a string. - Some(unsafe { (String::from_utf8_unchecked(s.to_vec()), i) }) - } - - _ => None, - }) - .collect() + parallel::extract_with_positions(extractor.extract(), offset, original_content) } } @@ -503,10 +606,7 @@ fn read_all_files(changed_content: Vec) -> Vec> { changed_content.len() ); - changed_content - .into_par_iter() - .filter_map(read_changed_content) - .collect() + parallel::process_changed_content(changed_content) } #[tracing::instrument(skip_all)] @@ -526,39 +626,7 @@ fn extract(blobs: Vec>, handle: H) -> Vec where H: Fn(Extractor) -> Vec + std::marker::Sync, { - let mut result: Vec<_> = blobs - .par_iter() - .flat_map(|blob| blob.par_split(|x| *x == b'\n')) - .filter_map(|blob| { - if blob.is_empty() { - return None; - } - - let extracted = handle(crate::extractor::Extractor::new(blob)); - if extracted.is_empty() { - return None; - } - - Some(FxHashSet::from_iter(extracted.into_iter().map( - |x| match x { - Extracted::Candidate(bytes) => bytes, - Extracted::CssVariable(bytes) => bytes, - }, - ))) - }) - .reduce(Default::default, |mut a, b| { - a.extend(b); - a - }) - .into_iter() - .map(|s| unsafe { String::from_utf8_unchecked(s.to_vec()) }) - .collect(); - - // SAFETY: Unstable sort is faster and in this scenario it's also safe because we are - // guaranteed to have unique candidates. - result.par_sort_unstable(); - - result + parallel::process_extraction_blobs(blobs, handle) } /// Create a walker for the given sources to detect all the files that we have to scan. diff --git a/crates/oxide/src/wasm.rs b/crates/oxide/src/wasm.rs new file mode 100644 index 000000000000..5364771de619 --- /dev/null +++ b/crates/oxide/src/wasm.rs @@ -0,0 +1,90 @@ +use wasm_bindgen::prelude::*; +use crate::{ChangedContent, Scanner}; + +// Set panic hook for better error messages +#[wasm_bindgen(start)] +pub fn wasm_init() { + console_error_panic_hook::set_once(); +} + +#[wasm_bindgen] +#[derive(Clone)] +pub struct WasmChangedContent { + content: Option, + extension: String, +} + +#[wasm_bindgen] +impl WasmChangedContent { + #[wasm_bindgen(constructor)] + pub fn new(content: Option, extension: String) -> WasmChangedContent { + WasmChangedContent { content, extension } + } + + #[wasm_bindgen(getter)] + pub fn content(&self) -> Option { + self.content.clone() + } + + #[wasm_bindgen(getter)] + pub fn extension(&self) -> String { + self.extension.clone() + } +} + +#[wasm_bindgen] +#[derive(Clone)] +pub struct WasmCandidateWithPosition { + candidate: String, + position: usize, +} + +#[wasm_bindgen] +impl WasmCandidateWithPosition { + #[wasm_bindgen(getter)] + pub fn candidate(&self) -> String { + self.candidate.clone() + } + + #[wasm_bindgen(getter)] + pub fn position(&self) -> usize { + self.position + } +} + +impl From for ChangedContent { + fn from(wasm_content: WasmChangedContent) -> Self { + match wasm_content.content { + Some(content) => ChangedContent::Content(content, wasm_content.extension), + None => panic!("File-based content not supported in browser WASM"), + } + } +} + +#[wasm_bindgen] +pub struct WasmScanner { + scanner: Scanner, +} + +#[wasm_bindgen] +impl WasmScanner { + #[wasm_bindgen(constructor)] + pub fn new() -> WasmScanner { + WasmScanner { + scanner: Scanner::new(vec![]), + } + } + + #[wasm_bindgen(js_name = getCandidatesWithPositions)] + pub fn get_candidates_with_positions( + &mut self, + content: WasmChangedContent, + ) -> Vec { + let changed_content: ChangedContent = content.into(); + self.scanner + .get_candidates_with_positions(changed_content) + .into_iter() + .map(|(candidate, position)| WasmCandidateWithPosition { candidate, position }) + .collect() + } +} \ No newline at end of file