From b6a6ace8fde666e0755b978fd0ce1c333db4435a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 17 Jul 2025 13:00:29 +0000 Subject: [PATCH 1/3] wip --- compiler/rustc_middle/src/mir/basic_blocks.rs | 112 ++++++++++++++++++ .../rustc_mir_dataflow/src/framework/mod.rs | 109 ++++++++++++++++- 2 files changed, 220 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_middle/src/mir/basic_blocks.rs b/compiler/rustc_middle/src/mir/basic_blocks.rs index 0d2e23609ce35..f4df9d289f8c6 100644 --- a/compiler/rustc_middle/src/mir/basic_blocks.rs +++ b/compiler/rustc_middle/src/mir/basic_blocks.rs @@ -33,6 +33,93 @@ struct Cache { predecessors: OnceLock, reverse_postorder: OnceLock>, dominators: OnceLock>, + is_cyclic: OnceLock, + sccs: OnceLock, +} + +#[derive(Clone, Default, Debug)] +pub struct SccData { + pub component_count: usize, + + /// The SCC of each block. + pub components: IndexVec, + + /// The contents of each SCC: its blocks, in RPO. + pub sccs: Vec>, +} + +use std::collections::VecDeque; + +struct PearceRecursive { + r_index: IndexVec, + stack: VecDeque, + index: u32, + c: u32, +} + +impl PearceRecursive { + fn new(node_count: usize) -> Self { + assert!(node_count > 0); // only a non-empty graph is supported + // todo: assert node_count is within overflow limits + Self { + r_index: IndexVec::from_elem_n(0, node_count), + stack: VecDeque::new(), + index: 1, + c: node_count.try_into().unwrap(), + // c: node_count - 1, + } + } + + fn compute_sccs(&mut self, blocks: &IndexVec>) { + for v in blocks.indices() { + if self.r_index[v] == 0 { + self.visit(v, blocks); + } + } + + // The SCC labels are from N - 1 to zero, remap them from 0 to the component count, to match + // their position in an array of SCCs. + let node_count: u32 = blocks.len().try_into().unwrap(); + for scc_index in self.r_index.iter_mut() { + *scc_index = node_count - *scc_index - 1; + } + + // Adjust the component index counter to the component count + self.c = node_count - self.c; + } + + fn visit(&mut self, v: BasicBlock, blocks: &IndexVec>) { + let mut root = true; + self.r_index[v] = self.index; + self.index += 1; + + for w in blocks[v].terminator().successors() { + if self.r_index[w] == 0 { + self.visit(w, blocks); + } + if self.r_index[w] < self.r_index[v] { + self.r_index[v] = self.r_index[w]; + root = false; + } + } + + if root { + self.index -= 1; + self.c -= 1; + + while let Some(&w) = self.stack.front() + && self.r_index[v] <= self.r_index[w] + { + self.stack.pop_front(); + self.r_index[w] = self.c; + self.index -= 1; + } + + self.r_index[v] = self.c; + } else { + self.stack.push_front(v); + } + } } impl<'tcx> BasicBlocks<'tcx> { @@ -41,10 +128,35 @@ impl<'tcx> BasicBlocks<'tcx> { BasicBlocks { basic_blocks, cache: Cache::default() } } + /// Returns true if control-flow graph contains a cycle reachable from the `START_BLOCK`. + #[inline] + pub fn is_cfg_cyclic(&self) -> bool { + *self.cache.is_cyclic.get_or_init(|| graph::is_cyclic(self)) + } + + #[inline] pub fn dominators(&self) -> &Dominators { self.cache.dominators.get_or_init(|| dominators(self)) } + #[inline] + pub fn sccs(&self) -> &SccData { + self.cache.sccs.get_or_init(|| { + let block_count = self.basic_blocks.len(); + + let mut pearce = PearceRecursive::new(block_count); + pearce.compute_sccs(&self.basic_blocks); + let component_count = pearce.c as usize; + + let mut sccs = vec![smallvec::SmallVec::new(); component_count]; + for &block in self.reverse_postorder().iter() { + let scc = pearce.r_index[block] as usize; + sccs[scc].push(block); + } + SccData { component_count, components: pearce.r_index, sccs } + }) + } + /// Returns predecessors for each basic block. #[inline] pub fn predecessors(&self) -> &Predecessors { diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs index b6a5603601959..75b7076e03708 100644 --- a/compiler/rustc_mir_dataflow/src/framework/mod.rs +++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs @@ -229,6 +229,27 @@ pub trait Analysis<'tcx> { unreachable!(); } + #[inline] + fn iterate_to_fixpoint<'mir>( + self, + tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + pass_name: Option<&'static str>, + ) -> AnalysisAndResults<'tcx, Self> + where + Self: Sized, + Self::Domain: DebugWithContext, + { + // Computing dataflow over the SCCs is only supported in forward analyses. It's also + // unnecessary to use it on acyclic graphs, as the condensation graph is of course the same + // as the CFG itself. + if Self::Direction::IS_BACKWARD || !body.basic_blocks.is_cfg_cyclic() { + self.iterate_to_fixpoint_per_block(tcx, body, pass_name) + } else { + self.iterate_to_fixpoint_per_scc(tcx, body, pass_name) + } + } + /* Extension methods */ /// Finds the fixpoint for this dataflow problem. @@ -244,7 +265,7 @@ pub trait Analysis<'tcx> { /// dataflow analysis. Some analyses are run multiple times in the compilation pipeline. /// Without a `pass_name` to differentiates them, only the results for the latest run will be /// saved. - fn iterate_to_fixpoint<'mir>( + fn iterate_to_fixpoint_per_block<'mir>( mut self, tcx: TyCtxt<'tcx>, body: &'mir mir::Body<'tcx>, @@ -308,6 +329,92 @@ pub trait Analysis<'tcx> { AnalysisAndResults { analysis: self, results } } + + fn iterate_to_fixpoint_per_scc<'mir>( + mut self, + _tcx: TyCtxt<'tcx>, + body: &'mir mir::Body<'tcx>, + _pass_name: Option<&'static str>, + ) -> AnalysisAndResults<'tcx, Self> + where + Self: Sized, + Self::Domain: DebugWithContext, + { + assert!(Self::Direction::IS_FORWARD); + + let sccs = body.basic_blocks.sccs(); + + struct VecQueue { + queue: Vec, + set: DenseBitSet, + } + + impl VecQueue { + #[inline] + fn with_none(len: usize) -> Self { + VecQueue { queue: Vec::with_capacity(len), set: DenseBitSet::new_empty(len) } + } + + #[inline] + fn insert(&mut self, element: T) { + if self.set.insert(element) { + self.queue.push(element); + } + } + } + + let mut scc_queue = VecQueue::with_none(sccs.component_count); + for &bb in body.basic_blocks.reverse_postorder().iter() { + // let scc = sccs.components[bb.as_usize()]; + let scc = sccs.components[bb]; + scc_queue.insert(scc); + } + // assert_eq!(scc_queue.queue, sccs.queue); + + let mut results = IndexVec::from_fn_n(|_| self.bottom_value(body), body.basic_blocks.len()); + self.initialize_start_block(body, &mut results[mir::START_BLOCK]); + + // Worklist for per-SCC iterations + let mut dirty_queue: WorkQueue = WorkQueue::with_none(body.basic_blocks.len()); + + let mut state = self.bottom_value(body); + + for &scc in &scc_queue.queue { + // les blocks doivent être ajoutés en RPO + // for block in sccs.blocks_in_rpo(scc as usize) { + for block in sccs.sccs[scc as usize].iter().copied() { + dirty_queue.insert(block); + } + + while let Some(bb) = dirty_queue.pop() { + // Set the state to the entry state of the block. This is equivalent to `state = + // results[bb].clone()`, but it saves an allocation, thus improving compile times. + state.clone_from(&results[bb]); + + Self::Direction::apply_effects_in_block( + &mut self, + body, + &mut state, + bb, + &body[bb], + |target: BasicBlock, state: &Self::Domain| { + let set_changed = results[target].join(state); + // let target_scc = sccs.components[target.as_usize()]; + let target_scc = sccs.components[target]; + if set_changed && target_scc == scc { + // The target block is in the SCC we're currently processing, and we + // want to process this block until fixpoint. Otherwise, the target + // block is in a successor SCC and it will be processed when that SCC is + // encountered later. + dirty_queue.insert(target); + } + }, + ); + } + } + + AnalysisAndResults { analysis: self, results } + } } /// The legal operations for a transfer function in a gen/kill problem. From 6b01883a04102846cf7392b92dba185a25b5fe15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Thu, 24 Jul 2025 09:36:43 +0000 Subject: [PATCH 2/3] 1% local win: single pass dataflow --- compiler/rustc_borrowck/src/dataflow.rs | 41 +++-- compiler/rustc_borrowck/src/lib.rs | 202 ++++++++++++++++++++++-- 2 files changed, 217 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_borrowck/src/dataflow.rs b/compiler/rustc_borrowck/src/dataflow.rs index 57db2e9fb574d..d9b1e3f1f2df1 100644 --- a/compiler/rustc_borrowck/src/dataflow.rs +++ b/compiler/rustc_borrowck/src/dataflow.rs @@ -39,9 +39,10 @@ impl<'a, 'tcx> Analysis<'tcx> for Borrowck<'a, 'tcx> { } } - fn initialize_start_block(&self, _body: &mir::Body<'tcx>, _state: &mut Self::Domain) { - // This is only reachable from `iterate_to_fixpoint`, which this analysis doesn't use. - unreachable!(); + fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain) { + self.borrows.initialize_start_block(body, &mut state.borrows); + self.uninits.initialize_start_block(body, &mut state.uninits); + self.ever_inits.initialize_start_block(body, &mut state.ever_inits); } fn apply_early_statement_effect( @@ -83,30 +84,36 @@ impl<'a, 'tcx> Analysis<'tcx> for Borrowck<'a, 'tcx> { term: &'mir mir::Terminator<'tcx>, loc: Location, ) -> TerminatorEdges<'mir, 'tcx> { - self.borrows.apply_primary_terminator_effect(&mut state.borrows, term, loc); - self.uninits.apply_primary_terminator_effect(&mut state.uninits, term, loc); - self.ever_inits.apply_primary_terminator_effect(&mut state.ever_inits, term, loc); + let _edges1 = self.borrows.apply_primary_terminator_effect(&mut state.borrows, term, loc); + let _edges2 = self.uninits.apply_primary_terminator_effect(&mut state.uninits, term, loc); + let edges3 = + self.ever_inits.apply_primary_terminator_effect(&mut state.ever_inits, term, loc); - // This return value doesn't matter. It's only used by `iterate_to_fixpoint`, which this - // analysis doesn't use. - TerminatorEdges::None + // assert_eq!(_edges1, _edges2); + // assert_eq!(_edges2, edges3); + + edges3 } fn apply_call_return_effect( &mut self, - _state: &mut Self::Domain, - _block: BasicBlock, - _return_places: CallReturnPlaces<'_, 'tcx>, + state: &mut Self::Domain, + block: BasicBlock, + return_places: CallReturnPlaces<'_, 'tcx>, ) { - // This is only reachable from `iterate_to_fixpoint`, which this analysis doesn't use. - unreachable!(); + self.borrows.apply_call_return_effect(&mut state.borrows, block, return_places); + self.uninits.apply_call_return_effect(&mut state.uninits, block, return_places); + self.ever_inits.apply_call_return_effect(&mut state.ever_inits, block, return_places); } } impl JoinSemiLattice for BorrowckDomain { - fn join(&mut self, _other: &Self) -> bool { - // This is only reachable from `iterate_to_fixpoint`, which this analysis doesn't use. - unreachable!(); + fn join(&mut self, other: &Self) -> bool { + let mut changed = false; + changed |= self.borrows.join(&other.borrows); + changed |= self.uninits.join(&other.uninits); + changed |= self.ever_inits.join(&other.ever_inits); + changed } } diff --git a/compiler/rustc_borrowck/src/lib.rs b/compiler/rustc_borrowck/src/lib.rs index 321b18c9b78b2..36a09f7f56f07 100644 --- a/compiler/rustc_borrowck/src/lib.rs +++ b/compiler/rustc_borrowck/src/lib.rs @@ -436,15 +436,19 @@ fn do_mir_borrowck<'tcx>( // Compute and report region errors, if any. mbcx.report_region_errors(nll_errors); - let (mut flow_analysis, flow_entry_states) = - get_flow_results(tcx, body, &move_data, &borrow_set, ®ioncx); - visit_results( - body, - traversal::reverse_postorder(body).map(|(bb, _)| bb), - &mut flow_analysis, - &flow_entry_states, - &mut mbcx, - ); + if body.basic_blocks.is_cfg_cyclic() { + let (mut flow_analysis, flow_entry_states) = + get_flow_results(tcx, body, &move_data, &borrow_set, ®ioncx); + visit_results( + body, + traversal::reverse_postorder(body).map(|(bb, _)| bb), + &mut flow_analysis, + &flow_entry_states, + &mut mbcx, + ); + } else { + compute_dataflow(tcx, body, &move_data, &borrow_set, ®ioncx, &mut mbcx); + } mbcx.report_move_errors(); @@ -497,6 +501,186 @@ fn do_mir_borrowck<'tcx>( result } +fn compute_dataflow<'a, 'tcx>( + tcx: TyCtxt<'tcx>, + body: &'a Body<'tcx>, + + move_data: &'a MoveData<'tcx>, + borrow_set: &'a BorrowSet<'tcx>, + regioncx: &RegionInferenceContext<'tcx>, + + vis: &mut MirBorrowckCtxt<'a, '_, 'tcx>, +) { + let borrows = Borrows::new(tcx, body, regioncx, borrow_set); + let uninits = MaybeUninitializedPlaces::new(tcx, body, move_data); + let ever_inits = EverInitializedPlaces::new(body, move_data); + + let mut analysis = Borrowck { borrows, uninits, ever_inits }; + + // Set up lazy state for the CFG + use rustc_middle::mir; + use rustc_mir_dataflow::JoinSemiLattice; + + let mut results: IndexVec> = + IndexVec::from_elem_n(None, body.basic_blocks.len()); + + // Ensure the start block has some state in it; + results[mir::START_BLOCK] = Some(analysis.bottom_value(body)); + analysis.initialize_start_block(body, results[mir::START_BLOCK].as_mut().unwrap()); + + for (_idx, (block, block_data)) in traversal::reverse_postorder(body).enumerate() { + // Apply effects in block + let mut block_state = results[block].take().unwrap_or_else(|| analysis.bottom_value(body)); + + vis.visit_block_start(&mut block_state); + + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.apply_early_statement_effect(&mut block_state, statement, location); + vis.visit_after_early_statement_effect( + &mut analysis, + &block_state, + statement, + location, + ); + + analysis.apply_primary_statement_effect(&mut block_state, statement, location); + vis.visit_after_primary_statement_effect( + &mut analysis, + &block_state, + statement, + location, + ); + } + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_early_terminator_effect(&mut block_state, terminator, location); + vis.visit_after_early_terminator_effect(&mut analysis, &block_state, terminator, location); + + let edges = + analysis.apply_primary_terminator_effect(&mut block_state, terminator, location); + vis.visit_after_primary_terminator_effect( + &mut analysis, + &block_state, + terminator, + location, + ); + + // notify visitor the block is ready + vis.visit_block_end(&mut block_state); + + match edges { + TerminatorEdges::None => {} + TerminatorEdges::Single(target) => match results[target].as_mut() { + None => { + results[target] = Some(block_state); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + }, + TerminatorEdges::Double(target, unwind) if target == unwind => { + // wtf + match results[target].as_mut() { + None => { + results[target] = Some(block_state); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + TerminatorEdges::Double(target, unwind) => match results.pick2_mut(target, unwind) { + (None, None) => { + results[target] = Some(block_state.clone()); + results[unwind] = Some(block_state); + } + (None, Some(unwind_state)) => { + unwind_state.join(&block_state); + results[target] = Some(block_state); + } + (Some(target_state), None) => { + target_state.join(&block_state); + results[unwind] = Some(block_state); + } + (Some(target_state), Some(unwind_state)) => { + target_state.join(&block_state); + unwind_state.join(&block_state); + } + }, + TerminatorEdges::AssignOnReturn { return_, cleanup, place } => { + // This must be done *first*, otherwise the unwind path will see the assignments. + if let Some(cleanup) = cleanup { + match results[cleanup].as_mut() { + None => { + results[cleanup] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + + if !return_.is_empty() { + analysis.apply_call_return_effect(&mut block_state, block, place); + + // fixme: optimize, if we've merged the previous target states instead + // of moving, we don't need to clone it. + + let target_count = return_.len(); + for &target in return_.iter().take(target_count - 1) { + match results[target].as_mut() { + None => { + results[target] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + + let target = *return_.last().unwrap(); + match results[target].as_mut() { + None => { + results[target] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + } + TerminatorEdges::SwitchInt { targets, discr } => { + if let Some(_data) = analysis.get_switch_int_data(block, discr) { + todo!("wat. this is unused in tests"); + } else { + let target_count = targets.all_targets().len(); + for &target in targets.all_targets().iter().take(target_count - 1) { + match results[target].as_mut() { + None => { + results[target] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + + let target = *targets.all_targets().last().unwrap(); + match results[target].as_mut() { + None => { + results[target] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + } + } + } +} + fn get_flow_results<'a, 'tcx>( tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, From 907949da56afc73031521d9833d48b28264c3112 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9my=20Rakic?= Date: Fri, 1 Aug 2025 10:15:12 +0000 Subject: [PATCH 3/3] what a mess --- compiler/rustc_borrowck/src/lib.rs | 3673 ++++++++++++++++- .../src/graph/scc/mod.rs | 2 +- compiler/rustc_middle/src/mir/basic_blocks.rs | 715 +++- compiler/rustc_middle/src/mir/mod.rs | 2 +- compiler/rustc_middle/src/mir/terminator.rs | 4 +- .../rustc_mir_dataflow/src/framework/mod.rs | 364 +- .../src/dataflow_const_prop.rs | 14 +- compiler/rustc_mir_transform/src/dest_prop.rs | 7 +- 8 files changed, 4663 insertions(+), 118 deletions(-) diff --git a/compiler/rustc_borrowck/src/lib.rs b/compiler/rustc_borrowck/src/lib.rs index 36a09f7f56f07..a022965168e63 100644 --- a/compiler/rustc_borrowck/src/lib.rs +++ b/compiler/rustc_borrowck/src/lib.rs @@ -1,5 +1,6 @@ //! This query borrow-checks the MIR to (further) ensure it is not broken. +// ignore-tidy-filelength // tidy-alphabetical-start #![allow(internal_features)] #![doc(rust_logo)] @@ -44,7 +45,8 @@ use rustc_mir_dataflow::impls::{EverInitializedPlaces, MaybeUninitializedPlaces} use rustc_mir_dataflow::move_paths::{ InitIndex, InitLocation, LookupResult, MoveData, MovePathIndex, }; -use rustc_mir_dataflow::{Analysis, Results, ResultsVisitor, visit_results}; +use rustc_mir_dataflow::{Analysis, ResultsVisitor}; +// use rustc_mir_dataflow::{Analysis, Results, ResultsVisitor, visit_results}; use rustc_session::lint::builtin::{TAIL_EXPR_DROP_ORDER, UNUSED_MUT}; use rustc_span::{ErrorGuaranteed, Span, Symbol}; use smallvec::SmallVec; @@ -340,6 +342,125 @@ fn do_mir_borrowck<'tcx>( &borrow_set, ); + // use rustc_data_structures::unify; + + // #[derive(Debug, PartialEq, Copy, Clone)] + // struct Yo(RegionVid); + + // impl unify::UnifyKey for Yo { + // type Value = (); + + // fn index(&self) -> u32 { + // self.0.as_u32() + // } + + // fn from_index(u: u32) -> Self { + // Self(RegionVid::from_u32(u)) + // } + + // fn tag() -> &'static str { + // "Yo!" + // } + // } + + // let timer = std::time::Instant::now(); + // let mut ena = unify::UnificationTable::>::new(); + // let mut keys = Vec::new(); + // for _ in regioncx.definitions.indices() { + // keys.push(ena.new_key(())); + // } + // for c in regioncx.outlives_constraints() { + // // let sup = Yo(c.sup); + // // let sub = Yo(c.sub); + // ena.union(keys[c.sup.as_usize()], keys[c.sub.as_usize()]); + // // table.unify_var_var(sup, sub).unwrap(); + // // table.unify_var_var(a_id, b_id) + // } + // let elapsed_2 = timer.elapsed(); + + // if borrow_set.len() > 0 { + // // if elapsed_1 < elapsed_2 { + // // eprintln!( + // // "table wins: by {} ns", + // // elapsed_2.as_nanos() - elapsed_1.as_nanos() + // // ); + // // } else { + // // eprintln!( + // // "ena wins: by {} ns", + // // elapsed_1.as_nanos() - elapsed_2.as_nanos() + // // ); + // // } + + // // FIXME: check how much it takes if we use this in loans in scope pre-check + // // eprintln!("ena unification took: {} ns", elapsed_2.as_nanos()); + + // // eprintln!( + // // "region union find: {} sets in {} ns, region count: {:?}, scc count: {}, loan count: {}, {:?}", + // // table.count(), + // // elapsed.as_nanos(), + // // regioncx.definitions.len(), + // // regioncx.constraint_sccs().num_sccs(), + // // borrow_set.len(), + // // body.span + // // ); + + // // eprintln!( + // // "region union find: {:5} sets in {} ns, region count: {:?}, scc count: {}, loan count: {}, {:?}", + // // table.count(), + // // elapsed_1.as_nanos(), + // // regioncx.definitions.len(), + // // regioncx.constraint_sccs().num_sccs(), + // // borrow_set.len(), + // // body.span + // // ); + + // // eprintln!( + // // "ena union find: {:5} sets in {} ns, region count: {:?}, scc count: {}, loan count: {}, {:?}", + // // ena.len(), + // // elapsed_2.as_nanos(), + // // regioncx.definitions.len(), + // // regioncx.constraint_sccs().num_sccs(), + // // borrow_set.len(), + // // body.span + // // ); + + // // for (idx, loan) in borrow_set.iter_enumerated() { + // // let borrow_region = loan.region; + // // let same_set: Vec<_> = regioncx + // // .definitions + // // .indices() + // // .filter(|r| table.is_same_set(&borrow_region, &r)) + // // .collect(); + // // // let different_set = regioncx.outlives_constraints().count() - same_set.len(); + // // // eprint!( + // // // "loan {} from region {} involves {} regions in the set, and {} unrelated regions", + // // // idx.as_usize(), + // // // borrow_region.as_usize(), + // // // same_set.len(), + // // // different_set, + // // // ); + // // // if same_set.len() < 15 { + // // // eprintln!(", same: {:?}", same_set); + // // // } else { + // // // eprintln!(); + // // // } + + // // let ena_same_set: Vec<_> = regioncx + // // .definitions + // // .indices() + // // .filter(|r| ena.unioned(keys[borrow_region.as_usize()], keys[r.as_usize()])) + // // .collect(); + + // // assert_eq!(same_set, ena_same_set); + + // // // eprintln!( + // // // "loan involves {} regions in the set, ena found {} regions in the set", + // // // same_set.len(), + // // // ena_same_set.len() + // // // ); + // // } + // } + // Dump MIR results into a file, if that is enabled. This lets us // write unit-tests, as well as helping with debugging. nll::dump_nll_mir(&infcx, body, ®ioncx, &opt_closure_req, &borrow_set); @@ -392,6 +513,18 @@ fn do_mir_borrowck<'tcx>( move_errors: Vec::new(), diags_buffer, polonius_diagnostics: polonius_diagnostics.as_ref(), + #[cfg(test)] + nuutila: None, + #[cfg(test)] + duration: 0, + #[cfg(test)] + duration2: 0, + #[cfg(test)] + duration3: 0, + #[cfg(test)] + transitive_predecessors: None, + #[cfg(test)] + locals_checked_for_initialization: FxHashMap::default(), }; struct MoveVisitor<'a, 'b, 'infcx, 'tcx> { ctxt: &'a mut MirBorrowckCtxt<'b, 'infcx, 'tcx>, @@ -431,76 +564,3051 @@ fn do_mir_borrowck<'tcx>( diags_buffer, polonius_output: polonius_output.as_deref(), polonius_diagnostics: polonius_diagnostics.as_ref(), + #[cfg(test)] + nuutila: None, + #[cfg(test)] + duration: 0, + #[cfg(test)] + duration2: 0, + #[cfg(test)] + duration3: 0, + #[cfg(test)] + transitive_predecessors: None, + #[cfg(test)] + locals_checked_for_initialization: FxHashMap::default(), + }; + + // Compute and report region errors, if any. + mbcx.report_region_errors(nll_errors); + + // if body.basic_blocks.len() > 5000 { + // let stmts: usize = + // body.basic_blocks.iter_enumerated().map(|(_idx, block)| block.statements.len()).sum(); + // eprintln!( + // "\nCFG stats, blocks: {}, statements: {}, is cyclic: {}, {:?}", + // body.basic_blocks.len(), + // stmts, + // rustc_data_structures::graph::is_cyclic(&body.basic_blocks), + // body.span, + // ); + // } + + if body.basic_blocks.is_cfg_cyclic() { + // let (mut flow_analysis, flow_entry_states) = + // get_flow_results(tcx, body, &move_data, &borrow_set, ®ioncx); + // visit_results( + // body, + // traversal::reverse_postorder(body).map(|(bb, _)| bb), + // &mut flow_analysis, + // &flow_entry_states, + // &mut mbcx, + // ); + + // let sccs = body.basic_blocks.sccs(); + // let mut single_block = 0; let mut single_successor = 0; let mut single_predecessor = 0; + // for &scc in &sccs.queue { + // if sccs.sccs[scc as usize].len() == 1 { + // single_block += 1; + // } + + // for block in sccs.sccs[scc as usize].iter().copied() { + // if body[block].terminator().successors().count() == 1 { + // single_successor += 1; + // } + + // if body.basic_blocks.predecessors()[block].len() == 1 { + // single_predecessor += 1; + // } + // } + // } + + // eprintln!( + // "CFG, {} blocks, SCCs: {}, single-block SCCs: {}, single-successor blocks: {}, single-predecessor blocks: {}, {:?}", + // body.basic_blocks.len(), + // sccs.component_count, + // single_block, + // single_successor, + // single_predecessor, + // body.span, + // ); + + let borrows = Borrows::new(tcx, body, ®ioncx, &borrow_set); + let uninits = MaybeUninitializedPlaces::new(tcx, body, &move_data); + let ever_inits = EverInitializedPlaces::new(body, &move_data); + compute_cyclic_dataflow(body, borrows, uninits, ever_inits, &mut mbcx); + + // let (_, flow_entry_states) = + // get_flow_results(tcx, body, &move_data, &borrow_set, ®ioncx); + // compute_cyclic_dataflow(body, borrows, uninits, ever_inits, &mut mbcx, &flow_entry_states); + } else { + // compute_dataflow(tcx, body, &move_data, &borrow_set, ®ioncx, &mut mbcx); + + let borrows = Borrows::new(tcx, body, ®ioncx, &borrow_set); + let uninits = MaybeUninitializedPlaces::new(tcx, body, &move_data); + let ever_inits = EverInitializedPlaces::new(body, &move_data); + let mut analysis = Borrowck { borrows, uninits, ever_inits }; + compute_rpo_dataflow(body, &mut analysis, &mut mbcx); + } + + mbcx.report_move_errors(); + + // For each non-user used mutable variable, check if it's been assigned from + // a user-declared local. If so, then put that local into the used_mut set. + // Note that this set is expected to be small - only upvars from closures + // would have a chance of erroneously adding non-user-defined mutable vars + // to the set. + let temporary_used_locals: FxIndexSet = mbcx + .used_mut + .iter() + .filter(|&local| !mbcx.body.local_decls[*local].is_user_variable()) + .cloned() + .collect(); + // For the remaining unused locals that are marked as mutable, we avoid linting any that + // were never initialized. These locals may have been removed as unreachable code; or will be + // linted as unused variables. + let unused_mut_locals = + mbcx.body.mut_vars_iter().filter(|local| !mbcx.used_mut.contains(local)).collect(); + mbcx.gather_used_muts(temporary_used_locals, unused_mut_locals); + + debug!("mbcx.used_mut: {:?}", mbcx.used_mut); + mbcx.lint_unused_mut(); + if let Some(guar) = mbcx.emit_errors() { + mbcx.root_cx.set_tainted_by_errors(guar); + } + + let result = PropagatedBorrowCheckResults { + closure_requirements: opt_closure_req, + used_mut_upvars: mbcx.used_mut_upvars, }; - // Compute and report region errors, if any. - mbcx.report_region_errors(nll_errors); + #[cfg(test)] + if body.basic_blocks.len() > 5000 { + eprintln!("borrow stats, locals: {}, loans: {}", body.local_decls.len(), borrow_set.len()); + eprintln!("nuutila duration: {} ns", mbcx.duration); + eprintln!("predecessor duration: {} ns", mbcx.duration2); + eprintln!("NLL scopes duration: {} ns", mbcx.duration3); + + use std::collections::VecDeque; + + use rustc_data_structures::graph::scc::*; + + // + { + eprint!("SCC tests - {:>30}", "rustc"); + let timer = std::time::Instant::now(); + + type CfgScc = Sccs; + let sccs = CfgScc::new(&body.basic_blocks); + + let elapsed = timer.elapsed(); + // eprintln!(", computed {} SCCs in {} ns", sccs.num_sccs(), elapsed.as_nanos()); + eprint!(", computed {} SCCs in {} ns", sccs.num_sccs(), elapsed.as_nanos()); + + use rustc_index::interval::IntervalSet; + + let timer = std::time::Instant::now(); + let mut components = vec![IntervalSet::new(body.basic_blocks.len()); sccs.num_sccs()]; + for block in body.basic_blocks.indices() { + let scc = sccs.scc(block); + components[scc].insert(block); + } + let elapsed = timer.elapsed(); + + eprintln!(" and SCCs contents in {} ns (intervals)", elapsed.as_nanos(),); + } + + // + { + eprint!("SCC tests - {:>30}", "tarjan SCCs (dense/usize)"); + + struct Scc { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + stack: VecDeque, + visited: DenseBitSet, + } + + impl Scc { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + stack: VecDeque::new(), + visited: DenseBitSet::new_empty(node_count), + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_usize(); + if !self.visited.contains(idx) { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.visited.insert(v); + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + // if w == v { + // panic!("a dang self loop ?!"); + // } + + if !self.visited.contains(w) { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + if self.components[w] == -1 + && self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = self.candidate_component_roots[w]; + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.components[v] = self.component_count as isize; + self.component_count += 1; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + while self.stack.front().is_some() + && self.dfs_numbers[*self.stack.front().expect("peek front failed")] + > self.dfs_numbers[v] + { + let w = self.stack.pop_front().expect("pop front failed"); + self.components[w] = self.components[v]; + // println!( + // "v = {v} - popping w = {w} off the stack (contents: {:?}) / C[w] = {}, C[v] = {}", + // self.stack, + // self.components[w], + // self.components[v], + // v = v, + // w = w, + // ); + } + } else { + // println!( + // "v = {v}: pushing v on the stack (contents: {:?})", + // self.stack, + // v = v, + // ); + self.stack.push_front(v); + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Scc::new(body.basic_blocks.len()); + sccs.compute_sccs(&body.basic_blocks); + + let elapsed = timer.elapsed(); + // eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + eprint!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + + // sanity checks + // eprintln!("-----"); + // eprintln!("blocks: {}", body.basic_blocks.len()); + // for block in body.basic_blocks.indices() { + // let successors: Vec<_> = + // body.basic_blocks[block].terminator().successors().collect(); + // eprintln!("block: {:?}, successors: {:?}", block, successors); + // } + + let rustc_sccs = Sccs::::new(&body.basic_blocks); + + for block in body.basic_blocks.indices() { + let rustc_scc = rustc_sccs.scc(block); + let scc = sccs.components[block.as_usize()] as usize; + assert_eq!( + rustc_scc, scc, + "sccs differ for {block:?} between tarjan sccs: {scc}, and rustc's scc: {rustc_scc}" + ); + } + + // eprintln!("-----"); + // eprintln!( + // "rustc SCCs: {} (min: {:?}, max: {:?})", + // rustc_sccs.num_sccs(), + // rustc_sccs.all_sccs().min().unwrap(), + // rustc_sccs.all_sccs().max().unwrap() + // ); + // for scc in rustc_sccs.all_sccs() { + // let mut blocks = Vec::new(); + // for block in body.basic_blocks.indices() { + // if rustc_sccs.scc(block) == scc { + // blocks.push(block); + // } + // } + // blocks.sort(); + + // let mut successors: Vec<_> = rustc_sccs.successors(scc).into_iter().collect(); + // successors.sort(); + + // eprintln!("scc: {:?}, contains: {:?}, successors: {:?}", scc, blocks, successors); + // } + + // eprintln!("-----"); + // eprintln!( + // "tarjan SCCs: {} (min: {:?}, max: {:?})", + // sccs.component_count, + // sccs.components.iter().min().unwrap(), + // sccs.components.iter().max().unwrap() + // ); + // for scc in 0..sccs.component_count { + // let mut blocks = Vec::new(); + // for block in body.basic_blocks.indices() { + // if sccs.components[block.as_usize()] == scc { + // blocks.push(block); + // } + // } + // blocks.sort(); + + // let mut successors = Vec::new(); + // for &block in &blocks { + // let block_successors = body.basic_blocks[block].terminator().successors(); + // let scc_successors = block_successors + // .map(|block| sccs.components[block.as_usize()]) + // .filter(|&succ_scc| succ_scc != scc); + // successors.extend(scc_successors); + // } + + // successors.sort(); + // successors.dedup(); + + // eprintln!("scc: {:?}, contains: {:?}, successors: {:?}", scc, blocks, successors); + // } + + // let timer = std::time::Instant::now(); + + // use rustc_index::interval::IntervalSet; + // let mut successors = vec![IntervalSet::new(sccs.component_count); sccs.component_count]; + // for block in body.basic_blocks.indices() { + // let scc = sccs.components[block.as_usize()] as usize; + // let scc_successors = body.basic_blocks[block] + // .terminator() + // .successors() + // .map(|block| sccs.components[block.as_usize()] as usize) + // .filter(|&succ_scc| succ_scc != scc); + // for succ in scc_successors { + // successors[scc].insert(succ); + // } + // } + // let elapsed = timer.elapsed(); + + // let timer = std::time::Instant::now(); + // let mut components = + // vec![IntervalSet::new(body.basic_blocks.len()); sccs.component_count]; + // for block in body.basic_blocks.indices() { + // let scc = sccs.components[block.as_usize()] as usize; + // components[scc].insert(block.as_usize()); + // } + // let elapsed2 = timer.elapsed(); + + use rustc_index::interval::IntervalSet; + + let timer = std::time::Instant::now(); + + let mut components = + vec![IntervalSet::new(body.basic_blocks.len()); sccs.component_count]; + let mut successors = vec![IntervalSet::new(sccs.component_count); sccs.component_count]; + for block in body.basic_blocks.indices() { + let scc = sccs.components[block.as_usize()] as usize; + let scc_successors = body.basic_blocks[block] + .terminator() + .successors() + .map(|block| sccs.components[block.as_usize()] as usize) + .filter(|&succ_scc| succ_scc != scc); + for succ in scc_successors { + successors[scc].insert(succ); + } + components[scc].insert(block.as_usize()); + } + let elapsed2 = timer.elapsed(); + + eprintln!(" and SCCs successors/contents in {} ns (intervals)", elapsed2.as_nanos(),); + } + + // + { + eprint!("SCC tests - {:>30}", "tarjan SCCs (mixed/usize)"); + + struct Scc { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + stack: VecDeque, + visited: MixedBitSet, + } + + impl Scc { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + stack: VecDeque::new(), + visited: MixedBitSet::new_empty(node_count), + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_usize(); + if !self.visited.contains(idx) { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.visited.insert(v); + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + // if w == v { + // panic!("a dang self loop ?!"); + // } + + if !self.visited.contains(w) { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + if self.components[w] == -1 + && self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = self.candidate_component_roots[w]; + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = self.component_count as isize; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + while self.stack.front().is_some() + && self.dfs_numbers[*self.stack.front().expect("peek front failed")] + > self.dfs_numbers[v] + { + let w = self.stack.pop_front().expect("pop front failed"); + self.components[w] = self.components[v]; + // println!( + // "v = {v} - popping w = {w} off the stack (contents: {:?}) / C[w] = {}, C[v] = {}", + // self.stack, + // self.components[w], + // self.components[v], + // v = v, + // w = w, + // ); + } + } else { + // println!( + // "v = {v}: pushing v on the stack (contents: {:?})", + // self.stack, + // v = v, + // ); + self.stack.push_front(v); + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Scc::new(body.basic_blocks.len()); + sccs.compute_sccs(&body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + // + { + eprint!("SCC tests - {:>30}", "tarjan SCCs (mixed/u32)"); + + struct Scc { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + stack: VecDeque, + visited: MixedBitSet, + } + + impl Scc { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + stack: VecDeque::new(), + visited: MixedBitSet::new_empty(node_count), + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_u32(); + if !self.visited.contains(idx) { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: u32, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v as usize] = v; + self.components[v as usize] = -1; + + self.d += 1; + self.dfs_numbers[v as usize] = self.d; + + self.visited.insert(v); + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = unsafe { BasicBlock::from_u32_unchecked(v) }; + for succ in blocks[idx].terminator().successors() { + let w = succ.as_u32(); + + // if w == v { + // panic!("a dang self loop ?!"); + // } + + if !self.visited.contains(w) { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + if self.components[w as usize] == -1 + && self.dfs_numbers[self.candidate_component_roots[w as usize] as usize] + < self.dfs_numbers + [self.candidate_component_roots[v as usize] as usize] + { + self.candidate_component_roots[v as usize] = + self.candidate_component_roots[w as usize]; + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v as usize] == v { + self.component_count += 1; + self.components[v as usize] = self.component_count as i32; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + while self.stack.front().is_some() + && self.dfs_numbers + [*self.stack.front().expect("peek front failed") as usize] + > self.dfs_numbers[v as usize] + { + let w = self.stack.pop_front().expect("pop front failed"); + self.components[w as usize] = self.components[v as usize]; + // println!( + // "v = {v} - popping w = {w} off the stack (contents: {:?}) / C[w] = {}, C[v] = {}", + // self.stack, + // self.components[w], + // self.components[v], + // v = v, + // w = w, + // ); + } + } else { + // println!( + // "v = {v}: pushing v on the stack (contents: {:?})", + // self.stack, + // v = v, + // ); + self.stack.push_front(v); + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Scc::new(body.basic_blocks.len()); + sccs.compute_sccs(&body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + // + { + eprint!("SCC tests - {:>30}", "tarjan SCCs (mixed/idx)"); + + struct Scc { + candidate_component_roots: IndexVec, + components: IndexVec>, + component_count: u32, + dfs_numbers: IndexVec, + d: u32, + stack: VecDeque, + visited: MixedBitSet, + } + + impl Scc { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: IndexVec::from_raw(vec![ + unsafe { + BasicBlock::from_u32_unchecked(0) + }; + node_count + ]), + components: IndexVec::from_raw(vec![None; node_count]), + component_count: 0, + dfs_numbers: IndexVec::from_raw(vec![0; node_count]), + d: 0, + stack: VecDeque::new(), + visited: MixedBitSet::new_empty(node_count), + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + if !self.visited.contains(idx) { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: BasicBlock, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = None; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.visited.insert(v); + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + for w in blocks[v].terminator().successors() { + // if w == v { + // panic!("a dang self loop ?!"); + // } + + if !self.visited.contains(w) { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + if self.components[w].is_none() + && self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = self.candidate_component_roots[w]; + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = + Some(unsafe { BasicBlock::from_u32_unchecked(self.component_count) }); + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + while self.stack.front().is_some() + && self.dfs_numbers[*self.stack.front().expect("peek front failed")] + > self.dfs_numbers[v] + { + let w = self.stack.pop_front().expect("pop front failed"); + self.components[w] = self.components[v]; + // println!( + // "v = {v} - popping w = {w} off the stack (contents: {:?}) / C[w] = {}, C[v] = {}", + // self.stack, + // self.components[w], + // self.components[v], + // v = v, + // w = w, + // ); + } + } else { + // println!( + // "v = {v}: pushing v on the stack (contents: {:?})", + // self.stack, + // v = v, + // ); + self.stack.push_front(v); + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Scc::new(body.basic_blocks.len()); + sccs.compute_sccs(&body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + // --- + + { + eprint!("SCC tests - {:>30}", "nuutila (interval/vec/usize)"); + + use rustc_index::interval::IntervalSet; + + struct Nuutila { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + visited: Vec, + stack_vertex: VecDeque, + stack_component: VecDeque, + reachability: Vec>, + // reachabilly: Vec>, + } + + impl Nuutila { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + visited: vec![false; node_count], + stack_vertex: VecDeque::new(), + stack_component: VecDeque::new(), + reachability: vec![IntervalSet::new(node_count); node_count + 1], + // ^--- la reachability c'est celle que des composants donc il en faut moins que `node_count` s'il y a au moins un SCC avec > 1 nodes + // reachabilly: vec![HybridBitSet::new_empty(node_count); node_count], + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + // Compute SCCs and reachability only starting where loans appear. + // We still have the unused blocks in our domain, but won't traverse them. + fn compute_for_loans( + &mut self, + borrow_set: &BorrowSet<'_>, + blocks: &BasicBlocks<'_>, + ) { + for (_loan_idx, loan) in borrow_set.iter_enumerated() { + let block_idx = loan.reserve_location.block; + let block = &blocks[block_idx]; + + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = block_idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.stack_vertex.push_front(v); + let stack_component_height = self.stack_component.len(); + + self.visited[v] = true; + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + if w == v { + panic!("a dang self loop ?! at {}", w); + } + + if !self.visited[w] { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + let component_w = self.components[w]; + if component_w == -1 { + if self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = + self.candidate_component_roots[w]; + } + } else { + assert!(component_w >= 0); + + // FIXME: check if v -> w is actually a forward edge or not, to avoid unnecessary work if it is + self.stack_component.push_front(self.components[w] as usize); + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = self.component_count as isize; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + // Reachability of C[v] + assert!(self.reachability[self.component_count].is_empty()); + // assert!(self.reachabilly[self.component_count].is_empty()); + + if let Some(&top) = self.stack_vertex.front() { + if top != v { + // we're adding new component, initialize its reachability: self-loop, + // the component can reach itself + // self.reachability[self.component_count] = + // (self.component_count, self.component_count).to_interval_set(); + self.reachability[self.component_count] + .insert(self.component_count); + } else { + // R[C[v]] should be empty here already, do nothing + // if we don't always initialize the reachability of C by default, it would need to be + // initialized to "empty" here. + } + } + + // process adjacent components + while self.stack_component.len() != stack_component_height { + let x = self + .stack_component + .pop_front() + .expect("Sc can't be empty at this point"); + // prevent performing duplicate operations + if !self.reachability[self.component_count].contains(x) { + // merge reachability information + // let r_c_v = self.reachability[self.component_count] + // .union(&self.reachability[x]) + // .union(&(x, x).to_interval_set()); + // self.reachability[self.component_count] = r_c_v; + assert_ne!(x, self.component_count); + + // self.reachability[self.component_count].union(&self.reachability[x]); + // self.reachability[self.component_count].insert(x); + + // let mut r_c_v = self.reachability[self.component_count].clone(); + // r_c_v.union(&self.reachability[x]); + // r_c_v.insert(x); + // self.reachability[self.component_count] = r_c_v; + + let zzz = unsafe { + self.reachability.get_unchecked(x) as *const IntervalSet + }; + let r_c_v = unsafe { + self.reachability.get_unchecked_mut(self.component_count) + }; + // r_c_v.union(&self.reachability[x]); + r_c_v.union(unsafe { &*zzz }); + r_c_v.insert(x); + } + + // // prevent performing duplicate operations + // if !self.reachabilly[self.component_count].contains(x) { + // // merge reachability information + // assert!(x != self.component_count); + + // self.reachabilly[self.component_count].insert(x); + + // // split the array into two slices, starting at the lowest of the 2 + // // the lowest will be the first in the array and the highest the last + // let low = self.component_count.min(x); + // let high = self.component_count.max(x); + // let interval = &mut self.reachabilly[low..=high]; + // let (a, b) = interval.split_at_mut(1); + // let (component_reachabilly, x_reachabilly) = if self.component_count == low { + // (&mut a[0], &mut b[b.len() - 1]) + // } else { + // (&mut b[0], &mut a[a.len() - 1]) + // }; + + // component_reachabilly.union(x_reachabilly); + // } + } + + while let Some(w) = self.stack_vertex.pop_front() { + self.components[w] = self.components[v]; + + if w == v { + break; + } + } + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Nuutila::new(body.basic_blocks.len()); + sccs.compute_sccs(&body.basic_blocks); + // sccs.compute_for_loans(&borrow_set, &body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + { + eprint!("SCC tests - {:>30}", "nuutila' (interval/vec/usize)"); + + use rustc_index::interval::IntervalSet; + + struct Nuutila { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + visited: Vec, + stack_vertex: VecDeque, + stack_component: VecDeque, + reachability: Vec>, + // reachabilly: Vec>, + } + + impl Nuutila { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + visited: vec![false; node_count], + stack_vertex: VecDeque::new(), + stack_component: VecDeque::new(), + reachability: vec![IntervalSet::new(node_count); node_count + 1], + // ^--- la reachability c'est celle que des composants donc il en faut moins que `node_count` s'il y a au moins un SCC avec > 1 nodes + // reachabilly: vec![HybridBitSet::new_empty(node_count); node_count], + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + // Compute SCCs and reachability only starting where loans appear. + // We still have the unused blocks in our domain, but won't traverse them. + fn compute_for_loans( + &mut self, + borrow_set: &BorrowSet<'_>, + blocks: &BasicBlocks<'_>, + ) { + for (_loan_idx, loan) in borrow_set.iter_enumerated() { + let block_idx = loan.reserve_location.block; + let block = &blocks[block_idx]; + + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = block_idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.stack_vertex.push_front(v); + let stack_component_height = self.stack_component.len(); + + self.visited[v] = true; + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + if w == v { + panic!("a dang self loop ?! at {}", w); + } + + if !self.visited[w] { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + let component_w = self.components[w]; + if component_w == -1 { + if self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = + self.candidate_component_roots[w]; + } + } else { + assert!(component_w >= 0); + + // FIXME: check if v -> w is actually a forward edge or not, to avoid unnecessary work if it is + self.stack_component.push_front(self.components[w] as usize); + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = self.component_count as isize; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + // Reachability of C[v] + assert!(self.reachability[self.component_count].is_empty()); + // assert!(self.reachabilly[self.component_count].is_empty()); + + if let Some(&top) = self.stack_vertex.front() { + if top != v { + // we're adding new component, initialize its reachability: self-loop, + // the component can reach itself + // self.reachability[self.component_count] = + // (self.component_count, self.component_count).to_interval_set(); + self.reachability[self.component_count] + .insert(self.component_count); + } else { + // R[C[v]] should be empty here already, do nothing + // if we don't always initialize the reachability of C by default, it would need to be + // initialized to "empty" here. + } + } + + // process adjacent components + while self.stack_component.len() != stack_component_height { + let x = self + .stack_component + .pop_front() + .expect("Sc can't be empty at this point"); + // prevent performing duplicate operations + if !self.reachability[self.component_count].contains(x) { + // merge reachability information + // let r_c_v = self.reachability[self.component_count] + // .union(&self.reachability[x]) + // .union(&(x, x).to_interval_set()); + // self.reachability[self.component_count] = r_c_v; + assert_ne!(x, self.component_count); + + // self.reachability[self.component_count].union(&self.reachability[x]); + // self.reachability[self.component_count].insert(x); + + // let mut r_c_v = self.reachability[self.component_count].clone(); + // r_c_v.union(&self.reachability[x]); + // r_c_v.insert(x); + // self.reachability[self.component_count] = r_c_v; + + let zzz = unsafe { + self.reachability.get_unchecked(x) as *const IntervalSet + }; + let r_c_v = unsafe { + self.reachability.get_unchecked_mut(self.component_count) + }; + // r_c_v.union(&self.reachability[x]); + r_c_v.union(unsafe { &*zzz }); + r_c_v.insert(x); + } + + // // prevent performing duplicate operations + // if !self.reachabilly[self.component_count].contains(x) { + // // merge reachability information + // assert!(x != self.component_count); + + // self.reachabilly[self.component_count].insert(x); + + // // split the array into two slices, starting at the lowest of the 2 + // // the lowest will be the first in the array and the highest the last + // let low = self.component_count.min(x); + // let high = self.component_count.max(x); + // let interval = &mut self.reachabilly[low..=high]; + // let (a, b) = interval.split_at_mut(1); + // let (component_reachabilly, x_reachabilly) = if self.component_count == low { + // (&mut a[0], &mut b[b.len() - 1]) + // } else { + // (&mut b[0], &mut a[a.len() - 1]) + // }; + + // component_reachabilly.union(x_reachabilly); + // } + } + + while let Some(w) = self.stack_vertex.pop_front() { + self.components[w] = self.components[v]; + + if w == v { + break; + } + } + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Nuutila::new(body.basic_blocks.len()); + // sccs.compute_sccs(&body.basic_blocks); + sccs.compute_for_loans(&borrow_set, &body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + { + eprint!("SCC tests - {:>30}", "nuutila (interval/dense/usize)"); + + use rustc_index::interval::IntervalSet; + + struct Nuutila { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + visited: DenseBitSet, + stack_vertex: VecDeque, + stack_component: VecDeque, + reachability: Vec>, + // reachabilly: Vec>, + } + + impl Nuutila { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + visited: DenseBitSet::new_empty(node_count), + stack_vertex: VecDeque::new(), + stack_component: VecDeque::new(), + reachability: vec![IntervalSet::new(node_count); node_count + 1], + // ^--- la reachability c'est celle que des composants donc il en faut moins que `node_count` s'il y a au moins un SCC avec > 1 nodes + // reachabilly: vec![HybridBitSet::new_empty(node_count); node_count], + } + } + + // Compute SCCs and reachability only starting where loans appear. + // We still have the unused blocks in our domain, but won't traverse them. + fn compute_for_loans( + &mut self, + borrow_set: &BorrowSet<'_>, + blocks: &BasicBlocks<'_>, + ) { + for (_loan_idx, loan) in borrow_set.iter_enumerated() { + let block_idx = loan.reserve_location.block; + let block = &blocks[block_idx]; + + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = block_idx.as_usize(); + if !self.visited.contains(idx) { + self.dfs_visit(idx, blocks); + } + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_usize(); + if !self.visited.contains(idx) { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.stack_vertex.push_front(v); + let stack_component_height = self.stack_component.len(); + + self.visited.insert(v); + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + if w == v { + panic!("a dang self loop ?! at {}", w); + } + + if !self.visited.contains(w) { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + let component_w = self.components[w]; + if component_w == -1 { + if self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = + self.candidate_component_roots[w]; + } + } else { + assert!(component_w >= 0); + + // FIXME: check if v -> w is actually a forward edge or not, to avoid unnecessary work if it is + self.stack_component.push_front(self.components[w] as usize); + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = self.component_count as isize; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + // Reachability of C[v] + assert!(self.reachability[self.component_count].is_empty()); + // assert!(self.reachabilly[self.component_count].is_empty()); + + if let Some(&top) = self.stack_vertex.front() { + if top != v { + // we're adding new component, initialize its reachability: self-loop, + // the component can reach itself + // self.reachability[self.component_count] = + // (self.component_count, self.component_count).to_interval_set(); + self.reachability[self.component_count] + .insert(self.component_count); + } else { + // R[C[v]] should be empty here already, do nothing + // if we don't always initialize the reachability of C by default, it would need to be + // initialized to "empty" here. + } + } + + // process adjacent components + while self.stack_component.len() != stack_component_height { + let x = self + .stack_component + .pop_front() + .expect("Sc can't be empty at this point"); + // prevent performing duplicate operations + if !self.reachability[self.component_count].contains(x) { + // merge reachability information + // let r_c_v = self.reachability[self.component_count] + // .union(&self.reachability[x]) + // .union(&(x, x).to_interval_set()); + // self.reachability[self.component_count] = r_c_v; + assert_ne!(x, self.component_count); + + // self.reachability[self.component_count].union(&self.reachability[x]); + // self.reachability[self.component_count].insert(x); + + // let mut r_c_v = self.reachability[self.component_count].clone(); + // r_c_v.union(&self.reachability[x]); + // r_c_v.insert(x); + // self.reachability[self.component_count] = r_c_v; + + let zzz = unsafe { + self.reachability.get_unchecked(x) as *const IntervalSet + }; + let r_c_v = unsafe { + self.reachability.get_unchecked_mut(self.component_count) + }; + // r_c_v.union(&self.reachability[x]); + r_c_v.union(unsafe { &*zzz }); + r_c_v.insert(x); + } + + // // prevent performing duplicate operations + // if !self.reachabilly[self.component_count].contains(x) { + // // merge reachability information + // assert!(x != self.component_count); + + // self.reachabilly[self.component_count].insert(x); + + // // split the array into two slices, starting at the lowest of the 2 + // // the lowest will be the first in the array and the highest the last + // let low = self.component_count.min(x); + // let high = self.component_count.max(x); + // let interval = &mut self.reachabilly[low..=high]; + // let (a, b) = interval.split_at_mut(1); + // let (component_reachabilly, x_reachabilly) = if self.component_count == low { + // (&mut a[0], &mut b[b.len() - 1]) + // } else { + // (&mut b[0], &mut a[a.len() - 1]) + // }; + + // component_reachabilly.union(x_reachabilly); + // } + } + + while let Some(w) = self.stack_vertex.pop_front() { + self.components[w] = self.components[v]; + + if w == v { + break; + } + } + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Nuutila::new(body.basic_blocks.len()); + // sccs.compute_sccs(&body.basic_blocks); + sccs.compute_for_loans(&borrow_set, &body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + { + eprint!("SCC tests - {:>30}", "nuutila (mixed/vec/usize)"); + + struct Nuutila { + candidate_component_roots: Vec, + components: Vec, + component_count: usize, + dfs_numbers: Vec, + d: u32, + visited: Vec, + stack_vertex: VecDeque, + stack_component: VecDeque, + // reachability: Vec>, + reachability: Vec>, + } + + impl Nuutila { + fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + visited: vec![false; node_count], + stack_vertex: VecDeque::new(), + stack_component: VecDeque::new(), + reachability: vec![MixedBitSet::new_empty(node_count); node_count + 1], + // ^--- la reachability c'est celle que des composants donc il en faut moins que `node_count` s'il y a au moins un SCC avec > 1 nodes + // reachabilly: vec![HybridBitSet::new_empty(node_count); node_count], + } + } + + // Compute SCCs and reachability only starting where loans appear. + // We still have the unused blocks in our domain, but won't traverse them. + fn compute_for_loans( + &mut self, + borrow_set: &BorrowSet<'_>, + blocks: &BasicBlocks<'_>, + ) { + for (_loan_idx, loan) in borrow_set.iter_enumerated() { + let block_idx = loan.reserve_location.block; + let block = &blocks[block_idx]; + + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = block_idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + for (idx, block) in blocks.iter_enumerated() { + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.stack_vertex.push_front(v); + let stack_component_height = self.stack_component.len(); + + self.visited[v] = true; + + // println!( + // "dfs_visit, v = {v}, CCR[{v}] = {}, D[{v}] = {}", + // self.candidate_component_roots[v], + // self.dfs_numbers[v], + // v = v + // ); + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + if w == v { + panic!("a dang self loop ?! at {}", w); + } + + if !self.visited[w] { + self.dfs_visit(w, blocks); + } + + // println!( + // "v = {v} - adjacent vertex w = {w}: C[w] = {}, C[v] = {} / D[CCR[w]] = {}, D[CCR[v]] = {} / CCR[v] = {}, CCR[w] = {}", + // self.components[w], + // self.components[v], + // self.dfs_numbers[self.candidate_component_roots[w]], + // self.dfs_numbers[self.candidate_component_roots[v]], + // self.candidate_component_roots[v], + // self.candidate_component_roots[w], + // w = w, + // v = v, + // ); + + let component_w = self.components[w]; + if component_w == -1 { + if self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = + self.candidate_component_roots[w]; + } + } else { + assert!(component_w >= 0); + + // FIXME: check if v -> w is actually a forward edge or not, to avoid unnecessary work if it is + self.stack_component.push_front(self.components[w] as usize); + } + } + + // println!( + // "v = {v} - CCR[v] = {}", + // self.candidate_component_roots[v], + // v = v, + // ); + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = self.component_count as isize; + + // println!( + // "v = {v} - creating component {} / C[v] = {}", + // self.component_count, + // self.components[v], + // v = v, + // ); + + // Reachability of C[v] + assert!(self.reachability[self.component_count].is_empty()); + // assert!(self.reachabilly[self.component_count].is_empty()); + + if let Some(&top) = self.stack_vertex.front() { + if top != v { + // we're adding new component, initialize its reachability: self-loop, + // the component can reach itself + // self.reachability[self.component_count] = + // (self.component_count, self.component_count).to_interval_set(); + self.reachability[self.component_count] + .insert(self.component_count); + } else { + // R[C[v]] should be empty here already, do nothing + // if we don't always initialize the reachability of C by default, it would need to be + // initialized to "empty" here. + } + } + + // process adjacent components + while self.stack_component.len() != stack_component_height { + let x = self + .stack_component + .pop_front() + .expect("Sc can't be empty at this point"); + // prevent performing duplicate operations + if !self.reachability[self.component_count].contains(x) { + // merge reachability information + // let r_c_v = self.reachability[self.component_count] + // .union(&self.reachability[x]) + // .union(&(x, x).to_interval_set()); + // self.reachability[self.component_count] = r_c_v; + assert_ne!(x, self.component_count); + + // self.reachability[self.component_count].union(&self.reachability[x]); + // self.reachability[self.component_count].insert(x); + + // let mut r_c_v = self.reachability[self.component_count].clone(); + // r_c_v.union(&self.reachability[x]); + // r_c_v.insert(x); + // self.reachability[self.component_count] = r_c_v; + + let zzz = unsafe { self.reachability.get_unchecked(x) as *const _ }; + let r_c_v = unsafe { + self.reachability.get_unchecked_mut(self.component_count) + }; + // r_c_v.union(&self.reachability[x]); + r_c_v.union(unsafe { &*zzz }); + r_c_v.insert(x); + } + + // // prevent performing duplicate operations + // if !self.reachabilly[self.component_count].contains(x) { + // // merge reachability information + // assert!(x != self.component_count); + + // self.reachabilly[self.component_count].insert(x); + + // // split the array into two slices, starting at the lowest of the 2 + // // the lowest will be the first in the array and the highest the last + // let low = self.component_count.min(x); + // let high = self.component_count.max(x); + // let interval = &mut self.reachabilly[low..=high]; + // let (a, b) = interval.split_at_mut(1); + // let (component_reachabilly, x_reachabilly) = if self.component_count == low { + // (&mut a[0], &mut b[b.len() - 1]) + // } else { + // (&mut b[0], &mut a[a.len() - 1]) + // }; + + // component_reachabilly.union(x_reachabilly); + // } + } + + while let Some(w) = self.stack_vertex.pop_front() { + self.components[w] = self.components[v]; + + if w == v { + break; + } + } + } + } + } + + let timer = std::time::Instant::now(); + let mut sccs = Nuutila::new(body.basic_blocks.len()); + // sccs.compute_sccs(&body.basic_blocks); + sccs.compute_for_loans(&borrow_set, &body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!(", computed {} SCCs in {} ns", sccs.component_count, elapsed.as_nanos()); + } + + { + // // CFG stats, blocks: 18073, statements: 67351 + // // borrow stats, locals: 21513, loans: 6232 + // if body.basic_blocks.len() == 18073 + // && body.local_decls.len() == 21513 + // && borrow_set.len() == 6232 + { + eprint!("SCC tests - {:>30}", "tagebility"); + + let timer = std::time::Instant::now(); + + // Compute `transitive_predecessors` and `adjacent_predecessors`. + let mut transitive_predecessors = IndexVec::from_elem_n( + DenseBitSet::new_empty(body.basic_blocks.len()), + body.basic_blocks.len(), + ); + let mut adjacent_predecessors = transitive_predecessors.clone(); + // The stack is initially a reversed postorder traversal of the CFG. However, we might add + // add blocks again to the stack if we have loops. + let mut stack = + body.basic_blocks.reverse_postorder().iter().rev().copied().collect::>(); + // We keep track of all blocks that are currently not in the stack. + let mut not_in_stack = DenseBitSet::new_empty(body.basic_blocks.len()); + while let Some(block) = stack.pop() { + not_in_stack.insert(block); + + // Loop over all successors to the block and add `block` to their predecessors. + for succ_block in body.basic_blocks[block].terminator().successors() { + // Keep track of whether the transitive predecessors of `succ_block` has changed. + let mut changed = false; + + // Insert `block` in `succ_block`s predecessors. + if adjacent_predecessors[succ_block].insert(block) { + // Remember that `adjacent_predecessors` is a subset of + // `transitive_predecessors`. + changed |= transitive_predecessors[succ_block].insert(block); + } + + // Add all transitive predecessors of `block` to the transitive predecessors of + // `succ_block`. + if block != succ_block { + let (blocks_predecessors, succ_blocks_predecessors) = + transitive_predecessors.pick2_mut(block, succ_block); + changed |= succ_blocks_predecessors.union(blocks_predecessors); + + // Check if the `succ_block`s transitive predecessors changed. If so, we may + // need to add it to the stack again. + if changed && not_in_stack.remove(succ_block) { + stack.push(succ_block); + } + } + } + + // debug_assert!( + // transitive_predecessors[block].superset(&adjacent_predecessors[block]) + // ); + } + + let elapsed = timer.elapsed(); + + let w = 2 + Sccs::::new(&body.basic_blocks).num_sccs().ilog10() + as usize; + // computed 2757 SCCs in 1535259 ns + // computed 11600 SCCs in 6472961 ns + // 2 + ilog + eprintln!(", {:w$} predecessors in {} ns", " ", elapsed.as_nanos()); + } + } + } + + // if std::env::var("LETSGO").is_ok() { + // eprintln!( + // "initialization checks: {} locals out of {} total, at {} statements out of {} total, in {} blocks out of {} total, in {} transitive blocks out {} total, cyclic cfg: {}", + // mbcx.locals_checked_for_initialization.len(), + // body.local_decls.len(), + // mbcx.locals_checked_for_initialization + // .iter() + // .flat_map(|(_, locations)| locations.iter()) + // .collect::>() + // .len(), + // body.basic_blocks + // .iter_enumerated() + // .map(|(_, bb)| bb.statements.len() + 1) + // .sum::(), + // mbcx.locals_checked_for_initialization + // .iter() + // .flat_map(|(_, locations)| locations.iter().map(|l| l.block)) + // .collect::>() + // .len(), + // body.basic_blocks.len(), + // mbcx.locals_checked_for_initialization + // .iter() + // .flat_map(|(_, locations)| locations.iter().map(|l| l.block)) + // .flat_map(|block| std::iter::once(block) + // .chain(body.basic_blocks.predecessors()[block].iter().copied())) + // .collect::>() + // .len(), + // body.basic_blocks.len(), + // body.basic_blocks.is_cfg_cyclic(), + // ); + + // // eprintln!( + // // "initialization checks: {} move paths out of {} total, at {} statements out of {} total", + // // mbcx.locals_checked_for_initialization.len(), + // // move_data.init_path_map.len(), + // // mbcx.locals_checked_for_initialization + // // .iter() + // // .flat_map(|(_, locations)| locations.iter()) + // // .collect::>() + // // .len(), + // // body.basic_blocks + // // .iter_enumerated() + // // .map(|(_, bb)| bb.statements.len() + 1) + // // .sum::(), + // // ); + // } + + if let Some(consumer) = &mut root_cx.consumer { + consumer.insert_body( + def, + BodyWithBorrowckFacts { + body: body_owned, + promoted, + borrow_set, + region_inference_context: regioncx, + location_table: polonius_input.as_ref().map(|_| location_table), + input_facts: polonius_input, + output_facts: polonius_output, + }, + ); + } + + debug!("do_mir_borrowck: result = {:#?}", result); + + result +} + +fn compute_cyclic_dataflow<'mir, 'tcx>( + body: &Body<'tcx>, + borrows: Borrows<'mir, 'tcx>, + uninits: MaybeUninitializedPlaces<'mir, 'tcx>, + ever_inits: EverInitializedPlaces<'mir, 'tcx>, + vis: &mut MirBorrowckCtxt<'mir, '_, 'tcx>, + // flow_entry_states: &IndexVec, +) { + use rustc_data_structures::work_queue::WorkQueue; + use rustc_middle::mir; + use rustc_mir_dataflow::{Direction, Forward, JoinSemiLattice}; + + struct AnalysisHolder<'tcx, T: Analysis<'tcx>> { + // results: IndexVec, + lazy_results: IndexVec>, + dirty_queue: WorkQueue, + } + + impl<'tcx, T: Analysis<'tcx>> AnalysisHolder<'tcx, T> { + fn new(body: &Body<'tcx>, analysis: &T) -> Self { + // let mut results = + // IndexVec::from_fn_n(|_| analysis.bottom_value(body), body.basic_blocks.len()); + // analysis.initialize_start_block(body, &mut results[mir::START_BLOCK]); + + let mut lazy_results = IndexVec::from_elem_n(None, body.basic_blocks.len()); + lazy_results[mir::START_BLOCK] = Some(analysis.bottom_value(body)); + analysis.initialize_start_block(body, lazy_results[mir::START_BLOCK].as_mut().unwrap()); + + Self { + // results, + lazy_results, + dirty_queue: WorkQueue::with_none(body.basic_blocks.len()), + } + } + } + + // FIXME: lazify this + // let mut results = IndexVec::from_fn_n(|_| analysis.bottom_value(body), body.basic_blocks.len()); + // analysis.initialize_start_block(body, &mut results[mir::START_BLOCK]); + + let mut borrows_holder = AnalysisHolder::new(body, &borrows); + let mut uninits_holder = AnalysisHolder::new(body, &uninits); + let mut ever_inits_holder = AnalysisHolder::new(body, &ever_inits); + + // let mut results: IndexVec> = + // IndexVec::from_elem_n(None, body.basic_blocks.len()); + // // Ensure the start block has some state in it; + // results[mir::START_BLOCK] = Some(analysis.bottom_value(body)); + // analysis.initialize_start_block(body, results[mir::START_BLOCK].as_mut().unwrap()); + + // We'll compute dataflow over the SCCs. + let sccs = body.basic_blocks.sccs(); + + // Worklist for per-SCC iterations + // let mut dirty_queue: WorkQueue = WorkQueue::with_none(body.basic_blocks.len()); + + // `state` is not actually used between iterations; this is just an optimization to avoid + // reallocating every iteration. + // let mut state = BorrowckDomain { + // borrows: borrows.bottom_value(body), + // uninits: uninits.bottom_value(body), + // ever_inits: ever_inits.bottom_value(body), + // }; + + let mut analysis = Borrowck { borrows, uninits, ever_inits }; + let mut state = analysis.bottom_value(body); + + for &scc in &sccs.queue { + let blocks_in_scc = sccs.sccs[scc as usize].len(); + // eprintln!( + // "X - entering scc {} out of {}, there are {} blocks in there: {:?}", + // scc, sccs.component_count, blocks_in_scc, sccs.sccs[scc as usize] + // ); + + #[inline(always)] + fn propagate<'tcx, A: Analysis<'tcx>>( + body: &Body<'tcx>, + block: BasicBlock, + holder: &mut AnalysisHolder<'tcx, A>, + state: &mut A::Domain, + analysis: &mut A, + mut propagate: impl FnMut(&mut AnalysisHolder<'tcx, A>, BasicBlock, &A::Domain), + ) { + // Apply the block's effects without visiting + Forward::apply_effects_in_block( + analysis, + body, + state, + block, + &body[block], + |target: BasicBlock, state: &A::Domain| { + propagate(holder, target, state); + }, + ); + } + + // Fast-path, the overwhelmingly most common case of having a single block in this SCC. + if blocks_in_scc == 1 { + let block = sccs.sccs[scc as usize][0]; + let block_data = &body[block]; + + // eprintln!("A1 - entering scc {scc}'s block: {:?}", block); + // eprintln!( + // "A2 - is {block:?} state ready, borrows: {}, uninits: {}, ever_inits: {}", + // borrows_holder.lazy_results[block].is_some(), + // uninits_holder.lazy_results[block].is_some(), + // ever_inits_holder.lazy_results[block].is_some(), + // ); + + // eprintln!( + // "1b - scc {scc} is a single block, computing and visiting {block:?} at the same time" + // ); + + // // tmp: verifying contents on entry + // if let Some(borrows) = &borrows_holder.lazy_results[block] { + // assert_eq!( + // &flow_entry_states[block].borrows, borrows, + // "borrows of block {block:?} differ" + // ); + // } + // if let Some(ever_inits) = &ever_inits_holder.lazy_results[block] { + // assert_eq!( + // &flow_entry_states[block].ever_inits, ever_inits, + // "ever_inits of block {block:?} differ" + // ); + // } + // if let Some(uninits) = &uninits_holder.lazy_results[block] { + // assert_eq!( + // &flow_entry_states[block].uninits, uninits, + // "uninits of block {block:?} differ" + // ); + // } + + // Apply effects in the block's statements. + let analysis = &mut analysis; + let Some(borrows) = borrows_holder.lazy_results[block].take() else { + continue; + }; + let Some(uninits) = uninits_holder.lazy_results[block].take() else { + continue; + }; + let Some(ever_inits) = ever_inits_holder.lazy_results[block].take() else { + continue; + }; + let mut block_state = BorrowckDomain { borrows, uninits, ever_inits }; + + // // tmp: verifying the contents on entry + // assert_eq!( + // flow_entry_states[block].borrows, block_state.borrows, + // "borrows of block {block:?} differ" + // ); + // assert_eq!( + // flow_entry_states[block].ever_inits, block_state.ever_inits, + // "ever_inits of block {block:?} differ" + // ); + // assert_eq!( + // flow_entry_states[block].uninits, block_state.uninits, + // "uninits of block {block:?} differ" + // ); + + // eprintln!("1c1 - {block:?} uninits start state: \n{:?}", block_state.uninits); + + vis.visit_block_start(&mut block_state); + + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.apply_early_statement_effect(&mut block_state, statement, location); + vis.visit_after_early_statement_effect(analysis, &block_state, statement, location); + + analysis.apply_primary_statement_effect(&mut block_state, statement, location); + vis.visit_after_primary_statement_effect( + analysis, + &block_state, + statement, + location, + ); + } + + // eprintln!("1c2 - {block:?} uninits post statements state: \n{:?}", block_state.uninits); + + // Apply effects in the block terminator. + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_early_terminator_effect(&mut block_state, terminator, location); + vis.visit_after_early_terminator_effect(analysis, &block_state, terminator, location); + + // eprintln!("1c3 - {block:?} uninits early terminator state: \n{:?}", block_state.uninits); + + let edges = + analysis.apply_primary_terminator_effect(&mut block_state, terminator, location); + vis.visit_after_primary_terminator_effect(analysis, &block_state, terminator, location); + + // eprintln!("1c4 - {block:?} uninits post terminator state: \n{:?}", block_state.uninits); + + vis.visit_block_end(&mut block_state); + + #[inline(always)] + fn propagate_single_edge<'tcx, A: Analysis<'tcx>>( + holder: &mut AnalysisHolder<'tcx, A>, + state: A::Domain, + _block: BasicBlock, + target: BasicBlock, + _tag: &str, + _kind: &str, + _edge: &str, + ) { + // eprintln!( + // "{tag} - propagating {kind} state from {block:?} to {edge} edge: {target:?} (init: {})", + // holder.lazy_results[target].is_some() + // ); + match holder.lazy_results[target].as_mut() { + None => { + holder.lazy_results[target] = Some(state); + } + Some(existing_state) => { + existing_state.join(&state); + } + } + } + + #[inline(always)] + fn propagate_optional_single_edge<'tcx, A: Analysis<'tcx>>( + holder: &mut AnalysisHolder<'tcx, A>, + state: &A::Domain, + _block: BasicBlock, + target: BasicBlock, + _tag: &str, + _kind: &str, + _edge: &str, + ) { + // eprintln!( + // "{tag} - propagating {kind} state from {block:?} to {edge} edge: {target:?} (init: {})", + // holder.lazy_results[target].is_some() + // ); + match holder.lazy_results[target].as_mut() { + None => { + holder.lazy_results[target] = Some(state.clone()); + } + Some(existing_state) => { + existing_state.join(state); + } + } + } + + #[inline(always)] + fn propagate_double_edge<'tcx, A: Analysis<'tcx>>( + holder: &mut AnalysisHolder<'tcx, A>, + state: A::Domain, + _block: BasicBlock, + target: BasicBlock, + unwind: BasicBlock, + _tag: &str, + _kind: &str, + _edge: &str, + ) { + // eprintln!( + // "{tag} - propagating {kind} state from {block:?} to {edge} edge: {target:?} (init: {}), {unwind:?} (init: {})", + // holder.lazy_results[target].is_some(), + // holder.lazy_results[unwind].is_some(), + // ); + // We have two *distinct* successors. + // + // We could use an `_unchecked` version of `pick2_mut` if it existed: we know the + // indices are disjoint and in-bounds. + match holder.lazy_results.pick2_mut(target, unwind) { + (None, None) => { + // We need to initialize both successors with our own block state, we need a + // clone. + holder.lazy_results[target] = Some(state.clone()); + holder.lazy_results[unwind] = Some(state); + } + (None, Some(unwind_state)) => { + // No need to clone, only one successor is not initialized yet. + unwind_state.join(&state); + holder.lazy_results[target] = Some(state); + } + (Some(target_state), None) => { + // No need to clone, only one successor is not initialized yet. + target_state.join(&state); + holder.lazy_results[unwind] = Some(state); + } + (Some(target_state), Some(unwind_state)) => { + // The successors have already been initialized by their other parents, we + // merge our block state there. + target_state.join(&state); + unwind_state.join(&state); + } + } + } + + // The current block is done, and the visitor was notified at every step. We now take care + // of the successors' state. + match edges { + TerminatorEdges::None => {} + TerminatorEdges::Single(target) => { + // We have a single successor, our own state can either be moved to it, or dropped. + // propagate(target, block_state); + propagate_single_edge( + &mut borrows_holder, + block_state.borrows, + block, + target, + "A3a", + "borrows", + "single", + ); + propagate_single_edge( + &mut uninits_holder, + block_state.uninits, + block, + target, + "A3b", + "uninits", + "single", + ); + propagate_single_edge( + &mut ever_inits_holder, + block_state.ever_inits, + block, + target, + "A3c", + "ever_inits", + "single", + ); + } + TerminatorEdges::Double(target, unwind) if target == unwind => { + // Why are we generating this shape in MIR building :thinking: ? Either way, we also + // have a single successor here. + // propagate(target, block_state); + propagate_single_edge( + &mut borrows_holder, + block_state.borrows, + block, + target, + "A4a", + "borrows", + "single double", + ); + propagate_single_edge( + &mut uninits_holder, + block_state.uninits, + block, + target, + "A4b", + "uninits", + "single double", + ); + propagate_single_edge( + &mut ever_inits_holder, + block_state.ever_inits, + block, + target, + "A4c", + "ever_inits", + "single double", + ); + } + TerminatorEdges::Double(target, unwind) => { + propagate_double_edge( + &mut borrows_holder, + block_state.borrows, + block, + target, + unwind, + "A5a", + "borrows", + "double", + ); + propagate_double_edge( + &mut uninits_holder, + block_state.uninits, + block, + target, + unwind, + "A5b", + "uninits", + "double", + ); + propagate_double_edge( + &mut ever_inits_holder, + block_state.ever_inits, + block, + target, + unwind, + "A5c", + "ever_inits", + "double", + ); + } + TerminatorEdges::AssignOnReturn { return_, cleanup, place } => { + // FIXME: we could optimize the move/clones here: + // - we only need to clone if there's >1 non-initialized block in the return and + // cleanup blocks + // - if the cleanup block has been initialized, we don't need to pass clone to + // propagate (until polonius is stabilized, not using propagate would also be a + // compile error) + // FIXME: check if the return blocks are actually disjoint. + + // This must be done *first*, otherwise the unwind path will see the assignments. + if let Some(cleanup) = cleanup { + // We don't `propagate`: we'd have to clone the block state, but that's only + // necessary if the cleanup state wasn't already initialized. + // + // FIXME: we wouldn't need to clone either if the cleanup block is one of the + // return blocks, similarly to `TerminatorEdges::Double` which can be 2 edges to + // the same block. + // propagate(cleanup, block_state.clone()); + + propagate_optional_single_edge( + &mut borrows_holder, + &block_state.borrows, + block, + cleanup, + "A6a", + "borrows", + "assign on return cleanup", + ); + propagate_optional_single_edge( + &mut uninits_holder, + &block_state.uninits, + block, + cleanup, + "A6b", + "uninits", + "assign on return cleanup", + ); + propagate_optional_single_edge( + &mut ever_inits_holder, + &block_state.ever_inits, + block, + cleanup, + "A6c", + "ever_inits", + "assign on return cleanup", + ); + } + + if !return_.is_empty() { + analysis.apply_call_return_effect(&mut block_state, block, place); + + let target_count = return_.len(); + for &target in return_.iter().take(target_count - 1) { + propagate_optional_single_edge( + &mut borrows_holder, + &block_state.borrows, + block, + target, + "A7a", + "borrows", + "return target", + ); + propagate_optional_single_edge( + &mut uninits_holder, + &block_state.uninits, + block, + target, + "A7b", + "uninits", + "return target", + ); + propagate_optional_single_edge( + &mut ever_inits_holder, + &block_state.ever_inits, + block, + target, + "A7c", + "ever_inits", + "return target", + ); + } + + let target = *return_.last().unwrap(); + propagate_single_edge( + &mut borrows_holder, + block_state.borrows, + block, + target, + "A7d", + "borrows", + "return target", + ); + propagate_single_edge( + &mut uninits_holder, + block_state.uninits, + block, + target, + "A7e", + "uninits", + "return target", + ); + propagate_single_edge( + &mut ever_inits_holder, + block_state.ever_inits, + block, + target, + "A7f", + "ever_inits", + "return target", + ); + } + } + TerminatorEdges::SwitchInt { targets, discr } => { + if let Some(_data) = analysis.get_switch_int_data(block, discr) { + todo!("wat. this is unused in tests"); + } else { + let target_count = targets.all_targets().len(); + for &target in targets.all_targets().iter().take(target_count - 1) { + propagate_optional_single_edge( + &mut borrows_holder, + &block_state.borrows, + block, + target, + "A8a", + "borrows", + "switchint", + ); + propagate_optional_single_edge( + &mut uninits_holder, + &block_state.uninits, + block, + target, + "A8b", + "uninits", + "switchint", + ); + propagate_optional_single_edge( + &mut ever_inits_holder, + &block_state.ever_inits, + block, + target, + "A8c", + "ever_inits", + "switchint", + ); + } + + let target = *targets.all_targets().last().unwrap(); + propagate_single_edge( + &mut borrows_holder, + block_state.borrows, + block, + target, + "A8d", + "borrows", + "switchint", + ); + propagate_single_edge( + &mut uninits_holder, + block_state.uninits, + block, + target, + "A8e", + "uninits", + "switchint", + ); + propagate_single_edge( + &mut ever_inits_holder, + block_state.ever_inits, + block, + target, + "A8f", + "ever_inits", + "switchint", + ); + } + } + } + } else { + for block in sccs.sccs[scc as usize].iter().copied() { + borrows_holder.dirty_queue.insert(block); + uninits_holder.dirty_queue.insert(block); + ever_inits_holder.dirty_queue.insert(block); + } + + while let Some(block) = borrows_holder.dirty_queue.pop() { + // eprintln!("B1 - entering scc {scc}'s block: {:?}", block); + + // eprintln!( + // "B2 - is {block:?} state ready, borrows: {}, uninits: {}, ever_inits: {}", + // borrows_holder.lazy_results[block].is_some(), + // uninits_holder.lazy_results[block].is_some(), + // ever_inits_holder.lazy_results[block].is_some(), + // ); + + // We're in an SCC: + // - we need to retain our entry state and can't move it to our children: we need to + // reach a fixpoint, and *then* to visit the blocks starting from their entry + // state. + // - our parent have initialized our state, so we use this to set the domain cursor + state.borrows.clone_from( + borrows_holder.lazy_results[block].as_ref().unwrap_or_else(|| { + panic!("the parents of {block:?} haven't initialized its state!"); + }), + ); + propagate( + body, + block, + &mut borrows_holder, + &mut state.borrows, + &mut analysis.borrows, + |holder, target, state| { + let set_changed = match holder.lazy_results[target].as_mut() { + None => { + holder.lazy_results[target] = Some(state.clone()); + true + } + Some(existing_state) => existing_state.join(&state), + }; + + // let set_changed = holder.results[target].join(&state); + let target_scc = sccs.components[target]; + // eprintln!( + // "B3a - propagating borrows from {block:?} to target {target:?} (init: {}) of scc {target_scc}", + // holder.lazy_results[target].is_some() + // ); + if set_changed && target_scc == scc { + // The target block is in the SCC we're currently processing, and we + // want to process this block until fixpoint. Otherwise, the target + // block is in a successor SCC and it will be processed when that SCC is + // encountered later. + holder.dirty_queue.insert(target); + } + }, + ); + } + while let Some(block) = uninits_holder.dirty_queue.pop() { + // eprintln!("C1 - entering scc {scc}'s block: {:?}", block); + + // eprintln!( + // "C2 - is {block:?} state ready, borrows: {}, uninits: {}, ever_inits: {}", + // borrows_holder.lazy_results[block].is_some(), + // uninits_holder.lazy_results[block].is_some(), + // ever_inits_holder.lazy_results[block].is_some(), + // ); + + state.uninits.clone_from( + uninits_holder.lazy_results[block].as_ref().unwrap_or_else(|| { + panic!("the parents of {block:?} haven't initialized its state!"); + }), + ); + propagate( + body, + block, + &mut uninits_holder, + &mut state.uninits, + &mut analysis.uninits, + |holder, target, state| { + let set_changed = match holder.lazy_results[target].as_mut() { + None => { + holder.lazy_results[target] = Some(state.clone()); + true + } + Some(existing_state) => existing_state.join(&state), + }; + + // let set_changed = holder.results[target].join(&state); + let target_scc = sccs.components[target]; + // eprintln!( + // "C3a - propagating uninits from {block:?} to target {target:?} (init: {}) of scc {target_scc}", + // holder.lazy_results[target].is_some() + // ); + if set_changed && target_scc == scc { + // The target block is in the SCC we're currently processing, and we + // want to process this block until fixpoint. Otherwise, the target + // block is in a successor SCC and it will be processed when that SCC is + // encountered later. + holder.dirty_queue.insert(target); + } + }, + ); + } + while let Some(block) = ever_inits_holder.dirty_queue.pop() { + // eprintln!("D1 - entering scc {scc}'s block: {:?}", block); + + // eprintln!( + // "D2 - is {block:?} state ready, ever_inits: {}", + // ever_inits_holder.lazy_results[block].is_some(), + // ); + + state.ever_inits.clone_from( + ever_inits_holder.lazy_results[block].as_ref().unwrap_or_else(|| { + panic!("the parents of {block:?} haven't initialized its state!"); + }), + ); + propagate( + body, + block, + &mut ever_inits_holder, + &mut state.ever_inits, + &mut analysis.ever_inits, + |holder, target, state| { + let set_changed = match holder.lazy_results[target].as_mut() { + None => { + holder.lazy_results[target] = Some(state.clone()); + true + } + Some(existing_state) => existing_state.join(&state), + }; + + // let set_changed = holder.results[target].join(&state); + let target_scc = sccs.components[target]; + // eprintln!( + // "D3a - propagating ever_inits from {block:?} to target {target:?} (init: {}) of scc {target_scc}", + // holder.lazy_results[target].is_some() + // ); + if set_changed && target_scc == scc { + // The target block is in the SCC we're currently processing, and we + // want to process this block until fixpoint. Otherwise, the target + // block is in a successor SCC and it will be processed when that SCC is + // encountered later. + holder.dirty_queue.insert(target); + } + }, + ); + } + + // eprintln!( + // "1b - scc {scc} has reached fixpoint, visiting it again from its entry states" + // ); + + // let state = &mut state; + let analysis = &mut analysis; + + // The SCC has reached fixpoint, we can now visit it. + for block in sccs.sccs[scc as usize].iter().copied() { + // eprintln!("2 - re-entering scc {scc}'s block: {:?}", block); + + // // tmp: verifying contents on entry + // assert_eq!( + // &flow_entry_states[block].borrows, + // borrows_holder.lazy_results[block].as_ref().unwrap(), + // "borrows of block {block:?} differ" + // ); + // assert_eq!( + // &flow_entry_states[block].ever_inits, + // ever_inits_holder.lazy_results[block].as_ref().unwrap(), + // "ever_inits of block {block:?} differ" + // ); + // assert_eq!( + // &flow_entry_states[block].uninits, + // uninits_holder.lazy_results[block].as_ref().unwrap(), + // "uninits of block {block:?} differ" + // ); + + // state.borrows.clone_from(&borrows_holder.lazy_results[block].as_ref().unwrap()); + // state + // .ever_inits + // .clone_from(&ever_inits_holder.lazy_results[block].as_ref().unwrap()); + // state.uninits.clone_from(&uninits_holder.lazy_results[block].as_ref().unwrap()); + + let Some(borrows) = borrows_holder.lazy_results[block].take() else { + continue; + }; + let Some(uninits) = uninits_holder.lazy_results[block].take() else { + continue; + }; + let Some(ever_inits) = ever_inits_holder.lazy_results[block].take() else { + continue; + }; + let mut state = BorrowckDomain { borrows, uninits, ever_inits }; + let state = &mut state; + + // // tmp: verifying contents on entry + // assert_eq!( + // flow_entry_states[block].borrows, state.borrows, + // "borrows of block {block:?} differ" + // ); + // assert_eq!( + // flow_entry_states[block].ever_inits, state.ever_inits, + // "ever_inits of block {block:?} differ" + // ); + // assert_eq!( + // flow_entry_states[block].uninits, state.uninits, + // "uninits of block {block:?} differ" + // ); + + let block_data = &body[block]; + + vis.visit_block_start(state); + + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.apply_early_statement_effect(state, statement, location); + vis.visit_after_early_statement_effect(analysis, state, statement, location); + analysis.apply_primary_statement_effect(state, statement, location); + vis.visit_after_primary_statement_effect(analysis, state, statement, location); + } - if body.basic_blocks.is_cfg_cyclic() { - let (mut flow_analysis, flow_entry_states) = - get_flow_results(tcx, body, &move_data, &borrow_set, ®ioncx); - visit_results( - body, - traversal::reverse_postorder(body).map(|(bb, _)| bb), - &mut flow_analysis, - &flow_entry_states, - &mut mbcx, - ); - } else { - compute_dataflow(tcx, body, &move_data, &borrow_set, ®ioncx, &mut mbcx); + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_early_terminator_effect(state, terminator, location); + vis.visit_after_early_terminator_effect(analysis, state, terminator, location); + analysis.apply_primary_terminator_effect(state, terminator, location); + vis.visit_after_primary_terminator_effect(analysis, state, terminator, location); + + vis.visit_block_end(state); + } + } + + // eprintln!(); } +} - mbcx.report_move_errors(); +// When a CFG is acyclic, reaching fixpoint is a single iteration over the blocks in RPO order. If +// we do the computation at the same time as we're visiting results, we can avoid computing +// per-block state in `iterate_to_fixpoint` and then per-statement state (again, since we have to do +// it in `iterate_to_fixpoint` to compute the per-block exit state) in `visit_results`. +// +// The callers need to ensure that the CFG is acyclic, e.g. via `body.basic_blocks.is_cfg_cyclic()`, +// and that the analysis is a forward analysis. +fn compute_rpo_dataflow<'mir, 'tcx, A>( + body: &'mir Body<'tcx>, + analysis: &mut A, + vis: &mut impl ResultsVisitor<'tcx, A>, +) where + A: Analysis<'tcx, Direction = rustc_mir_dataflow::Forward>, +{ + use rustc_middle::mir; + use rustc_mir_dataflow::JoinSemiLattice; - // For each non-user used mutable variable, check if it's been assigned from - // a user-declared local. If so, then put that local into the used_mut set. - // Note that this set is expected to be small - only upvars from closures - // would have a chance of erroneously adding non-user-defined mutable vars - // to the set. - let temporary_used_locals: FxIndexSet = mbcx - .used_mut - .iter() - .filter(|&local| !mbcx.body.local_decls[*local].is_user_variable()) - .cloned() - .collect(); - // For the remaining unused locals that are marked as mutable, we avoid linting any that - // were never initialized. These locals may have been removed as unreachable code; or will be - // linted as unused variables. - let unused_mut_locals = - mbcx.body.mut_vars_iter().filter(|local| !mbcx.used_mut.contains(local)).collect(); - mbcx.gather_used_muts(temporary_used_locals, unused_mut_locals); + // Instead of storing a domain state per-block, we only do it lazily. This means that we can + // re-use that state when for example a block has a single successor. The visitor will be + // notified that the entire block is complete, before we mutate the same piece of state, and + // thus avoid creating it or cloning it in many cases. - debug!("mbcx.used_mut: {:?}", mbcx.used_mut); - mbcx.lint_unused_mut(); - if let Some(guar) = mbcx.emit_errors() { - mbcx.root_cx.set_tainted_by_errors(guar); - } + // Set up lazy state for the CFG + let mut results: IndexVec> = + IndexVec::from_elem_n(None, body.basic_blocks.len()); - let result = PropagatedBorrowCheckResults { - closure_requirements: opt_closure_req, - used_mut_upvars: mbcx.used_mut_upvars, - }; + // Ensure the start block has some state in it; + results[mir::START_BLOCK] = Some(analysis.bottom_value(body)); + analysis.initialize_start_block(body, results[mir::START_BLOCK].as_mut().unwrap()); - if let Some(consumer) = &mut root_cx.consumer { - consumer.insert_body( - def, - BodyWithBorrowckFacts { - body: body_owned, - promoted, - borrow_set, - region_inference_context: regioncx, - location_table: polonius_input.as_ref().map(|_| location_table), - input_facts: polonius_input, - output_facts: polonius_output, + // eprintln!("CFG, {} blocks, {:?}, {:#?}", body.basic_blocks.len(), body.span, body); + // for (block, bb) in body.basic_blocks.iter_enumerated() { + // let terminator = bb.terminator(); + // let successors: Vec<_> = terminator.successors().collect(); + // eprintln!( + // "block: {block:?}, {} successors: {:?}, edges: {:?}, terminator: {:?}", + // successors.len(), + // successors, + // terminator.edges(), + // terminator.kind, + // ); + + // match terminator.kind { + // TerminatorKind::Drop { place: _, target, unwind, replace: _, drop, async_fut: _ } => { + // eprintln!( + // "Drop terminator, target: {target:?}, unwind: {unwind:?}, drop: {drop:?}" + // ); + // } + // _ => {} + // } + // } + + // Visit this *acyclic* CFG in RPO. + for (block, block_data) in traversal::reverse_postorder(body) { + // `reverse_postorder` doesn't yield unreachable blocks, so every block we visit will have + // at least one of its parents visited first. + // + // Therefore, we get our per-block state: + // - from one of our predecessors initializing it + // - or from the analysis' initial value, for the START_BLOCK + // + // That is true in general, except for some bug/issue with async drops today: we can visit + // successors of a block that are not present in the data used to propagate dataflow to + // successor blocks, `TerminatorEdges`. + // + // We temporarily ignore these unreachable-in-practice blocks for now: they are ignored by + // the dataflow engine, and wouldn't have any state computed or propagated other than the + // bottom value of the analysis. + let Some(mut block_state) = results[block].take() else { + continue; + }; + + // FIXME(async_drop): we assert here to fail when this issue is fixed, just expect() above + // and remove the assertion below when traversal and dataflow agree. + assert!( + { + let terminator = block_data.terminator(); + if matches!(terminator.kind, TerminatorKind::Drop { drop: Some(_), .. }) { + terminator.successors().count() == 3 + && matches!(terminator.edges(), TerminatorEdges::Double(_, _)) + } else { + true + } }, + "dataflow mismatch between async_drop TerminatorKind successors() and edges()" ); - } - debug!("do_mir_borrowck: result = {:#?}", result); + // Apply effects in the block's statements. + vis.visit_block_start(&mut block_state); - result + for (statement_index, statement) in block_data.statements.iter().enumerate() { + let location = Location { block, statement_index }; + analysis.apply_early_statement_effect(&mut block_state, statement, location); + vis.visit_after_early_statement_effect(analysis, &block_state, statement, location); + + analysis.apply_primary_statement_effect(&mut block_state, statement, location); + vis.visit_after_primary_statement_effect(analysis, &block_state, statement, location); + } + + // Apply effects in the block terminator. + let terminator = block_data.terminator(); + let location = Location { block, statement_index: block_data.statements.len() }; + analysis.apply_early_terminator_effect(&mut block_state, terminator, location); + vis.visit_after_early_terminator_effect(analysis, &block_state, terminator, location); + + let edges = + analysis.apply_primary_terminator_effect(&mut block_state, terminator, location); + vis.visit_after_primary_terminator_effect(analysis, &block_state, terminator, location); + + vis.visit_block_end(&mut block_state); + + // The current block is done, and the visitor was notified at every step. We now take care + // of the successor's state. + + // let mut propagate = |target: BasicBlock, state: A::Domain| { + // // Look at the target block state holder: + // // - either it's empty, and we initialize it by moving the state there + // // - or it's been initialized, and we merge it with the given state + // match results[target].as_mut() { + // None => { + // results[target] = Some(state); + // } + // Some(existing_state) => { + // existing_state.join(&state); + // } + // } + // }; + + match edges { + TerminatorEdges::None => {} + TerminatorEdges::Single(target) => { + // eprintln!( + // "Propagating state from {block:?} to single edge: {target:?} (init: {})", + // results[target].is_some() + // ); + match results[target].as_mut() { + None => { + results[target] = Some(block_state); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + + // We have a single successor, our own state can either be moved to it, or dropped. + // propagate(target, block_state); + } + TerminatorEdges::Double(target, unwind) if target == unwind => { + // eprintln!( + // "Propagating state from {block:?} to single double edge: {target:?} (init: {})", + // results[target].is_some() + // ); + + // Why are we generating this shape in MIR building :thinking: ? Either way, we also + // have a single successor here. + // propagate(target, block_state); + match results[target].as_mut() { + None => { + results[target] = Some(block_state); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + TerminatorEdges::Double(target, unwind) => { + // eprintln!( + // "Propagating state from {block:?} to double edge: {target:?} (init: {}), {unwind:?} (init: {})", + // results[target].is_some(), + // results[unwind].is_some() + // ); + + // We have two *distinct* successors. + // + // We could use an `_unchecked` version of `pick2_mut` if it existed: we know the + // indices are disjoint and in-bounds. + match results.pick2_mut(target, unwind) { + (None, None) => { + // We need to initialize both successors with our own block state, we need a + // clone. + results[target] = Some(block_state.clone()); + results[unwind] = Some(block_state); + } + (None, Some(unwind_state)) => { + // No need to clone, only one successor is not initialized yet. + unwind_state.join(&block_state); + results[target] = Some(block_state); + } + (Some(target_state), None) => { + // No need to clone, only one successor is not initialized yet. + target_state.join(&block_state); + results[unwind] = Some(block_state); + } + (Some(target_state), Some(unwind_state)) => { + // The successors have already been initialized by their other parents, we + // merge our block state there. + target_state.join(&block_state); + unwind_state.join(&block_state); + } + } + } + TerminatorEdges::AssignOnReturn { return_, cleanup, place } => { + // FIXME: we could optimize the move/clones here: + // - we only need to clone if there's >1 non-initialized block in the return and + // cleanup blocks + // - if the cleanup block has been initialized, we don't need to pass clone to + // propagate (until polonius is stabilized, not using propagate would also be a + // compile error) + // FIXME: check if the return blocks are actually disjoint. + + // This must be done *first*, otherwise the unwind path will see the assignments. + if let Some(cleanup) = cleanup { + // We don't `propagate`: we'd have to clone the block state, but that's only + // necessary if the cleanup state wasn't already initialized. + // + // FIXME: we wouldn't need to clone either if the cleanup block is one of the + // return blocks, similarly to `TerminatorEdges::Double` which can be 2 edges to + // the same block. + // propagate(cleanup, block_state.clone()); + + match results[cleanup].as_mut() { + None => { + results[cleanup] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + + if !return_.is_empty() { + analysis.apply_call_return_effect(&mut block_state, block, place); + + let target_count = return_.len(); + for &target in return_.iter().take(target_count - 1) { + // propagate(target, block_state.clone()); + match results[target].as_mut() { + None => { + results[target] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + + let target = *return_.last().unwrap(); + // propagate(target, block_state); + match results[target].as_mut() { + None => { + results[target] = Some(block_state); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + } + TerminatorEdges::SwitchInt { targets, discr } => { + if let Some(_data) = analysis.get_switch_int_data(block, discr) { + todo!("wat. this is unused in tests"); + } else { + let target_count = targets.all_targets().len(); + for &target in targets.all_targets().iter().take(target_count - 1) { + match results[target].as_mut() { + None => { + results[target] = Some(block_state.clone()); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + + let target = *targets.all_targets().last().unwrap(); + match results[target].as_mut() { + None => { + results[target] = Some(block_state); + } + Some(existing_state) => { + existing_state.join(&block_state); + } + } + } + } + } + } } +#[cfg(test)] fn compute_dataflow<'a, 'tcx>( tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, @@ -681,6 +3789,7 @@ fn compute_dataflow<'a, 'tcx>( } } +#[cfg(test)] fn get_flow_results<'a, 'tcx>( tcx: TyCtxt<'tcx>, body: &'a Body<'tcx>, @@ -690,21 +3799,96 @@ fn get_flow_results<'a, 'tcx>( ) -> (Borrowck<'a, 'tcx>, Results) { // We compute these three analyses individually, but them combine them into // a single results so that `mbcx` can visit them all together. + // let timer = std::time::Instant::now(); let borrows = Borrows::new(tcx, body, regioncx, borrow_set).iterate_to_fixpoint( tcx, body, Some("borrowck"), ); + // let elapsed = timer.elapsed(); + // if body.basic_blocks.len() > 5000 && elapsed.as_millis() > 1 { + // eprintln!("dataflow {}, took {} ns, {:?}", "borrows", elapsed.as_nanos(), body.span); + // } + + // --- + // if rustc_data_structures::graph::is_cyclic(&body.basic_blocks) { + // // let timer = std::time::Instant::now(); + // let borrowz = Borrows::new(tcx, body, regioncx, borrow_set).iterate_to_fixpoint_per_scc( + // tcx, + // body, + // Some("borrowz"), + // ); + // // let elapsed = timer.elapsed(); + // // if body.basic_blocks.len() > 5000 && elapsed.as_millis() > 1 { + // // eprintln!("dataflow {}, took {} ns, {:?}", "borrowz", elapsed.as_nanos(), body.span); + // // } + // assert_eq!( + // borrows.results, borrowz.results, + // "oh noes, borrows dataflow results are different" + // ); + // } + + // --- + + // let timer = std::time::Instant::now(); let uninits = MaybeUninitializedPlaces::new(tcx, body, move_data).iterate_to_fixpoint( tcx, body, Some("borrowck"), ); + // let elapsed = timer.elapsed(); + // if body.basic_blocks.len() > 5000 && elapsed.as_millis() > 1 { + // eprintln!("dataflow {}, took {} ns, {:?}", "uninits", elapsed.as_nanos(), body.span); + // } + + // if rustc_data_structures::graph::is_cyclic(&body.basic_blocks) { + // let uninitz = MaybeUninitializedPlaces::new(tcx, body, move_data) + // .iterate_to_fixpoint_per_scc(tcx, body, Some("borrowck")); + // assert_eq!( + // uninits.results, uninitz.results, + // "oh noes, uninits dataflow results are different" + // ); + // } + + // --- + // let timer = std::time::Instant::now(); let ever_inits = EverInitializedPlaces::new(body, move_data).iterate_to_fixpoint( tcx, body, Some("borrowck"), ); + // let elapsed = timer.elapsed(); + // if body.basic_blocks.len() > 5000 && elapsed.as_millis() > 1 { + // eprintln!("dataflow {}, took {} ns, {:?}", "e_inits", elapsed.as_nanos(), body.span); + // } + + // let timer = std::time::Instant::now(); + // let _ever_initz = EverInitializedPlaces2::new(body, move_data).iterate_to_fixpoint( + // tcx, + // body, + // Some("borrowck"), + // ); + // let elapsed = timer.elapsed(); + // if body.basic_blocks.len() > 5000 && elapsed.as_millis() > 1 { + // eprintln!("dataflow {}, took {} ns, {:?}", "e_initz", elapsed.as_nanos(), body.span); + // } + + // if rustc_data_structures::graph::is_cyclic(&body.basic_blocks) { + // // let timer = std::time::Instant::now(); + // let ever_initz = EverInitializedPlaces::new(body, move_data).iterate_to_fixpoint_per_scc( + // tcx, + // body, + // Some("e_initz"), + // ); + // // let elapsed = timer.elapsed(); + // // if body.basic_blocks.len() > 5000 && elapsed.as_millis() > 1 { + // // eprintln!("dataflow {}, took {} ns, {:?}", "e_initz", elapsed.as_nanos(), body.span); + // // } + // assert_eq!( + // ever_inits.results, ever_initz.results, + // "oh noes, ever_inits dataflow results are different" + // ); + // } let analysis = Borrowck { borrows: borrows.analysis, @@ -857,6 +4041,21 @@ struct MirBorrowckCtxt<'a, 'infcx, 'tcx> { polonius_output: Option<&'a PoloniusOutput>, /// When using `-Zpolonius=next`: the data used to compute errors and diagnostics. polonius_diagnostics: Option<&'a PoloniusDiagnosticsContext>, + + #[cfg(test)] + nuutila: Option, + #[cfg(test)] + duration: u128, + #[cfg(test)] + duration2: u128, + #[cfg(test)] + duration3: u128, + #[cfg(test)] + transitive_predecessors: Option>>, + + // locals_checked_for_initialization: FxHashMap>, + #[cfg(test)] + locals_checked_for_initialization: FxHashMap>, } // Check that: @@ -1247,6 +4446,173 @@ impl InitializationRequiringAction { } } +#[cfg(test)] +mod nuutila { + use std::collections::VecDeque; + + use rustc_index::interval::IntervalSet; + use rustc_middle::mir::{BasicBlock, BasicBlocks, TerminatorEdges}; + + use crate::consumers::BorrowSet; + + pub(super) struct Nuutila { + candidate_component_roots: Vec, + components: Vec, + pub component_count: usize, + dfs_numbers: Vec, + d: u32, + visited: Vec, + stack_vertex: VecDeque, + stack_component: VecDeque, + pub reachability: Vec>, + } + + impl Nuutila { + pub(crate) fn new(node_count: usize) -> Self { + Self { + candidate_component_roots: vec![0; node_count], + components: vec![-1; node_count], + component_count: 0, + dfs_numbers: vec![0; node_count], + d: 0, + visited: vec![false; node_count], + stack_vertex: VecDeque::new(), + stack_component: VecDeque::new(), + reachability: vec![IntervalSet::new(node_count); node_count + 1], + // ^--- la reachability c'est celle que des composants donc il en faut moins que `node_count` s'il y a au moins un SCC avec > 1 nodes + // reachabilly: vec![HybridBitSet::new_empty(node_count); node_count], + } + } + + // fn compute_sccs(&mut self, blocks: &BasicBlocks<'_>) { + // for (idx, block) in blocks.iter_enumerated() { + // let edges = block.terminator().edges(); + // if matches!(edges, TerminatorEdges::None) { + // continue; + // } + // + // let idx = idx.as_usize(); + // if !self.visited[idx] { + // self.dfs_visit(idx, blocks); + // } + // } + // } + + // Compute SCCs and reachability only starting where loans appear. + // We still have the unused blocks in our domain, but won't traverse them. + pub(crate) fn compute_for_loans( + &mut self, + borrow_set: &BorrowSet<'_>, + blocks: &BasicBlocks<'_>, + ) { + for (_loan_idx, loan) in borrow_set.iter_enumerated() { + let block_idx = loan.reserve_location.block; + let block = &blocks[block_idx]; + + let edges = block.terminator().edges(); + if matches!(edges, TerminatorEdges::None) { + continue; + } + + let idx = block_idx.as_usize(); + if !self.visited[idx] { + self.dfs_visit(idx, blocks); + } + } + } + + fn dfs_visit(&mut self, v: usize, blocks: &BasicBlocks<'_>) { + self.candidate_component_roots[v] = v; + self.components[v] = -1; + + self.d += 1; + self.dfs_numbers[v] = self.d; + + self.stack_vertex.push_front(v); + let stack_component_height = self.stack_component.len(); + + self.visited[v] = true; + + let idx = BasicBlock::from_usize(v); + for succ in blocks[idx].terminator().successors() { + let w = succ.as_usize(); + + if w == v { + panic!("a dang self loop ?! at {}", w); + } + + if !self.visited[w] { + self.dfs_visit(w, blocks); + } + + let component_w = self.components[w]; + if component_w == -1 { + if self.dfs_numbers[self.candidate_component_roots[w]] + < self.dfs_numbers[self.candidate_component_roots[v]] + { + self.candidate_component_roots[v] = self.candidate_component_roots[w]; + } + } else { + assert!(component_w >= 0); + + // FIXME: check if v -> w is actually a forward edge or not, to avoid unnecessary work if it is + self.stack_component.push_front(self.components[w] as usize); + } + } + + if self.candidate_component_roots[v] == v { + self.component_count += 1; + self.components[v] = self.component_count as isize; + + // Reachability of C[v] + assert!(self.reachability[self.component_count].is_empty()); + // assert!(self.reachabilly[self.component_count].is_empty()); + + if let Some(&top) = self.stack_vertex.front() { + if top != v { + // we're adding new component, initialize its reachability: self-loop, + // the component can reach itself + // self.reachability[self.component_count] = + // (self.component_count, self.component_count).to_interval_set(); + self.reachability[self.component_count].insert(self.component_count); + } else { + // R[C[v]] should be empty here already, do nothing + // if we don't always initialize the reachability of C by default, it would need to be + // initialized to "empty" here. + } + } + + // process adjacent components + while self.stack_component.len() != stack_component_height { + let x = + self.stack_component.pop_front().expect("Sc can't be empty at this point"); + // prevent performing duplicate operations + if !self.reachability[self.component_count].contains(x) { + // merge reachability information + assert_ne!(x, self.component_count); + + let zzz = unsafe { + self.reachability.get_unchecked(x) as *const IntervalSet + }; + let r_c_v = + unsafe { self.reachability.get_unchecked_mut(self.component_count) }; + r_c_v.union(unsafe { &*zzz }); + r_c_v.insert(x); + } + } + + while let Some(w) = self.stack_vertex.pop_front() { + self.components[w] = self.components[v]; + + if w == v { + break; + } + } + } + } + } +} + impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { fn body(&self) -> &'a Body<'tcx> { self.body @@ -1324,6 +4690,76 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { } } + #[cfg(test)] + fn compute_nuutila(&mut self) { + use nuutila::Nuutila; + + let timer = std::time::Instant::now(); + let mut sccs = Nuutila::new(self.body.basic_blocks.len()); + sccs.compute_for_loans(&self.borrow_set, &self.body.basic_blocks); + + let elapsed = timer.elapsed(); + eprintln!( + "compute_nuutila, found {} SCCs in {} ns", + sccs.component_count, + elapsed.as_nanos(), + ); + + self.nuutila = Some(sccs); + } + + #[cfg(test)] + fn compute_transitive_predecessors(&mut self) { + let block_count = self.body.basic_blocks.len(); + + // Compute `transitive_predecessors` and `adjacent_predecessors`. + let mut transitive_predecessors = + IndexVec::from_elem_n(DenseBitSet::new_empty(block_count), block_count); + let mut adjacent_predecessors = transitive_predecessors.clone(); + // The stack is initially a reversed postorder traversal of the CFG. However, we might add + // add blocks again to the stack if we have loops. + let mut stack = + self.body.basic_blocks.reverse_postorder().iter().rev().copied().collect::>(); + // We keep track of all blocks that are currently not in the stack. + let mut not_in_stack = DenseBitSet::new_empty(block_count); + while let Some(block) = stack.pop() { + not_in_stack.insert(block); + + // Loop over all successors to the block and add `block` to their predecessors. + for succ_block in self.body.basic_blocks[block].terminator().successors() { + // Keep track of whether the transitive predecessors of `succ_block` has changed. + let mut changed = false; + + // Insert `block` in `succ_block`s predecessors. + if adjacent_predecessors[succ_block].insert(block) { + // Remember that `adjacent_predecessors` is a subset of + // `transitive_predecessors`. + changed |= transitive_predecessors[succ_block].insert(block); + } + + // Add all transitive predecessors of `block` to the transitive predecessors of + // `succ_block`. + if block != succ_block { + let (blocks_predecessors, succ_blocks_predecessors) = + transitive_predecessors.pick2_mut(block, succ_block); + changed |= succ_blocks_predecessors.union(blocks_predecessors); + + // Check if the `succ_block`s transitive predecessors changed. If so, we may + // need to add it to the stack again. + if changed && not_in_stack.remove(succ_block) { + stack.push(succ_block); + } + } + } + + // debug_assert!( + // transitive_predecessors[block].superset(&adjacent_predecessors[block]) + // ); + } + + self.transitive_predecessors = Some(transitive_predecessors); + } + #[instrument(level = "debug", skip(self, state))] fn check_access_for_conflict( &mut self, @@ -1335,8 +4771,105 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { ) -> bool { let mut error_reported = false; + // For each region that is live at this location + // does it unify with the loan introduction region + // if so, it may be live + // if the live region doesn't unify with any borrow region, we don't need to check it + let borrows_in_scope = self.borrows_in_scope(location, state); + // if self.borrow_set.len() > 6230 { + // if let Some(borrows_for_place_base) = self.borrow_set.local_map.get(&place_span.0.local) + // { + // if self.nuutila.is_none() { + // eprintln!( + // "we have {} borrows for the place local on the entire function, computing sccs", + // borrows_for_place_base.len(), + // ); + // self.compute_nuutila(); + // } + + // let timer = std::time::Instant::now(); + + // let nuutila = self.nuutila.as_ref().unwrap(); + + // let current_block = location.block; + // let mut reachable_loans = 0; + // for &idx in borrows_for_place_base { + // let loan = &self.borrow_set[idx]; + // let loan_introduction_block = loan.reserve_location.block; + + // let loan_block_reachability = + // &nuutila.reachability[loan_introduction_block.as_usize()]; + // if loan_block_reachability.contains(current_block.as_usize()) { + // reachable_loans += 1; + // } + // } + + // let elapsed = timer.elapsed(); + // if reachable_loans > 0 { + // self.duration += elapsed.as_nanos(); + + // // eprintln!( + // // "{} invalidations are reachable from their loan introduction, took {} ns, {:?}", + // // reachable_loans, + // // elapsed.as_nanos(), + // // self.body.span + // // ); + // } + + // // --- + // let timer = std::time::Instant::now(); + + // let mut reachable_loans = 0; + // for &idx in borrows_for_place_base { + // if borrows_in_scope.contains(idx) { + // reachable_loans += 1; + // } + // } + + // let elapsed = timer.elapsed(); + // if reachable_loans > 0 { + // self.duration3 += elapsed.as_nanos(); + // } + + // // --- + + // if self.transitive_predecessors.is_none() { + // self.compute_transitive_predecessors(); + // } + + // let timer = std::time::Instant::now(); + + // let transitive_predecessors = self.transitive_predecessors.as_ref().unwrap(); + + // let mut reachable_loans = 0; + // for &idx in borrows_for_place_base { + // let loan = &self.borrow_set[idx]; + // let source = loan.reserve_location; + + // #[inline(always)] + // fn is_predecessor( + // transitive_predecessors: &IndexVec>, + // a: Location, + // b: Location, + // ) -> bool { + // a.block == b.block && a.statement_index < b.statement_index + // || transitive_predecessors[b.block].contains(a.block) + // } + + // if is_predecessor(transitive_predecessors, source, location) { + // reachable_loans += 1; + // } + // } + + // let elapsed = timer.elapsed(); + // if reachable_loans > 0 { + // self.duration2 += elapsed.as_nanos(); + // } + // } + // } + each_borrow_involving_path( self, self.infcx.tcx, @@ -2125,6 +5658,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { debug!("check_if_full_path_is_moved place: {:?}", place_span.0); let (prefix, mpi) = self.move_path_closest_to(place_span.0); + // self.locals_checked_for_initialization.entry(mpi).or_default().insert(location); if maybe_uninits.contains(mpi) { self.report_use_of_moved_or_uninitialized( location, @@ -2155,6 +5689,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { to: u64, ) { if let Some(mpi) = self.move_path_for_place(place_span.0) { + // self.locals_checked_for_initialization.entry(mpi).or_default().insert(location); let move_paths = &self.move_data.move_paths; let root_path = &move_paths[mpi]; @@ -2164,6 +5699,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { debug_assert!(!from_end, "Array constant indexing shouldn't be `from_end`."); if (from..to).contains(offset) { + // self.locals_checked_for_initialization.entry(child_mpi).or_default().insert(location); let uninit_child = self.move_data.find_in_move_path_or_its_descendants(child_mpi, |mpi| { maybe_uninits.contains(mpi) @@ -2242,6 +5778,8 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { .move_data .find_in_move_path_or_its_descendants(mpi, |mpi| maybe_uninits.contains(mpi)); + // self.locals_checked_for_initialization.entry(mpi).or_default().insert(location); + if let Some(uninit_mpi) = uninit_mpi { self.report_use_of_moved_or_uninitialized( location, @@ -2398,6 +5936,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { let mut shortest_uninit_seen = None; for prefix in this.prefixes(base, PrefixSet::Shallow) { let Some(mpi) = this.move_path_for_place(prefix) else { continue }; + // this.locals_checked_for_initialization.entry(mpi).or_default().insert(location); if maybe_uninits.contains(mpi) { debug!( @@ -2474,7 +6013,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { }; match self.is_mutable(place.as_ref(), is_local_mutation_allowed) { Ok(root_place) => { - self.add_used_mut(root_place, state); + self.add_used_mut(root_place, state, location); return false; } Err(place_err) => { @@ -2486,7 +6025,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { Reservation(WriteKind::Mutate) | Write(WriteKind::Mutate) => { match self.is_mutable(place.as_ref(), is_local_mutation_allowed) { Ok(root_place) => { - self.add_used_mut(root_place, state); + self.add_used_mut(root_place, state, location); return false; } Err(place_err) => { @@ -2544,6 +6083,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { // partial initialization, do not complain about mutability // errors except for actual mutation (as opposed to an attempt // to do a partial initialization). + // self.locals_checked_for_initialization.entry(place.local).or_default().insert(location); let previously_initialized = self.is_local_ever_initialized(place.local, state); // at this point, we have set up the error reporting state. @@ -2570,12 +6110,27 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> { } /// Adds the place into the used mutable variables set - fn add_used_mut(&mut self, root_place: RootPlace<'tcx>, state: &BorrowckDomain) { + fn add_used_mut( + &mut self, + root_place: RootPlace<'tcx>, + state: &BorrowckDomain, + _location: Location, + ) { match root_place { RootPlace { place_local: local, place_projection: [], is_local_mutation_allowed } => { // If the local may have been initialized, and it is now currently being // mutated, then it is justified to be annotated with the `mut` // keyword, since the mutation may be a possible reassignment. + + // FIXME: C'est pas super important ce use case de unused mut, on pourrait l'ignorer + // ou le faire différemment. + // if is_local_mutation_allowed != LocalMutationIsAllowed::Yes { + // self.locals_checked_for_initialization + // .entry(local) + // .or_default() + // .insert(_location); + // } + if is_local_mutation_allowed != LocalMutationIsAllowed::Yes && self.is_local_ever_initialized(local, state).is_some() { diff --git a/compiler/rustc_data_structures/src/graph/scc/mod.rs b/compiler/rustc_data_structures/src/graph/scc/mod.rs index 518817ea0f53a..852ec9f817c7a 100644 --- a/compiler/rustc_data_structures/src/graph/scc/mod.rs +++ b/compiler/rustc_data_structures/src/graph/scc/mod.rs @@ -85,7 +85,7 @@ impl Annotation for () { pub struct Sccs { /// For each node, what is the SCC index of the SCC to which it /// belongs. - scc_indices: IndexVec, + pub scc_indices: IndexVec, /// Data about all the SCCs. scc_data: SccData, diff --git a/compiler/rustc_middle/src/mir/basic_blocks.rs b/compiler/rustc_middle/src/mir/basic_blocks.rs index f4df9d289f8c6..68ef6c77e1a76 100644 --- a/compiler/rustc_middle/src/mir/basic_blocks.rs +++ b/compiler/rustc_middle/src/mir/basic_blocks.rs @@ -40,21 +40,112 @@ struct Cache { #[derive(Clone, Default, Debug)] pub struct SccData { pub component_count: usize, - + pub biggest_scc: usize, /// The SCC of each block. pub components: IndexVec, - + // pub components: Vec, + // pub components: Vec, /// The contents of each SCC: its blocks, in RPO. pub sccs: Vec>, + // pub sccs: Vec>, + // pub queue: Vec, + pub queue: Vec, } +// impl SccData { +// /// Returns the SCC index for a given `block`. +// #[inline(always)] +// pub fn scc(&self, block: BasicBlock) -> u32 { +// self.components[block] +// } + +// /// An iterator of the blocks that belong to the given SCC. Crucially, the blocks within the SCC +// /// are ordered in RPO. +// #[inline(always)] +// pub fn blocks_in_rpo(&self, scc: usize) -> impl Iterator { +// self.sccs[scc].iter().copied() +// } +// } + use std::collections::VecDeque; +use rustc_index::Idx; +use rustc_index::bit_set::DenseBitSet; + +// struct PearceRecursive { +// r_index: Vec, +// stack: VecDeque, +// index: usize, +// c: usize, +// } + +// impl PearceRecursive { +// fn new(node_count: usize) -> Self { +// assert!(node_count > 0); // only a non-empty graph is supported +// // todo: assert node_count is within overflow limits +// Self { r_index: vec![0; node_count], stack: VecDeque::new(), index: 1, c: node_count - 1 } +// } + +// fn compute_sccs(&mut self, blocks: &IndexVec>) { +// for (block_idx, _) in blocks.iter_enumerated() { +// let v = block_idx.as_usize(); +// if self.r_index[v] == 0 { +// // v is unvisited +// self.visit(v, blocks); +// } +// } + +// // The SCC labels are from N - 1 to zero, remap them from 0 to the component count, to match +// // their position in an array of SCCs. +// let node_count = blocks.len() - 1; +// for scc_index in self.r_index.iter_mut() { +// *scc_index = node_count - *scc_index; +// } +// } + +// fn visit(&mut self, v: usize, blocks: &IndexVec>) { +// let mut root = true; +// self.r_index[v] = self.index; +// self.index += 1; // todo: overflow + +// for succ in blocks[BasicBlock::from_usize(v)].terminator().successors() { +// let w = succ.as_usize(); +// if self.r_index[w] == 0 { +// self.visit(w, blocks); +// } +// if self.r_index[w] < self.r_index[v] { +// self.r_index[v] = self.r_index[w]; +// root = false; +// } +// } + +// if root { +// self.index -= 1; // todo: underflow + +// while let Some(&w) = self.stack.front() +// && self.r_index[v] <= self.r_index[w] +// { +// self.stack.pop_front(); +// self.r_index[w] = self.c; +// self.index -= 1; // todo: underflow +// } + +// self.r_index[v] = self.c; +// self.c -= 1; // todo: underflow +// } else { +// self.stack.push_front(v); +// } +// } +// } + struct PearceRecursive { r_index: IndexVec, stack: VecDeque, index: u32, c: u32, + // duplicates_count: usize, + // in_stack: FxHashSet, + // youpi: Vec>, } impl PearceRecursive { @@ -67,6 +158,10 @@ impl PearceRecursive { index: 1, c: node_count.try_into().unwrap(), // c: node_count - 1, + + // duplicates_count: 0, + // in_stack: FxHashSet::default(), + // youpi: Vec::new(), } } @@ -107,21 +202,330 @@ impl PearceRecursive { self.index -= 1; self.c -= 1; + // let c = self.r_index.len() - self.c; + // let mut contents = SmallVec::new(); + // contents.push(v); + while let Some(&w) = self.stack.front() && self.r_index[v] <= self.r_index[w] { self.stack.pop_front(); + // self.in_stack.remove(&w); self.r_index[w] = self.c; self.index -= 1; + // contents.push(w); // maybe should also sort contents by r_index? } self.r_index[v] = self.c; + + // self.youpi.push(contents); } else { + // // FIXME: check if tracking what's already inside the stack to avoid duplicate work is + // // worth it. + // if self.stack.contains(&v) { + // self.duplicates_count += 1; + // } + + // Note: tracking what's in the stack wasn't worth it on cranelift-codegen (there are no + // dupes), maybe for other benchmarks? + // FIXME: try + // if self.in_stack.insert(v) { + // self.stack.push_front(v); + // } + self.stack.push_front(v); } } } +// struct PearceIterative { +// r_index: Vec, +// index: usize, +// c: usize, +// stack_vertex: VecDeque, +// stack_iteration: VecDeque, +// root: Vec, +// // root: DenseBitSet, +// } + +// impl PearceIterative { +// fn new(node_count: usize) -> Self { +// // assert!(node_count > 0); // only a non-empty graph is supported +// // todo: assert node_count is within overflow limits +// Self { +// r_index: vec![0; node_count], +// // stack_vertex: VecDeque::new(), +// // stack_iteration: VecDeque::new(), +// stack_vertex: VecDeque::with_capacity(node_count), +// stack_iteration: VecDeque::with_capacity(node_count), +// index: 1, +// c: node_count - 1, +// root: vec![false; node_count], +// // root: DenseBitSet::new_empty(node_count), +// } +// } + +// fn compute_sccs(&mut self, blocks: &IndexVec>) { +// for (block_idx, _) in blocks.iter_enumerated() { +// let v = block_idx.as_usize(); +// if self.r_index[v] == 0 { +// self.visit(v, blocks); +// } +// } + +// // The computed SCC labels are from N - 1 to zero, remap them from 0 to the component count, +// // to match their position in an array of SCCs. +// let node_count = blocks.len() - 1; +// for scc_index in self.r_index.iter_mut() { +// *scc_index = node_count - *scc_index; +// } + +// // Adjust the component index counter to the component count +// self.c = node_count - self.c; +// } + +// fn visit(&mut self, v: usize, blocks: &IndexVec>) { +// // procedure visit(v) +// // 6: beginVisiting(v) +// // 7: while vS != ∅ do +// // 8: visitLoop() +// self.begin_visiting(v, blocks); +// while !self.stack_vertex.is_empty() { +// self.visit_loop(blocks); +// } +// } + +// // inline this +// fn visit_loop(&mut self, blocks: &IndexVec>) { +// // procedure visitLoop() +// // 9: v = top(vS) ; i = top(iS) +// // 10: while i ≤ |E(v)| do +// // 11: if i > 0 then finishEdge(v, i − 1) +// // 12: if i < |E(v)| ∧ beginEdge(v, i) then return +// // 13: i = i + 1 +// // 14: finishVisiting(v) +// let v = *self.stack_vertex.front().unwrap(); +// let mut i = *self.stack_iteration.front().unwrap(); + +// // todo: match on edges() to get the count +// let v_successor_count = blocks[BasicBlock::from_usize(v)].terminator().successors().count(); +// // assert!(i <= v_successor_count, "i: {i}, v_successor_count: {v_successor_count}"); + +// while i <= v_successor_count { +// if i > 0 { +// self.finish_edge(v, i - 1, blocks); +// } +// if i < v_successor_count && self.begin_edge(v, i, blocks) { +// return; +// } +// i += 1; +// } +// self.finish_visiting(v, blocks); +// } + +// fn begin_visiting(&mut self, v: usize, _blocks: &IndexVec>) { +// // procedure beginVisiting(v) +// // 15: push(vS, v) ; push(iS, 0) +// // 16: root[v] = true ; rindex[v] = index ; index = index + 1 +// self.stack_vertex.push_front(v); +// self.stack_iteration.push_front(0); +// self.root[v] = true; +// // self.root.insert(v); +// self.r_index[v] = self.index; +// self.index += 1; +// } + +// // inline this +// fn finish_visiting(&mut self, v: usize, _blocks: &IndexVec>) { +// // procedure finishVisiting(v) +// // 17: pop(vS) ; pop(iS) +// // 18: if root[v] then +// // 19: index = index − 1 +// // 20: while vS != ∅ ∧ rindex[v] ≤ rindex[top(vS)] do +// // 21: w = pop(vS) +// // 22: rindex[w] = c +// // 23: index = index − 1 +// // 24: rindex[v] = c +// // 25: c = c − 1 +// // 26: else +// // 27: push(vS, v) +// self.stack_vertex.pop_front(); +// self.stack_iteration.pop_front(); +// if self.root[v] { +// // if self.root.contains(v) { +// self.index -= 1; +// while let Some(&w) = self.stack_vertex.back() +// && self.r_index[v] <= self.r_index[w] +// { +// self.stack_vertex.pop_back(); +// self.r_index[w] = self.c; +// self.index -= 1; +// } +// self.r_index[v] = self.c; +// self.c -= 1; +// } else { +// self.stack_vertex.push_back(v); +// } +// } + +// // finish this +// fn begin_edge( +// &mut self, +// v: usize, +// k: usize, +// blocks: &IndexVec>, +// ) -> bool { +// // procedure beginEdge(v, k) +// // 28: w = E(v)[k] +// // 29: if rindex[w] == 0 then +// // 30: pop(iS) ; push(iS, k + 1) +// // 31: beginVisiting(w) +// // 32: return true +// // 33: else +// // 34: return false + +// let w = blocks[BasicBlock::from_usize(v)].terminator().successors().nth(k).unwrap(); +// let w = w.as_usize(); +// if self.r_index[w] == 0 { +// self.stack_iteration.pop_front(); +// self.stack_iteration.push_front(k + 1); +// self.begin_visiting(w, blocks); +// return true; +// } else { +// return false; +// } +// } + +// // inline this +// fn finish_edge( +// &mut self, +// v: usize, +// k: usize, +// blocks: &IndexVec>, +// ) { +// // procedure finishEdge(v, k) +// // 35: w = E(v)[k] +// // 36: if rindex[w] < rindex[v] then rindex[v] = rindex[w] ; root[v] = false +// let w = blocks[BasicBlock::from_usize(v)].terminator().successors().nth(k).unwrap(); +// let w = w.as_usize(); +// if self.r_index[w] < self.r_index[v] { +// self.r_index[v] = self.r_index[w]; +// self.root[v] = false; +// // self.root.remove(v); +// } +// } +// } + +// struct Scc { +// candidate_component_roots: Vec, +// components: Vec, +// component_count: usize, +// dfs_numbers: Vec, +// d: u32, +// stack: VecDeque, +// visited: DenseBitSet, +// // queue: Vec, // can compute this while discovering components?!,?! +// } + +// impl Scc { +// fn new(node_count: usize) -> Self { +// Self { +// candidate_component_roots: vec![0; node_count], +// components: vec![-1; node_count], +// component_count: 0, +// dfs_numbers: vec![0; node_count], +// d: 0, +// stack: VecDeque::new(), +// visited: DenseBitSet::new_empty(node_count), +// // queue: Vec::new(), +// } +// } + +// fn compute_sccs(&mut self, blocks: &IndexVec>) { +// for (idx, block) in blocks.iter_enumerated() { +// let edges = block.terminator().edges(); +// if matches!(edges, rustc_middle::mir::TerminatorEdges::None) { +// continue; +// } + +// let idx = idx.as_usize(); +// if !self.visited.contains(idx) { +// self.dfs_visit(idx, blocks); +// } +// } +// } + +// fn dfs_visit(&mut self, v: usize, blocks: &IndexVec>) { +// self.candidate_component_roots[v] = v; +// self.components[v] = -1; + +// self.d += 1; +// self.dfs_numbers[v] = self.d; + +// self.visited.insert(v); + +// let idx = BasicBlock::from_usize(v); +// for succ in blocks[idx].terminator().successors() { +// let w = succ.as_usize(); + +// if !self.visited.contains(w) { +// self.dfs_visit(w, blocks); +// } + +// if self.components[w] == -1 +// && self.dfs_numbers[self.candidate_component_roots[w]] +// < self.dfs_numbers[self.candidate_component_roots[v]] +// { +// self.candidate_component_roots[v] = self.candidate_component_roots[w]; +// } +// } + +// if self.candidate_component_roots[v] == v { +// self.components[v] = self.component_count as isize; +// self.component_count += 1; + +// // We have discovered a component. +// // self.queue.push(self.component_count); + +// while self.stack.front().is_some() +// && self.dfs_numbers[*self.stack.front().expect("peek front failed")] +// > self.dfs_numbers[v] +// { +// let w = self.stack.pop_front().expect("pop front failed"); +// self.components[w] = self.components[v]; +// } +// } else { +// self.stack.push_front(v); +// } +// } +// } + +#[derive(Clone, Debug)] +struct VecQueue { + queue: Vec, + set: DenseBitSet, +} + +// impl Default for VecQueue { +// fn default() -> Self { +// Self { queue: Default::default(), set: DenseBitSet::new_empty(0) } +// } +// } + +impl VecQueue { + #[inline] + fn with_none(len: usize) -> Self { + VecQueue { queue: Vec::with_capacity(len), set: DenseBitSet::new_empty(len) } + } + + #[inline] + fn insert(&mut self, element: T) { + if self.set.insert(element) { + self.queue.push(element); + } + } +} + impl<'tcx> BasicBlocks<'tcx> { #[inline] pub fn new(basic_blocks: IndexVec>) -> Self { @@ -139,21 +543,324 @@ impl<'tcx> BasicBlocks<'tcx> { self.cache.dominators.get_or_init(|| dominators(self)) } + // #[inline] + // pub fn sccs(&self) -> &SccData { + // self.cache.sccs.get_or_init(|| { + // let block_count = self.basic_blocks.len(); + + // let mut pearce = PearceRecursive::new(block_count); + // pearce.compute_sccs(&self.basic_blocks); + // let component_count = pearce.c as usize; + + // let mut sccs = vec![smallvec::SmallVec::new(); component_count]; + // for &block in self.reverse_postorder().iter() { + // let scc = pearce.r_index[block] as usize; + // sccs[scc].push(block); + // } + // SccData { component_count, components: pearce.r_index, sccs } + // }) + // } + #[inline] pub fn sccs(&self) -> &SccData { self.cache.sccs.get_or_init(|| { + // let mut sccs = Scc::new(self.basic_blocks.len()); + // sccs.compute_sccs(&self.basic_blocks); + + // let component_count = sccs.component_count; + + // let mut components= + // vec![smallvec::SmallVec::<[BasicBlock; 2]>::new(); component_count]; + // // let mut components = vec![Vec::new(); component_count]; + // let mut scc_queue = VecQueue::with_none(component_count); + + // // Reuse block bitset as scc bitset + // // sccs.visited.clear(); + + // for &block in self.reverse_postorder().iter() { + // let scc = sccs.components[block.as_usize()] as usize; + // components[scc].push(block); + + // // if sccs.visited.insert(scc) { + // // scc_queue.push(scc); + // // } + // scc_queue.insert(scc); + // } + + // SccData { + // components: sccs.components, + // component_count, + // sccs: components, + // queue: scc_queue.queue, + // } + + // // --- + + // let mut sccs = Scc::new(self.basic_blocks.len()); + // sccs.compute_sccs(&self.basic_blocks); + + // let component_count = sccs.component_count; + + // let mut components = vec![Vec::new(); component_count]; + // for &block in self.reverse_postorder().iter() { + // let scc = sccs.components[block.as_usize()] as usize; + // components[scc].push(block); + // } + + // // let mut scc_queue = VecQueue::with_none(component_count); + // // for scc in + // // self.reverse_postorder().iter().map(|bb| sccs.components[bb.as_usize()] as usize) + // // { + // // scc_queue.insert(scc); + // // } + + // // SccData { components: sccs.components, sccs: components, queue: scc_queue.queue } + // SccData { component_count, components: sccs.components, sccs: components } + + // --- + // use rustc_data_structures::graph::scc::*; + + // struct Wrapper<'a, 'tcx> { + // x: &'a IndexVec>, + // } + // impl graph::DirectedGraph for Wrapper<'_, '_> { + // type Node = BasicBlock; + + // #[inline] + // fn num_nodes(&self) -> usize { + // self.x.len() + // } + // } + // impl graph::Successors for Wrapper<'_, '_> { + // #[inline] + // fn successors(&self, node: Self::Node) -> impl Iterator { + // self.x[node].terminator().successors() + // } + // } + // let wrapper = Wrapper { x: &self.basic_blocks }; + // let rustc_sccs = Sccs::::new(&wrapper); + + // let component_count = rustc_sccs.num_sccs(); + + // // let mut sccs = vec![Vec::new(); component_count]; + // let mut sccs = vec![smallvec::SmallVec::new(); component_count]; + // for &block in self.reverse_postorder().iter() { + // let scc = rustc_sccs.scc(block) as usize; + // sccs[scc].push(block); + // } + + // let mut scc_queue = VecQueue::with_none(component_count); + // for scc in + // // self.reverse_postorder().iter().map(|&bb| pearce.r_index[bb]) + // self.reverse_postorder().iter().map(|&bb| rustc_sccs.scc(bb) as u32) + // { + // scc_queue.insert(scc); + // } + + // return SccData { + // component_count, + // sccs, + // components: rustc_sccs.scc_indices, + // biggest_scc: 0, + // queue: scc_queue.queue, + // }; + + // --- + + // let mut edges = Vec::new(); + + // for (bb, block) in self.basic_blocks.iter_enumerated() { + // for succ in block.terminator().successors() { + // edges.push((bb.as_usize(), succ.as_usize())); + // } + // } + + // let graph: petgraph::Graph = petgraph::Graph::from_edges(edges); + // let sccz = petgraph::algo::tarjan_scc(&graph); + // let component_count = sccz.len(); + + // let mut components = vec![0_isize; self.basic_blocks.len()]; + // for (scc_idx, scc_contents) in sccz.iter().enumerate() { + // for block_idx in scc_contents { + // components[block_idx.index()] = scc_idx as isize; + // } + // } + + // let mut sccs = vec![Vec::new(); component_count]; + // for &block in self.reverse_postorder().iter() { + // let scc = components[block.as_usize()] as usize; + // sccs[scc].push(block); + // } + + // SccData { component_count, sccs, components } + + // --- let block_count = self.basic_blocks.len(); + // tarjan is incorrect on some of the tests/ui/async-await/async-drop/ tests + // let mut tarjan = Scc::new(block_count); + // tarjan.compute_sccs(&self.basic_blocks); + let mut pearce = PearceRecursive::new(block_count); pearce.compute_sccs(&self.basic_blocks); let component_count = pearce.c as usize; - + // eprintln!("pearce recursive duplicate count: {}", pearce.duplicates_count); + + // let mut pearce_i = PearceIterative::new(block_count); + // pearce_i.compute_sccs(&self.basic_blocks); + // let component_count = pearce_i.c as usize; + + // // // assert_eq!(rustc_sccs.num_sccs(), tarjan.component_count); + // assert_eq!(rustc_sccs.num_sccs(), pearce.c as usize); + // assert_eq!(rustc_sccs.num_sccs(), pearce_i.c); + + // for block in self.basic_blocks.indices() { + // assert_eq!( + // rustc_sccs.scc(block), + // tarjan.components[block.as_usize()] as usize + // ); + // } + + // assert_eq!(rustc_sccs.num_sccs(), pearce.c as usize); + // for block in self.basic_blocks.indices() { + // assert_eq!(rustc_sccs.scc(block), pearce.r_index[block] as usize); + // } + + // assert_eq!(rustc_sccs.num_sccs(), pearce_i.c); + // for block in self.basic_blocks.indices() { + // assert_eq!(rustc_sccs.scc(block), pearce_i.r_index[block.as_usize()] as usize); + // } + + // for &block in self.reverse_postorder().iter() { + // assert_eq!(rustc_sccs.scc(block), pearce.r_index[block] as usize); + // assert_eq!(rustc_sccs.scc(block), pearce_i.r_index[block.as_usize()] as usize); + // assert!(rustc_sccs.scc(block) < rustc_sccs.num_sccs()); + // assert!(pearce.r_index[block] < pearce.c); + // assert!(pearce_i.r_index[block.as_usize()] < pearce_i.c); + // } + + // assert_eq!( + // tarjan.components, + // pearce.r_index.iter().map(|&c| c as isize).collect::>(), + // "blocks: {}, scc components: {}; pearce index: {}, c: {}", + // block_count, + // tarjan.component_count, + // pearce.index, + // pearce.c, + // ); + + // assert_eq!( + // pearce.r_index, + // pearce_i.r_index, + // "blocks: {}, scc components: {}; pearce_r index: {}, c: {}; pearce_i index: {}, c: {}", + // block_count, + // tarjan.component_count, + // pearce.index, + // pearce.c, + // pearce_i.index, + // pearce_i.c, + // ); + + // let component_count = tarjan.component_count; + + // let mut sccs = vec![Vec::new(); component_count]; + // for &block in self.reverse_postorder().iter() { + // let scc = tarjan.components[block.as_usize()] as usize; + // sccs[scc].push(block); + // } + + // let component_count = block_count - pearce.c - 1; + // let mut sccs = vec![Vec::new(); component_count]; + // for &block in self.reverse_postorder().iter() { + // let scc = pearce.r_index[block.as_usize()] as usize; + // sccs[scc].push(block); + // } + + // for block in self.basic_blocks.indices() { + // assert_eq!(rustc_sccs.scc(block), pearce.r_index[block] as usize); + // } + + // assert_eq!( + // self.basic_blocks + // .indices() + // .map(|bb| rustc_sccs.scc(bb) as u32) + // .collect::>(), + // pearce.r_index, + // "blocks: {}, scc components: {}; pearce index: {}, c: {}", + // block_count, + // rustc_sccs.num_sccs(), + // pearce.index, + // pearce.c, + // ); + // assert_eq!( + // self.basic_blocks + // .indices() + // .map(|bb| rustc_sccs.scc(bb) as u32) + // .collect::>(), + // pearce.r_index, + // "blocks: {}, scc components: {}; pearce_i index: {}, c: {}", + // block_count, + // rustc_sccs.num_sccs(), + // pearce_i.index, + // pearce_i.c, + // ); + + // assert_eq!( + // rustc_sccs.num_sccs(), + // component_count, + // "blocks: {}, scc components: {}; pearce index: {}, c: {}", + // block_count, + // rustc_sccs.num_sccs(), + // pearce.index, + // pearce.c, + // ); + // assert!(component_count > 0); + // assert!( + // component_count <= block_count, + // "component count: {}, block count: {}, pearse c: {}", + // component_count, + // block_count, + // pearce.c + // ); + + // let mut sccs = vec![Vec::new(); component_count]; let mut sccs = vec![smallvec::SmallVec::new(); component_count]; for &block in self.reverse_postorder().iter() { let scc = pearce.r_index[block] as usize; + // let scc = pearce_i.r_index[block.as_usize()] as usize; sccs[scc].push(block); } - SccData { component_count, components: pearce.r_index, sccs } + + // if self.basic_blocks.len() < 200 { + // for scc in 0..component_count { + // if pearce.youpi[scc][..] != sccs[scc][..] { + // eprintln!( + // "scc: {scc}, pearce: {:?}, post: {:?}", + // pearce.youpi[scc], sccs[scc] + // ); + // } + // } + // } + + let biggest_scc = 0; //sccs.iter().map(|scc| scc.len()).max().unwrap(); + + let mut scc_queue = VecQueue::with_none(component_count); + for scc in self.reverse_postorder().iter().map(|&bb| pearce.r_index[bb]) + // self.reverse_postorder().iter().map(|&bb| pearce_i.r_index[bb.as_usize()] as u32) + { + scc_queue.insert(scc); + } + + // SccData { component_count, components: tarjan.components, sccs } + SccData { + component_count, + components: pearce.r_index, + // sccs: pearce.youpi, // 1351402 invalidations -- FIXME: which is fastest? FIXME: try sorting within youpi vecs, per r_index? + sccs, // 1327320 invalidations + biggest_scc, + queue: scc_queue.queue, + } + // SccData { component_count, components: pearce_i.r_index, sccs, biggest_scc, queue: scc_queue.queue } }) } diff --git a/compiler/rustc_middle/src/mir/mod.rs b/compiler/rustc_middle/src/mir/mod.rs index e819aa2d8f815..ba64fd7c612fc 100644 --- a/compiler/rustc_middle/src/mir/mod.rs +++ b/compiler/rustc_middle/src/mir/mod.rs @@ -7,7 +7,7 @@ use std::fmt::{self, Debug, Formatter}; use std::iter; use std::ops::{Index, IndexMut}; -pub use basic_blocks::{BasicBlocks, SwitchTargetValue}; +pub use basic_blocks::{BasicBlocks, SccData, SwitchTargetValue}; use either::Either; use polonius_engine::Atom; use rustc_abi::{FieldIdx, VariantIdx}; diff --git a/compiler/rustc_middle/src/mir/terminator.rs b/compiler/rustc_middle/src/mir/terminator.rs index 4034a3a06e943..5c9eff837c555 100644 --- a/compiler/rustc_middle/src/mir/terminator.rs +++ b/compiler/rustc_middle/src/mir/terminator.rs @@ -690,7 +690,7 @@ impl<'tcx> TerminatorKind<'tcx> { } } -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum TerminatorEdges<'mir, 'tcx> { /// For terminators that have no successor, like `return`. None, @@ -713,7 +713,7 @@ pub enum TerminatorEdges<'mir, 'tcx> { /// List of places that are written to after a successful (non-unwind) return /// from a `Call`, `Yield` or `InlineAsm`. -#[derive(Copy, Clone, Debug)] +#[derive(Copy, Clone, Debug, PartialEq)] pub enum CallReturnPlaces<'a, 'tcx> { Call(Place<'tcx>), Yield(Place<'tcx>), diff --git a/compiler/rustc_mir_dataflow/src/framework/mod.rs b/compiler/rustc_mir_dataflow/src/framework/mod.rs index 75b7076e03708..602ba659b0677 100644 --- a/compiler/rustc_mir_dataflow/src/framework/mod.rs +++ b/compiler/rustc_mir_dataflow/src/framework/mod.rs @@ -33,8 +33,10 @@ //! [gen-kill]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems use std::cmp::Ordering; +use std::collections::VecDeque; -use rustc_data_structures::work_queue::WorkQueue; +// use rustc_data_structures::fx::FxHashSet; +// use rustc_data_structures::work_queue::WorkQueue; use rustc_index::bit_set::{DenseBitSet, MixedBitSet}; use rustc_index::{Idx, IndexVec}; use rustc_middle::bug; @@ -231,7 +233,7 @@ pub trait Analysis<'tcx> { #[inline] fn iterate_to_fixpoint<'mir>( - self, + mut self, tcx: TyCtxt<'tcx>, body: &'mir mir::Body<'tcx>, pass_name: Option<&'static str>, @@ -240,14 +242,109 @@ pub trait Analysis<'tcx> { Self: Sized, Self::Domain: DebugWithContext, { - // Computing dataflow over the SCCs is only supported in forward analyses. It's also - // unnecessary to use it on acyclic graphs, as the condensation graph is of course the same - // as the CFG itself. - if Self::Direction::IS_BACKWARD || !body.basic_blocks.is_cfg_cyclic() { - self.iterate_to_fixpoint_per_block(tcx, body, pass_name) - } else { - self.iterate_to_fixpoint_per_scc(tcx, body, pass_name) - } + // if Self::Direction::IS_BACKWARD { + // let (results, _) = self.iterate_to_fixpoint_per_block(tcx, body, pass_name); + // return AnalysisAndResults { analysis: self, results }; + // } + + // // let timer = std::time::Instant::now(); + // // let _ = body.basic_blocks.is_cfg_cyclic(); + // // let elapsed_cyclic = timer.elapsed(); + // // eprintln!("is cyclic: {}, elapsed: {} ns, {:?}", body.basic_blocks.is_cfg_cyclic(), elapsed_cyclic.as_nanos(), body.span); + + // // let timer = std::time::Instant::now(); + // // let sccs = body.basic_blocks.sccs(); + // // let elapsed_sccs = timer.elapsed(); + // // eprintln!("scc count: {}, elapsed: {} ns, {:?}", sccs.component_count, elapsed_sccs.as_nanos(), body.span); + + // // Computing dataflow over the SCCs is only supported in forward analyses. It's also + // // unnecessary to use it on acyclic graphs, as the condensation graph is of course the same + // // as the CFG itself. + // if Self::Direction::IS_BACKWARD || !body.basic_blocks.is_cfg_cyclic() { + // let (results, _) = self.iterate_to_fixpoint_per_block(tcx, body, pass_name); + // return AnalysisAndResults { analysis: self, results }; + // } + + // const CAN_USE_SCCS: std::cell::OnceCell = std::cell::OnceCell::new(); + // let letsgo = *CAN_USE_SCCS + // .get_or_init(|| matches!(std::env::var("LETSGO").ok().as_deref(), Some("1"))); + // let can_use_sccs = letsgo; + // let can_use_sccs = true; + + // if can_use_sccs { + // let (results, _) = self.iterate_to_fixpoint_per_scc(tcx, body, pass_name); + // return AnalysisAndResults { analysis: self, results }; + // } else { + let (results, _) = self.iterate_to_fixpoint_per_block(tcx, body, pass_name); + return AnalysisAndResults { analysis: self, results }; + // } + + // let (results_per_block, debug_per_block) = + // self.iterate_to_fixpoint_per_block(tcx, body, pass_name); + + // let (results_per_scc, debug_per_scc) = + // self.iterate_to_fixpoint_per_scc(tcx, body, pass_name); + + // assert_eq!(results_per_block, results_per_scc); + // assert!(debug_per_scc.block_invalidations <= debug_per_block.block_invalidations); + + // // if std::env::var("LETSGO").is_ok() + // // && debug_per_scc.elapsed.as_secs_f64() / debug_per_block.elapsed.as_secs_f64() < 0.75 + // // { + // // eprintln!( + // // "iterate_to_fixpoint - {} blocks took {} ns, invalidations per block: {} - {} sccs took {} ns, invalidations per scc: {}, factor: {}, {:?}", + // // debug_per_block.unit_count, + // // debug_per_block.elapsed.as_nanos(), + // // debug_per_block.block_invalidations, + // // debug_per_scc.unit_count, + // // debug_per_scc.elapsed.as_nanos(), + // // debug_per_scc.block_invalidations, + // // debug_per_scc.elapsed.as_secs_f64() / debug_per_block.elapsed.as_secs_f64(), + // // body.span, + // // ); + // // eprintln!( + // // "per scc - sccs: {} ns, queue: {:5} ns, loop setup: {:7} ns, loop: {:10} ns", + // // debug_per_scc.elapsed_sccs.as_nanos(), + // // debug_per_scc.elapsed_queue.as_nanos(), + // // debug_per_scc.elapsed_loop_setup.as_nanos(), + // // debug_per_scc.elapsed_loop.as_nanos(), + // // ); + // // eprintln!( + // // "per block - queue: {:5} ns, loop setup: {:7} ns, loop: {:10} ns", + // // debug_per_block.elapsed_queue.as_nanos(), + // // debug_per_block.elapsed_loop_setup.as_nanos(), + // // debug_per_block.elapsed_loop.as_nanos(), + // // ); + // // } + + // if letsgo { + // // debug_per_scc.elapsed += elapsed_cyclic; + + // // if debug_per_scc.elapsed < debug_per_block.elapsed { + // // eprintln!( + // // "per scc wins: by {} ns", + // // debug_per_block.elapsed.as_nanos() - debug_per_scc.elapsed.as_nanos() + // // ); + // // } else { + // // eprintln!( + // // "per block wins: by {} ns", + // // debug_per_scc.elapsed.as_nanos() - debug_per_block.elapsed.as_nanos() + // // ); + // // } + + // if debug_per_scc.block_invalidations < debug_per_block.block_invalidations { + // eprintln!( + // "per scc wins: by {} invalidations, scc: {} (took {} ns), block: {} (took {} ns)", + // debug_per_block.block_invalidations - debug_per_scc.block_invalidations, + // debug_per_scc.block_invalidations, + // debug_per_scc.elapsed.as_nanos(), + // debug_per_block.block_invalidations, + // debug_per_block.elapsed.as_nanos(), + // ); + // } + // } + + // AnalysisAndResults { analysis: self, results: results_per_scc } } /* Extension methods */ @@ -265,24 +362,33 @@ pub trait Analysis<'tcx> { /// dataflow analysis. Some analyses are run multiple times in the compilation pipeline. /// Without a `pass_name` to differentiates them, only the results for the latest run will be /// saved. + #[inline(always)] fn iterate_to_fixpoint_per_block<'mir>( - mut self, + &mut self, tcx: TyCtxt<'tcx>, body: &'mir mir::Body<'tcx>, pass_name: Option<&'static str>, - ) -> AnalysisAndResults<'tcx, Self> + ) -> (IndexVec, DebugTrash) where Self: Sized, Self::Domain: DebugWithContext, { + let timer = std::time::Instant::now(); + + let mut debug = DebugTrash::default(); + // debug.unit_count = body.basic_blocks.len(); + + // let _timer = std::time::Instant::now(); let mut results = IndexVec::from_fn_n(|_| self.bottom_value(body), body.basic_blocks.len()); self.initialize_start_block(body, &mut results[mir::START_BLOCK]); + let mut state = self.bottom_value(body); if Self::Direction::IS_BACKWARD && results[mir::START_BLOCK] != self.bottom_value(body) { bug!("`initialize_start_block` is not yet supported for backward dataflow analyses"); } - let mut dirty_queue: WorkQueue = WorkQueue::with_none(body.basic_blocks.len()); + let mut dirty_queue: WorkQueue = + WorkQueue::with_none(body.basic_blocks.len(), body.basic_blocks.len()); if Self::Direction::IS_FORWARD { for (bb, _) in traversal::reverse_postorder(body) { @@ -296,17 +402,23 @@ pub trait Analysis<'tcx> { } } + // debug.elapsed_loop_setup = _timer.elapsed(); + // `state` is not actually used between iterations; // this is just an optimization to avoid reallocating // every iteration. - let mut state = self.bottom_value(body); + // let mut state = self.bottom_value(body); + // let _timer = std::time::Instant::now(); + while let Some(bb) = dirty_queue.pop() { // Set the state to the entry state of the block. This is equivalent to `state = // results[bb].clone()`, but it saves an allocation, thus improving compile times. state.clone_from(&results[bb]); + debug.block_invalidations += 1; + Self::Direction::apply_effects_in_block( - &mut self, + self, body, &mut state, bb, @@ -319,80 +431,135 @@ pub trait Analysis<'tcx> { }, ); } + // debug.elapsed_loop = _timer.elapsed(); + + debug.elapsed = timer.elapsed(); if tcx.sess.opts.unstable_opts.dump_mir_dataflow { - let res = write_graphviz_results(tcx, body, &mut self, &results, pass_name); + let res = write_graphviz_results(tcx, body, self, &results, pass_name); if let Err(e) = res { error!("Failed to write graphviz dataflow results: {}", e); } } - AnalysisAndResults { analysis: self, results } + // AnalysisAndResults { analysis: self, results } + (results, debug) } + #[inline(always)] fn iterate_to_fixpoint_per_scc<'mir>( - mut self, + &mut self, _tcx: TyCtxt<'tcx>, body: &'mir mir::Body<'tcx>, _pass_name: Option<&'static str>, - ) -> AnalysisAndResults<'tcx, Self> + ) -> (IndexVec, DebugTrash) where Self: Sized, Self::Domain: DebugWithContext, { - assert!(Self::Direction::IS_FORWARD); - - let sccs = body.basic_blocks.sccs(); + // let timer = std::time::Instant::now(); - struct VecQueue { - queue: Vec, - set: DenseBitSet, - } - - impl VecQueue { - #[inline] - fn with_none(len: usize) -> Self { - VecQueue { queue: Vec::with_capacity(len), set: DenseBitSet::new_empty(len) } - } + let mut debug = DebugTrash::default(); - #[inline] - fn insert(&mut self, element: T) { - if self.set.insert(element) { - self.queue.push(element); - } - } - } + // assert!(Self::Direction::IS_FORWARD); - let mut scc_queue = VecQueue::with_none(sccs.component_count); - for &bb in body.basic_blocks.reverse_postorder().iter() { - // let scc = sccs.components[bb.as_usize()]; - let scc = sccs.components[bb]; - scc_queue.insert(scc); - } + // let _timer = std::time::Instant::now(); + let sccs = body.basic_blocks.sccs(); + // debug.elapsed_sccs = _timer.elapsed(); + + // debug.unit_count = sccs.component_count; + + // struct VecQueue { + // queue: Vec, + // set: DenseBitSet, + // } + + // impl VecQueue { + // #[inline] + // fn with_none(len: usize) -> Self { + // VecQueue { queue: Vec::with_capacity(len), set: DenseBitSet::new_empty(len) } + // } + + // #[inline] + // fn insert(&mut self, element: T) { + // if self.set.insert(element) { + // self.queue.push(element); + // } + // } + // } + + // let _timer = std::time::Instant::now(); + // let mut scc_queue = VecQueue::with_none(sccs.component_count); + // for &bb in body.basic_blocks.reverse_postorder().iter() { + // // let scc = sccs.components[bb.as_usize()]; + // let scc = sccs.components[bb]; + // scc_queue.insert(scc); + // } // assert_eq!(scc_queue.queue, sccs.queue); + // debug.elapsed_queue = _timer.elapsed(); + + // let _timer = std::time::Instant::now(); let mut results = IndexVec::from_fn_n(|_| self.bottom_value(body), body.basic_blocks.len()); self.initialize_start_block(body, &mut results[mir::START_BLOCK]); // Worklist for per-SCC iterations - let mut dirty_queue: WorkQueue = WorkQueue::with_none(body.basic_blocks.len()); + let mut dirty_queue: WorkQueue = + WorkQueue::with_none(body.basic_blocks.len(), body.basic_blocks.len()); let mut state = self.bottom_value(body); - for &scc in &scc_queue.queue { + // debug.elapsed_loop_setup = _timer.elapsed(); + + // let mut sccs_seen = FxHashSet::default(); + // let mut blocks_seen = FxHashSet::default(); + + // let _timer = std::time::Instant::now(); + // for &scc in &scc_queue.queue { + for &scc in &sccs.queue { + // sccs_seen.insert(scc); + // les blocks doivent être ajoutés en RPO // for block in sccs.blocks_in_rpo(scc as usize) { for block in sccs.sccs[scc as usize].iter().copied() { dirty_queue.insert(block); } + // assert!(dirty_queue.deque.len() <= sccs.biggest_scc); + + // for block in sccs.sccs[scc as usize].iter().copied() { + // state.clone_from(&results[block]); + // Self::Direction::apply_effects_in_block( + // self, + // body, + // &mut state, + // block, + // &body[block], + // |target: BasicBlock, state: &Self::Domain| { + // let set_changed = results[target].join(state); + // // let target_scc = sccs.components[target.as_usize()]; + // let target_scc = sccs.components[target]; + // if set_changed && target_scc == scc { + // // The target block is in the SCC we're currently processing, and we + // // want to process this block until fixpoint. Otherwise, the target + // // block is in a successor SCC and it will be processed when that SCC is + // // encountered later. + // dirty_queue.insert(target); + // } + // }, + // ); + // } while let Some(bb) = dirty_queue.pop() { + // blocks_seen.insert(bb); + // Set the state to the entry state of the block. This is equivalent to `state = // results[bb].clone()`, but it saves an allocation, thus improving compile times. state.clone_from(&results[bb]); + debug.block_invalidations += 1; + Self::Direction::apply_effects_in_block( - &mut self, + self, body, &mut state, bb, @@ -412,9 +579,108 @@ pub trait Analysis<'tcx> { ); } } + // debug.elapsed_loop = _timer.elapsed(); + + // debug.elapsed = timer.elapsed(); + + // if sccs_seen.len() != sccs.component_count { + // // UH OH, an SCC we haven't visited?! + + // let mut ok = true; + // let reachable_blocks = rustc_middle::mir::traversal::reachable_as_bitset(&body); + // for scc in 0..sccs.component_count { + // if sccs_seen.contains(&(scc as u32)) { + // continue; + // } + + // // The SCC gotta contain unreachable blocks only! + // for block in sccs.sccs[scc].iter().copied() { + // if reachable_blocks.contains(block) { + // ok = false; + // break; + // } + // } + // } + + // if !ok { + // panic!("UHOH"); + // } + // } + + // if blocks_seen.len() != body.basic_blocks.len() { + // // UH OH, a block we haven't visited?! + + // let mut ok = true; + // let reachable_blocks = rustc_middle::mir::traversal::reachable_as_bitset(&body); + // for block in body.basic_blocks.indices() { + // if blocks_seen.contains(&block) { + // continue; + // } + + // // It's gotta be unreachable! + // if reachable_blocks.contains(block) { + // ok = false; + // break; + // } + // } + + // if !ok { + // panic!("UHOH2"); + // } + // } + + (results, debug) + } +} + +struct WorkQueue { + pub deque: VecDeque, + set: DenseBitSet, +} - AnalysisAndResults { analysis: self, results } +impl WorkQueue { + /// Creates a new work queue that starts empty, where elements range from (0..len). + #[inline] + fn with_none(queue_len: usize, domain_len: usize) -> Self { + WorkQueue { + deque: VecDeque::with_capacity(queue_len), + set: DenseBitSet::new_empty(domain_len), + } } + + /// Attempt to enqueue `element` in the work queue. Returns false if it was already present. + #[inline] + fn insert(&mut self, element: T) -> bool { + if self.set.insert(element) { + self.deque.push_back(element); + true + } else { + false + } + } + + /// Attempt to pop an element from the work queue. + #[inline] + fn pop(&mut self) -> Option { + if let Some(element) = self.deque.pop_front() { + self.set.remove(element); + Some(element) + } else { + None + } + } +} + +#[derive(Default)] +pub struct DebugTrash { + pub block_invalidations: usize, + pub elapsed: std::time::Duration, + pub elapsed_sccs: std::time::Duration, + pub elapsed_queue: std::time::Duration, + pub elapsed_loop_setup: std::time::Duration, + pub elapsed_loop: std::time::Duration, + // block count or scc count + pub unit_count: usize, } /// The legal operations for a transfer function in a gen/kill problem. diff --git a/compiler/rustc_mir_transform/src/dataflow_const_prop.rs b/compiler/rustc_mir_transform/src/dataflow_const_prop.rs index fe53de31f7583..ecfc44595def7 100644 --- a/compiler/rustc_mir_transform/src/dataflow_const_prop.rs +++ b/compiler/rustc_mir_transform/src/dataflow_const_prop.rs @@ -43,6 +43,12 @@ impl<'tcx> crate::MirPass<'tcx> for DataflowConstProp { debug!(def_id = ?body.source.def_id()); if tcx.sess.mir_opt_level() < 4 && body.basic_blocks.len() > BLOCK_LIMIT { debug!("aborted dataflow const prop due too many basic blocks"); + eprintln!( + "aborted dataflow const prop due too many basic blocks {} (limit: {}), mir opt-level={} (limit: 4)", + body.basic_blocks.len(), + BLOCK_LIMIT, + tcx.sess.mir_opt_level() + ); return; } @@ -60,8 +66,14 @@ impl<'tcx> crate::MirPass<'tcx> for DataflowConstProp { let map = Map::new(tcx, body, place_limit); // Perform the actual dataflow analysis. + // let timer = std::time::Instant::now(); let mut const_ = debug_span!("analyze") - .in_scope(|| ConstAnalysis::new(tcx, body, map).iterate_to_fixpoint(tcx, body, None)); + .in_scope(|| ConstAnalysis::new(tcx, body, map).iterate_to_fixpoint(tcx, body, None)); // fixme: optimize + // let elapsed = timer.elapsed(); + // // if elapsed.as_millis() > 1 + // { + // eprintln!("ConstAnalysis took {} ns on {:?}", elapsed.as_nanos(), body.span); + // } // Collect results and patch the body afterwards. let mut visitor = Collector::new(tcx, &body.local_decls); diff --git a/compiler/rustc_mir_transform/src/dest_prop.rs b/compiler/rustc_mir_transform/src/dest_prop.rs index 4c94a6c524e00..cf0ff17737cc7 100644 --- a/compiler/rustc_mir_transform/src/dest_prop.rs +++ b/compiler/rustc_mir_transform/src/dest_prop.rs @@ -169,7 +169,12 @@ impl<'tcx> crate::MirPass<'tcx> for DestinationPropagation { let borrowed = rustc_mir_dataflow::impls::borrowed_locals(body); - let live = MaybeLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp")); + // let timer = std::time::Instant::now(); + let live = MaybeLiveLocals.iterate_to_fixpoint(tcx, body, Some("MaybeLiveLocals-DestProp")); // fixme: optimize, but is backwards + // let elapsed = timer.elapsed(); + // // if elapsed.as_millis() > 1 { + // eprintln!("MaybeLiveLocals took {} ns on {:?}", elapsed.as_nanos(), body.span); + // // } let points = DenseLocationMap::new(body); let mut live = save_as_intervals(&points, body, live.analysis, live.results);