diff --git a/compiler/rustc_middle/src/query/plumbing.rs b/compiler/rustc_middle/src/query/plumbing.rs index 9652be2551629..3840886fcd3b3 100644 --- a/compiler/rustc_middle/src/query/plumbing.rs +++ b/compiler/rustc_middle/src/query/plumbing.rs @@ -87,6 +87,19 @@ pub struct CycleError { } impl<'tcx> CycleError> { + pub fn is_similar_to(&self, other: &Self) -> bool { + (match (&self.usage, &other.usage) { + (None, None) => true, + (None, Some(_)) | (Some(_), None) => false, + (Some((s1, f1)), Some((s2, f2))) => s1 == s2 && f1.is_similar_to(&f2), + }) && self.cycle.len() == other.cycle.len() + && self + .cycle + .iter() + .zip(&other.cycle) + .all(|(q1, q2)| q1.span == q2.span && q1.frame.is_similar_to(&q2.frame)) + } + pub fn lift(&self) -> CycleError { CycleError { usage: self.usage.as_ref().map(|(span, frame)| (*span, frame.lift())), diff --git a/compiler/rustc_middle/src/query/stack.rs b/compiler/rustc_middle/src/query/stack.rs index fd80c7edd602d..feabe50af29ca 100644 --- a/compiler/rustc_middle/src/query/stack.rs +++ b/compiler/rustc_middle/src/query/stack.rs @@ -46,6 +46,12 @@ impl<'tcx> QueryStackFrame> { def_id_for_ty_in_cycle: self.def_id_for_ty_in_cycle, } } + + pub fn is_similar_to(&self, other: &Self) -> bool { + self.dep_kind == other.dep_kind + && self.def_id == other.def_id + && self.def_id_for_ty_in_cycle == other.def_id_for_ty_in_cycle + } } #[derive(Clone, Debug)] diff --git a/compiler/rustc_query_impl/src/execution.rs b/compiler/rustc_query_impl/src/execution.rs index 53afcacb63a6c..e3aca300e638c 100644 --- a/compiler/rustc_query_impl/src/execution.rs +++ b/compiler/rustc_query_impl/src/execution.rs @@ -220,7 +220,8 @@ fn cycle_error<'tcx, C: QueryCache>( .ok() .expect("failed to collect active queries"); - let error = find_cycle_in_stack(try_execute, job_map, ¤t_query_job(tcx), span); + let error = find_cycle_in_stack(try_execute, &job_map, current_query_job(tcx), span) + .expect("did not find a cycle"); (mk_cycle(query, tcx, error.lift()), None) } diff --git a/compiler/rustc_query_impl/src/job.rs b/compiler/rustc_query_impl/src/job.rs index 2d9824a783ea5..11494f6c5fd79 100644 --- a/compiler/rustc_query_impl/src/job.rs +++ b/compiler/rustc_query_impl/src/job.rs @@ -1,14 +1,11 @@ use std::io::Write; -use std::iter; -use std::ops::ControlFlow; -use std::sync::Arc; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::indexmap::{self, IndexMap}; use rustc_errors::{Diag, DiagCtxtHandle}; use rustc_hir::def::DefKind; use rustc_middle::query::{ - CycleError, QueryInfo, QueryJob, QueryJobId, QueryLatch, QueryStackDeferred, QueryStackFrame, - QueryWaiter, + CycleError, QueryInfo, QueryJob, QueryJobId, QueryStackDeferred, QueryStackFrame, }; use rustc_middle::ty::TyCtxt; use rustc_session::Session; @@ -34,18 +31,6 @@ impl<'tcx> QueryJobMap<'tcx> { fn frame_of(&self, id: QueryJobId) -> &QueryStackFrame> { &self.map[&id].frame } - - fn span_of(&self, id: QueryJobId) -> Span { - self.map[&id].job.span - } - - fn parent_of(&self, id: QueryJobId) -> Option { - self.map[&id].job.parent - } - - fn latch_of(&self, id: QueryJobId) -> Option<&QueryLatch<'tcx>> { - self.map[&id].job.latch.as_ref() - } } #[derive(Clone, Debug)] @@ -56,13 +41,12 @@ pub(crate) struct QueryJobInfo<'tcx> { pub(crate) fn find_cycle_in_stack<'tcx>( id: QueryJobId, - job_map: QueryJobMap<'tcx>, - current_job: &Option, + job_map: &QueryJobMap<'tcx>, + mut current_job: Option, span: Span, -) -> CycleError> { +) -> Option>> { // Find the waitee amongst `current_job` parents let mut cycle = Vec::new(); - let mut current_job = Option::clone(current_job); while let Some(job) = current_job { let info = &job_map.map[&job]; @@ -77,17 +61,15 @@ pub(crate) fn find_cycle_in_stack<'tcx>( // Replace it with the span which caused the cycle to form cycle[0].span = span; // Find out why the cycle itself was used - let usage = try { - let parent = info.job.parent?; - (info.job.span, job_map.frame_of(parent).clone()) - }; - return CycleError { usage, cycle }; + let usage = + info.job.parent.map(|parent| (info.job.span, job_map.frame_of(parent).clone())); + return Some(CycleError { usage, cycle }); } current_job = info.job.parent; } - panic!("did not find a cycle") + None } #[cold] @@ -113,262 +95,148 @@ pub(crate) fn find_dep_kind_root<'tcx>( last_layout } -/// A resumable waiter of a query. The usize is the index into waiters in the query's latch -type Waiter = (QueryJobId, usize); - -/// Visits all the non-resumable and resumable waiters of a query. -/// Only waiters in a query are visited. -/// `visit` is called for every waiter and is passed a query waiting on `query` -/// and a span indicating the reason the query waited on `query`. -/// If `visit` returns `Break`, this function also returns `Break`, -/// and if all `visit` calls returns `Continue` it also returns `Continue`. -/// For visits of non-resumable waiters it returns the return value of `visit`. -/// For visits of resumable waiters it returns information required to resume that waiter. -fn visit_waiters<'tcx>( - job_map: &QueryJobMap<'tcx>, - query: QueryJobId, - mut visit: impl FnMut(Span, QueryJobId) -> ControlFlow>, -) -> ControlFlow> { - // Visit the parent query which is a non-resumable waiter since it's on the same stack - if let Some(parent) = job_map.parent_of(query) { - visit(job_map.span_of(query), parent)?; +/// Finds a query to break cycle on. +/// +/// This function doesn't distinguish between a query wait and a query execution, so both are just +/// query calls. +/// As such some queries may have two or more parent query calls too. +/// It uses depth-first search from a single root query down to the first duplicate query, +/// establishing a cycle. +#[allow(rustc::potential_query_instability)] +fn find_cycle_in_graph<'tcx>( + query_map: &QueryJobMap<'tcx>, +) -> (QueryJobId, usize, CycleError>) { + // We pick any root query we find + let (&root_query, _) = query_map + .map + .iter() + .find(|(_, info)| info.job.parent.is_none()) + .expect("no root query was found"); + + #[derive(Clone, Copy)] + struct Subquery { + id: QueryJobId, + span: Span, + /// Waiter index or `usize::MAX` if subquery was executed + waiter_idx: usize, } - // Visit the explicit waiters which use condvars and are resumable - if let Some(latch) = job_map.latch_of(query) { - for (i, waiter) in latch.info.lock().waiters.iter().enumerate() { - if let Some(waiter_query) = waiter.query { - // Return a value which indicates that this waiter can be resumed - visit(waiter.span, waiter_query).map_break(|_| Some((query, i)))?; - } - } + // We are allowed to keep track of just one subquery since each query has at least one subquery. + // + // If we would assume the opposite then thread of query with no subqueries cannot wait on any + // subquery. That thread neither can wait on a running parallel task in functions like + // `par_join`, `par_slice` as the thread executing this parallel task must be blocked too since + // we are in a deadlock. Rustc only tracks these two cases of blocking code to trigger a + // deadlock so our assumption has to be false. + let mut subqueries = FxHashMap::default(); + for query in query_map.map.values() { + let Some(parent) = query.job.parent else { + continue; + }; + // We are safe to only track a single subquery due to the statement above + subqueries.entry(parent).or_insert(Subquery { + id: query.job.id, + span: query.job.span, + waiter_idx: usize::MAX, + }); } - ControlFlow::Continue(()) -} - -/// Look for query cycles by doing a depth first search starting at `query`. -/// `span` is the reason for the `query` to execute. This is initially DUMMY_SP. -/// If a cycle is detected, this initial value is replaced with the span causing -/// the cycle. -fn cycle_check<'tcx>( - job_map: &QueryJobMap<'tcx>, - query: QueryJobId, - span: Span, - stack: &mut Vec<(Span, QueryJobId)>, - visited: &mut FxHashSet, -) -> ControlFlow> { - if !visited.insert(query) { - return if let Some(p) = stack.iter().position(|q| q.1 == query) { - // We detected a query cycle, fix up the initial span and return Some - - // Remove previous stack entries - stack.drain(0..p); - // Replace the span for the first query with the cycle cause - stack[0].0 = span; - ControlFlow::Break(None) - } else { - ControlFlow::Continue(()) + for query in query_map.map.values() { + let Some(latch) = &query.job.latch else { + continue; }; + // Latch mutexes should be at least about to unlock as we do not hold it anywhere too long + let lock = latch.info.lock(); + assert!(!lock.complete); + for (waiter_idx, waiter) in lock.waiters.iter().enumerate() { + let waited_on_query = waiter.query.expect("cannot wait on a root query"); + // We are safe to only track a single subquery due to the statement above + subqueries.entry(waited_on_query).or_insert(Subquery { + id: query.job.id, + span: waiter.span, + waiter_idx, + }); + } } - // Query marked as visited is added it to the stack - stack.push((span, query)); - - // Visit all the waiters - let r = visit_waiters(job_map, query, |span, successor| { - cycle_check(job_map, successor, span, stack, visited) - }); - - // Remove the entry in our stack if we didn't find a cycle - if r.is_continue() { - stack.pop(); + // Debug check the statement above + if cfg!(debug_assertions) { + for query in query_map.map.values() { + assert!(subqueries.contains_key(&query.job.id)); + } } - r -} - -/// Finds out if there's a path to the compiler root (aka. code which isn't in a query) -/// from `query` without going through any of the queries in `visited`. -/// This is achieved with a depth first search. -fn connected_to_root<'tcx>( - job_map: &QueryJobMap<'tcx>, - query: QueryJobId, - visited: &mut FxHashSet, -) -> ControlFlow> { - // We already visited this or we're deliberately ignoring it - if !visited.insert(query) { - return ControlFlow::Continue(()); + // At least one thread waits on the first duplicate query in depth-first search stack. + // Consider this stack of subqueries: + // + // ```text + // a() -> b() -> c() -> b() + // ``` + // + // In order for this statement to be false, both occurrences of `b()` only be query executions. + // Only a single query executes a subquery, so parents of these occurrences of `b()` have to be + // the same query, aka `a()` and `c()` are equal. + // However that means `b()` is not the first duplicate query in the stack, + // so the original statement must be true. + let mut visited = IndexMap::new(); + let mut last_parent = None; + let mut last = Subquery { id: root_query, span: DUMMY_SP, waiter_idx: usize::MAX }; + while let indexmap::map::Entry::Vacant(entry) = visited.entry(last.id) { + entry.insert((last_parent, last)); + last_parent = Some(last.id); + last = subqueries[&last.id]; } - // This query is connected to the root (it has no query parent), return true - if job_map.parent_of(query).is_none() { - return ControlFlow::Break(None); + let parent = visited[&last.id].0; + let mut iter = visited.values(); + let mut cycle = Vec::new(); + loop { + let (_, subquery) = iter.next_back().unwrap(); + let frame = query_map.map[&subquery.id].frame.clone(); + cycle.push(QueryInfo { span: subquery.span, frame }); + if subquery.id == last.id { + break; + } } + cycle.reverse(); + cycle[0].span = last.span; + let usage = parent.map(|parent| (last.span, query_map.map[&parent].frame.clone())); + let cycle_error = CycleError { usage, cycle }; - visit_waiters(job_map, query, |_, successor| connected_to_root(job_map, successor, visited)) -} - -/// Looks for query cycles starting from the last query in `jobs`. -/// If a cycle is found, all queries in the cycle is removed from `jobs` and -/// the function return true. -/// If a cycle was not found, the starting query is removed from `jobs` and -/// the function returns false. -fn remove_cycle<'tcx>( - job_map: &QueryJobMap<'tcx>, - jobs: &mut Vec, - wakelist: &mut Vec>>, -) -> bool { - let mut visited = FxHashSet::default(); - let mut stack = Vec::new(); - // Look for a cycle starting with the last query in `jobs` - if let ControlFlow::Break(waiter) = - cycle_check(job_map, jobs.pop().unwrap(), DUMMY_SP, &mut stack, &mut visited) + if cfg!(debug_assertions) + && let Some(expected) = find_cycle_in_stack(last.id, query_map, last_parent, last.span) { - // The stack is a vector of pairs of spans and queries; reverse it so that - // the earlier entries require later entries - let (mut spans, queries): (Vec<_>, Vec<_>) = stack.into_iter().rev().unzip(); - - // Shift the spans so that queries are matched with the span for their waitee - spans.rotate_right(1); - - // Zip them back together - let mut stack: Vec<_> = iter::zip(spans, queries).collect(); - - // Remove the queries in our cycle from the list of jobs to look at - for r in &stack { - if let Some(pos) = jobs.iter().position(|j| j == &r.1) { - jobs.remove(pos); - } - } - - struct EntryPoint { - query_in_cycle: QueryJobId, - waiter: Option<(Span, QueryJobId)>, - } - - // Find the queries in the cycle which are - // connected to queries outside the cycle - let entry_points = stack - .iter() - .filter_map(|&(_, query_in_cycle)| { - if job_map.parent_of(query_in_cycle).is_none() { - // This query is connected to the root (it has no query parent) - Some(EntryPoint { query_in_cycle, waiter: None }) - } else { - let mut waiter_on_cycle = None; - // Find a direct waiter who leads to the root - let _ = visit_waiters(job_map, query_in_cycle, |span, waiter| { - // Mark all the other queries in the cycle as already visited - let mut visited = FxHashSet::from_iter(stack.iter().map(|q| q.1)); - - if connected_to_root(job_map, waiter, &mut visited).is_break() { - waiter_on_cycle = Some((span, waiter)); - ControlFlow::Break(None) - } else { - ControlFlow::Continue(()) - } - }); - - waiter_on_cycle.map(|waiter_on_cycle| EntryPoint { - query_in_cycle, - waiter: Some(waiter_on_cycle), - }) - } - }) - .collect::>(); - - // Pick an entry point, preferring ones with waiters - let entry_point = entry_points - .iter() - .find(|entry_point| entry_point.waiter.is_some()) - .unwrap_or(&entry_points[0]); - - // Shift the stack so that our entry point is first - let entry_point_pos = - stack.iter().position(|(_, query)| *query == entry_point.query_in_cycle); - if let Some(pos) = entry_point_pos { - stack.rotate_left(pos); - } - - let usage = entry_point.waiter.map(|(span, job)| (span, job_map.frame_of(job).clone())); - - // Create the cycle error - let error = CycleError { - usage, - cycle: stack - .iter() - .map(|&(span, job)| QueryInfo { span, frame: job_map.frame_of(job).clone() }) - .collect(), - }; - - // We unwrap `waiter` here since there must always be one - // edge which is resumable / waited using a query latch - let (waitee_query, waiter_idx) = waiter.unwrap(); - - // Extract the waiter we want to resume - let waiter = job_map.latch_of(waitee_query).unwrap().extract_waiter(waiter_idx); - - // Set the cycle error so it will be picked up when resumed - *waiter.cycle.lock() = Some(error); - - // Put the waiter on the list of things to resume - wakelist.push(waiter); + assert!(cycle_error.is_similar_to(&expected)); + } - true + // Per statement above we should have wait at either of two occurrences of the duplicate query + let waiter_idx = if last.waiter_idx != usize::MAX { + last.waiter_idx } else { - false - } + visited.get(&last.id).unwrap().1.waiter_idx + }; + + (last.id, waiter_idx, cycle_error) } -/// Detects query cycles by using depth first search over all active query jobs. -/// If a query cycle is found it will break the cycle by finding an edge which -/// uses a query latch and then resuming that waiter. -/// There may be multiple cycles involved in a deadlock, so this searches -/// all active queries for cycles before finally resuming all the waiters at once. pub fn break_query_cycles<'tcx>( - job_map: QueryJobMap<'tcx>, + query_map: QueryJobMap<'tcx>, registry: &rustc_thread_pool::Registry, ) { - let mut wakelist = Vec::new(); - // It is OK per the comments: - // - https://github.com/rust-lang/rust/pull/131200#issuecomment-2798854932 - // - https://github.com/rust-lang/rust/pull/131200#issuecomment-2798866392 - #[allow(rustc::potential_query_instability)] - let mut jobs: Vec = job_map.map.keys().copied().collect(); - - let mut found_cycle = false; - - while jobs.len() > 0 { - if remove_cycle(&job_map, &mut jobs, &mut wakelist) { - found_cycle = true; - } - } + let (waited_on, waiter_idx, cycle_error) = find_cycle_in_graph(&query_map); - // Check that a cycle was found. It is possible for a deadlock to occur without - // a query cycle if a query which can be waited on uses Rayon to do multithreading - // internally. Such a query (X) may be executing on 2 threads (A and B) and A may - // wait using Rayon on B. Rayon may then switch to executing another query (Y) - // which in turn will wait on X causing a deadlock. We have a false dependency from - // X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here - // only considers the true dependency and won't detect a cycle. - if !found_cycle { - panic!( - "deadlock detected as we're unable to find a query cycle to break\n\ - current query map:\n{job_map:#?}", - ); - } + let waited_on = &query_map.map[&waited_on]; + let latch = waited_on.job.latch.as_ref().unwrap(); + let mut latch_info_lock = latch.info.try_lock().unwrap(); - // Mark all the thread we're about to wake up as unblocked. This needs to be done before - // we wake the threads up as otherwise Rayon could detect a deadlock if a thread we - // resumed fell asleep and this thread had yet to mark the remaining threads as unblocked. - for _ in 0..wakelist.len() { - rustc_thread_pool::mark_unblocked(registry); - } + // And so this `Vec::remove` shouldn't cause a panic + let waiter = latch_info_lock.waiters.remove(waiter_idx); - for waiter in wakelist.into_iter() { - waiter.condvar.notify_one(); - } + let mut cycle_lock = waiter.cycle.try_lock().unwrap(); + assert!(cycle_lock.is_none()); + *cycle_lock = Some(cycle_error); + rustc_thread_pool::mark_unblocked(registry); + waiter.condvar.notify_one(); } pub fn print_query_stack<'tcx>(