diff --git a/sqlx-core/src/pool/inner.rs b/sqlx-core/src/pool/inner.rs index b698dc9df0..732de11521 100644 --- a/sqlx-core/src/pool/inner.rs +++ b/sqlx-core/src/pool/inner.rs @@ -33,6 +33,9 @@ pub(crate) struct PoolInner { pub(super) options: PoolOptions, pub(crate) acquire_time_level: Option, pub(crate) acquire_slow_level: Option, + /// Counter for phantom-permit retries, used by tests to verify backoff behavior. + #[cfg(test)] + pub(super) phantom_retries: AtomicU32, } impl PoolInner { @@ -62,6 +65,8 @@ impl PoolInner { acquire_time_level: private_level_filter_to_trace_level(options.acquire_time_level), acquire_slow_level: private_level_filter_to_trace_level(options.acquire_slow_level), options, + #[cfg(test)] + phantom_retries: AtomicU32::new(0), }; let pool = Arc::new(pool); @@ -254,6 +259,8 @@ impl PoolInner { let acquired = crate::rt::timeout( self.options.acquire_timeout, async { + let mut backoff_count: u32 = 0; + loop { // Handles the close-event internally let permit = self.acquire_permit().await?; @@ -279,11 +286,23 @@ impl PoolInner { // This can happen for a child pool that's at its connection limit, // or if the pool was closed between `acquire_permit()` and // `try_increment_size()`. + // + // It can also happen due to "phantom permits" — the semaphore + // has permits available but no idle connections exist and the + // pool is at max_connections. This occurs when a connection is + // discarded (expired/errored) while checked out: the + // DecrementSizeGuard releases a permit but the connection was + // never returned to the idle queue. + // + // Use exponential backoff to avoid spinning at 100% CPU on + // repeated phantom permit acquisitions. The backoff also gives + // time for checked-out connections to be returned to the pool. tracing::debug!("woke but was unable to acquire idle connection or open new one; retrying"); - // If so, we're likely in the current-thread runtime if it's Tokio, - // and so we should yield to let any spawned return_to_pool() tasks - // execute. - crate::rt::yield_now().await; + #[cfg(test)] + self.phantom_retries.fetch_add(1, Ordering::Relaxed); + let backoff = Duration::from_millis(1 << cmp::min(backoff_count, 8)); + crate::rt::sleep(backoff).await; + backoff_count += 1; continue; } }; @@ -621,3 +640,21 @@ impl Drop for DecrementSizeGuard { } } } + +#[cfg(test)] +impl PoolInner { + /// Simulate the "phantom permit" condition that can occur on ARM/aarch64 + /// due to weak memory ordering between `idle_conns.push()` and `semaphore.release()` + /// in the `release()` path. + /// + /// Sets `size` to `max_connections` and injects extra semaphore permits that + /// don't correspond to any idle connection, reproducing the state where + /// `acquire()` would spin in a tight loop with the old `yield_now()` code. + pub(super) fn inject_phantom_permits(&self, count: usize) { + // Set pool size to max so try_increment_size() will fail. + self.size + .store(self.options.max_connections, Ordering::Release); + // Add permits that don't correspond to idle connections. + self.semaphore.release(count); + } +} diff --git a/sqlx-core/src/pool/mod.rs b/sqlx-core/src/pool/mod.rs index f11ff1d76a..f5429a28ac 100644 --- a/sqlx-core/src/pool/mod.rs +++ b/sqlx-core/src/pool/mod.rs @@ -666,3 +666,100 @@ fn assert_pool_traits() { assert_clone::>(); } } + +#[cfg(test)] +#[cfg(all(feature = "any", feature = "_rt-tokio"))] +mod phantom_permit_tests { + use super::*; + use std::sync::atomic::Ordering as AtomicOrdering; + use std::time::{Duration, Instant}; + + /// Reproduces the phantom permit spin loop that occurs on ARM/aarch64. + /// + /// On ARM, weak memory ordering can cause `semaphore.release()` in the + /// `release()` path to become visible to other cores before + /// `idle_conns.push()`, creating "phantom permits" — semaphore permits + /// with no corresponding idle connection. + /// + /// When the pool is at `max_connections`, this leads to a tight loop: + /// 1. acquire permit (succeeds — phantom) + /// 2. pop_idle() → None + /// 3. try_increment_size() → fails (at max) + /// 4. drop permit (re-released to semaphore) + /// 5. goto 1 + /// + /// With the old `yield_now()` code, this loop would execute millions of + /// times per second, consuming 100% CPU. With exponential backoff + /// (1ms, 2ms, 4ms, ..., 256ms cap), only ~15 retries occur in 2 seconds. + /// + /// This test synthetically injects phantom permits and asserts on the + /// retry count to verify that backoff is working. + /// + /// Run with: cargo test -p sqlx-core --features _rt-tokio,any phantom_permit + #[test] + fn test_phantom_permit_backoff_limits_retries() { + use crate::any::{Any, AnyConnectOptions}; + + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + + rt.block_on(async { + let conn_options: AnyConnectOptions = "sqlite::memory:".parse().unwrap(); + + let acquire_timeout = Duration::from_secs(2); + let pool = PoolOptions::::new() + .max_connections(4) + .acquire_timeout(acquire_timeout) + .connect_lazy_with(conn_options); + + // Inject 4 phantom permits: semaphore has permits, idle queue is + // empty, and size == max_connections. This is the exact state + // observed on aarch64 under heavy contention. + pool.0.inject_phantom_permits(4); + + // Verify the phantom state is set up correctly. + assert_eq!(pool.0.size(), pool.0.options.max_connections); + assert_eq!(pool.0.num_idle(), 0); + assert!(pool.0.semaphore.permits() >= 4); + + let start = Instant::now(); + let result = pool.0.acquire().await; + let elapsed = start.elapsed(); + + // Must return PoolTimedOut, not spin forever. + assert!( + matches!(result, Err(Error::PoolTimedOut)), + "expected PoolTimedOut, got: {:?}", + result.err() + ); + + // The acquire should have taken roughly `acquire_timeout` wall time. + assert!( + elapsed >= acquire_timeout - Duration::from_millis(100), + "acquire returned too quickly ({elapsed:?}), expected ~{acquire_timeout:?}" + ); + + // KEY ASSERTION: With exponential backoff (1+2+4+8+16+32+64+128+256+256+...ms), + // roughly 12-16 retries fit in 2 seconds. With the old yield_now() code, + // this would be millions of retries (yield_now returns in ~nanoseconds). + // + // We use 100 as a generous upper bound that still catches a spin loop. + let retries = pool.0.phantom_retries.load(AtomicOrdering::Relaxed); + assert!( + retries < 100, + "too many phantom permit retries ({retries}): \ + acquire loop is spinning instead of backing off. \ + With yield_now() this would be millions; with backoff, ~15." + ); + + // Sanity check: we did actually retry (not just timeout immediately). + assert!( + retries >= 5, + "too few retries ({retries}): backoff may be too aggressive \ + or the test setup is wrong" + ); + }); + } +}