From a2dd9132d1cbeae88f4e434552e8a340fb3c8612 Mon Sep 17 00:00:00 2001 From: Sulkta Date: Wed, 13 May 2026 10:56:22 -0700 Subject: [PATCH] =?UTF-8?q?serve:=20graceful-shutdown=20bug=20=E2=80=94=20?= =?UTF-8?q?was=20exit-looping=20every=2015s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The serve loop wrapped axum::serve in tokio::time::timeout(15s), which caps the WHOLE serve future, not just the shutdown drain. Net effect: skald-serve cleanly returned Ok after 15 seconds every time, docker restart picked it up, container went through the exit-loop. Made any long-running docker exec (like summarize, with opus calls that take 60-180s) racy at best, dead at worst — the embedded postgres got 'database system was not properly shut down' every 15s on the dot. Fix: move the 15s deadline INSIDE the shutdown future. axum::serve runs forever; the shutdown future fires on SIGTERM/SIGINT, then gives in-flight requests 15s, then forces exit. Container only goes down on a real signal. Same bug exists in cwho-panel (copy-pasted from there). Fixing there in a separate commit. --- skald/src/serve.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/skald/src/serve.rs b/skald/src/serve.rs index 145accf..0af07cd 100644 --- a/skald/src/serve.rs +++ b/skald/src/serve.rs @@ -39,14 +39,23 @@ pub async fn run(database_url: &str, listen: &str) -> anyhow::Result<()> { let listener = tokio::net::TcpListener::bind(listen).await?; tracing::info!(listen, "api listening"); - let serve = axum::serve(listener, router).with_graceful_shutdown(shutdown()); - match tokio::time::timeout(Duration::from_secs(15), serve).await { - Ok(r) => r?, - Err(_) => tracing::warn!("graceful shutdown timed out after 15s — exiting anyway"), - } + // Run forever; exit only when SIGTERM/SIGINT signals the + // graceful-shutdown future. The shutdown future itself caps + // drain time so in-flight requests can't block exit forever. + axum::serve(listener, router) + .with_graceful_shutdown(shutdown_with_deadline()) + .await?; Ok(()) } +async fn shutdown_with_deadline() { + shutdown().await; + tracing::info!("graceful drain starting (15s deadline)"); + // 15s for in-flight requests to finish; then force-exit. + tokio::time::sleep(Duration::from_secs(15)).await; + tracing::warn!("graceful drain deadline elapsed — forcing exit"); +} + async fn shutdown() { let ctrl_c = async { let _ = tokio::signal::ctrl_c().await;