web: chapter audio player + render button

Chapter view now shows a narration card between title and prose
with three states:
  - succeeded → HTML5 <audio> + voice + duration + download link
  - running   → 'rendering…' banner with relative start time
  - none/failed → 'Render audio' POST button (spawns background
                  tokio task calling narrate::run)

ServeDir mounted at /audio serves WAVs from the f5-tts bind-mount
read-only. Range requests work, so 16-min chapters seek cleanly.

Deploy needs: compose mount /mnt/cache/appdata/f5-tts/audio:/audio:ro
on skald (already staged in /mnt/cache/appdata/skald/compose.yml on
Lucy).
This commit is contained in:
Kayos 2026-05-13 17:08:43 -07:00
parent c2bb12fdd0
commit 75a609d507
3 changed files with 217 additions and 6 deletions

15
Cargo.lock generated
View file

@ -803,6 +803,12 @@ dependencies = [
"pin-project-lite",
]
[[package]]
name = "http-range-header"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9171a2ea8a68358193d15dd5d70c1c10a2afc3e7e4c5bc92bc9f025cebd7359c"
[[package]]
name = "httparse"
version = "1.10.1"
@ -1909,6 +1915,7 @@ dependencies = [
"chrono",
"clawdforge",
"regex",
"reqwest",
"serde",
"serde_json",
"sqlx",
@ -2393,11 +2400,19 @@ checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51"
dependencies = [
"bitflags",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"http-range-header",
"httpdate",
"mime",
"mime_guess",
"percent-encoding",
"pin-project-lite",
"tokio",
"tokio-util",
"tower",
"tower-layer",
"tower-service",

View file

@ -13,7 +13,7 @@ repository = "http://192.168.0.5:3001/cobb/skald"
tokio = { version = "1", features = ["full"] }
axum = "0.8"
tower = "0.5"
tower-http = { version = "0.6", features = ["trace", "limit"] }
tower-http = { version = "0.6", features = ["trace", "limit", "fs"] }
sqlx = { version = "0.8", default-features = false, features = [
"postgres", "runtime-tokio", "tls-rustls",
"chrono", "uuid", "macros", "migrate",

View file

@ -14,12 +14,13 @@ use axum::Router;
use axum::extract::{Path, State};
use axum::http::StatusCode;
use axum::response::{Html, Redirect};
use axum::routing::get;
use axum::routing::{get, post};
use axum::Form;
use chrono::{DateTime, Utc};
use maud::{DOCTYPE, Markup, html};
use serde::Deserialize;
use sqlx::PgPool;
use tower_http::services::ServeDir;
use uuid::Uuid;
#[derive(Clone)]
@ -28,13 +29,24 @@ pub struct WebState {
}
pub fn router(state: WebState) -> Router {
// Audio dir is the f5-tts bind mount, exposed read-only inside
// skald via compose. ServeDir handles Range requests + correct
// MIME for .wav — important for HTML5 <audio> seeking on 16-min
// chapter renders.
let audio_dir = std::env::var("SKALD_AUDIO_DIR").unwrap_or_else(|_| "/audio".into());
Router::new()
.route("/", get(index))
.route("/stories/new", get(new_story_form).post(new_story_create))
.route("/stories/{id}", get(story_detail))
.route("/stories/{id}/continue", get(continue_form).post(continue_create))
.route("/stories/{id}/chapters/{n}", get(chapter_view))
.route(
"/stories/{id}/chapters/{n}/narrate",
post(chapter_narrate_fire),
)
.route("/stories/{id}/runs", get(runs_view))
.nest_service("/audio", ServeDir::new(audio_dir))
.with_state(Arc::new(state))
}
@ -400,9 +412,9 @@ async fn chapter_view(
) -> Result<Html<String>, StatusCode> {
let stories = fetch_stories(&state.pool).await;
let row: Option<(Option<String>, String, i32, Option<String>)> = sqlx::query_as(
let row: Option<(Uuid, Option<String>, String, i32, Option<String>)> = sqlx::query_as(
r#"
SELECT c.title, c.body_md, c.word_count, cs.body
SELECT c.id, c.title, c.body_md, c.word_count, cs.body
FROM chapters c
LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id
WHERE c.story_id = $1 AND c.n = $2
@ -415,14 +427,92 @@ async fn chapter_view(
.ok()
.flatten();
let Some((title, body_md, word_count, summary)) = row else {
let Some((chapter_id, title, body_md, word_count, summary)) = row else {
return Err(StatusCode::NOT_FOUND);
};
let panel = chapter_panel(id, n, title.as_deref(), &body_md, word_count, summary.as_deref());
// Most-recent narration_run for the chapter — drives whether we
// show the audio player, the "rendering..." state, or the
// "Render audio" form.
let narration: Option<NarrationCard> = sqlx::query_as::<_, (String, Option<String>, Option<f32>, Option<String>, DateTime<Utc>, Option<DateTime<Utc>>, Option<String>)>(
r#"
SELECT nr.status, nr.output_path, nr.duration_seconds, v.display_name,
nr.started_at, nr.ended_at, nr.error
FROM narration_runs nr
JOIN voices v ON v.id = nr.voice_id
WHERE nr.chapter_id = $1
ORDER BY nr.started_at DESC
LIMIT 1
"#
)
.bind(chapter_id)
.fetch_optional(&state.pool)
.await
.ok()
.flatten()
.map(|(status, output_path, duration_seconds, voice_display, started_at, ended_at, error)| {
NarrationCard {
status,
output_path,
duration_seconds,
voice_display,
started_at,
ended_at,
error,
}
});
let panel = chapter_panel(
id,
chapter_id,
n,
title.as_deref(),
&body_md,
word_count,
summary.as_deref(),
narration.as_ref(),
);
Ok(Html(render_shell(&stories, Some(id), panel).into_string()))
}
#[derive(Debug, Clone)]
struct NarrationCard {
status: String,
output_path: Option<String>,
duration_seconds: Option<f32>,
voice_display: Option<String>,
started_at: DateTime<Utc>,
ended_at: Option<DateTime<Utc>>,
error: Option<String>,
}
async fn chapter_narrate_fire(
State(state): State<Arc<WebState>>,
Path((id, n)): Path<(Uuid, i32)>,
) -> Result<Redirect, (StatusCode, String)> {
let chapter_id: Option<Uuid> =
sqlx::query_scalar("SELECT id FROM chapters WHERE story_id = $1 AND n = $2")
.bind(id)
.bind(n)
.fetch_optional(&state.pool)
.await
.map_err(|e| (StatusCode::INTERNAL_SERVER_ERROR, e.to_string()))?;
let chapter_id =
chapter_id.ok_or((StatusCode::NOT_FOUND, "chapter not found".into()))?;
let database_url = std::env::var("DATABASE_URL")
.unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into());
tokio::spawn(async move {
if let Err(e) = crate::narrate::run(&database_url, chapter_id, None, 1.0).await {
tracing::error!(chapter_id = %chapter_id, error = %e, "background narrate failed");
} else {
tracing::info!(chapter_id = %chapter_id, "background narrate succeeded");
}
});
Ok(Redirect::to(&format!("/stories/{id}/chapters/{n}")))
}
async fn runs_view(
State(state): State<Arc<WebState>>,
Path(id): Path<Uuid>,
@ -714,11 +804,13 @@ fn story_panel(
fn chapter_panel(
story_id: Uuid,
chapter_id: Uuid,
n: i32,
title: Option<&str>,
body_md: &str,
word_count: i32,
summary: Option<&str>,
narration: Option<&NarrationCard>,
) -> Markup {
let display_title = title.map(|t| strip_chapter_prefix(t, n).to_string());
html! {
@ -733,6 +825,7 @@ fn chapter_panel(
div.metabar {
span.meta-item { (kfmt(word_count)) " words" }
}
(narration_card(story_id, chapter_id, n, narration))
@if let Some(s) = summary {
aside.summary-box {
h3 { "Summary" }
@ -759,6 +852,91 @@ fn chapter_panel(
}
}
/// Narration card — the strip that lives between chapter title and
/// prose. Three shapes:
/// - succeeded with audio: <audio> player + voice + duration
/// - running: pending banner + auto-refresh meta-tag is not used;
/// user reloads when they want fresh state
/// - none / failed: "Render audio" button (POST to /narrate)
fn narration_card(
story_id: Uuid,
_chapter_id: Uuid,
n: i32,
narration: Option<&NarrationCard>,
) -> Markup {
let post_url = format!("/stories/{}/chapters/{}/narrate", story_id, n);
html! {
aside.narration {
@match narration {
Some(card) if card.status == "succeeded" && card.output_path.is_some() => {
@let path = card.output_path.as_ref().unwrap();
@let basename = path.rsplit('/').next().unwrap_or(path);
@let audio_url = format!("/audio/{}", basename);
h3 { "Audio" }
audio controls preload="metadata" src=(audio_url) {}
p.muted {
@if let Some(d) = card.duration_seconds {
(fmt_duration(d as f64))
}
@if let Some(v) = &card.voice_display {
" · " (v)
}
" · "
a href=(audio_url) download=(basename) { "download" }
}
form method="post" action=(post_url) {
button type="submit" .ghost { "Re-render" }
}
}
Some(card) if card.status == "running" => {
h3 { "Audio — rendering…" }
p.muted {
"Started " (relative_time(card.started_at)) " ago. "
"Reload the page to check progress."
}
}
Some(card) if card.status == "failed" => {
h3.warn { "Audio render failed" }
@if let Some(e) = &card.error {
pre.error-pre { (e) }
}
form method="post" action=(post_url) {
button type="submit" { "Retry render" }
}
}
_ => {
h3 { "Audio" }
p.muted {
"Not rendered yet. F5-TTS with the system default voice. "
"Expect ~68 min wall clock for a 3,000-word chapter on the GPU."
}
form method="post" action=(post_url) {
button type="submit" { "Render audio" }
}
}
}
}
}
}
fn fmt_duration(s: f64) -> String {
let total = s as i64;
let mins = total / 60;
let secs = total % 60;
format!("{mins}m {secs:02}s")
}
fn relative_time(t: DateTime<Utc>) -> String {
let elapsed = (Utc::now() - t).num_seconds().max(0);
if elapsed < 60 {
format!("{elapsed}s")
} else if elapsed < 3600 {
format!("{}m", elapsed / 60)
} else {
format!("{}h", elapsed / 3600)
}
}
fn runs_panel(story_id: Uuid, runs: &[(Uuid, String, String, DateTime<Utc>, Option<DateTime<Utc>>, Option<String>)]) -> Markup {
html! {
article.runs {
@ -1099,6 +1277,24 @@ code { font-family: var(--mono); font-size: 0.9em; background: var(--surface-2);
margin: 0 0 10px 0; font-weight: 700;
}
.summary-box p { margin: 0; color: var(--ink-muted); font-size: 14px; line-height: 1.7; font-family: var(--serif); }
.narration {
background: var(--surface); border-left: 2px solid var(--accent);
padding: 16px 22px; margin: 24px 0; max-width: 75ch;
}
.narration h3 {
font-family: var(--display); font-size: 11px;
text-transform: uppercase; letter-spacing: 2.5px; color: var(--accent);
margin: 0 0 10px 0; font-weight: 700;
}
.narration h3.warn { color: var(--accent); }
.narration audio { width: 100%; margin: 4px 0 8px 0; }
.narration p.muted { margin: 0 0 10px 0; color: var(--ink-muted); font-size: 13px; font-family: var(--sans, var(--serif)); }
.narration p.muted a { color: var(--bronze); }
.narration form { margin: 0; }
.narration button { background: transparent; border: 1px solid var(--bronze); color: var(--bronze); padding: 6px 14px; font-family: var(--display); font-size: 11px; letter-spacing: 2px; text-transform: uppercase; cursor: pointer; }
.narration button:hover { background: var(--bronze); color: var(--bg); }
.narration button.ghost { border-color: var(--surface-2); color: var(--ink-muted); }
.narration .error-pre { background: var(--surface-2); padding: 8px 12px; font-size: 12px; overflow-x: auto; }
.prose {
font-family: var(--serif); font-size: 18px; line-height: 1.8;
max-width: 68ch; color: var(--ink);