narrate: body_md_tts column + narrate_prep pass + Kokoro routing
Two new things working together: 1. Migration 0005 adds chapters.body_md_tts (nullable). Narrate path prefers it over body_md when present — that's the annotated-for- audiobook variant. Falls back to body_md if not set. 2. New Forge::narrate_prep pass: author (or House) annotates prose with [breath] / [pause:Xs] / [scene] beat markers AND occasional humanizing narrator stumbles (em-dash repetition, self-correction, hesitation — sparingly, 1-3 per chapter). Apart from stumbles, the prose is verbatim. Author voice threads through. 3. New CLI: 'skald prepare-narration --chapter <uuid> [--author slug] [--overwrite]'. Records as generation_runs row kind=narrate_prep. 4. skald narrate now routes by voice.source — kokoro_* voices hit KOKORO_URL (Apache 2.0 stack, audiobook-tuned with the v0.2 render- and-stitch server), everything else hits F5_TTS_URL (voice-cloning path). Voice DB row carries source as the dispatch key. Why no new tag for narrator stumbles: em-dash repetition and self- correction are just prose patterns Kokoro reads correctly because of its punctuation cues. No new server-side machinery.
This commit is contained in:
parent
aece970b50
commit
89c35fd9d3
6 changed files with 413 additions and 88 deletions
15
migrations/0005_chapter_body_md_tts.sql
Normal file
15
migrations/0005_chapter_body_md_tts.sql
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
-- Annotated TTS variant of each chapter's prose. NULL = not yet
|
||||
-- generated; renderers fall back to body_md when this is NULL.
|
||||
-- Otherwise this column carries the same prose with control tags
|
||||
-- inline: [pause:Xs] / [breath] / [scene] — the kokoro server
|
||||
-- interprets these into silence beats during synth.
|
||||
--
|
||||
-- Why a separate column instead of mutating body_md: the human-
|
||||
-- readable version stays clean for web inspection + future plain-
|
||||
-- text export. The TTS version is production output, regeneratable
|
||||
-- whenever the author's beat-placement taste shifts.
|
||||
ALTER TABLE chapters
|
||||
ADD COLUMN body_md_tts text;
|
||||
|
||||
-- generation_runs.kind already accepts any string; no check
|
||||
-- constraint to update. The new pass kind is 'narrate_prep'.
|
||||
|
|
@ -59,7 +59,7 @@ pub struct PassOutput {
|
|||
|
||||
/// What a given pass over the model is for.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "lowercase")]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum PassKind {
|
||||
/// First-pass long-form draft from prompt + context.
|
||||
Gen,
|
||||
|
|
@ -69,6 +69,11 @@ pub enum PassKind {
|
|||
Audit,
|
||||
/// Chapter summary for cheap context loading on long series.
|
||||
Summary,
|
||||
/// Annotate prose with narration control tags ([pause:Xs],
|
||||
/// [breath], [scene]) for the TTS render path. Does NOT change
|
||||
/// prose; output should be byte-identical except for the
|
||||
/// tag insertions.
|
||||
NarratePrep,
|
||||
}
|
||||
|
||||
impl PassKind {
|
||||
|
|
@ -78,6 +83,7 @@ impl PassKind {
|
|||
Self::Cleanup => "cleanup",
|
||||
Self::Audit => "audit",
|
||||
Self::Summary => "summary",
|
||||
Self::NarratePrep => "narrate_prep",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -179,6 +185,52 @@ impl Forge {
|
|||
})
|
||||
}
|
||||
|
||||
/// Annotate prose with narration control tags. The model
|
||||
/// receives the full chapter prose and returns the SAME prose
|
||||
/// with `[pause:Xs]`, `[breath]`, `[scene]` markers inserted
|
||||
/// at natural beats. The author voice DOES thread through —
|
||||
/// Orson Black places beats differently than another author
|
||||
/// would. Replace-mode if author is set; Append otherwise.
|
||||
///
|
||||
/// Hard rule the system prompt enforces: do not change a word
|
||||
/// of prose. Tags are additive only.
|
||||
pub async fn narrate_prep(
|
||||
&self,
|
||||
prose: &str,
|
||||
author: Option<&AuthorWithRevision>,
|
||||
) -> anyhow::Result<PassOutput> {
|
||||
let user_prompt = narrate_prep_user_prompt(prose);
|
||||
let (system, mode) = match author {
|
||||
Some(a) => {
|
||||
let scaffold = a
|
||||
.revision
|
||||
.system_template
|
||||
.as_deref()
|
||||
.unwrap_or(DEFAULT_AUTHOR_SCAFFOLD);
|
||||
let composed = scaffold
|
||||
.replace("{{display_name}}", &a.author.display_name)
|
||||
.replace("{{pass_directive}}", NARRATE_PREP_DIRECTIVE)
|
||||
.replace("{{soul}}", &a.revision.soul);
|
||||
(composed, SystemMode::Replace)
|
||||
}
|
||||
None => (HOUSE_NARRATE_PREP_SYSTEM.to_string(), SystemMode::Append),
|
||||
};
|
||||
let body = RunRequest {
|
||||
prompt: user_prompt,
|
||||
model: Some(self.model.clone()),
|
||||
system: Some(system),
|
||||
system_mode: Some(mode),
|
||||
// Tag placement IS a craft choice; max effort buys
|
||||
// better beat sense. Same posture as gen/cleanup.
|
||||
effort: Some(Effort::Max),
|
||||
timeout_secs: Some(1800),
|
||||
..Default::default()
|
||||
};
|
||||
let r = self.client.run(body).await?;
|
||||
let duration_ms = r.duration_ms;
|
||||
Ok(PassOutput { kind: PassKind::NarratePrep, result: r, duration_ms })
|
||||
}
|
||||
|
||||
/// Summarize one chapter to ~250 words. The summary feeds into
|
||||
/// the continuation context for older chapters so the token
|
||||
/// budget stays sane on long series (book 12 doesn't carry book 1
|
||||
|
|
@ -287,6 +339,10 @@ const HOUSE_CLEANUP_SYSTEM: &str = "You are a copy editor polishing a draft chap
|
|||
|
||||
const SYSTEM_AUDIT: &str = "You are a canon auditor for long-form fiction. You compare a parent story and a new chapter against the bible. You flag continuity drift, character voice shift, retconned facts, dropped threads, timeline contradictions. You return STRUCTURED JSON ONLY — no commentary, no preamble. The exact shape: { \"findings\": [ { \"severity\": \"info\"|\"warn\"|\"crit\", \"area\": \"character\"|\"continuity\"|\"tone\"|\"fact\"|\"timeline\"|\"other\", \"body\": \"...\" } ] }. If no findings, return { \"findings\": [] }.";
|
||||
|
||||
const NARRATE_PREP_DIRECTIVE: &str = "This is a NARRATION-ANNOTATION pass. You receive your own prose and prepare it for an audiobook reading. Two kinds of inserts are allowed:\n\n1. BEAT MARKERS (additive, not prose): `[breath]` (~400ms), `[pause:1.2s]` (explicit silence in seconds, e.g. 0.5s, 1.2s, 2s), `[scene]` (~1500ms scene break). Place where the prose's rhythm asks for them — after a hard one-line beat, before a turn in dialogue, on a paragraph that lands with weight.\n\n2. NARRATOR STUMBLES (humanizing prose-level inserts): a real narrator occasionally stumbles on a hard word, catches themselves, repeats. You may add these *sparingly* where the prose's pacing makes them feel right. Patterns: em-dash repetition (`Prip— Pripyat`), self-correction (`she — no, the wife — had been told`), hesitation (`the dose, the dose was`). USE SPARINGLY. Maybe 1-3 per chapter. Pick proper nouns, technical terms, or moments where the narrator might genuinely catch herself. Avoid stumbling on emotional climaxes — those should land clean.\n\nApart from stumbles, do NOT change a word of the original prose. Return the prose with beat markers and stumbles inline. No preamble. No commentary about your choices.";
|
||||
|
||||
const HOUSE_NARRATE_PREP_SYSTEM: &str = "You are a senior audiobook director annotating prose for narration. You insert (a) beat markers — `[breath]`, `[pause:Xs]`, `[scene]` — where a skilled narrator would breathe or pause, and (b) occasional humanizing narrator stumbles using em-dash repetition or self-correction (sparingly — maybe 1-3 per chapter, on proper nouns or hard words). Apart from those stumbles you do NOT change a word of the prose. Return the prose verbatim plus beat markers and (rare) stumbles inline. No preamble, no commentary.";
|
||||
|
||||
// ─── User-prompt builders ───────────────────────────────────────
|
||||
|
||||
fn gen_user_prompt(
|
||||
|
|
@ -320,6 +376,18 @@ fn gen_user_prompt(
|
|||
out
|
||||
}
|
||||
|
||||
fn narrate_prep_user_prompt(prose: &str) -> String {
|
||||
let mut out = String::with_capacity(prose.len() + 512);
|
||||
out.push_str("# Prose to annotate\n\n");
|
||||
out.push_str(prose);
|
||||
out.push_str(
|
||||
"\n\n# Task\n\nReturn the prose above with `[breath]`, `[pause:Xs]`, and \
|
||||
`[scene]` markers inserted at natural narration beats. Do not change \
|
||||
any word. Do not skip any sentence. Return only the annotated prose.\n",
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
fn cleanup_user_prompt(draft: &str, context: &str, chapter_n: Option<i32>) -> String {
|
||||
let mut out = String::with_capacity(context.len() + draft.len() + 512);
|
||||
out.push_str("# Story canon (for reference — do not retcon)\n\n");
|
||||
|
|
|
|||
|
|
@ -114,73 +114,69 @@ pub struct Voice {
|
|||
pub id: Uuid,
|
||||
pub name: String,
|
||||
pub display_name: String,
|
||||
/// Source label = engine bucket. "lj_speech" + similar → f5-tts
|
||||
/// engine. "kokoro_*" → kokoro engine. Used to pick the HTTP
|
||||
/// target. Future: a dedicated voices.engine column.
|
||||
pub source: String,
|
||||
pub reference_path: Option<String>,
|
||||
pub reference_text: Option<String>,
|
||||
pub license: String,
|
||||
pub is_default: bool,
|
||||
}
|
||||
|
||||
const VOICE_COLUMNS: &str =
|
||||
"id, name, display_name, source, reference_path, reference_text, license, is_default";
|
||||
|
||||
type VoiceTuple = (
|
||||
Uuid,
|
||||
String,
|
||||
String,
|
||||
String,
|
||||
Option<String>,
|
||||
Option<String>,
|
||||
String,
|
||||
bool,
|
||||
);
|
||||
|
||||
fn voice_from_tuple(t: VoiceTuple) -> Voice {
|
||||
let (id, name, display_name, source, reference_path, reference_text, license, is_default) = t;
|
||||
Voice {
|
||||
id,
|
||||
name,
|
||||
display_name,
|
||||
source,
|
||||
reference_path,
|
||||
reference_text,
|
||||
license,
|
||||
is_default,
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_voice_by_name(pool: &PgPool, name: &str) -> anyhow::Result<Option<Voice>> {
|
||||
let row: Option<(Uuid, String, String, Option<String>, Option<String>, String, bool)> =
|
||||
sqlx::query_as(
|
||||
"SELECT id, name, display_name, reference_path, reference_text, license, is_default
|
||||
FROM voices WHERE name = $1",
|
||||
)
|
||||
let row: Option<VoiceTuple> = sqlx::query_as(&format!(
|
||||
"SELECT {VOICE_COLUMNS} FROM voices WHERE name = $1"
|
||||
))
|
||||
.bind(name)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row.map(|(id, name, display_name, reference_path, reference_text, license, is_default)| {
|
||||
Voice {
|
||||
id,
|
||||
name,
|
||||
display_name,
|
||||
reference_path,
|
||||
reference_text,
|
||||
license,
|
||||
is_default,
|
||||
}
|
||||
}))
|
||||
Ok(row.map(voice_from_tuple))
|
||||
}
|
||||
|
||||
pub async fn get_default_voice(pool: &PgPool) -> anyhow::Result<Option<Voice>> {
|
||||
let row: Option<(Uuid, String, String, Option<String>, Option<String>, String, bool)> =
|
||||
sqlx::query_as(
|
||||
"SELECT id, name, display_name, reference_path, reference_text, license, is_default
|
||||
FROM voices WHERE is_default = true LIMIT 1",
|
||||
)
|
||||
let row: Option<VoiceTuple> = sqlx::query_as(&format!(
|
||||
"SELECT {VOICE_COLUMNS} FROM voices WHERE is_default = true LIMIT 1"
|
||||
))
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row.map(|(id, name, display_name, reference_path, reference_text, license, is_default)| {
|
||||
Voice {
|
||||
id,
|
||||
name,
|
||||
display_name,
|
||||
reference_path,
|
||||
reference_text,
|
||||
license,
|
||||
is_default,
|
||||
}
|
||||
}))
|
||||
Ok(row.map(voice_from_tuple))
|
||||
}
|
||||
|
||||
pub async fn get_voice_by_id(pool: &PgPool, id: Uuid) -> anyhow::Result<Option<Voice>> {
|
||||
let row: Option<(Uuid, String, String, Option<String>, Option<String>, String, bool)> =
|
||||
sqlx::query_as(
|
||||
"SELECT id, name, display_name, reference_path, reference_text, license, is_default
|
||||
FROM voices WHERE id = $1",
|
||||
)
|
||||
let row: Option<VoiceTuple> = sqlx::query_as(&format!(
|
||||
"SELECT {VOICE_COLUMNS} FROM voices WHERE id = $1"
|
||||
))
|
||||
.bind(id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
Ok(row.map(|(id, name, display_name, reference_path, reference_text, license, is_default)| {
|
||||
Voice {
|
||||
id,
|
||||
name,
|
||||
display_name,
|
||||
reference_path,
|
||||
reference_text,
|
||||
license,
|
||||
is_default,
|
||||
}
|
||||
}))
|
||||
Ok(row.map(voice_from_tuple))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ mod authors_seed;
|
|||
mod continue_story;
|
||||
mod import;
|
||||
mod narrate;
|
||||
mod narrate_prep;
|
||||
mod serve;
|
||||
mod show_context;
|
||||
mod summarize;
|
||||
|
|
@ -140,6 +141,21 @@ enum Cmd {
|
|||
#[arg(long, default_value = "1.0")]
|
||||
speed: f32,
|
||||
},
|
||||
/// Annotate one chapter's prose with audiobook beat markers
|
||||
/// + occasional narrator stumbles. Writes to chapters.body_md_tts.
|
||||
/// The narrate path prefers body_md_tts when present.
|
||||
PrepareNarration {
|
||||
/// Chapter UUID to annotate.
|
||||
#[arg(long)]
|
||||
chapter: Uuid,
|
||||
/// Override the chapter's story-bound author with this slug.
|
||||
#[arg(long)]
|
||||
author: Option<String>,
|
||||
/// Replace an existing body_md_tts. Otherwise the command
|
||||
/// errors out to avoid clobbering a hand-tuned version.
|
||||
#[arg(long)]
|
||||
overwrite: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -210,6 +226,13 @@ async fn run() -> anyhow::Result<()> {
|
|||
voice,
|
||||
speed,
|
||||
} => narrate::run(&cli.database_url, chapter, voice.as_deref(), speed).await,
|
||||
Cmd::PrepareNarration {
|
||||
chapter,
|
||||
author,
|
||||
overwrite,
|
||||
} => {
|
||||
narrate_prep::run(&cli.database_url, chapter, author.as_deref(), overwrite).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,18 @@
|
|||
//! `skald narrate` — render chapter prose to audio via F5-TTS.
|
||||
//! `skald narrate` — render chapter prose to audio via F5-TTS or
|
||||
//! Kokoro depending on the chosen voice's `source`.
|
||||
//!
|
||||
//! Resolution order for the voice:
|
||||
//! 1. --voice <slug> flag (explicit override)
|
||||
//! 2. story.preferred_voice_id (per-story pin)
|
||||
//! 3. voices.is_default = true (the system default)
|
||||
//!
|
||||
//! Output filename layout: <story-slug-or-id>/<n>-<run-uuid>.wav.
|
||||
//! Story-slug isn't yet on the schema, so v0.1 uses the bare
|
||||
//! story-uuid prefix.
|
||||
//! Engine routing: voices with source matching `kokoro*` use the
|
||||
//! KOKORO_URL endpoint (Apache 2.0 stack, audiobook-tuned); everything
|
||||
//! else routes to F5_TTS_URL (voice-cloning path).
|
||||
//!
|
||||
//! Output filename layout: <story-uuid>-<n>-<run-uuid>.wav.
|
||||
|
||||
use std::time::Instant;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use chrono::Utc;
|
||||
|
|
@ -24,26 +27,32 @@ pub async fn run(
|
|||
voice_slug: Option<&str>,
|
||||
speed: f32,
|
||||
) -> anyhow::Result<()> {
|
||||
let cfg = load_f5_config()?;
|
||||
tracing::info!(base_url = %cfg.base_url, "f5-tts configured");
|
||||
|
||||
let pool = db::connect_and_migrate(database_url).await?;
|
||||
let narrator = Narrator::new(cfg)?;
|
||||
|
||||
// Quick health probe before we burn time loading rows.
|
||||
let h = narrator.healthz().await.context("f5-tts healthz failed")?;
|
||||
if !h.loaded {
|
||||
bail!("f5-tts /healthz says model is not loaded yet — retry shortly");
|
||||
}
|
||||
tracing::info!(device = %h.device, model = %h.model, "f5-tts ready");
|
||||
|
||||
let chapter = load_chapter(&pool, chapter_id).await?;
|
||||
let voice = resolve_voice(&pool, &chapter, voice_slug).await?;
|
||||
|
||||
let base_url = engine_url_for(&voice.source)?;
|
||||
let cfg = F5Config {
|
||||
base_url: base_url.clone(),
|
||||
timeout: Duration::from_secs(1800),
|
||||
};
|
||||
let narrator = Narrator::new(cfg)?;
|
||||
tracing::info!(base_url = %base_url, source = %voice.source, "tts engine selected");
|
||||
|
||||
// Quick health probe before we burn time on the synth call.
|
||||
let h = narrator.healthz().await.context("tts healthz failed")?;
|
||||
if !h.loaded {
|
||||
bail!("tts /healthz says model is not loaded yet — retry shortly");
|
||||
}
|
||||
|
||||
tracing::info!(device = %h.device, model = %h.model, "tts ready");
|
||||
|
||||
tracing::info!(
|
||||
voice = %voice.name,
|
||||
voice_license = %voice.license,
|
||||
chapter_n = chapter.n,
|
||||
word_count = chapter.word_count,
|
||||
used_tts_variant = chapter.used_tts_variant,
|
||||
"narrating",
|
||||
);
|
||||
|
||||
|
|
@ -56,19 +65,30 @@ pub async fn run(
|
|||
let run_id = Uuid::new_v4();
|
||||
let output_filename = format!("{}-{}-{}.wav", chapter.story_id, chapter.n, run_id);
|
||||
|
||||
// Engine + version threaded from the voice row's source/license
|
||||
// pair. lj_speech-style PD voices live behind f5-tts; kokoro_*
|
||||
// voices live behind kokoro. Future: a dedicated voices.engine
|
||||
// column to make this explicit.
|
||||
let (engine, engine_version) = if voice.source.starts_with("kokoro") {
|
||||
("kokoro-82m", "0.9")
|
||||
} else {
|
||||
("f5-tts", "1.1.20")
|
||||
};
|
||||
let run_row_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO narration_runs (id, chapter_id, voice_id, engine, engine_version, status)
|
||||
VALUES ($1, $2, $3, 'f5-tts', '1.1.20', 'running') RETURNING id",
|
||||
VALUES ($1, $2, $3, $4, $5, 'running') RETURNING id",
|
||||
)
|
||||
.bind(run_id)
|
||||
.bind(chapter_id)
|
||||
.bind(voice.id)
|
||||
.bind(engine)
|
||||
.bind(engine_version)
|
||||
.fetch_one(&pool)
|
||||
.await?;
|
||||
|
||||
let started = Instant::now();
|
||||
let req = SynthesizeRequest {
|
||||
gen_text: chapter.body_md.clone(),
|
||||
gen_text: chapter.body_for_tts.clone(),
|
||||
ref_audio_path,
|
||||
ref_text: voice.reference_text.clone(),
|
||||
output_filename,
|
||||
|
|
@ -121,23 +141,33 @@ pub async fn run(
|
|||
struct ChapterRow {
|
||||
story_id: Uuid,
|
||||
n: i32,
|
||||
body_md: String,
|
||||
/// The prose to actually narrate. If body_md_tts is set we use
|
||||
/// that (annotated with [breath]/[pause:Xs]/[scene] beats);
|
||||
/// otherwise the plain body_md.
|
||||
body_for_tts: String,
|
||||
/// Whether body_md_tts was the source (for logging).
|
||||
used_tts_variant: bool,
|
||||
word_count: i32,
|
||||
}
|
||||
|
||||
async fn load_chapter(pool: &PgPool, id: Uuid) -> anyhow::Result<ChapterRow> {
|
||||
let row: Option<(Uuid, i32, String, i32)> = sqlx::query_as(
|
||||
"SELECT story_id, n, body_md, word_count FROM chapters WHERE id = $1",
|
||||
let row: Option<(Uuid, i32, String, Option<String>, i32)> = sqlx::query_as(
|
||||
"SELECT story_id, n, body_md, body_md_tts, word_count FROM chapters WHERE id = $1",
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
let (story_id, n, body_md, word_count) =
|
||||
let (story_id, n, body_md, body_md_tts, word_count) =
|
||||
row.with_context(|| format!("chapter {id} not found"))?;
|
||||
let (body_for_tts, used_tts_variant) = match body_md_tts {
|
||||
Some(tts) if !tts.trim().is_empty() => (tts, true),
|
||||
_ => (body_md, false),
|
||||
};
|
||||
Ok(ChapterRow {
|
||||
story_id,
|
||||
n,
|
||||
body_md,
|
||||
body_for_tts,
|
||||
used_tts_variant,
|
||||
word_count,
|
||||
})
|
||||
}
|
||||
|
|
@ -170,11 +200,15 @@ async fn resolve_voice(
|
|||
.ok_or_else(|| anyhow::anyhow!("no default voice set; create one or use --voice <slug>"))
|
||||
}
|
||||
|
||||
fn load_f5_config() -> anyhow::Result<F5Config> {
|
||||
let base_url = std::env::var("F5_TTS_URL")
|
||||
.unwrap_or_else(|_| "http://192.168.0.5:7792".into());
|
||||
Ok(F5Config {
|
||||
base_url,
|
||||
timeout: std::time::Duration::from_secs(1800),
|
||||
})
|
||||
/// Pick the engine base URL for a given voice.source. Voices whose
|
||||
/// source starts with "kokoro" route to KOKORO_URL; everything else
|
||||
/// routes to F5_TTS_URL. Each env var has a LAN-default for Lucy.
|
||||
fn engine_url_for(source: &str) -> anyhow::Result<String> {
|
||||
if source.starts_with("kokoro") {
|
||||
Ok(std::env::var("KOKORO_URL")
|
||||
.unwrap_or_else(|_| "http://192.168.0.5:7794".into()))
|
||||
} else {
|
||||
Ok(std::env::var("F5_TTS_URL")
|
||||
.unwrap_or_else(|_| "http://192.168.0.5:7792".into()))
|
||||
}
|
||||
}
|
||||
|
|
|
|||
189
skald/src/narrate_prep.rs
Normal file
189
skald/src/narrate_prep.rs
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
//! `skald prepare-narration` — annotate one chapter's prose with
|
||||
//! audiobook beat markers + occasional narrator stumbles. Output
|
||||
//! lands in chapters.body_md_tts; if NULL, the narrate path falls
|
||||
//! back to chapters.body_md.
|
||||
//!
|
||||
//! Author voice threads through: an Orson Black chapter gets beats
|
||||
//! placed how Orson would direct, a different author would place
|
||||
//! them differently. Default = the author bound to the chapter's
|
||||
//! story; --author overrides.
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use chrono::Utc;
|
||||
use skald_core::authors::{self, AuthorWithRevision};
|
||||
use skald_core::config::ForgeConfig;
|
||||
use skald_core::db;
|
||||
use skald_core::forge::{Forge, PassKind, PassOutput};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub async fn run(
|
||||
database_url: &str,
|
||||
chapter_id: Uuid,
|
||||
author_slug: Option<&str>,
|
||||
overwrite: bool,
|
||||
) -> anyhow::Result<()> {
|
||||
let cfg = load_forge_config()?;
|
||||
tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
|
||||
|
||||
let pool = db::connect_and_migrate(database_url).await?;
|
||||
let forge = Forge::new(&cfg)?;
|
||||
|
||||
let chapter = load_chapter(&pool, chapter_id).await?;
|
||||
if chapter.body_md_tts.is_some() && !overwrite {
|
||||
bail!(
|
||||
"chapter {chapter_id} already has body_md_tts set ({} chars). \
|
||||
Pass --overwrite to regenerate.",
|
||||
chapter.body_md_tts.as_ref().unwrap().len()
|
||||
);
|
||||
}
|
||||
|
||||
let author = resolve_author(&pool, &chapter, author_slug).await?;
|
||||
if let Some(a) = &author {
|
||||
tracing::info!(
|
||||
slug = %a.author.slug,
|
||||
revision_n = a.revision.n,
|
||||
chapter_n = chapter.n,
|
||||
"annotating with author voice",
|
||||
);
|
||||
} else {
|
||||
tracing::info!(chapter_n = chapter.n, "annotating with house voice");
|
||||
}
|
||||
|
||||
let run_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id",
|
||||
)
|
||||
.bind(chapter.story_id)
|
||||
.bind(PassKind::NarratePrep.as_str())
|
||||
.fetch_one(&pool)
|
||||
.await?;
|
||||
|
||||
let started = Instant::now();
|
||||
let out_res = forge.narrate_prep(&chapter.body_md, author.as_ref()).await;
|
||||
let elapsed = started.elapsed();
|
||||
|
||||
let out: PassOutput = match out_res {
|
||||
Ok(o) => o,
|
||||
Err(e) => {
|
||||
sqlx::query(
|
||||
"UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
|
||||
)
|
||||
.bind(format!("{e:#}"))
|
||||
.bind(Utc::now())
|
||||
.bind(run_id)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
return Err(e);
|
||||
}
|
||||
};
|
||||
|
||||
let annotated = pass_text(&out)?;
|
||||
sqlx::query("UPDATE chapters SET body_md_tts = $1 WHERE id = $2")
|
||||
.bind(&annotated)
|
||||
.bind(chapter_id)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2")
|
||||
.bind(Utc::now())
|
||||
.bind(run_id)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
|
||||
let in_chars = chapter.body_md.len();
|
||||
let out_chars = annotated.len();
|
||||
let tag_count = count_beats(&annotated);
|
||||
println!(
|
||||
"annotated chapter {} of story {} ({}c → {}c, {} beat markers) in {:.1}s",
|
||||
chapter.n,
|
||||
chapter.story_id,
|
||||
in_chars,
|
||||
out_chars,
|
||||
tag_count,
|
||||
elapsed.as_secs_f32(),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ChapterRow {
|
||||
story_id: Uuid,
|
||||
n: i32,
|
||||
body_md: String,
|
||||
body_md_tts: Option<String>,
|
||||
story_author_id: Option<Uuid>,
|
||||
}
|
||||
|
||||
async fn load_chapter(pool: &PgPool, id: Uuid) -> anyhow::Result<ChapterRow> {
|
||||
let row: Option<(Uuid, i32, String, Option<String>, Option<Uuid>)> = sqlx::query_as(
|
||||
"SELECT c.story_id, c.n, c.body_md, c.body_md_tts, s.author_id
|
||||
FROM chapters c JOIN stories s ON s.id = c.story_id
|
||||
WHERE c.id = $1",
|
||||
)
|
||||
.bind(id)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
let (story_id, n, body_md, body_md_tts, story_author_id) =
|
||||
row.with_context(|| format!("chapter {id} not found"))?;
|
||||
Ok(ChapterRow {
|
||||
story_id,
|
||||
n,
|
||||
body_md,
|
||||
body_md_tts,
|
||||
story_author_id,
|
||||
})
|
||||
}
|
||||
|
||||
async fn resolve_author(
|
||||
pool: &PgPool,
|
||||
chapter: &ChapterRow,
|
||||
flag_slug: Option<&str>,
|
||||
) -> anyhow::Result<Option<AuthorWithRevision>> {
|
||||
if let Some(slug) = flag_slug {
|
||||
return authors::get_with_current_revision(pool, slug)
|
||||
.await?
|
||||
.map(Some)
|
||||
.with_context(|| format!("author '{slug}' not found"));
|
||||
}
|
||||
if let Some(aid) = chapter.story_author_id {
|
||||
let row: Option<(String,)> = sqlx::query_as("SELECT slug FROM authors WHERE id = $1")
|
||||
.bind(aid)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
if let Some((slug,)) = row {
|
||||
return Ok(authors::get_with_current_revision(pool, &slug).await?);
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn pass_text(out: &PassOutput) -> anyhow::Result<String> {
|
||||
let text = out
|
||||
.result
|
||||
.as_text()
|
||||
.map(|s| s.to_string())
|
||||
.or_else(|| out.result.result.as_str().map(|s| s.to_string()))
|
||||
.unwrap_or_else(|| out.result.result.to_string());
|
||||
if text.trim().is_empty() {
|
||||
bail!("narrate_prep pass returned empty");
|
||||
}
|
||||
Ok(text)
|
||||
}
|
||||
|
||||
fn count_beats(s: &str) -> usize {
|
||||
let mut n = 0;
|
||||
for marker in &["[breath]", "[pause:", "[scene]"] {
|
||||
n += s.matches(marker).count();
|
||||
}
|
||||
n
|
||||
}
|
||||
|
||||
fn load_forge_config() -> anyhow::Result<ForgeConfig> {
|
||||
let base_url = std::env::var("CLAWDFORGE_URL")
|
||||
.context("CLAWDFORGE_URL not set")?;
|
||||
let app_token = std::env::var("CLAWDFORGE_TOKEN")
|
||||
.context("CLAWDFORGE_TOKEN not set")?;
|
||||
let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
|
||||
Ok(ForgeConfig { base_url, app_token, model })
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue