diff --git a/skald/src/dedup.rs b/skald/src/dedup.rs index 9573b8c..f7867fd 100644 --- a/skald/src/dedup.rs +++ b/skald/src/dedup.rs @@ -22,6 +22,7 @@ pub async fn run( database_url: &str, story_id: Uuid, author_slug: Option<&str>, + chapter_filter: Option, ) -> anyhow::Result<()> { let cfg = load_forge_config()?; tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured"); @@ -62,7 +63,7 @@ pub async fn run( "dedup starting", ); - let chapters: Vec<(Uuid, i32)> = sqlx::query_as( + let mut chapters: Vec<(Uuid, i32)> = sqlx::query_as( "SELECT id, n FROM chapters WHERE story_id = $1 ORDER BY n", ) .bind(story_id) @@ -71,7 +72,16 @@ pub async fn run( if chapters.is_empty() { bail!("story {story_id} has no chapters"); } + // --chapter narrows the run to one chapter — used to retry a + // chapter the length guard skipped on an earlier run. + if let Some(target) = chapter_filter { + chapters.retain(|(_, n)| *n == target); + if chapters.is_empty() { + bail!("chapter {target} not found in story {story_id}"); + } + } + let mut skipped = 0usize; for (chapter_id, n) in &chapters { let body_md: String = sqlx::query_scalar("SELECT body_md FROM chapters WHERE id = $1") @@ -107,6 +117,34 @@ pub async fn run( }; let deduped = pass_text(&out)?; + + // Sanity guard: a surgical dedup nudges a chapter's length by + // a little. An output wildly off the input means the model + // duplicated or ballooned the chapter — reject it, leave the + // chapter untouched, move on. A re-run with --chapter retries + // just the skipped one. + let before = body_md.len(); + let after = deduped.len(); + if after > before * 3 / 2 || after < before * 3 / 5 { + sqlx::query( + "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3", + ) + .bind(format!( + "rejected: dedup output {after}c is wildly off input {before}c \ + — likely a duplicated or ballooned output" + )) + .bind(Utc::now()) + .bind(run_id) + .execute(&pool) + .await?; + skipped += 1; + println!( + "SKIPPED chapter {n}: dedup returned {after}c from {before}c \ + — chapter left untouched (retry with --chapter {n})" + ); + continue; + } + // Overwrite body_md and clear body_md_tts — the chapter must be // re-prepped before it is narrated again. body_md_original is // left untouched (it belongs to the rewrite pass). @@ -121,19 +159,25 @@ pub async fn run( .execute(&pool) .await?; - let before = body_md.len(); - let after = deduped.len(); println!( "deduped chapter {n} ({before}c -> {after}c) in {:.1}s", elapsed.as_secs_f32(), ); } - println!( - "dedup complete: \"{title}\" — {} chapter(s), {} finding(s) applied", - chapters.len(), - findings.len(), - ); + if skipped > 0 { + println!( + "dedup complete: \"{title}\" — {} chapter(s) processed, {skipped} SKIPPED \ + (retry each with --chapter)", + chapters.len(), + ); + } else { + println!( + "dedup complete: \"{title}\" — {} chapter(s) deduped against {} finding(s)", + chapters.len(), + findings.len(), + ); + } Ok(()) } diff --git a/skald/src/main.rs b/skald/src/main.rs index b802d8a..eb24342 100644 --- a/skald/src/main.rs +++ b/skald/src/main.rs @@ -199,6 +199,10 @@ enum Cmd { /// bound author if omitted. #[arg(long)] author: Option, + /// Restrict the run to a single chapter number — used to + /// retry a chapter the length guard skipped. + #[arg(long)] + chapter: Option, }, } @@ -289,8 +293,8 @@ async fn run() -> anyhow::Result<()> { rewrite::run(&cli.database_url, chapter, author.as_deref()).await } Cmd::Audit { story } => audit::run(&cli.database_url, story).await, - Cmd::Dedup { story, author } => { - dedup::run(&cli.database_url, story, author.as_deref()).await + Cmd::Dedup { story, author, chapter } => { + dedup::run(&cli.database_url, story, author.as_deref(), chapter).await } } }