forge: prose-quality audit pass + anti-repetition directives

Adds `skald audit --story <id>`: a whole-story QC pass that reads every chapter end to end and flags repetition, template tics, self-restatement and continuity drift — the gate before a story goes to narration, where repetition a silent reader skims is glaring read aloud. Runs at max effort (real reasoning work, worth the spend); findings land in audit_findings and print. Also hardens the gen + cleanup directives to hunt repetition at the source: re-phrase recurring motifs fresh, no stacked template anaphora, dialogue echoed verbatim at most once. Migration 0010: 'prose_audit' generation_runs.kind, 'repetition' audit_findings.area.
2026-05-15 11:19:04 -07:00 · 2026-05-15 11:19:04 -07:00 · fd7a34ac1d
commit fd7a34ac1d
parent 575749b774
4 changed files with 214 additions and 2 deletions
--- a/migrations/0010_prose_audit.sql
+++ b/migrations/0010_prose_audit.sql
@ -0,0 +1,20 @@
+-- The prose-quality audit pass: a QC gate that reads a finished
+-- story end to end and flags repetition, template tics, self-
+-- restatement and continuity drift before it goes to narration.
+-- Repetition a silent reader skims is glaring once narrated aloud.
+
+-- Allow 'prose_audit' as a generation_runs.kind.
+ALTER TABLE generation_runs DROP CONSTRAINT generation_runs_kind_check;
+ALTER TABLE generation_runs ADD CONSTRAINT generation_runs_kind_check
+    CHECK (kind = ANY (ARRAY[
+        'gen', 'cleanup', 'audit', 'summary', 'embed',
+        'narrate_prep', 'rewrite', 'prose_audit'
+    ]));
+
+-- 'repetition' is a first-class audit finding area.
+ALTER TABLE audit_findings DROP CONSTRAINT audit_findings_area_check;
+ALTER TABLE audit_findings ADD CONSTRAINT audit_findings_area_check
+    CHECK (area = ANY (ARRAY[
+        'character', 'continuity', 'tone', 'fact',
+        'timeline', 'repetition', 'other'
+    ]));
--- a/skald-core/src/forge.rs
+++ b/skald-core/src/forge.rs
@ -81,6 +81,10 @@ pub enum PassKind {
    /// (names, dates, events, places, facts) is preserved exactly;
    /// the prose itself is rewritten. Not editing — re-authoring.
    Rewrite,
+    /// Whole-story prose-quality audit — reads a finished story end
+    /// to end and flags repetition, template tics, self-restatement
+    /// and continuity drift. The QC gate before narration.
+    ProseAudit,
 }

 impl PassKind {
@ -92,6 +96,7 @@ impl PassKind {
            Self::Summary => "summary",
            Self::NarratePrep => "narrate_prep",
            Self::Rewrite => "rewrite",
+            Self::ProseAudit => "prose_audit",
        }
    }
 }
@ -193,6 +198,35 @@ impl Forge {
        })
    }

+    /// Whole-story prose-quality audit. Receives every chapter of a
+    /// finished story concatenated in order and returns findings JSON
+    /// — repetition, template tics, self-restatement, continuity
+    /// drift. This is the QC gate before a story goes to narration,
+    /// where repetition a silent reader skims becomes glaring.
+    ///
+    /// MAX effort: unlike the prose-craft passes, catching every
+    /// near-duplicate across a whole book is real reasoning work and
+    /// worth the spend (same posture as the canon audit).
+    pub async fn prose_audit(&self, full_story: &str) -> anyhow::Result<PassOutput> {
+        let prompt = format!(
+            "Audit the complete story below for repetition, template tics, \
+             self-restatement and continuity errors. Read every chapter. \
+             Return JSON only, matching the schema in the system prompt.\n\n\
+             {full_story}"
+        );
+        let body = RunRequest {
+            prompt,
+            model: Some(self.model.clone()),
+            system: Some(SYSTEM_PROSE_AUDIT.to_string()),
+            effort: Some(Effort::Max),
+            timeout_secs: Some(1800),
+            ..Default::default()
+        };
+        let r = self.client.run(body).await?;
+        let duration_ms = r.duration_ms;
+        Ok(PassOutput { kind: PassKind::ProseAudit, result: r, duration_ms })
+    }
+
    /// Annotate prose with narration control tags. The model
    /// receives the full chapter prose and returns the SAME prose
    /// with `[pause:Xs]`, `[breath]`, `[scene]` markers inserted
@ -403,14 +437,16 @@ Hard rules:
 {{soul}}
 "#;

-const GEN_DIRECTIVE: &str = "This is a GENERATION pass. Write the next chapter from scratch. Honor canon. Begin with a chapter heading on the first line.";
+const GEN_DIRECTIVE: &str = "This is a GENERATION pass. Write the next chapter from scratch. Honor canon. Begin with a chapter heading on the first line.\n\nGuard against repetition. If a recurring image, motif or descriptive beat appeared in an earlier chapter, render it in fresh words here — never reuse the same sentence shape or verb sequence for it. Do not stack consecutive sentences on one template (\"He thought… He thought…\", \"She felt… She felt…\"). Vary how you land beats. This will be read aloud by a single narrator, so phrasing a silent reader skims is glaring to a listener.";

-const CLEANUP_DIRECTIVE: &str = "This is a CLEANUP pass. The user prompt contains a draft you wrote. Polish for prose quality — tighten dialogue, fix pacing dead spots, hold your voice steady. Do NOT add new plot, do NOT retcon canon. Return ONLY the polished chapter.";
+const CLEANUP_DIRECTIVE: &str = "This is a CLEANUP pass. The user prompt contains a draft you wrote. Polish for prose quality — tighten dialogue, fix pacing dead spots, hold your voice steady. Do NOT add new plot, do NOT retcon canon.\n\nHunt repetition hard — this prose will be narrated aloud, where repetition is far more glaring than on the page. If a recurring image or motif appears more than once, re-phrase every occurrence so no two share a sentence shape or verb sequence. Do not stack more than two consecutive sentences on the same opening stem (\"He thought… He thought…\"). Echo a line of dialogue back verbatim at most once in the whole chapter. If a sentence merely restates something already said, cut it. Return ONLY the polished chapter.";

 const HOUSE_GEN_SYSTEM: &str = "You are a long-form fiction author writing the next chapter of a series. Honor the canon (characters, setting, established facts) exactly. Return only the chapter prose, starting with a heading line. No preamble.";

 const HOUSE_CLEANUP_SYSTEM: &str = "You are a copy editor polishing a draft chapter. Tighten dialogue, fix pacing, keep voice consistent. Do not add new plot. Return only the polished chapter.";

+const SYSTEM_PROSE_AUDIT: &str = "You are a ruthless prose-quality auditor for long-form fiction destined for single-voice audiobook narration. You receive a complete story — every chapter, in order — and you read all of it. You return STRUCTURED JSON ONLY: no commentary, no preamble. You hunt four things:\n\n1. REPETITION across the whole book: a recurring image, motif or descriptive beat rendered in near-identical wording more than once; a sentence (or near-identical sentence) reused in different chapters; a line of dialogue echoed verbatim.\n2. TEMPLATE TICS: the same sentence template stacked or over-reused — e.g. clusters of consecutive sentences all opening 'He thought:' / 'She felt:' — anywhere it becomes a noticeable pattern.\n3. SELF-RESTATEMENT: a sentence or paragraph that says again, slightly reworded, something the text already said.\n4. CONTINUITY ERRORS: a fact, name, age, date or detail that contradicts an earlier one.\n\nThis prose will be read ALOUD by one narrator, so repetition a silent reader skims is glaring to a listener — be exhaustive, do not let near-duplicates pass. Return EXACTLY this shape: { \"findings\": [ { \"severity\": \"info\"|\"warn\"|\"crit\", \"area\": \"repetition\"|\"continuity\"|\"other\", \"body\": \"...\" } ] }. In each finding's body, quote the offending text exactly and name the chapter number(s). Use 'crit' for anything a listener will plainly hear as a mistake, 'warn' for noticeable repetition, 'info' for minor. If the story is clean, return { \"findings\": [] }.";
+
 const SYSTEM_AUDIT: &str = "You are a canon auditor for long-form fiction. You compare a parent story and a new chapter against the bible. You flag continuity drift, character voice shift, retconned facts, dropped threads, timeline contradictions. You return STRUCTURED JSON ONLY — no commentary, no preamble. The exact shape: { \"findings\": [ { \"severity\": \"info\"|\"warn\"|\"crit\", \"area\": \"character\"|\"continuity\"|\"tone\"|\"fact\"|\"timeline\"|\"other\", \"body\": \"...\" } ] }. If no findings, return { \"findings\": [] }.";

 const NARRATE_PREP_DIRECTIVE: &str = "This is a NARRATION-ANNOTATION pass. You receive your own prose and prepare it for an audiobook reading. Three kinds of inserts are allowed:\n\n1. BEAT MARKERS (additive, not prose): `[breath]` (~400ms), `[pause:1.2s]` (explicit silence in seconds, e.g. 0.5s, 1.2s, 2s), `[scene]` (~1500ms scene break). Place where the prose's rhythm asks for them — after a hard one-line beat, before a turn in dialogue, on a paragraph that lands with weight.\n\n2. SPEAKER VOICE TAGS (multi-voice dialogue): wrap dialogue lines in `[voice:<slug>]\"...\"[/voice]` based on who is speaking. The roster of available speaker slugs is given in the user prompt. The dialogue itself stays verbatim — only the wrapper is added. If a line of dialogue is not clearly attributable to a roster speaker, leave it unwrapped (the narrator voice will read it). Quoted thoughts (italicized interior monologue) stay unwrapped — only spoken aloud dialogue gets a voice tag.\n\n3. NARRATOR STUMBLES (humanizing prose-level inserts): a real narrator occasionally stumbles on a hard word, catches themselves, repeats. You may add these *sparingly* where the prose's pacing makes them feel right. Patterns: em-dash repetition (`Prip— Pripyat`), self-correction (`she — no, the wife — had been told`), hesitation (`the dose, the dose was`). USE SPARINGLY. Maybe 1-3 per chapter. Pick proper nouns, technical terms, or moments where the narrator might genuinely catch herself. Avoid stumbling on emotional climaxes — those should land clean.\n\nApart from stumbles, do NOT change a word of the original prose. Return the prose with beat markers, voice tags, and stumbles inline. No preamble. No commentary about your choices.";
--- a/skald/src/audit.rs
+++ b/skald/src/audit.rs
@ -0,0 +1,144 @@
+//! `skald audit` — whole-story prose-quality audit. Reads every
+//! chapter of a story end to end and flags repetition, template
+//! tics, self-restatement and continuity drift. The QC gate before
+//! a story goes to narration, where repetition a silent reader
+//! skims becomes glaring once read aloud.
+//!
+//! Findings land in the `audit_findings` table (area 'repetition' /
+//! 'continuity' / 'other') and are printed to stdout. v1 flags
+//! only — acting on the findings (a fix pass) is a separate step.
+
+use std::time::Instant;
+
+use anyhow::{Context, bail};
+use chrono::Utc;
+use skald_core::config::ForgeConfig;
+use skald_core::db;
+use skald_core::forge::{AuditFinding, AuditResponse, Forge, PassKind, PassOutput};
+use uuid::Uuid;
+
+pub async fn run(database_url: &str, story_id: Uuid) -> anyhow::Result<()> {
+    let cfg = load_forge_config()?;
+    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
+
+    let pool = db::connect_and_migrate(database_url).await?;
+    let forge = Forge::new(&cfg)?;
+
+    let title: String = sqlx::query_scalar("SELECT title FROM stories WHERE id = $1")
+        .bind(story_id)
+        .fetch_optional(&pool)
+        .await?
+        .with_context(|| format!("story {story_id} not found"))?;
+
+    let chapters: Vec<(i32, Option<String>, String)> = sqlx::query_as(
+        "SELECT n, title, body_md FROM chapters WHERE story_id = $1 ORDER BY n",
+    )
+    .bind(story_id)
+    .fetch_all(&pool)
+    .await?;
+    if chapters.is_empty() {
+        bail!("story {story_id} has no chapters to audit");
+    }
+
+    // Concatenate the whole story in chapter order — the audit needs
+    // to see across chapter boundaries to catch cross-chapter reuse.
+    let mut full = String::new();
+    for (n, ct, body) in &chapters {
+        full.push_str(&format!(
+            "## Chapter {n} — {}\n\n",
+            ct.as_deref().unwrap_or("")
+        ));
+        full.push_str(body);
+        full.push_str("\n\n");
+    }
+    tracing::info!(
+        story = %title,
+        chapters = chapters.len(),
+        chars = full.len(),
+        "prose audit starting",
+    );
+
+    let run_id: Uuid = sqlx::query_scalar(
+        "INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id",
+    )
+    .bind(story_id)
+    .bind(PassKind::ProseAudit.as_str())
+    .fetch_one(&pool)
+    .await?;
+
+    let started = Instant::now();
+    let out_res = forge.prose_audit(&full).await;
+    let elapsed = started.elapsed();
+
+    let out: PassOutput = match out_res {
+        Ok(o) => o,
+        Err(e) => {
+            sqlx::query(
+                "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
+            )
+            .bind(format!("{e:#}"))
+            .bind(Utc::now())
+            .bind(run_id)
+            .execute(&pool)
+            .await?;
+            return Err(e);
+        }
+    };
+
+    let findings = parse_findings(&out);
+    for f in &findings {
+        sqlx::query(
+            "INSERT INTO audit_findings (story_id, run_id, severity, area, body)
+             VALUES ($1, $2, $3, $4, $5)",
+        )
+        .bind(story_id)
+        .bind(run_id)
+        .bind(&f.severity)
+        .bind(&f.area)
+        .bind(&f.body)
+        .execute(&pool)
+        .await?;
+    }
+    sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2")
+        .bind(Utc::now())
+        .bind(run_id)
+        .execute(&pool)
+        .await?;
+
+    let crit = findings.iter().filter(|f| f.severity == "crit").count();
+    let warn = findings.iter().filter(|f| f.severity == "warn").count();
+    let info = findings.iter().filter(|f| f.severity == "info").count();
+    println!(
+        "prose audit: \"{title}\" — {} finding(s): {crit} crit, {warn} warn, {info} info ({:.1}s)",
+        findings.len(),
+        elapsed.as_secs_f32(),
+    );
+    for f in &findings {
+        println!("\n[{} · {}]\n{}", f.severity.to_uppercase(), f.area, f.body);
+    }
+    Ok(())
+}
+
+fn parse_findings(out: &PassOutput) -> Vec<AuditFinding> {
+    if let Ok(typed) = out.result.as_json::<AuditResponse>() {
+        return typed.findings;
+    }
+    if let Some(s) = out.result.as_text() {
+        if let Ok(typed) = serde_json::from_str::<AuditResponse>(s) {
+            return typed.findings;
+        }
+    }
+    tracing::warn!("prose audit output did not parse as AuditResponse — no findings recorded");
+    Vec::new()
+}
+
+fn load_forge_config() -> anyhow::Result<ForgeConfig> {
+    let base_url = std::env::var("CLAWDFORGE_URL").context("CLAWDFORGE_URL not set")?;
+    let app_token = std::env::var("CLAWDFORGE_TOKEN").context("CLAWDFORGE_TOKEN not set")?;
+    let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
+    Ok(ForgeConfig {
+        base_url,
+        app_token,
+        model,
+    })
+}
--- a/skald/src/main.rs
+++ b/skald/src/main.rs
@ -4,6 +4,7 @@
 //!   skald serve              — boot the http server (v0.1 = /health + migrations)
 //!   skald import-markdown    — ingest a story markdown file into the DB

+mod audit;
 mod authors_seed;
 mod continue_story;
 mod import;
@ -174,6 +175,16 @@ enum Cmd {
        #[arg(long)]
        author: Option<String>,
    },
+    /// Whole-story prose-quality audit. Reads every chapter end to
+    /// end and flags repetition, template tics, self-restatement
+    /// and continuity drift. The QC gate before narration. Findings
+    /// land in audit_findings and print to stdout. Requires
+    /// CLAWDFORGE_URL + CLAWDFORGE_TOKEN.
+    Audit {
+        /// Story to audit.
+        #[arg(long)]
+        story: Uuid,
+    },
 }

 #[tokio::main]
@ -262,6 +273,7 @@ async fn run() -> anyhow::Result<()> {
        Cmd::Rewrite { chapter, author } => {
            rewrite::run(&cli.database_url, chapter, author.as_deref()).await
        }
+        Cmd::Audit { story } => audit::run(&cli.database_url, story).await,
    }
 }