forge: dedup pass — the fix half of the audit loop
Adds `skald dedup --story <id>`: reads a story's most recent prose-audit findings and walks every chapter, handing the author the chapter prose + the findings with instructions to rephrase ONLY the flagged repetitions (each recurrence made distinct) and fix flagged continuity errors — everything else stays verbatim. A surgical dedup, not a rewrite. Overwrites body_md, clears body_md_tts so the chapter is re-prepped before narration. High effort (prose-craft). Migration 0011 adds the 'dedup' run kind. Completes the QC loop: audit (find) -> dedup (fix) -> re-audit.
This commit is contained in:
parent
4de484cd35
commit
2820d173e8
4 changed files with 317 additions and 0 deletions
9
migrations/0011_dedup_pass.sql
Normal file
9
migrations/0011_dedup_pass.sql
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
-- The dedup pass: the fix half of the audit loop. Takes a chapter
|
||||
-- plus the story's prose-audit findings and rephrases only the
|
||||
-- flagged repetitions, leaving everything else verbatim.
|
||||
ALTER TABLE generation_runs DROP CONSTRAINT generation_runs_kind_check;
|
||||
ALTER TABLE generation_runs ADD CONSTRAINT generation_runs_kind_check
|
||||
CHECK (kind = ANY (ARRAY[
|
||||
'gen', 'cleanup', 'audit', 'summary', 'embed',
|
||||
'narrate_prep', 'rewrite', 'prose_audit', 'dedup'
|
||||
]));
|
||||
|
|
@ -85,6 +85,10 @@ pub enum PassKind {
|
|||
/// to end and flags repetition, template tics, self-restatement
|
||||
/// and continuity drift. The QC gate before narration.
|
||||
ProseAudit,
|
||||
/// Surgical dedup — takes a chapter plus the story's audit
|
||||
/// findings and rephrases only the flagged repetitions, leaving
|
||||
/// everything else verbatim. The fix half of the audit loop.
|
||||
Dedup,
|
||||
}
|
||||
|
||||
impl PassKind {
|
||||
|
|
@ -97,6 +101,7 @@ impl PassKind {
|
|||
Self::NarratePrep => "narrate_prep",
|
||||
Self::Rewrite => "rewrite",
|
||||
Self::ProseAudit => "prose_audit",
|
||||
Self::Dedup => "dedup",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -232,6 +237,43 @@ impl Forge {
|
|||
Ok(PassOutput { kind: PassKind::ProseAudit, result: r, duration_ms })
|
||||
}
|
||||
|
||||
/// Surgical dedup of one chapter — the fix half of the audit
|
||||
/// loop. Receives the chapter's prose plus the whole story's
|
||||
/// audit findings, and rephrases ONLY the flagged repetitions
|
||||
/// that occur in this chapter; everything the findings do not
|
||||
/// flag stays verbatim. Author REQUIRED — the fresh phrasing
|
||||
/// lands in the author's voice (SystemMode::Replace). High
|
||||
/// effort: it is prose-craft, same posture as rewrite.
|
||||
pub async fn dedup(
|
||||
&self,
|
||||
prose: &str,
|
||||
findings: &str,
|
||||
author: &AuthorWithRevision,
|
||||
) -> anyhow::Result<PassOutput> {
|
||||
let scaffold = author
|
||||
.revision
|
||||
.system_template
|
||||
.as_deref()
|
||||
.unwrap_or(DEFAULT_AUTHOR_SCAFFOLD);
|
||||
let system = scaffold
|
||||
.replace("{{display_name}}", &author.author.display_name)
|
||||
.replace("{{pass_directive}}", DEDUP_DIRECTIVE)
|
||||
.replace("{{soul}}", &author.revision.soul);
|
||||
let user_prompt = dedup_user_prompt(prose, findings);
|
||||
let body = RunRequest {
|
||||
prompt: user_prompt,
|
||||
model: Some(self.model.clone()),
|
||||
system: Some(system),
|
||||
system_mode: Some(SystemMode::Replace),
|
||||
effort: Some(Effort::High),
|
||||
timeout_secs: Some(1800),
|
||||
..Default::default()
|
||||
};
|
||||
let r = self.client.run(body).await?;
|
||||
let duration_ms = r.duration_ms;
|
||||
Ok(PassOutput { kind: PassKind::Dedup, result: r, duration_ms })
|
||||
}
|
||||
|
||||
/// Annotate prose with narration control tags. The model
|
||||
/// receives the full chapter prose and returns the SAME prose
|
||||
/// with `[pause:Xs]`, `[breath]`, `[scene]` markers inserted
|
||||
|
|
@ -470,6 +512,8 @@ const HOUSE_NARRATE_PREP_SYSTEM: &str = "You are a senior audiobook director ann
|
|||
/// voice tags, one narrator throughout.
|
||||
const HOUSE_NARRATE_PREP_SYSTEM_SINGLE: &str = "You are a senior audiobook director annotating prose for a SINGLE-narrator reading. You insert (a) beat markers — `[breath]`, `[pause:Xs]`, `[scene]` — where a skilled narrator would breathe or pause, and (b) occasional humanizing narrator stumbles using em-dash repetition or self-correction (sparingly — maybe 1-3 per chapter, on proper nouns or hard words). Do NOT add `[voice:...]` speaker tags — the whole chapter is one voice. Apart from those stumbles you do NOT change a word of the prose. Return the prose verbatim plus beat markers and (rare) stumbles inline. No preamble, no commentary.";
|
||||
|
||||
const DEDUP_DIRECTIVE: &str = "This is a DEDUP pass. The user prompt contains ONE chapter of a story you wrote, plus a list of audit findings — repeated phrases, motifs, similes, sentence templates and continuity errors found across the whole book. Your job: return this chapter with every flagged repetition that occurs IN IT rephrased fresh, and everything else byte-identical.\n\nHARD RULES:\n- For any motif, simile, phrase, image or structural tic the findings flag as recurring: if it appears in THIS chapter, render this chapter's occurrence in fresh, distinctive wording. Never reuse the flagged original phrasing. The other chapters' occurrences are being revised separately — do NOT try to coordinate with them; just make yours distinct from the flagged original.\n- Fix any continuity error the findings flag that touches this chapter (a wrong age, number, name, date) — use the correct value the findings identify.\n- Change NOTHING the findings do not flag. Every sentence not implicated by a finding stays EXACTLY as written, word for word. This is not a rewrite, not a polish, not an edit for taste — it is a surgical dedup. When in doubt, leave it.\n- Canon is absolute: names, dates, events, the order they happen, every fact — unchanged. The chapter stays the same length and shape.\n- Return ONLY the chapter prose. No heading unless the source had one. No preamble, no commentary, no list of what you changed.\n\n";
|
||||
|
||||
const REWRITE_DIRECTIVE: &str = "This is a REWRITE pass. The user prompt contains a chapter of prose written by another hand. Re-author it entirely in YOUR voice — every sentence reworked in your style: your sentence rhythm, your word choice, your paragraph shape, your way of landing a beat. This is not editing or polishing. It is re-authoring. The reader should not be able to tell another writer ever touched it.\n\nHARD CONSTRAINTS — canon is non-negotiable:\n- Every character name, every date, every place name stays exactly as written.\n- Every event, and the ORDER events happen in, stays exactly as written.\n- Every technical or historical fact stays exactly as written.\n- Do not add new scenes, characters, or events. Do not cut any scene or beat. Same story, same shape — your telling.\n\nReturn ONLY the rewritten chapter prose. Begin with the chapter heading line (`## Chapter N — title`) exactly as in the source. No preamble, no commentary about the rewrite.";
|
||||
|
||||
// ─── User-prompt builders ───────────────────────────────────────
|
||||
|
|
@ -518,6 +562,25 @@ pub struct CharacterSpeaker {
|
|||
pub hint: Option<String>,
|
||||
}
|
||||
|
||||
fn dedup_user_prompt(prose: &str, findings: &str) -> String {
|
||||
let mut out = String::with_capacity(prose.len() + findings.len() + 512);
|
||||
out.push_str("# Audit findings for this story\n\n");
|
||||
out.push_str(
|
||||
"These repetitions and errors were found across the whole book. \
|
||||
Fix only the ones that occur in the chapter below.\n\n",
|
||||
);
|
||||
out.push_str(findings);
|
||||
out.push_str("\n\n# Chapter to dedup\n\n");
|
||||
out.push_str(prose);
|
||||
out.push_str(
|
||||
"\n\n# Task\n\nReturn the chapter above with every flagged repetition \
|
||||
that appears in it rephrased fresh, and any flagged continuity error \
|
||||
touching it corrected. Leave every unflagged sentence verbatim. \
|
||||
Return only the chapter prose.\n",
|
||||
);
|
||||
out
|
||||
}
|
||||
|
||||
fn rewrite_user_prompt(prose: &str) -> String {
|
||||
let mut out = String::with_capacity(prose.len() + 256);
|
||||
out.push_str("# Chapter to re-author\n\n");
|
||||
|
|
|
|||
227
skald/src/dedup.rs
Normal file
227
skald/src/dedup.rs
Normal file
|
|
@ -0,0 +1,227 @@
|
|||
//! `skald dedup` — the fix half of the audit loop. Reads a story's
|
||||
//! most recent prose-audit findings and walks the chapters, handing
|
||||
//! each chapter + the findings to the author with instructions to
|
||||
//! rephrase ONLY the flagged repetitions and leave everything else
|
||||
//! verbatim. Overwrites body_md and clears body_md_tts so the
|
||||
//! chapter gets re-prepped before narration.
|
||||
//!
|
||||
//! Run `skald audit` first — dedup needs findings to act on.
|
||||
|
||||
use std::time::Instant;
|
||||
|
||||
use anyhow::{Context, bail};
|
||||
use chrono::Utc;
|
||||
use skald_core::authors::{self, AuthorWithRevision};
|
||||
use skald_core::config::ForgeConfig;
|
||||
use skald_core::db;
|
||||
use skald_core::forge::{Forge, PassKind, PassOutput};
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub async fn run(
|
||||
database_url: &str,
|
||||
story_id: Uuid,
|
||||
author_slug: Option<&str>,
|
||||
) -> anyhow::Result<()> {
|
||||
let cfg = load_forge_config()?;
|
||||
tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
|
||||
|
||||
let pool = db::connect_and_migrate(database_url).await?;
|
||||
let forge = Forge::new(&cfg)?;
|
||||
|
||||
let story: Option<(String, Option<Uuid>)> =
|
||||
sqlx::query_as("SELECT title, author_id FROM stories WHERE id = $1")
|
||||
.bind(story_id)
|
||||
.fetch_optional(&pool)
|
||||
.await?;
|
||||
let (title, story_author_id) =
|
||||
story.with_context(|| format!("story {story_id} not found"))?;
|
||||
|
||||
let author = resolve_author(&pool, story_author_id, author_slug)
|
||||
.await?
|
||||
.ok_or_else(|| {
|
||||
anyhow::anyhow!(
|
||||
"dedup needs an author for the rephrasing — pass --author <slug> \
|
||||
or bind one to the story"
|
||||
)
|
||||
})?;
|
||||
|
||||
// Findings from the most recent succeeded prose-audit run.
|
||||
let findings = load_latest_findings(&pool, story_id).await?;
|
||||
if findings.is_empty() {
|
||||
bail!(
|
||||
"no prose-audit findings for story {story_id} — run `skald audit \
|
||||
--story {story_id}` first"
|
||||
);
|
||||
}
|
||||
let findings_block = render_findings(&findings);
|
||||
tracing::info!(
|
||||
story = %title,
|
||||
author = %author.author.slug,
|
||||
finding_count = findings.len(),
|
||||
"dedup starting",
|
||||
);
|
||||
|
||||
let chapters: Vec<(Uuid, i32)> = sqlx::query_as(
|
||||
"SELECT id, n FROM chapters WHERE story_id = $1 ORDER BY n",
|
||||
)
|
||||
.bind(story_id)
|
||||
.fetch_all(&pool)
|
||||
.await?;
|
||||
if chapters.is_empty() {
|
||||
bail!("story {story_id} has no chapters");
|
||||
}
|
||||
|
||||
for (chapter_id, n) in &chapters {
|
||||
let body_md: String =
|
||||
sqlx::query_scalar("SELECT body_md FROM chapters WHERE id = $1")
|
||||
.bind(chapter_id)
|
||||
.fetch_one(&pool)
|
||||
.await?;
|
||||
|
||||
let run_id: Uuid = sqlx::query_scalar(
|
||||
"INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id",
|
||||
)
|
||||
.bind(story_id)
|
||||
.bind(PassKind::Dedup.as_str())
|
||||
.fetch_one(&pool)
|
||||
.await?;
|
||||
|
||||
let started = Instant::now();
|
||||
let out_res = forge.dedup(&body_md, &findings_block, &author).await;
|
||||
let elapsed = started.elapsed();
|
||||
|
||||
let out: PassOutput = match out_res {
|
||||
Ok(o) => o,
|
||||
Err(e) => {
|
||||
sqlx::query(
|
||||
"UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
|
||||
)
|
||||
.bind(format!("{e:#}"))
|
||||
.bind(Utc::now())
|
||||
.bind(run_id)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
return Err(e).with_context(|| format!("dedup failed on chapter {n}"));
|
||||
}
|
||||
};
|
||||
|
||||
let deduped = pass_text(&out)?;
|
||||
// Overwrite body_md and clear body_md_tts — the chapter must be
|
||||
// re-prepped before it is narrated again. body_md_original is
|
||||
// left untouched (it belongs to the rewrite pass).
|
||||
sqlx::query("UPDATE chapters SET body_md = $1, body_md_tts = NULL WHERE id = $2")
|
||||
.bind(&deduped)
|
||||
.bind(chapter_id)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2")
|
||||
.bind(Utc::now())
|
||||
.bind(run_id)
|
||||
.execute(&pool)
|
||||
.await?;
|
||||
|
||||
let before = body_md.len();
|
||||
let after = deduped.len();
|
||||
println!(
|
||||
"deduped chapter {n} ({before}c -> {after}c) in {:.1}s",
|
||||
elapsed.as_secs_f32(),
|
||||
);
|
||||
}
|
||||
|
||||
println!(
|
||||
"dedup complete: \"{title}\" — {} chapter(s), {} finding(s) applied",
|
||||
chapters.len(),
|
||||
findings.len(),
|
||||
);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct Finding {
|
||||
severity: String,
|
||||
area: String,
|
||||
body: String,
|
||||
}
|
||||
|
||||
async fn load_latest_findings(pool: &PgPool, story_id: Uuid) -> anyhow::Result<Vec<Finding>> {
|
||||
let rows: Vec<(String, String, String)> = sqlx::query_as(
|
||||
"SELECT severity, area, body FROM audit_findings
|
||||
WHERE story_id = $1
|
||||
AND run_id = (
|
||||
SELECT id FROM generation_runs
|
||||
WHERE story_id = $1 AND kind = 'prose_audit' AND status = 'succeeded'
|
||||
ORDER BY started_at DESC LIMIT 1
|
||||
)
|
||||
ORDER BY
|
||||
CASE severity WHEN 'crit' THEN 0 WHEN 'warn' THEN 1 ELSE 2 END,
|
||||
area",
|
||||
)
|
||||
.bind(story_id)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
Ok(rows
|
||||
.into_iter()
|
||||
.map(|(severity, area, body)| Finding { severity, area, body })
|
||||
.collect())
|
||||
}
|
||||
|
||||
fn render_findings(findings: &[Finding]) -> String {
|
||||
let mut out = String::new();
|
||||
for f in findings {
|
||||
out.push_str(&format!(
|
||||
"[{} · {}]\n{}\n\n",
|
||||
f.severity.to_uppercase(),
|
||||
f.area,
|
||||
f.body,
|
||||
));
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
async fn resolve_author(
|
||||
pool: &PgPool,
|
||||
story_author_id: Option<Uuid>,
|
||||
flag_slug: Option<&str>,
|
||||
) -> anyhow::Result<Option<AuthorWithRevision>> {
|
||||
if let Some(slug) = flag_slug {
|
||||
return authors::get_with_current_revision(pool, slug)
|
||||
.await?
|
||||
.map(Some)
|
||||
.with_context(|| format!("author '{slug}' not found"));
|
||||
}
|
||||
if let Some(aid) = story_author_id {
|
||||
let row: Option<(String,)> = sqlx::query_as("SELECT slug FROM authors WHERE id = $1")
|
||||
.bind(aid)
|
||||
.fetch_optional(pool)
|
||||
.await?;
|
||||
if let Some((slug,)) = row {
|
||||
return Ok(authors::get_with_current_revision(pool, &slug).await?);
|
||||
}
|
||||
}
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
fn pass_text(out: &PassOutput) -> anyhow::Result<String> {
|
||||
let text = out
|
||||
.result
|
||||
.as_text()
|
||||
.map(|s| s.to_string())
|
||||
.or_else(|| out.result.result.as_str().map(|s| s.to_string()))
|
||||
.unwrap_or_else(|| out.result.result.to_string());
|
||||
if text.trim().is_empty() {
|
||||
bail!("dedup pass returned empty");
|
||||
}
|
||||
Ok(text)
|
||||
}
|
||||
|
||||
fn load_forge_config() -> anyhow::Result<ForgeConfig> {
|
||||
let base_url = std::env::var("CLAWDFORGE_URL").context("CLAWDFORGE_URL not set")?;
|
||||
let app_token = std::env::var("CLAWDFORGE_TOKEN").context("CLAWDFORGE_TOKEN not set")?;
|
||||
let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
|
||||
Ok(ForgeConfig {
|
||||
base_url,
|
||||
app_token,
|
||||
model,
|
||||
})
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
mod audit;
|
||||
mod authors_seed;
|
||||
mod continue_story;
|
||||
mod dedup;
|
||||
mod import;
|
||||
mod narrate;
|
||||
mod narrate_prep;
|
||||
|
|
@ -185,6 +186,20 @@ enum Cmd {
|
|||
#[arg(long)]
|
||||
story: Uuid,
|
||||
},
|
||||
/// Dedup a story against its most recent prose-audit findings.
|
||||
/// Walks every chapter, rephrasing only the flagged repetitions
|
||||
/// and fixing flagged continuity errors — everything else stays
|
||||
/// verbatim. Overwrites body_md and clears body_md_tts. Run
|
||||
/// `skald audit` first.
|
||||
Dedup {
|
||||
/// Story to dedup.
|
||||
#[arg(long)]
|
||||
story: Uuid,
|
||||
/// Author slug for the rephrasing. Falls back to the story's
|
||||
/// bound author if omitted.
|
||||
#[arg(long)]
|
||||
author: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -274,6 +289,9 @@ async fn run() -> anyhow::Result<()> {
|
|||
rewrite::run(&cli.database_url, chapter, author.as_deref()).await
|
||||
}
|
||||
Cmd::Audit { story } => audit::run(&cli.database_url, story).await,
|
||||
Cmd::Dedup { story, author } => {
|
||||
dedup::run(&cli.database_url, story, author.as_deref()).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue