From d2442f0a87c6854bf912a070e005413a5bd83cab Mon Sep 17 00:00:00 2001 From: Kayos Date: Thu, 14 May 2026 21:35:20 -0700 Subject: [PATCH] =?UTF-8?q?forge:=20rewrite=20pass=20=E2=80=94=20re-author?= =?UTF-8?q?=20prose=20in=20an=20author's=20voice?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New Forge::rewrite + PassKind::Rewrite. An author re-authors existing chapter prose entirely in their voice — sentence rhythm, word choice, paragraph shape all become theirs — while canon (names, dates, places, events, order, technical facts) is preserved exactly. Not editing; re-authoring. SystemMode::Replace, max effort. skald rewrite --chapter [--author slug] overwrites body_md with the rewritten version. The pre-rewrite prose is stashed in the new chapters.body_md_original column on first rewrite (migration 0008, idempotent) so the original is never lost. body_md_tts is cleared — it was annotated against the old prose and must be regenerated by a fresh prepare-narration. prepare-narration gains --single-voice: skips the character speaker roster so no [voice:X] dialogue tags are inserted, only beat markers. Right for one-voice narration. Migration 0008 also extends generation_runs.kind to allow 'rewrite'. --- migrations/0008_chapter_rewrite.sql | 15 ++ skald-core/src/forge.rs | 59 +++++++ skald/src/main.rs | 31 +++- skald/src/narrate_prep.rs | 19 ++- skald/src/rewrite.rs | 252 ++++++++++++++++++++++++++++ 5 files changed, 371 insertions(+), 5 deletions(-) create mode 100644 migrations/0008_chapter_rewrite.sql create mode 100644 skald/src/rewrite.rs diff --git a/migrations/0008_chapter_rewrite.sql b/migrations/0008_chapter_rewrite.sql new file mode 100644 index 0000000..c6ce9b5 --- /dev/null +++ b/migrations/0008_chapter_rewrite.sql @@ -0,0 +1,15 @@ +-- The rewrite pass: an author re-authors existing chapter prose in +-- their own voice (canon preserved, prose reworked). body_md gets +-- overwritten with the rewritten version; body_md_original keeps +-- the pre-rewrite prose so the original is never lost. Populated +-- only on the FIRST rewrite of a chapter (if NULL) — subsequent +-- rewrites leave the original alone. +ALTER TABLE chapters + ADD COLUMN IF NOT EXISTS body_md_original text; + +-- Allow 'rewrite' as a generation_runs.kind. +ALTER TABLE generation_runs + DROP CONSTRAINT generation_runs_kind_check; +ALTER TABLE generation_runs + ADD CONSTRAINT generation_runs_kind_check + CHECK (kind = ANY (ARRAY['gen', 'cleanup', 'audit', 'summary', 'embed', 'narrate_prep', 'rewrite'])); diff --git a/skald-core/src/forge.rs b/skald-core/src/forge.rs index 14f0da6..75c22b1 100644 --- a/skald-core/src/forge.rs +++ b/skald-core/src/forge.rs @@ -74,6 +74,10 @@ pub enum PassKind { /// prose; output should be byte-identical except for the /// tag insertions. NarratePrep, + /// Re-author existing chapter prose in an author's voice. Canon + /// (names, dates, events, places, facts) is preserved exactly; + /// the prose itself is rewritten. Not editing — re-authoring. + Rewrite, } impl PassKind { @@ -84,6 +88,7 @@ impl PassKind { Self::Audit => "audit", Self::Summary => "summary", Self::NarratePrep => "narrate_prep", + Self::Rewrite => "rewrite", } } } @@ -237,6 +242,45 @@ impl Forge { Ok(PassOutput { kind: PassKind::NarratePrep, result: r, duration_ms }) } + /// Re-author existing chapter prose in the author's voice. The + /// model receives prose written by another hand and rewrites it + /// entirely in its own style — sentence rhythm, word choice, + /// paragraph shape all become the author's. Canon is preserved + /// exactly: names, dates, events, places, technical facts, and + /// the sequence of what happens do not change. + /// + /// Author REQUIRED — a rewrite without an author has no target + /// voice. SystemMode::Replace; the model BECOMES the author. + /// Max effort: re-authoring is the heaviest prose-craft task. + pub async fn rewrite( + &self, + prose: &str, + author: &AuthorWithRevision, + ) -> anyhow::Result { + let scaffold = author + .revision + .system_template + .as_deref() + .unwrap_or(DEFAULT_AUTHOR_SCAFFOLD); + let system = scaffold + .replace("{{display_name}}", &author.author.display_name) + .replace("{{pass_directive}}", REWRITE_DIRECTIVE) + .replace("{{soul}}", &author.revision.soul); + let user_prompt = rewrite_user_prompt(prose); + let body = RunRequest { + prompt: user_prompt, + model: Some(self.model.clone()), + system: Some(system), + system_mode: Some(SystemMode::Replace), + effort: Some(Effort::Max), + timeout_secs: Some(1800), + ..Default::default() + }; + let r = self.client.run(body).await?; + let duration_ms = r.duration_ms; + Ok(PassOutput { kind: PassKind::Rewrite, result: r, duration_ms }) + } + /// Summarize one chapter to ~250 words. The summary feeds into /// the continuation context for older chapters so the token /// budget stays sane on long series (book 12 doesn't carry book 1 @@ -349,6 +393,8 @@ const NARRATE_PREP_DIRECTIVE: &str = "This is a NARRATION-ANNOTATION pass. You r const HOUSE_NARRATE_PREP_SYSTEM: &str = "You are a senior audiobook director annotating prose for narration. You insert (a) beat markers — `[breath]`, `[pause:Xs]`, `[scene]` — where a skilled narrator would breathe or pause, (b) speaker voice tags `[voice:]\"...\"[/voice]` wrapping dialogue based on who is speaking (roster supplied in user prompt; leave unattributed dialogue unwrapped), and (c) occasional humanizing narrator stumbles using em-dash repetition or self-correction (sparingly — maybe 1-3 per chapter, on proper nouns or hard words). Apart from those stumbles you do NOT change a word of the prose. Return the prose verbatim plus beat markers, voice tags, and (rare) stumbles inline. No preamble, no commentary."; +const REWRITE_DIRECTIVE: &str = "This is a REWRITE pass. The user prompt contains a chapter of prose written by another hand. Re-author it entirely in YOUR voice — every sentence reworked in your style: your sentence rhythm, your word choice, your paragraph shape, your way of landing a beat. This is not editing or polishing. It is re-authoring. The reader should not be able to tell another writer ever touched it.\n\nHARD CONSTRAINTS — canon is non-negotiable:\n- Every character name, every date, every place name stays exactly as written.\n- Every event, and the ORDER events happen in, stays exactly as written.\n- Every technical or historical fact stays exactly as written.\n- Do not add new scenes, characters, or events. Do not cut any scene or beat. Same story, same shape — your telling.\n\nReturn ONLY the rewritten chapter prose. Begin with the chapter heading line (`## Chapter N — title`) exactly as in the source. No preamble, no commentary about the rewrite."; + // ─── User-prompt builders ─────────────────────────────────────── fn gen_user_prompt( @@ -395,6 +441,19 @@ pub struct CharacterSpeaker { pub hint: Option, } +fn rewrite_user_prompt(prose: &str) -> String { + let mut out = String::with_capacity(prose.len() + 256); + out.push_str("# Chapter to re-author\n\n"); + out.push_str(prose); + out.push_str( + "\n\n# Task\n\nRe-author the chapter above entirely in your voice. \ + Preserve all canon — names, dates, places, events, the order they \ + happen, every technical fact. Change only the prose. Return only \ + the rewritten chapter, starting with its `## Chapter N` heading.\n", + ); + out +} + fn narrate_prep_user_prompt(prose: &str, characters: &[CharacterSpeaker]) -> String { let mut out = String::with_capacity(prose.len() + 512); diff --git a/skald/src/main.rs b/skald/src/main.rs index b6b6d7a..503a5f0 100644 --- a/skald/src/main.rs +++ b/skald/src/main.rs @@ -9,6 +9,7 @@ mod continue_story; mod import; mod narrate; mod narrate_prep; +mod rewrite; mod serve; mod show_context; mod summarize; @@ -155,6 +156,23 @@ enum Cmd { /// errors out to avoid clobbering a hand-tuned version. #[arg(long)] overwrite: bool, + /// Single-voice mode: skip the character speaker roster so + /// no [voice:X] dialogue tags are inserted. Use when the + /// whole chapter narrates in one voice. + #[arg(long)] + single_voice: bool, + }, + /// Re-author one chapter's prose in an author's voice. Canon + /// preserved, prose reworked. Overwrites body_md (stashing the + /// original in body_md_original) and clears body_md_tts. + Rewrite { + /// Chapter UUID to re-author. + #[arg(long)] + chapter: Uuid, + /// Author slug to rewrite as. Falls back to the story's + /// bound author if omitted. + #[arg(long)] + author: Option, }, } @@ -230,8 +248,19 @@ async fn run() -> anyhow::Result<()> { chapter, author, overwrite, + single_voice, } => { - narrate_prep::run(&cli.database_url, chapter, author.as_deref(), overwrite).await + narrate_prep::run( + &cli.database_url, + chapter, + author.as_deref(), + overwrite, + single_voice, + ) + .await + } + Cmd::Rewrite { chapter, author } => { + rewrite::run(&cli.database_url, chapter, author.as_deref()).await } } } diff --git a/skald/src/narrate_prep.rs b/skald/src/narrate_prep.rs index bc92228..8f4b806 100644 --- a/skald/src/narrate_prep.rs +++ b/skald/src/narrate_prep.rs @@ -24,6 +24,7 @@ pub async fn run( chapter_id: Uuid, author_slug: Option<&str>, overwrite: bool, + single_voice: bool, ) -> anyhow::Result<()> { let cfg = load_forge_config()?; tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured"); @@ -60,10 +61,20 @@ pub async fn run( .fetch_one(&pool) .await?; - let characters = load_speakers(&pool, chapter.story_id).await?; - if !characters.is_empty() { - tracing::info!(speaker_count = characters.len(), "speaker roster loaded"); - } + // Single-voice mode skips the speaker roster entirely — the + // narrate_prep pass then inserts only [breath]/[pause]/[scene] + // beats, no [voice:X] dialogue tags. Right when the whole + // chapter narrates in one voice. + let characters = if single_voice { + tracing::info!("single-voice mode — skipping speaker roster"); + Vec::new() + } else { + let c = load_speakers(&pool, chapter.story_id).await?; + if !c.is_empty() { + tracing::info!(speaker_count = c.len(), "speaker roster loaded"); + } + c + }; let started = Instant::now(); let out_res = forge diff --git a/skald/src/rewrite.rs b/skald/src/rewrite.rs new file mode 100644 index 0000000..8692826 --- /dev/null +++ b/skald/src/rewrite.rs @@ -0,0 +1,252 @@ +//! `skald rewrite` — re-author one chapter's prose in an author's +//! voice. Canon preserved, prose reworked. Overwrites chapters.body_md +//! with the rewritten version; the pre-rewrite prose is stashed in +//! chapters.body_md_original on the first rewrite (if NULL) so the +//! original is never lost. +//! +//! Author resolution: --author flag wins, else the chapter's +//! story.author_id. A rewrite with no author errors — there's no +//! target voice. + +use std::time::Instant; + +use anyhow::{Context, bail}; +use chrono::Utc; +use skald_core::authors::{self, AuthorWithRevision}; +use skald_core::config::ForgeConfig; +use skald_core::db; +use skald_core::forge::{Forge, PassKind, PassOutput}; +use sqlx::PgPool; +use uuid::Uuid; + +pub async fn run( + database_url: &str, + chapter_id: Uuid, + author_slug: Option<&str>, +) -> anyhow::Result<()> { + let cfg = load_forge_config()?; + tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured"); + + let pool = db::connect_and_migrate(database_url).await?; + let forge = Forge::new(&cfg)?; + + let chapter = load_chapter(&pool, chapter_id).await?; + let author = resolve_author(&pool, &chapter, author_slug) + .await? + .ok_or_else(|| { + anyhow::anyhow!( + "rewrite needs an author — pass --author or bind one to the story" + ) + })?; + tracing::info!( + slug = %author.author.slug, + revision_n = author.revision.n, + chapter_n = chapter.n, + word_count_in = word_count(&chapter.body_md), + "re-authoring chapter", + ); + + let run_id: Uuid = sqlx::query_scalar( + "INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id", + ) + .bind(chapter.story_id) + .bind(PassKind::Rewrite.as_str()) + .fetch_one(&pool) + .await?; + + let started = Instant::now(); + let out_res = forge.rewrite(&chapter.body_md, &author).await; + let elapsed = started.elapsed(); + + let out: PassOutput = match out_res { + Ok(o) => o, + Err(e) => { + sqlx::query( + "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3", + ) + .bind(format!("{e:#}")) + .bind(Utc::now()) + .bind(run_id) + .execute(&pool) + .await?; + return Err(e); + } + }; + + let rewritten = pass_text(&out)?; + let (_n, title, body) = parse_chapter(&rewritten); + + // Stash the original on first rewrite, then overwrite body_md. + // body_md_tts is cleared — it was annotated against the OLD + // prose and must be regenerated by a fresh prepare-narration. + sqlx::query( + "UPDATE chapters + SET body_md_original = COALESCE(body_md_original, body_md), + body_md = $1, + title = COALESCE($2, title), + body_md_tts = NULL, + word_count = $3, + generated_at = now() + WHERE id = $4", + ) + .bind(&body) + .bind(title.as_deref()) + .bind(word_count(&body)) + .bind(chapter_id) + .execute(&pool) + .await?; + + // Replace passages with the rewritten paragraphs. + sqlx::query("DELETE FROM passages WHERE chapter_id = $1") + .bind(chapter_id) + .execute(&pool) + .await?; + for (i, para) in body.split("\n\n").enumerate() { + let p = para.trim(); + if p.is_empty() || p == "---" { + continue; + } + sqlx::query("INSERT INTO passages (chapter_id, paragraph_n, body) VALUES ($1, $2, $3)") + .bind(chapter_id) + .bind(i as i32 + 1) + .bind(p) + .execute(&pool) + .await?; + } + sqlx::query( + "UPDATE stories SET word_count_actual = (SELECT COALESCE(SUM(word_count), 0) FROM chapters WHERE story_id = $1) WHERE id = $1", + ) + .bind(chapter.story_id) + .execute(&pool) + .await?; + + sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2") + .bind(Utc::now()) + .bind(run_id) + .execute(&pool) + .await?; + + println!( + "rewrote chapter {} of story {} as {} ({} → {} words) in {:.1}s", + chapter.n, + chapter.story_id, + author.author.slug, + word_count(&chapter.body_md), + word_count(&body), + elapsed.as_secs_f32(), + ); + Ok(()) +} + +#[derive(Debug, Clone)] +struct ChapterRow { + story_id: Uuid, + n: i32, + body_md: String, + story_author_id: Option, +} + +async fn load_chapter(pool: &PgPool, id: Uuid) -> anyhow::Result { + let row: Option<(Uuid, i32, String, Option)> = sqlx::query_as( + "SELECT c.story_id, c.n, c.body_md, s.author_id + FROM chapters c JOIN stories s ON s.id = c.story_id + WHERE c.id = $1", + ) + .bind(id) + .fetch_optional(pool) + .await?; + let (story_id, n, body_md, story_author_id) = + row.with_context(|| format!("chapter {id} not found"))?; + Ok(ChapterRow { + story_id, + n, + body_md, + story_author_id, + }) +} + +async fn resolve_author( + pool: &PgPool, + chapter: &ChapterRow, + flag_slug: Option<&str>, +) -> anyhow::Result> { + if let Some(slug) = flag_slug { + return authors::get_with_current_revision(pool, slug) + .await? + .map(Some) + .with_context(|| format!("author '{slug}' not found")); + } + if let Some(aid) = chapter.story_author_id { + let row: Option<(String,)> = sqlx::query_as("SELECT slug FROM authors WHERE id = $1") + .bind(aid) + .fetch_optional(pool) + .await?; + if let Some((slug,)) = row { + return Ok(authors::get_with_current_revision(pool, &slug).await?); + } + } + Ok(None) +} + +fn pass_text(out: &PassOutput) -> anyhow::Result { + let text = out + .result + .as_text() + .map(|s| s.to_string()) + .or_else(|| out.result.result.as_str().map(|s| s.to_string())) + .unwrap_or_else(|| out.result.result.to_string()); + if text.trim().is_empty() { + bail!("rewrite pass returned empty"); + } + Ok(text) +} + +/// Parse (n, title, body) out of the rewritten chapter. Tolerant of +/// a missing heading — if the first line isn't a heading we keep the +/// whole text as body and return n=0 (caller keeps the existing n). +fn parse_chapter(text: &str) -> (i32, Option, String) { + let trimmed = text.trim_start(); + let first = trimmed.lines().next().unwrap_or("").trim(); + if let Some(heading) = first.strip_prefix('#') { + let heading = heading.trim_start_matches('#').trim(); + let n = heading + .to_lowercase() + .find("chapter") + .and_then(|idx| { + heading[idx + 7..] + .trim_start() + .split([' ', '—', '-', ':', ',']) + .next() + .and_then(|w| w.parse::().ok()) + }) + .unwrap_or(0); + let title = heading + .split_once(" — ") + .or_else(|| heading.split_once(" - ")) + .map(|(_, t)| t.trim().to_string()) + .filter(|t| !t.is_empty()); + let body = trimmed + .lines() + .skip(1) + .collect::>() + .join("\n") + .trim_start() + .to_string(); + let body = if body.is_empty() { text.trim().to_string() } else { body }; + return (n, title, body); + } + (0, None, text.trim().to_string()) +} + +fn word_count(s: &str) -> i32 { + s.split_whitespace().count() as i32 +} + +fn load_forge_config() -> anyhow::Result { + let base_url = std::env::var("CLAWDFORGE_URL") + .context("CLAWDFORGE_URL not set")?; + let app_token = std::env::var("CLAWDFORGE_TOKEN") + .context("CLAWDFORGE_TOKEN not set")?; + let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into()); + Ok(ForgeConfig { base_url, app_token, model }) +}