forge: always --effort max + multi-chapter batch (cap 20)

forge.rs threads Effort::Max on gen + cleanup. Audit + summarize stay default — they're structured-output / tool-shaped tasks where extended thinking doesn't help. Bumps subprocess timeout from 600s to 1800s so max-effort prose-craft has the wall clock it needs. continue_story::run takes a chapter_count param; loops gen+cleanup per chapter with each iteration's just-written prose appended to context. Audit fires once at end against the combined batch vs parent canon. Cap is 20 (~5h wall clock, ~$600 at max effort — beyond that is operationally absurd). CLI: 'skald continue --chapters N'. Web: numeric field on both new- story and continue forms, 1..=20, defaults to 1. Vendored clawdforge SDK refreshed for the Effort enum.
2026-05-13 14:26:53 -07:00 · 2026-05-13 14:26:53 -07:00 · 3a749b7643
commit 3a749b7643
parent 20e262c85d
6 changed files with 168 additions and 54 deletions
--- a/skald-core/src/forge.rs
+++ b/skald-core/src/forge.rs
@ -30,7 +30,7 @@

 use std::time::Duration;

-use clawdforge::{Client, ClientBuilder, RunRequest, RunResult, SystemMode};
+use clawdforge::{Client, ClientBuilder, Effort, RunRequest, RunResult, SystemMode};
 use serde::{Deserialize, Serialize};

 use crate::authors::AuthorWithRevision;
@ -87,11 +87,10 @@ impl Forge {
        let client = ClientBuilder::default()
            .base_url(&cfg.base_url)
            .token(&cfg.app_token)
-            // Generation passes are slow — 600s is the clawdforge
-            // server-side max anyway, and gen passes routinely hit
-            // 5+ minutes on opus max-effort. Default 120s would
-            // strand them.
-            .timeout(Duration::from_secs(600))
+            // Generation passes at --effort max can run 10–20 min wall
+            // clock. clawdforge's server-side cap is 1800s — match it.
+            // Default 120s would strand any prose-craft pass.
+            .timeout(Duration::from_secs(1800))
            .user_agent(concat!("skald/", env!("CARGO_PKG_VERSION")))
            .build()?;
        Ok(Self {
@ -125,7 +124,8 @@ impl Forge {
            model: Some(self.model.clone()),
            system: Some(system),
            system_mode: Some(mode),
-            timeout_secs: Some(600),
+            effort: Some(Effort::Max),
+            timeout_secs: Some(1800),
            ..Default::default()
        };
        let r = self.client.run(body).await?;
@ -148,7 +148,8 @@ impl Forge {
            model: Some(self.model.clone()),
            system: Some(system),
            system_mode: Some(mode),
-            timeout_secs: Some(600),
+            effort: Some(Effort::Max),
+            timeout_secs: Some(1800),
            ..Default::default()
        };
        let r = self.client.run(body).await?;
--- a/skald/src/continue_story.rs
+++ b/skald/src/continue_story.rs
@ -30,6 +30,11 @@ use skald_core::forge::{AuditResponse, Forge, PassKind, PassOutput};
 use sqlx::PgPool;
 use uuid::Uuid;

+/// Hard cap on chapters-per-run. 20 = ~5h wall-clock at --effort max,
+/// ~$600 in API cost. Beyond that is operationally absurd — split
+/// into separate runs.
+pub const MAX_CHAPTERS_PER_RUN: usize = 20;
+
 pub async fn run(
    database_url: &str,
    story_id: Uuid,
@ -38,9 +43,11 @@ pub async fn run(
    target_words: Option<i32>,
    recent_n: usize,
    skip_audit: bool,
+    chapter_count: usize,
 ) -> anyhow::Result<()> {
+    let chapter_count = chapter_count.clamp(1, MAX_CHAPTERS_PER_RUN);
    let cfg = load_forge_config()?;
-    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
+    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, chapter_count, "forge configured");

    let pool = db::connect_and_migrate(database_url).await?;
    let forge = Forge::new(&cfg)?;
@ -70,7 +77,7 @@ pub async fn run(
    }

    let parent_id = story.parent_story_id;
-    let context_md = match parent_id {
+    let parent_context_md = match parent_id {
        Some(pid) => {
            let ctx = ContinuationContext::assemble(&pool, pid, recent_n).await?;
            tracing::info!(
@ -93,24 +100,49 @@ pub async fn run(
        }
    };

-    // ─── gen pass ────────────────────────────────────────────────
+    // Chapters this batch has written so far; appended to context
+    // for each subsequent iteration so the LLM sees what it just
+    // wrote. Each entry: (n, title, body).
+    let mut written_this_batch: Vec<(i32, Option<String>, String)> = Vec::new();
    let target = target_words.or(story.word_count_target);
+
+    for batch_i in 0..chapter_count {
+        // Build context = parent_context + chapters already written this batch.
+        let context_md = if written_this_batch.is_empty() {
+            parent_context_md.clone()
+        } else {
+            let mut s = parent_context_md.clone();
+            s.push_str("\n\n## Already written in this run\n\n");
+            for (n, title, body) in &written_this_batch {
+                let t = title.as_deref().unwrap_or("");
+                s.push_str(&format!("### Chapter {n} {t}\n\n{body}\n\n"));
+            }
+            s
+        };
+
+        // User direction applies to the FIRST chapter only. Continuations
+        // follow naturally from the just-written prose.
+        let dir = if batch_i == 0 { direction } else { None };
+
+        // ─── gen pass ──────────────────────────────────────────
        set_status(&pool, story_id, "generating").await?;
        let gen_out = run_pass(&pool, story_id, PassKind::Gen, async {
-        forge.generate(&context_md, direction, target, author.as_ref()).await
+            forge.generate(&context_md, dir, target, author.as_ref()).await
        })
        .await?;
        let gen_text = pass_text(&gen_out, "gen")?;
        let (chapter_n, chapter_title, chapter_body) = parse_chapter(&gen_text)?;
        let chapter_id = insert_chapter(&pool, story_id, chapter_n, chapter_title.as_deref(), &chapter_body).await?;
        tracing::info!(
+            batch_i = batch_i + 1,
+            of = chapter_count,
            chapter_n,
            title = %chapter_title.as_deref().unwrap_or(""),
            body_chars = chapter_body.len(),
            "gen pass stored",
        );

-    // ─── cleanup pass ────────────────────────────────────────────
+        // ─── cleanup pass ──────────────────────────────────────
        set_status(&pool, story_id, "cleaning").await?;
        let cleanup_out = run_pass(&pool, story_id, PassKind::Cleanup, async {
            forge.cleanup(&chapter_body, &context_md, author.as_ref()).await
@ -127,20 +159,35 @@ pub async fn run(
        }
        replace_chapter(&pool, chapter_id, ct2.as_deref(), &cb2).await?;
        tracing::info!(
+            batch_i = batch_i + 1,
+            of = chapter_count,
            body_chars = cb2.len(),
-        "cleanup pass stored (replaced chapter body)",
+            "cleanup pass stored",
        );

-    // ─── audit pass ──────────────────────────────────────────────
+        written_this_batch.push((cn2, ct2, cb2));
+    }
+
+    // ─── audit pass (once at end) ────────────────────────────────
+    let total_words: i32 = written_this_batch
+        .iter()
+        .map(|(_, _, b)| word_count(b))
+        .sum();
+
    let audit_summary = if skip_audit {
        set_status(&pool, story_id, "complete").await?;
        "skipped".to_string()
    } else if let Some(pid) = parent_id {
        set_status(&pool, story_id, "auditing").await?;
        let parent_prose = fetch_parent_prose(&pool, pid).await?;
-        let bible_md = context_md.clone();
+        // Combine all batch chapters into one sequel-prose blob for the audit.
+        let sequel_prose: String = written_this_batch
+            .iter()
+            .map(|(n, t, b)| format!("## Chapter {n} {}\n\n{b}\n\n", t.as_deref().unwrap_or("")))
+            .collect();
+        let bible_md = parent_context_md.clone();
        let audit_out_res = run_pass(&pool, story_id, PassKind::Audit, async {
-            forge.audit(&parent_prose, &cb2, &bible_md).await
+            forge.audit(&parent_prose, &sequel_prose, &bible_md).await
        })
        .await;

@ -168,9 +215,11 @@ pub async fn run(
        "no parent — audit skipped".to_string()
    };

+    let chapter_ns: Vec<String> = written_this_batch.iter().map(|(n, _, _)| n.to_string()).collect();
    println!(
-        "continued story {story_id}: chapter {chapter_n} written ({} words) — audit: {audit_summary}",
-        word_count(&cb2),
+        "story {story_id}: wrote {n} chapter(s) [{}] totaling {total_words} words — audit: {audit_summary}",
+        chapter_ns.join(", "),
+        n = written_this_batch.len(),
    );
    Ok(())
 }
--- a/skald/src/main.rs
+++ b/skald/src/main.rs
@ -121,6 +121,10 @@ enum Cmd {
        /// Skip the canon audit pass.
        #[arg(long)]
        skip_audit: bool,
+        /// How many chapters to write in this run. Loops gen+cleanup
+        /// per chapter; audit fires once at end. Capped at 20.
+        #[arg(long, default_value = "1")]
+        chapters: usize,
    },
 }

@ -173,6 +177,7 @@ async fn run() -> anyhow::Result<()> {
            target_words,
            recent,
            skip_audit,
+            chapters,
        } => {
            continue_story::run(
                &cli.database_url,
@ -182,6 +187,7 @@ async fn run() -> anyhow::Result<()> {
                target_words,
                recent,
                skip_audit,
+                chapters,
            )
            .await
        }
--- a/skald/src/web.rs
+++ b/skald/src/web.rs
@ -130,6 +130,8 @@ pub struct NewStoryForm {
    author_slug: String,
    #[serde(default)]
    fire: String, // "now" = spawn background gen; empty = just queue the seed row
+    #[serde(default)]
+    chapters: String, // 1..=20, parsed downstream
 }

 async fn new_story_form(State(state): State<Arc<WebState>>) -> Html<String> {
@ -193,6 +195,7 @@ async fn new_story_create(
        } else {
            Some(author_slug.to_string())
        };
+        let chapters = parse_chapters(&form.chapters);
        tokio::spawn(async move {
            if let Err(e) = crate::continue_story::run(
                &database_url,
@ -202,6 +205,7 @@ async fn new_story_create(
                target,
                2,       // recent_n
                true,    // skip_audit (no parent → can't audit)
+                chapters,
            )
            .await
            {
@ -226,6 +230,23 @@ pub struct ContinueForm {
    author_slug: String,
    #[serde(default)]
    fire: String, // "now" = spawn background gen; empty = just queue
+    #[serde(default)]
+    chapters: String, // 1..=20, parsed downstream
+}
+
+/// Parse the `chapters` form field. Falls back to 1 on empty or
+/// unparseable input, clamps to [1, 20] (matches the server-side
+/// cap in continue_story::MAX_CHAPTERS_PER_RUN). Form-level parse
+/// — the continue_story loop also clamps defensively.
+fn parse_chapters(raw: &str) -> usize {
+    let trimmed = raw.trim();
+    if trimmed.is_empty() {
+        return 1;
+    }
+    trimmed
+        .parse::<usize>()
+        .unwrap_or(1)
+        .clamp(1, crate::continue_story::MAX_CHAPTERS_PER_RUN)
 }

 async fn continue_form(
@ -306,6 +327,7 @@ async fn continue_create(
        let database_url = std::env::var("DATABASE_URL").unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into());
        let author_owned = if author_slug.is_empty() { None } else { Some(author_slug.to_string()) };
        let direction_owned = direction.clone();
+        let chapters = parse_chapters(&form.chapters);
        tokio::spawn(async move {
            if let Err(e) = crate::continue_story::run(
                &database_url,
@ -315,6 +337,7 @@ async fn continue_create(
                target,
                2, // recent_n
                false, // skip_audit
+                chapters,
            )
            .await
            {
@ -516,12 +539,15 @@ fn new_story_panel(err: Option<&str>, authors: &[AuthorOpt]) -> Markup {
                label { "Seed prompt"
                    textarea name="prompt" rows="5" placeholder="What is this saga about? Setting, frame, conflict, vibe. The richer this is, the better the first chapter." {}
                }
-                label { "Target word count for the first chapter"
+                label { "Target word count per chapter"
                    input type="number" name="word_count_target" min="500" step="500" placeholder="3000";
                }
+                label { "Chapters to write this run (1–20)"
+                    input type="number" name="chapters" min="1" max="20" value="1";
+                }
                label.checkbox-label {
                    input type="checkbox" name="fire" value="now";
-                    span { " fire generation now (background task, ~7 min — no audit pass on the first chapter since there's no parent yet)" }
+                    span { " fire generation now (background task, ~15 min per chapter at --effort max — no audit pass on a new story since there's no parent yet)" }
                }
                button type="submit" { "Create" }
            }
@ -559,12 +585,15 @@ fn continue_panel(parent: &StoryRow, authors: &[AuthorOpt], parent_author_slug:
                label { "Direction (optional)"
                    textarea name="direction" rows="5" placeholder="What should the sequel explore? Specific scenes, characters, beats." {}
                }
-                label { "Target word count (optional)"
+                label { "Target word count per chapter (optional)"
                    input type="number" name="word_count_target" min="500" step="500" placeholder="3000";
                }
+                label { "Chapters to write this run (1–20)"
+                    input type="number" name="chapters" min="1" max="20" value="1";
+                }
                label.checkbox-label {
                    input type="checkbox" name="fire" value="now";
-                    span { " fire generation now (background task, ~9 min)" }
+                    span { " fire generation now (background task, ~15 min per chapter at --effort max; audit runs once at end)" }
                }
                button type="submit" { "Queue sequel" }
            }
--- a/vendor/clawdforge/src/lib.rs
+++ b/vendor/clawdforge/src/lib.rs
@ -53,6 +53,6 @@ pub use client::{Client, ClientBuilder};
 pub use error::Error;
 pub use session::{Session, SessionList, SessionOptions, SessionState, TurnEvent, TurnResult};
 pub use types::{
-    AppToken, AppTokenInfo, FileToken, Healthz, RunFailure, RunRequest, RunResult,
+    AppToken, AppTokenInfo, Effort, FileToken, Healthz, RunFailure, RunRequest, RunResult,
    SystemMode, TokenCreateRequest, TokenList,
 };
--- a/vendor/clawdforge/src/types.rs
+++ b/vendor/clawdforge/src/types.rs
@ -26,6 +26,30 @@ impl Default for SystemMode {
    }
 }

+/// Maps to `claude -p --effort <level>`. Controls the extended-thinking
+/// budget on the reasoning models. `None` on [`RunRequest::effort`] means
+/// "let the CLI default decide" (today: medium).
+///
+/// Use [`Effort::Max`] for prose-craft tasks (fiction generation,
+/// long-form rewrite) where the author persona benefits from extra
+/// thinking. Leave default for tool-use / structured-JSON tasks.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum Effort {
+    /// `--effort low` — minimum thinking budget.
+    Low,
+    /// `--effort medium` — claude's default if no flag is passed.
+    Medium,
+    /// `--effort high`.
+    High,
+    /// `--effort xhigh`.
+    Xhigh,
+    /// `--effort max` — maximum thinking budget. Right for long-form
+    /// prose generation where the author persona benefits from extra
+    /// thinking before each paragraph.
+    Max,
+}
+
 /// `GET /healthz` response body.
 #[derive(Debug, Clone, Deserialize, Serialize)]
 pub struct Healthz {
@ -80,6 +104,11 @@ pub struct RunRequest {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub system_mode: Option<SystemMode>,

+    /// Maps to `claude -p --effort <level>`. `None` lets the CLI default
+    /// decide. Use [`Effort::Max`] on prose-craft / long-form generation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<Effort>,
+
    /// File tokens previously returned from [`Client::upload_file`].
    ///
    /// [`Client::upload_file`]: crate::Client::upload_file