forge: always --effort max + multi-chapter batch (cap 20)

forge.rs threads Effort::Max on gen + cleanup. Audit + summarize stay
default — they're structured-output / tool-shaped tasks where extended
thinking doesn't help. Bumps subprocess timeout from 600s to 1800s so
max-effort prose-craft has the wall clock it needs.

continue_story::run takes a chapter_count param; loops gen+cleanup per
chapter with each iteration's just-written prose appended to context.
Audit fires once at end against the combined batch vs parent canon.
Cap is 20 (~5h wall clock, ~$600 at max effort — beyond that is
operationally absurd).

CLI: 'skald continue --chapters N'. Web: numeric field on both new-
story and continue forms, 1..=20, defaults to 1.

Vendored clawdforge SDK refreshed for the Effort enum.
This commit is contained in:
Kayos 2026-05-13 14:26:53 -07:00
parent 20e262c85d
commit 3a749b7643
6 changed files with 168 additions and 54 deletions

View file

@ -30,7 +30,7 @@
use std::time::Duration;
use clawdforge::{Client, ClientBuilder, RunRequest, RunResult, SystemMode};
use clawdforge::{Client, ClientBuilder, Effort, RunRequest, RunResult, SystemMode};
use serde::{Deserialize, Serialize};
use crate::authors::AuthorWithRevision;
@ -87,11 +87,10 @@ impl Forge {
let client = ClientBuilder::default()
.base_url(&cfg.base_url)
.token(&cfg.app_token)
// Generation passes are slow — 600s is the clawdforge
// server-side max anyway, and gen passes routinely hit
// 5+ minutes on opus max-effort. Default 120s would
// strand them.
.timeout(Duration::from_secs(600))
// Generation passes at --effort max can run 1020 min wall
// clock. clawdforge's server-side cap is 1800s — match it.
// Default 120s would strand any prose-craft pass.
.timeout(Duration::from_secs(1800))
.user_agent(concat!("skald/", env!("CARGO_PKG_VERSION")))
.build()?;
Ok(Self {
@ -125,7 +124,8 @@ impl Forge {
model: Some(self.model.clone()),
system: Some(system),
system_mode: Some(mode),
timeout_secs: Some(600),
effort: Some(Effort::Max),
timeout_secs: Some(1800),
..Default::default()
};
let r = self.client.run(body).await?;
@ -148,7 +148,8 @@ impl Forge {
model: Some(self.model.clone()),
system: Some(system),
system_mode: Some(mode),
timeout_secs: Some(600),
effort: Some(Effort::Max),
timeout_secs: Some(1800),
..Default::default()
};
let r = self.client.run(body).await?;

View file

@ -30,6 +30,11 @@ use skald_core::forge::{AuditResponse, Forge, PassKind, PassOutput};
use sqlx::PgPool;
use uuid::Uuid;
/// Hard cap on chapters-per-run. 20 = ~5h wall-clock at --effort max,
/// ~$600 in API cost. Beyond that is operationally absurd — split
/// into separate runs.
pub const MAX_CHAPTERS_PER_RUN: usize = 20;
pub async fn run(
database_url: &str,
story_id: Uuid,
@ -38,9 +43,11 @@ pub async fn run(
target_words: Option<i32>,
recent_n: usize,
skip_audit: bool,
chapter_count: usize,
) -> anyhow::Result<()> {
let chapter_count = chapter_count.clamp(1, MAX_CHAPTERS_PER_RUN);
let cfg = load_forge_config()?;
tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
tracing::info!(base_url = %cfg.base_url, model = %cfg.model, chapter_count, "forge configured");
let pool = db::connect_and_migrate(database_url).await?;
let forge = Forge::new(&cfg)?;
@ -70,7 +77,7 @@ pub async fn run(
}
let parent_id = story.parent_story_id;
let context_md = match parent_id {
let parent_context_md = match parent_id {
Some(pid) => {
let ctx = ContinuationContext::assemble(&pool, pid, recent_n).await?;
tracing::info!(
@ -93,24 +100,49 @@ pub async fn run(
}
};
// ─── gen pass ────────────────────────────────────────────────
// Chapters this batch has written so far; appended to context
// for each subsequent iteration so the LLM sees what it just
// wrote. Each entry: (n, title, body).
let mut written_this_batch: Vec<(i32, Option<String>, String)> = Vec::new();
let target = target_words.or(story.word_count_target);
for batch_i in 0..chapter_count {
// Build context = parent_context + chapters already written this batch.
let context_md = if written_this_batch.is_empty() {
parent_context_md.clone()
} else {
let mut s = parent_context_md.clone();
s.push_str("\n\n## Already written in this run\n\n");
for (n, title, body) in &written_this_batch {
let t = title.as_deref().unwrap_or("");
s.push_str(&format!("### Chapter {n} {t}\n\n{body}\n\n"));
}
s
};
// User direction applies to the FIRST chapter only. Continuations
// follow naturally from the just-written prose.
let dir = if batch_i == 0 { direction } else { None };
// ─── gen pass ──────────────────────────────────────────
set_status(&pool, story_id, "generating").await?;
let gen_out = run_pass(&pool, story_id, PassKind::Gen, async {
forge.generate(&context_md, direction, target, author.as_ref()).await
forge.generate(&context_md, dir, target, author.as_ref()).await
})
.await?;
let gen_text = pass_text(&gen_out, "gen")?;
let (chapter_n, chapter_title, chapter_body) = parse_chapter(&gen_text)?;
let chapter_id = insert_chapter(&pool, story_id, chapter_n, chapter_title.as_deref(), &chapter_body).await?;
tracing::info!(
batch_i = batch_i + 1,
of = chapter_count,
chapter_n,
title = %chapter_title.as_deref().unwrap_or(""),
body_chars = chapter_body.len(),
"gen pass stored",
);
// ─── cleanup pass ────────────────────────────────────────────
// ─── cleanup pass ──────────────────────────────────────
set_status(&pool, story_id, "cleaning").await?;
let cleanup_out = run_pass(&pool, story_id, PassKind::Cleanup, async {
forge.cleanup(&chapter_body, &context_md, author.as_ref()).await
@ -127,20 +159,35 @@ pub async fn run(
}
replace_chapter(&pool, chapter_id, ct2.as_deref(), &cb2).await?;
tracing::info!(
batch_i = batch_i + 1,
of = chapter_count,
body_chars = cb2.len(),
"cleanup pass stored (replaced chapter body)",
"cleanup pass stored",
);
// ─── audit pass ──────────────────────────────────────────────
written_this_batch.push((cn2, ct2, cb2));
}
// ─── audit pass (once at end) ────────────────────────────────
let total_words: i32 = written_this_batch
.iter()
.map(|(_, _, b)| word_count(b))
.sum();
let audit_summary = if skip_audit {
set_status(&pool, story_id, "complete").await?;
"skipped".to_string()
} else if let Some(pid) = parent_id {
set_status(&pool, story_id, "auditing").await?;
let parent_prose = fetch_parent_prose(&pool, pid).await?;
let bible_md = context_md.clone();
// Combine all batch chapters into one sequel-prose blob for the audit.
let sequel_prose: String = written_this_batch
.iter()
.map(|(n, t, b)| format!("## Chapter {n} {}\n\n{b}\n\n", t.as_deref().unwrap_or("")))
.collect();
let bible_md = parent_context_md.clone();
let audit_out_res = run_pass(&pool, story_id, PassKind::Audit, async {
forge.audit(&parent_prose, &cb2, &bible_md).await
forge.audit(&parent_prose, &sequel_prose, &bible_md).await
})
.await;
@ -168,9 +215,11 @@ pub async fn run(
"no parent — audit skipped".to_string()
};
let chapter_ns: Vec<String> = written_this_batch.iter().map(|(n, _, _)| n.to_string()).collect();
println!(
"continued story {story_id}: chapter {chapter_n} written ({} words) — audit: {audit_summary}",
word_count(&cb2),
"story {story_id}: wrote {n} chapter(s) [{}] totaling {total_words} words — audit: {audit_summary}",
chapter_ns.join(", "),
n = written_this_batch.len(),
);
Ok(())
}

View file

@ -121,6 +121,10 @@ enum Cmd {
/// Skip the canon audit pass.
#[arg(long)]
skip_audit: bool,
/// How many chapters to write in this run. Loops gen+cleanup
/// per chapter; audit fires once at end. Capped at 20.
#[arg(long, default_value = "1")]
chapters: usize,
},
}
@ -173,6 +177,7 @@ async fn run() -> anyhow::Result<()> {
target_words,
recent,
skip_audit,
chapters,
} => {
continue_story::run(
&cli.database_url,
@ -182,6 +187,7 @@ async fn run() -> anyhow::Result<()> {
target_words,
recent,
skip_audit,
chapters,
)
.await
}

View file

@ -130,6 +130,8 @@ pub struct NewStoryForm {
author_slug: String,
#[serde(default)]
fire: String, // "now" = spawn background gen; empty = just queue the seed row
#[serde(default)]
chapters: String, // 1..=20, parsed downstream
}
async fn new_story_form(State(state): State<Arc<WebState>>) -> Html<String> {
@ -193,6 +195,7 @@ async fn new_story_create(
} else {
Some(author_slug.to_string())
};
let chapters = parse_chapters(&form.chapters);
tokio::spawn(async move {
if let Err(e) = crate::continue_story::run(
&database_url,
@ -202,6 +205,7 @@ async fn new_story_create(
target,
2, // recent_n
true, // skip_audit (no parent → can't audit)
chapters,
)
.await
{
@ -226,6 +230,23 @@ pub struct ContinueForm {
author_slug: String,
#[serde(default)]
fire: String, // "now" = spawn background gen; empty = just queue
#[serde(default)]
chapters: String, // 1..=20, parsed downstream
}
/// Parse the `chapters` form field. Falls back to 1 on empty or
/// unparseable input, clamps to [1, 20] (matches the server-side
/// cap in continue_story::MAX_CHAPTERS_PER_RUN). Form-level parse
/// — the continue_story loop also clamps defensively.
fn parse_chapters(raw: &str) -> usize {
let trimmed = raw.trim();
if trimmed.is_empty() {
return 1;
}
trimmed
.parse::<usize>()
.unwrap_or(1)
.clamp(1, crate::continue_story::MAX_CHAPTERS_PER_RUN)
}
async fn continue_form(
@ -306,6 +327,7 @@ async fn continue_create(
let database_url = std::env::var("DATABASE_URL").unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into());
let author_owned = if author_slug.is_empty() { None } else { Some(author_slug.to_string()) };
let direction_owned = direction.clone();
let chapters = parse_chapters(&form.chapters);
tokio::spawn(async move {
if let Err(e) = crate::continue_story::run(
&database_url,
@ -315,6 +337,7 @@ async fn continue_create(
target,
2, // recent_n
false, // skip_audit
chapters,
)
.await
{
@ -516,12 +539,15 @@ fn new_story_panel(err: Option<&str>, authors: &[AuthorOpt]) -> Markup {
label { "Seed prompt"
textarea name="prompt" rows="5" placeholder="What is this saga about? Setting, frame, conflict, vibe. The richer this is, the better the first chapter." {}
}
label { "Target word count for the first chapter"
label { "Target word count per chapter"
input type="number" name="word_count_target" min="500" step="500" placeholder="3000";
}
label { "Chapters to write this run (120)"
input type="number" name="chapters" min="1" max="20" value="1";
}
label.checkbox-label {
input type="checkbox" name="fire" value="now";
span { " fire generation now (background task, ~7 min — no audit pass on the first chapter since there's no parent yet)" }
span { " fire generation now (background task, ~15 min per chapter at --effort max — no audit pass on a new story since there's no parent yet)" }
}
button type="submit" { "Create" }
}
@ -559,12 +585,15 @@ fn continue_panel(parent: &StoryRow, authors: &[AuthorOpt], parent_author_slug:
label { "Direction (optional)"
textarea name="direction" rows="5" placeholder="What should the sequel explore? Specific scenes, characters, beats." {}
}
label { "Target word count (optional)"
label { "Target word count per chapter (optional)"
input type="number" name="word_count_target" min="500" step="500" placeholder="3000";
}
label { "Chapters to write this run (120)"
input type="number" name="chapters" min="1" max="20" value="1";
}
label.checkbox-label {
input type="checkbox" name="fire" value="now";
span { " fire generation now (background task, ~9 min)" }
span { " fire generation now (background task, ~15 min per chapter at --effort max; audit runs once at end)" }
}
button type="submit" { "Queue sequel" }
}

View file

@ -53,6 +53,6 @@ pub use client::{Client, ClientBuilder};
pub use error::Error;
pub use session::{Session, SessionList, SessionOptions, SessionState, TurnEvent, TurnResult};
pub use types::{
AppToken, AppTokenInfo, FileToken, Healthz, RunFailure, RunRequest, RunResult,
AppToken, AppTokenInfo, Effort, FileToken, Healthz, RunFailure, RunRequest, RunResult,
SystemMode, TokenCreateRequest, TokenList,
};

View file

@ -26,6 +26,30 @@ impl Default for SystemMode {
}
}
/// Maps to `claude -p --effort <level>`. Controls the extended-thinking
/// budget on the reasoning models. `None` on [`RunRequest::effort`] means
/// "let the CLI default decide" (today: medium).
///
/// Use [`Effort::Max`] for prose-craft tasks (fiction generation,
/// long-form rewrite) where the author persona benefits from extra
/// thinking. Leave default for tool-use / structured-JSON tasks.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum Effort {
/// `--effort low` — minimum thinking budget.
Low,
/// `--effort medium` — claude's default if no flag is passed.
Medium,
/// `--effort high`.
High,
/// `--effort xhigh`.
Xhigh,
/// `--effort max` — maximum thinking budget. Right for long-form
/// prose generation where the author persona benefits from extra
/// thinking before each paragraph.
Max,
}
/// `GET /healthz` response body.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Healthz {
@ -80,6 +104,11 @@ pub struct RunRequest {
#[serde(skip_serializing_if = "Option::is_none")]
pub system_mode: Option<SystemMode>,
/// Maps to `claude -p --effort <level>`. `None` lets the CLI default
/// decide. Use [`Effort::Max`] on prose-craft / long-form generation.
#[serde(skip_serializing_if = "Option::is_none")]
pub effort: Option<Effort>,
/// File tokens previously returned from [`Client::upload_file`].
///
/// [`Client::upload_file`]: crate::Client::upload_file