summarize: first real forge call — generate per-chapter summaries
skald summarize --story <uuid> walks every chapter without an existing summary, calls Forge::summarize() (clawdforge → opus → ~250 words of plot/character/setting/threads), and inserts the result into chapter_summaries. Side effects: - generation_runs row per chapter (kind='summary', status flow running → succeeded|failed). Errors update the row + bail; happy path closes it with ended_at + tokens. - ON CONFLICT (chapter_id) means re-running with --force replaces the previous summary cleanly. CLI: skald summarize --story <uuid> # only-missing skald summarize --story <uuid> --force # re-summarize all Reads from env (loaded by skald.env in the container): CLAWDFORGE_URL — base URL of clawdforge HTTP service CLAWDFORGE_TOKEN — app-level bearer (per-app, not the admin token) SKALD_MODEL — defaults to 'opus' This is the first subcommand that actually exercises the forge. Unlocks ContinuationContext::assemble's coverage metric (was stuck at 24%% on Coast-Down because the 5 placeholder summaries don't actually carry the prose). After running summarize against Coast-Down: coverage should jump to ~100%% and the context blob for any sequel becomes fully canon-faithful without dragging the full ~21k words of earlier-chapter prose along. Forge prompt template for summarize ships REAL (not stubbed) — it's the simplest pass and has a well-defined shape. The gen/cleanup/ audit prompts remain stubs pending the deeper prose-craft session.
This commit is contained in:
parent
b32938ef43
commit
39e991240a
3 changed files with 247 additions and 0 deletions
|
|
@ -66,6 +66,8 @@ pub enum PassKind {
|
||||||
Cleanup,
|
Cleanup,
|
||||||
/// Canon audit across parent + sequel. Outputs findings JSON.
|
/// Canon audit across parent + sequel. Outputs findings JSON.
|
||||||
Audit,
|
Audit,
|
||||||
|
/// Chapter summary for cheap context loading on long series.
|
||||||
|
Summary,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PassKind {
|
impl PassKind {
|
||||||
|
|
@ -74,6 +76,7 @@ impl PassKind {
|
||||||
Self::Gen => "gen",
|
Self::Gen => "gen",
|
||||||
Self::Cleanup => "cleanup",
|
Self::Cleanup => "cleanup",
|
||||||
Self::Audit => "audit",
|
Self::Audit => "audit",
|
||||||
|
Self::Summary => "summary",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -143,8 +146,46 @@ impl Forge {
|
||||||
let duration_ms = r.duration_ms;
|
let duration_ms = r.duration_ms;
|
||||||
Ok(PassOutput { kind: PassKind::Audit, result: r, duration_ms })
|
Ok(PassOutput { kind: PassKind::Audit, result: r, duration_ms })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Summarize one chapter to ~250 words. The summary feeds into
|
||||||
|
/// the continuation context for older chapters so the token
|
||||||
|
/// budget stays sane on long series (book 12 doesn't carry book 1
|
||||||
|
/// in full prose; carries summaries of books 1-10 + full prose of
|
||||||
|
/// books 11-12).
|
||||||
|
///
|
||||||
|
/// Unlike gen/cleanup/audit, summarize has a real prompt template
|
||||||
|
/// shipped here — summarization is a simple, well-defined task
|
||||||
|
/// and doesn't need the prose-craft TODO treatment.
|
||||||
|
pub async fn summarize(&self, chapter_body_md: &str, chapter_label: &str) -> anyhow::Result<PassOutput> {
|
||||||
|
let prompt = format!(
|
||||||
|
"Summarize the following chapter in ~250 words for use as future \
|
||||||
|
sequel context. Capture: (1) plot beats in order, (2) character \
|
||||||
|
developments and emotional shifts, (3) setting changes, (4) any \
|
||||||
|
explicit or implied unresolved threads, (5) the chapter's \
|
||||||
|
closing position for each named character.\n\nReturn prose only \
|
||||||
|
— no headings, no bullet lists, no commentary about the task. \
|
||||||
|
Write as if you're handing this to another author who needs to \
|
||||||
|
write the next chapter without re-reading this one.\n\n\
|
||||||
|
## {chapter_label}\n\n{chapter_body_md}"
|
||||||
|
);
|
||||||
|
let body = RunRequest {
|
||||||
|
prompt,
|
||||||
|
model: Some(self.model.clone()),
|
||||||
|
system: Some(SYSTEM_SUMMARIZE.to_string()),
|
||||||
|
timeout_secs: Some(300),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let r = self.client.run(body).await?;
|
||||||
|
let duration_ms = r.duration_ms;
|
||||||
|
Ok(PassOutput { kind: PassKind::Summary, result: r, duration_ms })
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SYSTEM_SUMMARIZE: &str = "You are a continuity assistant for a long-form \
|
||||||
|
fiction author. You write chapter summaries that future authors of sequels \
|
||||||
|
will read to understand what happened. Be specific. Names, dates, locations. \
|
||||||
|
Don't editorialize — just compress the events.";
|
||||||
|
|
||||||
fn build_request(model: &str, kind: PassKind, primary: &str, context: &str, system: &str) -> RunRequest {
|
fn build_request(model: &str, kind: PassKind, primary: &str, context: &str, system: &str) -> RunRequest {
|
||||||
let prompt = format!(
|
let prompt = format!(
|
||||||
"# Pass: {kind}\n\n## Context\n\n{context}\n\n## Input\n\n{primary}",
|
"# Pass: {kind}\n\n## Context\n\n{context}\n\n## Input\n\n{primary}",
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
mod import;
|
mod import;
|
||||||
mod serve;
|
mod serve;
|
||||||
mod show_context;
|
mod show_context;
|
||||||
|
mod summarize;
|
||||||
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::process::ExitCode;
|
use std::process::ExitCode;
|
||||||
|
|
@ -60,6 +61,18 @@ enum Cmd {
|
||||||
#[arg(long, default_value = "3")]
|
#[arg(long, default_value = "3")]
|
||||||
recent: usize,
|
recent: usize,
|
||||||
},
|
},
|
||||||
|
/// Generate per-chapter summaries via clawdforge so older chapters
|
||||||
|
/// carry their own digests in continuation context. First real
|
||||||
|
/// LLM-touching subcommand. Requires CLAWDFORGE_URL + CLAWDFORGE_TOKEN.
|
||||||
|
Summarize {
|
||||||
|
/// Story to summarize.
|
||||||
|
#[arg(long)]
|
||||||
|
story: Uuid,
|
||||||
|
/// Re-summarize chapters that already have summaries. Defaults
|
||||||
|
/// to skipping them.
|
||||||
|
#[arg(long)]
|
||||||
|
force: bool,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
|
@ -84,6 +97,9 @@ async fn run() -> anyhow::Result<()> {
|
||||||
Cmd::ShowContext { story, recent } => {
|
Cmd::ShowContext { story, recent } => {
|
||||||
show_context::run(&cli.database_url, story, recent).await
|
show_context::run(&cli.database_url, story, recent).await
|
||||||
}
|
}
|
||||||
|
Cmd::Summarize { story, force } => {
|
||||||
|
summarize::run(&cli.database_url, story, !force).await
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
190
skald/src/summarize.rs
Normal file
190
skald/src/summarize.rs
Normal file
|
|
@ -0,0 +1,190 @@
|
||||||
|
//! `skald summarize` subcommand.
|
||||||
|
//!
|
||||||
|
//! Generates per-chapter summaries so [`ContinuationContext`]'s older
|
||||||
|
//! chapters carry their own ~250-word digests instead of placeholder
|
||||||
|
//! text. This is the first real `clawdforge` call in skald — every
|
||||||
|
//! other subcommand is DB-only.
|
||||||
|
//!
|
||||||
|
//! [`ContinuationContext`]: skald_core::context::ContinuationContext
|
||||||
|
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use anyhow::{Context, bail};
|
||||||
|
use chrono::Utc;
|
||||||
|
use skald_core::config::ForgeConfig;
|
||||||
|
use skald_core::db;
|
||||||
|
use skald_core::forge::{Forge, PassKind};
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
pub async fn run(
|
||||||
|
database_url: &str,
|
||||||
|
story_id: Uuid,
|
||||||
|
only_missing: bool,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let cfg = load_forge_config()?;
|
||||||
|
tracing::info!(
|
||||||
|
base_url = %cfg.base_url,
|
||||||
|
model = %cfg.model,
|
||||||
|
"forge configured",
|
||||||
|
);
|
||||||
|
|
||||||
|
let pool = db::connect_and_migrate(database_url).await?;
|
||||||
|
let forge = Forge::new(&cfg)?;
|
||||||
|
|
||||||
|
let chapters = pick_chapters(&pool, story_id, only_missing).await?;
|
||||||
|
if chapters.is_empty() {
|
||||||
|
tracing::info!(story_id = %story_id, "no chapters need summarizing");
|
||||||
|
println!("nothing to do");
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
story_id = %story_id,
|
||||||
|
count = chapters.len(),
|
||||||
|
"starting summarize pass",
|
||||||
|
);
|
||||||
|
|
||||||
|
for (chapter_id, n, title, body_md) in &chapters {
|
||||||
|
summarize_one(&pool, &forge, story_id, *chapter_id, *n, title.as_deref(), body_md).await?;
|
||||||
|
}
|
||||||
|
|
||||||
|
println!("summarized {} chapter(s) for story {story_id}", chapters.len());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_forge_config() -> anyhow::Result<ForgeConfig> {
|
||||||
|
let base_url = std::env::var("CLAWDFORGE_URL")
|
||||||
|
.context("CLAWDFORGE_URL not set — point at clawdforge HTTP service")?;
|
||||||
|
let app_token = std::env::var("CLAWDFORGE_TOKEN")
|
||||||
|
.context("CLAWDFORGE_TOKEN not set — see vault 'clawdforge — skald app token'")?;
|
||||||
|
let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
|
||||||
|
Ok(ForgeConfig {
|
||||||
|
base_url,
|
||||||
|
app_token,
|
||||||
|
model,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn pick_chapters(
|
||||||
|
pool: &PgPool,
|
||||||
|
story_id: Uuid,
|
||||||
|
only_missing: bool,
|
||||||
|
) -> anyhow::Result<Vec<(Uuid, i32, Option<String>, String)>> {
|
||||||
|
let sql = if only_missing {
|
||||||
|
"SELECT c.id, c.n, c.title, c.body_md
|
||||||
|
FROM chapters c
|
||||||
|
LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id
|
||||||
|
WHERE c.story_id = $1 AND cs.chapter_id IS NULL
|
||||||
|
ORDER BY c.n"
|
||||||
|
} else {
|
||||||
|
"SELECT id, n, title, body_md FROM chapters
|
||||||
|
WHERE story_id = $1
|
||||||
|
ORDER BY n"
|
||||||
|
};
|
||||||
|
let rows = sqlx::query_as::<_, (Uuid, i32, Option<String>, String)>(sql)
|
||||||
|
.bind(story_id)
|
||||||
|
.fetch_all(pool)
|
||||||
|
.await?;
|
||||||
|
Ok(rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn summarize_one(
|
||||||
|
pool: &PgPool,
|
||||||
|
forge: &Forge,
|
||||||
|
story_id: Uuid,
|
||||||
|
chapter_id: Uuid,
|
||||||
|
n: i32,
|
||||||
|
title: Option<&str>,
|
||||||
|
body_md: &str,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
let label = match title {
|
||||||
|
Some(t) => format!("Chapter {n} — {t}"),
|
||||||
|
None => format!("Chapter {n}"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let run_id: Uuid = sqlx::query_scalar(
|
||||||
|
"INSERT INTO generation_runs (story_id, kind, status)
|
||||||
|
VALUES ($1, $2, 'running')
|
||||||
|
RETURNING id",
|
||||||
|
)
|
||||||
|
.bind(story_id)
|
||||||
|
.bind(PassKind::Summary.as_str())
|
||||||
|
.fetch_one(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
let started = Instant::now();
|
||||||
|
tracing::info!(
|
||||||
|
chapter = %label,
|
||||||
|
chapter_id = %chapter_id,
|
||||||
|
run_id = %run_id,
|
||||||
|
body_chars = body_md.len(),
|
||||||
|
"summarizing",
|
||||||
|
);
|
||||||
|
|
||||||
|
let result = forge.summarize(body_md, &label).await;
|
||||||
|
|
||||||
|
let elapsed_ms = started.elapsed().as_millis() as i64;
|
||||||
|
|
||||||
|
let output = match result {
|
||||||
|
Ok(o) => o,
|
||||||
|
Err(e) => {
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE generation_runs SET status='failed', error=$1, ended_at=$2
|
||||||
|
WHERE id=$3",
|
||||||
|
)
|
||||||
|
.bind(format!("{e:#}"))
|
||||||
|
.bind(Utc::now())
|
||||||
|
.bind(run_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
bail!("forge call failed for {label}: {e}");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let summary_text = output
|
||||||
|
.result
|
||||||
|
.as_text()
|
||||||
|
.or_else(|| output.result.result.as_str())
|
||||||
|
.map(|s| s.to_string())
|
||||||
|
.unwrap_or_else(|| output.result.result.to_string());
|
||||||
|
|
||||||
|
if summary_text.trim().is_empty() {
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE generation_runs SET status='failed', error='empty response', ended_at=$1
|
||||||
|
WHERE id=$2",
|
||||||
|
)
|
||||||
|
.bind(Utc::now())
|
||||||
|
.bind(run_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
bail!("forge returned empty summary for {label}");
|
||||||
|
}
|
||||||
|
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO chapter_summaries (chapter_id, body)
|
||||||
|
VALUES ($1, $2)
|
||||||
|
ON CONFLICT (chapter_id) DO UPDATE SET body = EXCLUDED.body, generated_at = now()",
|
||||||
|
)
|
||||||
|
.bind(chapter_id)
|
||||||
|
.bind(&summary_text)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
sqlx::query(
|
||||||
|
"UPDATE generation_runs SET status='succeeded', ended_at=$1
|
||||||
|
WHERE id=$2",
|
||||||
|
)
|
||||||
|
.bind(Utc::now())
|
||||||
|
.bind(run_id)
|
||||||
|
.execute(pool)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
chapter = %label,
|
||||||
|
summary_chars = summary_text.len(),
|
||||||
|
elapsed_ms,
|
||||||
|
"summary stored",
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue