diff --git a/skald-core/src/context.rs b/skald-core/src/context.rs new file mode 100644 index 0000000..8ca4e51 --- /dev/null +++ b/skald-core/src/context.rs @@ -0,0 +1,298 @@ +//! Context assembly. DB rows → markdown blob the forge passes to +//! clawdforge as the LLM's working set. +//! +//! For now this module has one entry point: [`ContinuationContext::assemble`] +//! pulls everything an Opus sequel pass needs to honor canon — +//! characters, canon facts, per-chapter summaries, and the FULL prose +//! of the most recent chapters. The "≥85% of parent" rule lands here +//! by reading the last N full chapters; older chapters fall back to +//! their per-chapter summary so the token budget doesn't explode on +//! book 12. +//! +//! Render order in the markdown is deliberate: +//! 1. Series + parent title (cheap orientation) +//! 2. Characters (real + fictional, decomposed) — most-referenced data +//! 3. Canon facts (setting / mystery / theme / hooks) +//! 4. Chapter summaries (oldest → newest, full series so far) +//! 5. Recent chapters (most-recent N, full prose) +//! +//! Opus consumes the blob with the most-condensed data first and the +//! richest detail last — by the time it's writing the new chapter, +//! the previous chapter's prose is freshest in the context window. + +use anyhow::Context; +use serde::Serialize; +use sqlx::PgPool; +use uuid::Uuid; + +/// Everything needed for a sequel-gen + canon-audit pass. +#[derive(Debug, Clone, Serialize)] +pub struct ContinuationContext { + pub parent_story_id: Uuid, + pub parent_title: String, + pub series_name: Option, + pub parent_word_count: i32, + pub characters_real: Vec, + pub characters_fictional: Vec, + pub canon_facts: Vec, + pub chapter_summaries: Vec, + pub recent_chapters: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct CharacterRef { + pub name: String, + pub kind: String, + pub key_facts: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct CanonFactRef { + pub category: String, + pub title: String, + pub body: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ChapterSummaryRef { + pub n: i32, + pub title: Option, + pub summary: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ChapterRef { + pub n: i32, + pub title: Option, + pub body_md: String, + pub word_count: i32, +} + +impl ContinuationContext { + /// Pull a continuation-ready context for `parent_story_id`. The + /// last `recent_n` chapters come back with full prose; everything + /// older comes back as per-chapter summaries. + /// + /// If a chapter older than the recent window has no summary, it's + /// returned as a placeholder summary noting the gap — the operator + /// can either back-fill a summary or accept that the context for + /// that chapter is "Chapter K — summary not yet generated." + pub async fn assemble( + pool: &PgPool, + parent_story_id: Uuid, + recent_n: usize, + ) -> anyhow::Result { + let (parent_title, series_name, parent_word_count) = sqlx::query_as::< + _, + (String, Option, i32), + >( + "SELECT title, series_name, word_count_actual + FROM stories WHERE id = $1", + ) + .bind(parent_story_id) + .fetch_optional(pool) + .await? + .with_context(|| format!("story {parent_story_id} not found"))?; + + let chars: Vec<(String, String, String)> = sqlx::query_as( + "SELECT name, kind, key_facts FROM characters + WHERE story_id = $1 + ORDER BY kind, name", + ) + .bind(parent_story_id) + .fetch_all(pool) + .await?; + + let mut characters_real: Vec = Vec::new(); + let mut characters_fictional: Vec = Vec::new(); + for (name, kind, key_facts) in chars { + let r = CharacterRef { + name, + kind: kind.clone(), + key_facts, + }; + if kind == "real" { + characters_real.push(r); + } else { + characters_fictional.push(r); + } + } + + let canon_facts: Vec = sqlx::query_as::<_, (String, String, String)>( + "SELECT category, title, body FROM canon_facts + WHERE story_id = $1 + ORDER BY category, title", + ) + .bind(parent_story_id) + .fetch_all(pool) + .await? + .into_iter() + .map(|(category, title, body)| CanonFactRef { + category, + title, + body, + }) + .collect(); + + // Total chapter count → split: last `recent_n` get full prose, + // earlier chapters get summaries (or placeholders). + let total_chapters: i64 = + sqlx::query_scalar("SELECT count(*) FROM chapters WHERE story_id = $1") + .bind(parent_story_id) + .fetch_one(pool) + .await?; + let recent_n = recent_n as i64; + let summary_threshold = (total_chapters - recent_n).max(0); + + let summary_rows: Vec<(i32, Option, Option)> = sqlx::query_as( + "SELECT c.n, c.title, cs.body + FROM chapters c + LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id + WHERE c.story_id = $1 AND c.n <= $2 + ORDER BY c.n", + ) + .bind(parent_story_id) + .bind(summary_threshold as i32) + .fetch_all(pool) + .await?; + + let chapter_summaries: Vec = summary_rows + .into_iter() + .map(|(n, title, body)| ChapterSummaryRef { + n, + title, + summary: body.unwrap_or_else(|| { + format!( + "Chapter {n} — summary not yet generated. Consider back-filling for cleaner sequel context." + ) + }), + }) + .collect(); + + let recent_rows: Vec<(i32, Option, String, i32)> = sqlx::query_as( + "SELECT n, title, body_md, word_count FROM chapters + WHERE story_id = $1 AND n > $2 + ORDER BY n", + ) + .bind(parent_story_id) + .bind(summary_threshold as i32) + .fetch_all(pool) + .await?; + + let recent_chapters: Vec = recent_rows + .into_iter() + .map(|(n, title, body_md, word_count)| ChapterRef { + n, + title, + body_md, + word_count, + }) + .collect(); + + Ok(Self { + parent_story_id, + parent_title, + series_name, + parent_word_count, + characters_real, + characters_fictional, + canon_facts, + chapter_summaries, + recent_chapters, + }) + } + + /// Render the context as a markdown blob suitable for handing to + /// the forge as the LLM's working context. + pub fn render_markdown(&self) -> String { + let mut out = String::new(); + + out.push_str(&format!( + "# Continuing series: {}\n\n", + self.series_name.as_deref().unwrap_or(&self.parent_title) + )); + out.push_str(&format!( + "**Parent story:** {} ({} words)\n\n", + self.parent_title, self.parent_word_count + )); + + out.push_str("## Characters — real historical figures\n\n"); + if self.characters_real.is_empty() { + out.push_str("_(none)_\n\n"); + } else { + for c in &self.characters_real { + out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts)); + } + out.push('\n'); + } + + out.push_str("## Characters — fictional\n\n"); + if self.characters_fictional.is_empty() { + out.push_str("_(none)_\n\n"); + } else { + for c in &self.characters_fictional { + out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts)); + } + out.push('\n'); + } + + // Group canon facts by category. + let mut by_category: std::collections::BTreeMap<&str, Vec<&CanonFactRef>> = + std::collections::BTreeMap::new(); + for fact in &self.canon_facts { + by_category + .entry(fact.category.as_str()) + .or_default() + .push(fact); + } + for (category, facts) in &by_category { + out.push_str(&format!("## Canon — {}\n\n", category)); + for fact in facts { + out.push_str(&format!("### {}\n\n{}\n\n", fact.title, fact.body)); + } + } + + if !self.chapter_summaries.is_empty() { + out.push_str("## Earlier chapters — summaries\n\n"); + for s in &self.chapter_summaries { + let title = s.title.as_deref().unwrap_or(""); + out.push_str(&format!("### Chapter {} {}\n\n{}\n\n", s.n, title, s.summary)); + } + } + + if !self.recent_chapters.is_empty() { + out.push_str("## Recent chapters — full prose\n\n"); + for c in &self.recent_chapters { + let title = c.title.as_deref().unwrap_or(""); + out.push_str(&format!( + "### Chapter {} {} ({} words)\n\n{}\n\n", + c.n, title, c.word_count, c.body_md + )); + } + } + + out + } + + /// Total prose word-count carried in `recent_chapters`. Useful for + /// the "are we above the 85%-of-parent" check before firing a gen + /// pass. + pub fn recent_word_total(&self) -> i32 { + self.recent_chapters.iter().map(|c| c.word_count).sum() + } + + /// Ratio of recent-prose words to total parent words. 1.0 = the + /// recent window covers the entire parent. The 85% rule: refuse + /// (or warn) on continuation if this is below 0.85 AND there are + /// no chapter summaries to bridge the gap. + pub fn parent_coverage(&self) -> f64 { + if self.parent_word_count == 0 { + return 0.0; + } + let recent = self.recent_word_total() as f64; + let summaries_proxy = (self.chapter_summaries.len() as f64) * 250.0; + let total_covered = recent + summaries_proxy; + let parent = self.parent_word_count as f64; + (total_covered / parent).min(1.0) + } +} diff --git a/skald-core/src/lib.rs b/skald-core/src/lib.rs index 5f4bff9..93972f7 100644 --- a/skald-core/src/lib.rs +++ b/skald-core/src/lib.rs @@ -5,6 +5,7 @@ //! this crate knows about any specific story. Every story is rows. pub mod config; +pub mod context; pub mod db; pub mod forge; pub mod ingest; diff --git a/skald/src/main.rs b/skald/src/main.rs index e64f675..fd05808 100644 --- a/skald/src/main.rs +++ b/skald/src/main.rs @@ -6,11 +6,13 @@ mod import; mod serve; +mod show_context; use std::path::PathBuf; use std::process::ExitCode; use clap::{Parser, Subcommand}; +use uuid::Uuid; #[derive(Debug, Parser)] #[command( @@ -46,6 +48,18 @@ enum Cmd { #[arg(long)] title: Option, }, + /// Assemble the continuation context for an existing story and + /// print the rendered markdown blob to stdout. No LLM call — + /// inspect what would be sent before paying for it. + ShowContext { + /// Story id to continue FROM (the parent). + #[arg(long)] + story: Uuid, + /// How many most-recent chapters to include with full prose. + /// Older chapters fall back to per-chapter summaries. + #[arg(long, default_value = "3")] + recent: usize, + }, } #[tokio::main] @@ -67,6 +81,9 @@ async fn run() -> anyhow::Result<()> { Cmd::ImportMarkdown { path, title } => { import::run(&cli.database_url, &path, title.as_deref()).await } + Cmd::ShowContext { story, recent } => { + show_context::run(&cli.database_url, story, recent).await + } } } diff --git a/skald/src/show_context.rs b/skald/src/show_context.rs new file mode 100644 index 0000000..cb42691 --- /dev/null +++ b/skald/src/show_context.rs @@ -0,0 +1,28 @@ +//! `skald show-context` subcommand. Pulls the continuation context +//! for a story and prints the rendered markdown blob to stdout. +//! Useful pre-LLM smoke: see exactly what would be sent to opus +//! before paying for it. + +use skald_core::context::ContinuationContext; +use skald_core::db; +use uuid::Uuid; + +pub async fn run(database_url: &str, story_id: Uuid, recent_n: usize) -> anyhow::Result<()> { + let pool = db::connect_and_migrate(database_url).await?; + let ctx = ContinuationContext::assemble(&pool, story_id, recent_n).await?; + + eprintln!( + "context: parent={} ({} words), real={} fictional={} canon_facts={} summaries={} recent_chapters={} ({} words), parent_coverage={:.0}%", + ctx.parent_title, + ctx.parent_word_count, + ctx.characters_real.len(), + ctx.characters_fictional.len(), + ctx.canon_facts.len(), + ctx.chapter_summaries.len(), + ctx.recent_chapters.len(), + ctx.recent_word_total(), + ctx.parent_coverage() * 100.0, + ); + println!("{}", ctx.render_markdown()); + Ok(()) +}