From 5b418369c002993716cdf09c20cc1635addb5102 Mon Sep 17 00:00:00 2001 From: Kayos Date: Wed, 13 May 2026 10:30:16 -0700 Subject: [PATCH] =?UTF-8?q?context:=20assemble=20DB=E2=86=92opus=20blob=20?= =?UTF-8?q?+=20skald=20show-context=20CLI?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit skald-core::context is the bridge between 'rows in postgres' and 'prompt-ready markdown blob.' ContinuationContext::assemble(pool, parent_story_id, recent_n) pulls: - parent story meta (title, series, total word count) - characters split real / fictional - canon_facts grouped by category - chapter summaries for everything older than the recent window - FULL prose for the last recent_n chapters render_markdown() formats it with the most-condensed data first (characters, canon) and the richest detail last (recent chapter prose). Opus reads it linearly so by the time it's writing the new chapter, the previous chapter's prose is freshest in its context window. The 'continuation reads ≥85% of parent' rule lands here via parent_coverage() which counts recent prose + summaries-as-proxy (250 words / summary) against parent word_count. The web UI / CLI can warn before firing a gen pass if coverage is below threshold. New CLI subcommand: skald show-context --story --recent Assembles + prints the blob to stdout (eprintln'd stats summary goes to stderr). No LLM call — pre-flight inspection so we see what would be sent before paying for it. Useful for prompt-eng work in the next session. Module structure now: skald-core/ config.rs ForgeConfig context.rs ContinuationContext (new) db.rs connect_and_migrate forge.rs Forge — three-pass orchestration ingest.rs markdown parser models.rs row types lib.rs MIGRATOR + module exports skald/ main.rs clap CLI serve.rs axum + /health + migrations import.rs skald import-markdown show_context.rs skald show-context (new) --- skald-core/src/context.rs | 298 ++++++++++++++++++++++++++++++++++++++ skald-core/src/lib.rs | 1 + skald/src/main.rs | 17 +++ skald/src/show_context.rs | 28 ++++ 4 files changed, 344 insertions(+) create mode 100644 skald-core/src/context.rs create mode 100644 skald/src/show_context.rs diff --git a/skald-core/src/context.rs b/skald-core/src/context.rs new file mode 100644 index 0000000..8ca4e51 --- /dev/null +++ b/skald-core/src/context.rs @@ -0,0 +1,298 @@ +//! Context assembly. DB rows → markdown blob the forge passes to +//! clawdforge as the LLM's working set. +//! +//! For now this module has one entry point: [`ContinuationContext::assemble`] +//! pulls everything an Opus sequel pass needs to honor canon — +//! characters, canon facts, per-chapter summaries, and the FULL prose +//! of the most recent chapters. The "≥85% of parent" rule lands here +//! by reading the last N full chapters; older chapters fall back to +//! their per-chapter summary so the token budget doesn't explode on +//! book 12. +//! +//! Render order in the markdown is deliberate: +//! 1. Series + parent title (cheap orientation) +//! 2. Characters (real + fictional, decomposed) — most-referenced data +//! 3. Canon facts (setting / mystery / theme / hooks) +//! 4. Chapter summaries (oldest → newest, full series so far) +//! 5. Recent chapters (most-recent N, full prose) +//! +//! Opus consumes the blob with the most-condensed data first and the +//! richest detail last — by the time it's writing the new chapter, +//! the previous chapter's prose is freshest in the context window. + +use anyhow::Context; +use serde::Serialize; +use sqlx::PgPool; +use uuid::Uuid; + +/// Everything needed for a sequel-gen + canon-audit pass. +#[derive(Debug, Clone, Serialize)] +pub struct ContinuationContext { + pub parent_story_id: Uuid, + pub parent_title: String, + pub series_name: Option, + pub parent_word_count: i32, + pub characters_real: Vec, + pub characters_fictional: Vec, + pub canon_facts: Vec, + pub chapter_summaries: Vec, + pub recent_chapters: Vec, +} + +#[derive(Debug, Clone, Serialize)] +pub struct CharacterRef { + pub name: String, + pub kind: String, + pub key_facts: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct CanonFactRef { + pub category: String, + pub title: String, + pub body: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ChapterSummaryRef { + pub n: i32, + pub title: Option, + pub summary: String, +} + +#[derive(Debug, Clone, Serialize)] +pub struct ChapterRef { + pub n: i32, + pub title: Option, + pub body_md: String, + pub word_count: i32, +} + +impl ContinuationContext { + /// Pull a continuation-ready context for `parent_story_id`. The + /// last `recent_n` chapters come back with full prose; everything + /// older comes back as per-chapter summaries. + /// + /// If a chapter older than the recent window has no summary, it's + /// returned as a placeholder summary noting the gap — the operator + /// can either back-fill a summary or accept that the context for + /// that chapter is "Chapter K — summary not yet generated." + pub async fn assemble( + pool: &PgPool, + parent_story_id: Uuid, + recent_n: usize, + ) -> anyhow::Result { + let (parent_title, series_name, parent_word_count) = sqlx::query_as::< + _, + (String, Option, i32), + >( + "SELECT title, series_name, word_count_actual + FROM stories WHERE id = $1", + ) + .bind(parent_story_id) + .fetch_optional(pool) + .await? + .with_context(|| format!("story {parent_story_id} not found"))?; + + let chars: Vec<(String, String, String)> = sqlx::query_as( + "SELECT name, kind, key_facts FROM characters + WHERE story_id = $1 + ORDER BY kind, name", + ) + .bind(parent_story_id) + .fetch_all(pool) + .await?; + + let mut characters_real: Vec = Vec::new(); + let mut characters_fictional: Vec = Vec::new(); + for (name, kind, key_facts) in chars { + let r = CharacterRef { + name, + kind: kind.clone(), + key_facts, + }; + if kind == "real" { + characters_real.push(r); + } else { + characters_fictional.push(r); + } + } + + let canon_facts: Vec = sqlx::query_as::<_, (String, String, String)>( + "SELECT category, title, body FROM canon_facts + WHERE story_id = $1 + ORDER BY category, title", + ) + .bind(parent_story_id) + .fetch_all(pool) + .await? + .into_iter() + .map(|(category, title, body)| CanonFactRef { + category, + title, + body, + }) + .collect(); + + // Total chapter count → split: last `recent_n` get full prose, + // earlier chapters get summaries (or placeholders). + let total_chapters: i64 = + sqlx::query_scalar("SELECT count(*) FROM chapters WHERE story_id = $1") + .bind(parent_story_id) + .fetch_one(pool) + .await?; + let recent_n = recent_n as i64; + let summary_threshold = (total_chapters - recent_n).max(0); + + let summary_rows: Vec<(i32, Option, Option)> = sqlx::query_as( + "SELECT c.n, c.title, cs.body + FROM chapters c + LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id + WHERE c.story_id = $1 AND c.n <= $2 + ORDER BY c.n", + ) + .bind(parent_story_id) + .bind(summary_threshold as i32) + .fetch_all(pool) + .await?; + + let chapter_summaries: Vec = summary_rows + .into_iter() + .map(|(n, title, body)| ChapterSummaryRef { + n, + title, + summary: body.unwrap_or_else(|| { + format!( + "Chapter {n} — summary not yet generated. Consider back-filling for cleaner sequel context." + ) + }), + }) + .collect(); + + let recent_rows: Vec<(i32, Option, String, i32)> = sqlx::query_as( + "SELECT n, title, body_md, word_count FROM chapters + WHERE story_id = $1 AND n > $2 + ORDER BY n", + ) + .bind(parent_story_id) + .bind(summary_threshold as i32) + .fetch_all(pool) + .await?; + + let recent_chapters: Vec = recent_rows + .into_iter() + .map(|(n, title, body_md, word_count)| ChapterRef { + n, + title, + body_md, + word_count, + }) + .collect(); + + Ok(Self { + parent_story_id, + parent_title, + series_name, + parent_word_count, + characters_real, + characters_fictional, + canon_facts, + chapter_summaries, + recent_chapters, + }) + } + + /// Render the context as a markdown blob suitable for handing to + /// the forge as the LLM's working context. + pub fn render_markdown(&self) -> String { + let mut out = String::new(); + + out.push_str(&format!( + "# Continuing series: {}\n\n", + self.series_name.as_deref().unwrap_or(&self.parent_title) + )); + out.push_str(&format!( + "**Parent story:** {} ({} words)\n\n", + self.parent_title, self.parent_word_count + )); + + out.push_str("## Characters — real historical figures\n\n"); + if self.characters_real.is_empty() { + out.push_str("_(none)_\n\n"); + } else { + for c in &self.characters_real { + out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts)); + } + out.push('\n'); + } + + out.push_str("## Characters — fictional\n\n"); + if self.characters_fictional.is_empty() { + out.push_str("_(none)_\n\n"); + } else { + for c in &self.characters_fictional { + out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts)); + } + out.push('\n'); + } + + // Group canon facts by category. + let mut by_category: std::collections::BTreeMap<&str, Vec<&CanonFactRef>> = + std::collections::BTreeMap::new(); + for fact in &self.canon_facts { + by_category + .entry(fact.category.as_str()) + .or_default() + .push(fact); + } + for (category, facts) in &by_category { + out.push_str(&format!("## Canon — {}\n\n", category)); + for fact in facts { + out.push_str(&format!("### {}\n\n{}\n\n", fact.title, fact.body)); + } + } + + if !self.chapter_summaries.is_empty() { + out.push_str("## Earlier chapters — summaries\n\n"); + for s in &self.chapter_summaries { + let title = s.title.as_deref().unwrap_or(""); + out.push_str(&format!("### Chapter {} {}\n\n{}\n\n", s.n, title, s.summary)); + } + } + + if !self.recent_chapters.is_empty() { + out.push_str("## Recent chapters — full prose\n\n"); + for c in &self.recent_chapters { + let title = c.title.as_deref().unwrap_or(""); + out.push_str(&format!( + "### Chapter {} {} ({} words)\n\n{}\n\n", + c.n, title, c.word_count, c.body_md + )); + } + } + + out + } + + /// Total prose word-count carried in `recent_chapters`. Useful for + /// the "are we above the 85%-of-parent" check before firing a gen + /// pass. + pub fn recent_word_total(&self) -> i32 { + self.recent_chapters.iter().map(|c| c.word_count).sum() + } + + /// Ratio of recent-prose words to total parent words. 1.0 = the + /// recent window covers the entire parent. The 85% rule: refuse + /// (or warn) on continuation if this is below 0.85 AND there are + /// no chapter summaries to bridge the gap. + pub fn parent_coverage(&self) -> f64 { + if self.parent_word_count == 0 { + return 0.0; + } + let recent = self.recent_word_total() as f64; + let summaries_proxy = (self.chapter_summaries.len() as f64) * 250.0; + let total_covered = recent + summaries_proxy; + let parent = self.parent_word_count as f64; + (total_covered / parent).min(1.0) + } +} diff --git a/skald-core/src/lib.rs b/skald-core/src/lib.rs index 5f4bff9..93972f7 100644 --- a/skald-core/src/lib.rs +++ b/skald-core/src/lib.rs @@ -5,6 +5,7 @@ //! this crate knows about any specific story. Every story is rows. pub mod config; +pub mod context; pub mod db; pub mod forge; pub mod ingest; diff --git a/skald/src/main.rs b/skald/src/main.rs index e64f675..fd05808 100644 --- a/skald/src/main.rs +++ b/skald/src/main.rs @@ -6,11 +6,13 @@ mod import; mod serve; +mod show_context; use std::path::PathBuf; use std::process::ExitCode; use clap::{Parser, Subcommand}; +use uuid::Uuid; #[derive(Debug, Parser)] #[command( @@ -46,6 +48,18 @@ enum Cmd { #[arg(long)] title: Option, }, + /// Assemble the continuation context for an existing story and + /// print the rendered markdown blob to stdout. No LLM call — + /// inspect what would be sent before paying for it. + ShowContext { + /// Story id to continue FROM (the parent). + #[arg(long)] + story: Uuid, + /// How many most-recent chapters to include with full prose. + /// Older chapters fall back to per-chapter summaries. + #[arg(long, default_value = "3")] + recent: usize, + }, } #[tokio::main] @@ -67,6 +81,9 @@ async fn run() -> anyhow::Result<()> { Cmd::ImportMarkdown { path, title } => { import::run(&cli.database_url, &path, title.as_deref()).await } + Cmd::ShowContext { story, recent } => { + show_context::run(&cli.database_url, story, recent).await + } } } diff --git a/skald/src/show_context.rs b/skald/src/show_context.rs new file mode 100644 index 0000000..cb42691 --- /dev/null +++ b/skald/src/show_context.rs @@ -0,0 +1,28 @@ +//! `skald show-context` subcommand. Pulls the continuation context +//! for a story and prints the rendered markdown blob to stdout. +//! Useful pre-LLM smoke: see exactly what would be sent to opus +//! before paying for it. + +use skald_core::context::ContinuationContext; +use skald_core::db; +use uuid::Uuid; + +pub async fn run(database_url: &str, story_id: Uuid, recent_n: usize) -> anyhow::Result<()> { + let pool = db::connect_and_migrate(database_url).await?; + let ctx = ContinuationContext::assemble(&pool, story_id, recent_n).await?; + + eprintln!( + "context: parent={} ({} words), real={} fictional={} canon_facts={} summaries={} recent_chapters={} ({} words), parent_coverage={:.0}%", + ctx.parent_title, + ctx.parent_word_count, + ctx.characters_real.len(), + ctx.characters_fictional.len(), + ctx.canon_facts.len(), + ctx.chapter_summaries.len(), + ctx.recent_chapters.len(), + ctx.recent_word_total(), + ctx.parent_coverage() * 100.0, + ); + println!("{}", ctx.render_markdown()); + Ok(()) +}