//! Context assembly. DB rows → markdown blob the forge passes to //! clawdforge as the LLM's working set. //! //! For now this module has one entry point: [`ContinuationContext::assemble`] //! pulls everything an Opus sequel pass needs to honor canon — //! characters, canon facts, per-chapter summaries, and the FULL prose //! of the most recent chapters. The "≥85% of parent" rule lands here //! by reading the last N full chapters; older chapters fall back to //! their per-chapter summary so the token budget doesn't explode on //! book 12. //! //! Render order in the markdown is deliberate: //! 1. Series + parent title (cheap orientation) //! 2. Characters (real + fictional, decomposed) — most-referenced data //! 3. Canon facts (setting / mystery / theme / hooks) //! 4. Chapter summaries (oldest → newest, full series so far) //! 5. Recent chapters (most-recent N, full prose) //! //! Opus consumes the blob with the most-condensed data first and the //! richest detail last — by the time it's writing the new chapter, //! the previous chapter's prose is freshest in the context window. use anyhow::Context; use serde::Serialize; use sqlx::PgPool; use uuid::Uuid; /// Everything needed for a sequel-gen + canon-audit pass. #[derive(Debug, Clone, Serialize)] pub struct ContinuationContext { pub parent_story_id: Uuid, pub parent_title: String, pub series_name: Option, pub parent_word_count: i32, pub characters_real: Vec, pub characters_fictional: Vec, pub canon_facts: Vec, pub chapter_summaries: Vec, pub recent_chapters: Vec, } #[derive(Debug, Clone, Serialize)] pub struct CharacterRef { pub name: String, pub kind: String, pub key_facts: String, } #[derive(Debug, Clone, Serialize)] pub struct CanonFactRef { pub category: String, pub title: String, pub body: String, } #[derive(Debug, Clone, Serialize)] pub struct ChapterSummaryRef { pub n: i32, pub title: Option, pub summary: String, } #[derive(Debug, Clone, Serialize)] pub struct ChapterRef { pub n: i32, pub title: Option, pub body_md: String, pub word_count: i32, } impl ContinuationContext { /// Pull a continuation-ready context for `parent_story_id`. The /// last `recent_n` chapters come back with full prose; everything /// older comes back as per-chapter summaries. /// /// If a chapter older than the recent window has no summary, it's /// returned as a placeholder summary noting the gap — the operator /// can either back-fill a summary or accept that the context for /// that chapter is "Chapter K — summary not yet generated." pub async fn assemble( pool: &PgPool, parent_story_id: Uuid, recent_n: usize, ) -> anyhow::Result { let (parent_title, series_name, parent_word_count) = sqlx::query_as::< _, (String, Option, i32), >( "SELECT title, series_name, word_count_actual FROM stories WHERE id = $1", ) .bind(parent_story_id) .fetch_optional(pool) .await? .with_context(|| format!("story {parent_story_id} not found"))?; let chars: Vec<(String, String, String)> = sqlx::query_as( "SELECT name, kind, key_facts FROM characters WHERE story_id = $1 ORDER BY kind, name", ) .bind(parent_story_id) .fetch_all(pool) .await?; let mut characters_real: Vec = Vec::new(); let mut characters_fictional: Vec = Vec::new(); for (name, kind, key_facts) in chars { let r = CharacterRef { name, kind: kind.clone(), key_facts, }; if kind == "real" { characters_real.push(r); } else { characters_fictional.push(r); } } let canon_facts: Vec = sqlx::query_as::<_, (String, String, String)>( "SELECT category, title, body FROM canon_facts WHERE story_id = $1 ORDER BY category, title", ) .bind(parent_story_id) .fetch_all(pool) .await? .into_iter() .map(|(category, title, body)| CanonFactRef { category, title, body, }) .collect(); // Total chapter count → split: last `recent_n` get full prose, // earlier chapters get summaries (or placeholders). let total_chapters: i64 = sqlx::query_scalar("SELECT count(*) FROM chapters WHERE story_id = $1") .bind(parent_story_id) .fetch_one(pool) .await?; let recent_n = recent_n as i64; let summary_threshold = (total_chapters - recent_n).max(0); let summary_rows: Vec<(i32, Option, Option)> = sqlx::query_as( "SELECT c.n, c.title, cs.body FROM chapters c LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id WHERE c.story_id = $1 AND c.n <= $2 ORDER BY c.n", ) .bind(parent_story_id) .bind(summary_threshold as i32) .fetch_all(pool) .await?; let chapter_summaries: Vec = summary_rows .into_iter() .map(|(n, title, body)| ChapterSummaryRef { n, title, summary: body.unwrap_or_else(|| { format!( "Chapter {n} — summary not yet generated. Consider back-filling for cleaner sequel context." ) }), }) .collect(); let recent_rows: Vec<(i32, Option, String, i32)> = sqlx::query_as( "SELECT n, title, body_md, word_count FROM chapters WHERE story_id = $1 AND n > $2 ORDER BY n", ) .bind(parent_story_id) .bind(summary_threshold as i32) .fetch_all(pool) .await?; let recent_chapters: Vec = recent_rows .into_iter() .map(|(n, title, body_md, word_count)| ChapterRef { n, title, body_md, word_count, }) .collect(); Ok(Self { parent_story_id, parent_title, series_name, parent_word_count, characters_real, characters_fictional, canon_facts, chapter_summaries, recent_chapters, }) } /// Render the context as a markdown blob suitable for handing to /// the forge as the LLM's working context. pub fn render_markdown(&self) -> String { let mut out = String::new(); out.push_str(&format!( "# Continuing series: {}\n\n", self.series_name.as_deref().unwrap_or(&self.parent_title) )); out.push_str(&format!( "**Parent story:** {} ({} words)\n\n", self.parent_title, self.parent_word_count )); out.push_str("## Characters — real historical figures\n\n"); if self.characters_real.is_empty() { out.push_str("_(none)_\n\n"); } else { for c in &self.characters_real { out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts)); } out.push('\n'); } out.push_str("## Characters — fictional\n\n"); if self.characters_fictional.is_empty() { out.push_str("_(none)_\n\n"); } else { for c in &self.characters_fictional { out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts)); } out.push('\n'); } // Group canon facts by category. let mut by_category: std::collections::BTreeMap<&str, Vec<&CanonFactRef>> = std::collections::BTreeMap::new(); for fact in &self.canon_facts { by_category .entry(fact.category.as_str()) .or_default() .push(fact); } for (category, facts) in &by_category { out.push_str(&format!("## Canon — {}\n\n", category)); for fact in facts { out.push_str(&format!("### {}\n\n{}\n\n", fact.title, fact.body)); } } if !self.chapter_summaries.is_empty() { out.push_str("## Earlier chapters — summaries\n\n"); for s in &self.chapter_summaries { let title = s.title.as_deref().unwrap_or(""); out.push_str(&format!("### Chapter {} {}\n\n{}\n\n", s.n, title, s.summary)); } } if !self.recent_chapters.is_empty() { out.push_str("## Recent chapters — full prose\n\n"); for c in &self.recent_chapters { let title = c.title.as_deref().unwrap_or(""); out.push_str(&format!( "### Chapter {} {} ({} words)\n\n{}\n\n", c.n, title, c.word_count, c.body_md )); } } out } /// Total prose word-count carried in `recent_chapters`. Useful for /// the "are we above the 85%-of-parent" check before firing a gen /// pass. pub fn recent_word_total(&self) -> i32 { self.recent_chapters.iter().map(|c| c.word_count).sum() } /// Raw-prose coverage: opus-readable words / parent words. /// Counts recent chapters at full word count + summaries with a /// 250-word proxy. Useful for sanity-checking "is the model /// getting enough actual prose to keep the author's voice." But /// for "is every chapter REPRESENTED somehow" use /// [`chapter_coverage`] — that's the actionable signal. /// /// [`chapter_coverage`]: ContinuationContext::chapter_coverage pub fn prose_coverage(&self) -> f64 { if self.parent_word_count == 0 { return 0.0; } let recent = self.recent_word_total() as f64; let summaries_proxy = (self.chapter_summaries.len() as f64) * 250.0; let total_covered = recent + summaries_proxy; let parent = self.parent_word_count as f64; (total_covered / parent).min(1.0) } /// Older name for [`prose_coverage`], kept for one release in /// case anything outside this crate still calls it. #[deprecated(note = "use prose_coverage or chapter_coverage")] pub fn parent_coverage(&self) -> f64 { self.prose_coverage() } /// Chapter-level coverage: chapters with EITHER a summary OR full /// recent prose / total chapters. The "is the parent fully /// represented in the context blob" signal. With well-written /// summaries this should be 1.0 on a stable parent. pub fn chapter_coverage(&self) -> f64 { let total = self.chapter_summaries.len() + self.recent_chapters.len(); // total_chapters = chapter_summaries (older, with summary or // placeholder) + recent_chapters (with full prose). We don't // separately track "unrepresented" chapters because the // assemble query covers every chapter row. if total == 0 { return 0.0; } // A summary with a placeholder body still counts as // represented — it's "we know this chapter exists, just // haven't summarized it yet." That's fine for the metric; // the operator-facing warning lives elsewhere. 1.0 } }