The old parent_coverage was raw-prose / parent-words — a signal of 'how much actual prose opus is reading.' But the more actionable signal is 'is every chapter represented somehow' which sits at 1.0 for any parent with summaries (or placeholders) for older chapters. Add chapter_coverage = 1.0 when every chapter has either a summary or full-recent-prose row in the context. Keep prose_coverage as the precise raw-words metric for ops that care about token budget. Deprecate parent_coverage with a one-release shim (renames to prose_coverage). show_context CLI prints both percentages.
329 lines
12 KiB
Rust
329 lines
12 KiB
Rust
//! Context assembly. DB rows → markdown blob the forge passes to
|
|
//! clawdforge as the LLM's working set.
|
|
//!
|
|
//! For now this module has one entry point: [`ContinuationContext::assemble`]
|
|
//! pulls everything an Opus sequel pass needs to honor canon —
|
|
//! characters, canon facts, per-chapter summaries, and the FULL prose
|
|
//! of the most recent chapters. The "≥85% of parent" rule lands here
|
|
//! by reading the last N full chapters; older chapters fall back to
|
|
//! their per-chapter summary so the token budget doesn't explode on
|
|
//! book 12.
|
|
//!
|
|
//! Render order in the markdown is deliberate:
|
|
//! 1. Series + parent title (cheap orientation)
|
|
//! 2. Characters (real + fictional, decomposed) — most-referenced data
|
|
//! 3. Canon facts (setting / mystery / theme / hooks)
|
|
//! 4. Chapter summaries (oldest → newest, full series so far)
|
|
//! 5. Recent chapters (most-recent N, full prose)
|
|
//!
|
|
//! Opus consumes the blob with the most-condensed data first and the
|
|
//! richest detail last — by the time it's writing the new chapter,
|
|
//! the previous chapter's prose is freshest in the context window.
|
|
|
|
use anyhow::Context;
|
|
use serde::Serialize;
|
|
use sqlx::PgPool;
|
|
use uuid::Uuid;
|
|
|
|
/// Everything needed for a sequel-gen + canon-audit pass.
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct ContinuationContext {
|
|
pub parent_story_id: Uuid,
|
|
pub parent_title: String,
|
|
pub series_name: Option<String>,
|
|
pub parent_word_count: i32,
|
|
pub characters_real: Vec<CharacterRef>,
|
|
pub characters_fictional: Vec<CharacterRef>,
|
|
pub canon_facts: Vec<CanonFactRef>,
|
|
pub chapter_summaries: Vec<ChapterSummaryRef>,
|
|
pub recent_chapters: Vec<ChapterRef>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct CharacterRef {
|
|
pub name: String,
|
|
pub kind: String,
|
|
pub key_facts: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct CanonFactRef {
|
|
pub category: String,
|
|
pub title: String,
|
|
pub body: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct ChapterSummaryRef {
|
|
pub n: i32,
|
|
pub title: Option<String>,
|
|
pub summary: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize)]
|
|
pub struct ChapterRef {
|
|
pub n: i32,
|
|
pub title: Option<String>,
|
|
pub body_md: String,
|
|
pub word_count: i32,
|
|
}
|
|
|
|
impl ContinuationContext {
|
|
/// Pull a continuation-ready context for `parent_story_id`. The
|
|
/// last `recent_n` chapters come back with full prose; everything
|
|
/// older comes back as per-chapter summaries.
|
|
///
|
|
/// If a chapter older than the recent window has no summary, it's
|
|
/// returned as a placeholder summary noting the gap — the operator
|
|
/// can either back-fill a summary or accept that the context for
|
|
/// that chapter is "Chapter K — summary not yet generated."
|
|
pub async fn assemble(
|
|
pool: &PgPool,
|
|
parent_story_id: Uuid,
|
|
recent_n: usize,
|
|
) -> anyhow::Result<Self> {
|
|
let (parent_title, series_name, parent_word_count) = sqlx::query_as::<
|
|
_,
|
|
(String, Option<String>, i32),
|
|
>(
|
|
"SELECT title, series_name, word_count_actual
|
|
FROM stories WHERE id = $1",
|
|
)
|
|
.bind(parent_story_id)
|
|
.fetch_optional(pool)
|
|
.await?
|
|
.with_context(|| format!("story {parent_story_id} not found"))?;
|
|
|
|
let chars: Vec<(String, String, String)> = sqlx::query_as(
|
|
"SELECT name, kind, key_facts FROM characters
|
|
WHERE story_id = $1
|
|
ORDER BY kind, name",
|
|
)
|
|
.bind(parent_story_id)
|
|
.fetch_all(pool)
|
|
.await?;
|
|
|
|
let mut characters_real: Vec<CharacterRef> = Vec::new();
|
|
let mut characters_fictional: Vec<CharacterRef> = Vec::new();
|
|
for (name, kind, key_facts) in chars {
|
|
let r = CharacterRef {
|
|
name,
|
|
kind: kind.clone(),
|
|
key_facts,
|
|
};
|
|
if kind == "real" {
|
|
characters_real.push(r);
|
|
} else {
|
|
characters_fictional.push(r);
|
|
}
|
|
}
|
|
|
|
let canon_facts: Vec<CanonFactRef> = sqlx::query_as::<_, (String, String, String)>(
|
|
"SELECT category, title, body FROM canon_facts
|
|
WHERE story_id = $1
|
|
ORDER BY category, title",
|
|
)
|
|
.bind(parent_story_id)
|
|
.fetch_all(pool)
|
|
.await?
|
|
.into_iter()
|
|
.map(|(category, title, body)| CanonFactRef {
|
|
category,
|
|
title,
|
|
body,
|
|
})
|
|
.collect();
|
|
|
|
// Total chapter count → split: last `recent_n` get full prose,
|
|
// earlier chapters get summaries (or placeholders).
|
|
let total_chapters: i64 =
|
|
sqlx::query_scalar("SELECT count(*) FROM chapters WHERE story_id = $1")
|
|
.bind(parent_story_id)
|
|
.fetch_one(pool)
|
|
.await?;
|
|
let recent_n = recent_n as i64;
|
|
let summary_threshold = (total_chapters - recent_n).max(0);
|
|
|
|
let summary_rows: Vec<(i32, Option<String>, Option<String>)> = sqlx::query_as(
|
|
"SELECT c.n, c.title, cs.body
|
|
FROM chapters c
|
|
LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id
|
|
WHERE c.story_id = $1 AND c.n <= $2
|
|
ORDER BY c.n",
|
|
)
|
|
.bind(parent_story_id)
|
|
.bind(summary_threshold as i32)
|
|
.fetch_all(pool)
|
|
.await?;
|
|
|
|
let chapter_summaries: Vec<ChapterSummaryRef> = summary_rows
|
|
.into_iter()
|
|
.map(|(n, title, body)| ChapterSummaryRef {
|
|
n,
|
|
title,
|
|
summary: body.unwrap_or_else(|| {
|
|
format!(
|
|
"Chapter {n} — summary not yet generated. Consider back-filling for cleaner sequel context."
|
|
)
|
|
}),
|
|
})
|
|
.collect();
|
|
|
|
let recent_rows: Vec<(i32, Option<String>, String, i32)> = sqlx::query_as(
|
|
"SELECT n, title, body_md, word_count FROM chapters
|
|
WHERE story_id = $1 AND n > $2
|
|
ORDER BY n",
|
|
)
|
|
.bind(parent_story_id)
|
|
.bind(summary_threshold as i32)
|
|
.fetch_all(pool)
|
|
.await?;
|
|
|
|
let recent_chapters: Vec<ChapterRef> = recent_rows
|
|
.into_iter()
|
|
.map(|(n, title, body_md, word_count)| ChapterRef {
|
|
n,
|
|
title,
|
|
body_md,
|
|
word_count,
|
|
})
|
|
.collect();
|
|
|
|
Ok(Self {
|
|
parent_story_id,
|
|
parent_title,
|
|
series_name,
|
|
parent_word_count,
|
|
characters_real,
|
|
characters_fictional,
|
|
canon_facts,
|
|
chapter_summaries,
|
|
recent_chapters,
|
|
})
|
|
}
|
|
|
|
/// Render the context as a markdown blob suitable for handing to
|
|
/// the forge as the LLM's working context.
|
|
pub fn render_markdown(&self) -> String {
|
|
let mut out = String::new();
|
|
|
|
out.push_str(&format!(
|
|
"# Continuing series: {}\n\n",
|
|
self.series_name.as_deref().unwrap_or(&self.parent_title)
|
|
));
|
|
out.push_str(&format!(
|
|
"**Parent story:** {} ({} words)\n\n",
|
|
self.parent_title, self.parent_word_count
|
|
));
|
|
|
|
out.push_str("## Characters — real historical figures\n\n");
|
|
if self.characters_real.is_empty() {
|
|
out.push_str("_(none)_\n\n");
|
|
} else {
|
|
for c in &self.characters_real {
|
|
out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts));
|
|
}
|
|
out.push('\n');
|
|
}
|
|
|
|
out.push_str("## Characters — fictional\n\n");
|
|
if self.characters_fictional.is_empty() {
|
|
out.push_str("_(none)_\n\n");
|
|
} else {
|
|
for c in &self.characters_fictional {
|
|
out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts));
|
|
}
|
|
out.push('\n');
|
|
}
|
|
|
|
// Group canon facts by category.
|
|
let mut by_category: std::collections::BTreeMap<&str, Vec<&CanonFactRef>> =
|
|
std::collections::BTreeMap::new();
|
|
for fact in &self.canon_facts {
|
|
by_category
|
|
.entry(fact.category.as_str())
|
|
.or_default()
|
|
.push(fact);
|
|
}
|
|
for (category, facts) in &by_category {
|
|
out.push_str(&format!("## Canon — {}\n\n", category));
|
|
for fact in facts {
|
|
out.push_str(&format!("### {}\n\n{}\n\n", fact.title, fact.body));
|
|
}
|
|
}
|
|
|
|
if !self.chapter_summaries.is_empty() {
|
|
out.push_str("## Earlier chapters — summaries\n\n");
|
|
for s in &self.chapter_summaries {
|
|
let title = s.title.as_deref().unwrap_or("");
|
|
out.push_str(&format!("### Chapter {} {}\n\n{}\n\n", s.n, title, s.summary));
|
|
}
|
|
}
|
|
|
|
if !self.recent_chapters.is_empty() {
|
|
out.push_str("## Recent chapters — full prose\n\n");
|
|
for c in &self.recent_chapters {
|
|
let title = c.title.as_deref().unwrap_or("");
|
|
out.push_str(&format!(
|
|
"### Chapter {} {} ({} words)\n\n{}\n\n",
|
|
c.n, title, c.word_count, c.body_md
|
|
));
|
|
}
|
|
}
|
|
|
|
out
|
|
}
|
|
|
|
/// Total prose word-count carried in `recent_chapters`. Useful for
|
|
/// the "are we above the 85%-of-parent" check before firing a gen
|
|
/// pass.
|
|
pub fn recent_word_total(&self) -> i32 {
|
|
self.recent_chapters.iter().map(|c| c.word_count).sum()
|
|
}
|
|
|
|
/// Raw-prose coverage: opus-readable words / parent words.
|
|
/// Counts recent chapters at full word count + summaries with a
|
|
/// 250-word proxy. Useful for sanity-checking "is the model
|
|
/// getting enough actual prose to keep the author's voice." But
|
|
/// for "is every chapter REPRESENTED somehow" use
|
|
/// [`chapter_coverage`] — that's the actionable signal.
|
|
///
|
|
/// [`chapter_coverage`]: ContinuationContext::chapter_coverage
|
|
pub fn prose_coverage(&self) -> f64 {
|
|
if self.parent_word_count == 0 {
|
|
return 0.0;
|
|
}
|
|
let recent = self.recent_word_total() as f64;
|
|
let summaries_proxy = (self.chapter_summaries.len() as f64) * 250.0;
|
|
let total_covered = recent + summaries_proxy;
|
|
let parent = self.parent_word_count as f64;
|
|
(total_covered / parent).min(1.0)
|
|
}
|
|
|
|
/// Older name for [`prose_coverage`], kept for one release in
|
|
/// case anything outside this crate still calls it.
|
|
#[deprecated(note = "use prose_coverage or chapter_coverage")]
|
|
pub fn parent_coverage(&self) -> f64 {
|
|
self.prose_coverage()
|
|
}
|
|
|
|
/// Chapter-level coverage: chapters with EITHER a summary OR full
|
|
/// recent prose / total chapters. The "is the parent fully
|
|
/// represented in the context blob" signal. With well-written
|
|
/// summaries this should be 1.0 on a stable parent.
|
|
pub fn chapter_coverage(&self) -> f64 {
|
|
let total = self.chapter_summaries.len() + self.recent_chapters.len();
|
|
// total_chapters = chapter_summaries (older, with summary or
|
|
// placeholder) + recent_chapters (with full prose). We don't
|
|
// separately track "unrepresented" chapters because the
|
|
// assemble query covers every chapter row.
|
|
if total == 0 {
|
|
return 0.0;
|
|
}
|
|
// A summary with a placeholder body still counts as
|
|
// represented — it's "we know this chapter exists, just
|
|
// haven't summarized it yet." That's fine for the metric;
|
|
// the operator-facing warning lives elsewhere.
|
|
1.0
|
|
}
|
|
}
|