context: assemble DB→opus blob + skald show-context CLI
skald-core::context is the bridge between 'rows in postgres' and
'prompt-ready markdown blob.' ContinuationContext::assemble(pool,
parent_story_id, recent_n) pulls:
- parent story meta (title, series, total word count)
- characters split real / fictional
- canon_facts grouped by category
- chapter summaries for everything older than the recent window
- FULL prose for the last recent_n chapters
render_markdown() formats it with the most-condensed data first
(characters, canon) and the richest detail last (recent chapter
prose). Opus reads it linearly so by the time it's writing the new
chapter, the previous chapter's prose is freshest in its context
window.
The 'continuation reads ≥85% of parent' rule lands here via
parent_coverage() which counts recent prose + summaries-as-proxy
(250 words / summary) against parent word_count. The web UI / CLI
can warn before firing a gen pass if coverage is below threshold.
New CLI subcommand:
skald show-context --story <uuid> --recent <N>
Assembles + prints the blob to stdout (eprintln'd stats summary
goes to stderr). No LLM call — pre-flight inspection so we see
what would be sent before paying for it. Useful for prompt-eng
work in the next session.
Module structure now:
skald-core/
config.rs ForgeConfig
context.rs ContinuationContext (new)
db.rs connect_and_migrate
forge.rs Forge — three-pass orchestration
ingest.rs markdown parser
models.rs row types
lib.rs MIGRATOR + module exports
skald/
main.rs clap CLI
serve.rs axum + /health + migrations
import.rs skald import-markdown
show_context.rs skald show-context (new)
This commit is contained in:
parent
f71b533e52
commit
5b418369c0
4 changed files with 344 additions and 0 deletions
298
skald-core/src/context.rs
Normal file
298
skald-core/src/context.rs
Normal file
|
|
@ -0,0 +1,298 @@
|
|||
//! Context assembly. DB rows → markdown blob the forge passes to
|
||||
//! clawdforge as the LLM's working set.
|
||||
//!
|
||||
//! For now this module has one entry point: [`ContinuationContext::assemble`]
|
||||
//! pulls everything an Opus sequel pass needs to honor canon —
|
||||
//! characters, canon facts, per-chapter summaries, and the FULL prose
|
||||
//! of the most recent chapters. The "≥85% of parent" rule lands here
|
||||
//! by reading the last N full chapters; older chapters fall back to
|
||||
//! their per-chapter summary so the token budget doesn't explode on
|
||||
//! book 12.
|
||||
//!
|
||||
//! Render order in the markdown is deliberate:
|
||||
//! 1. Series + parent title (cheap orientation)
|
||||
//! 2. Characters (real + fictional, decomposed) — most-referenced data
|
||||
//! 3. Canon facts (setting / mystery / theme / hooks)
|
||||
//! 4. Chapter summaries (oldest → newest, full series so far)
|
||||
//! 5. Recent chapters (most-recent N, full prose)
|
||||
//!
|
||||
//! Opus consumes the blob with the most-condensed data first and the
|
||||
//! richest detail last — by the time it's writing the new chapter,
|
||||
//! the previous chapter's prose is freshest in the context window.
|
||||
|
||||
use anyhow::Context;
|
||||
use serde::Serialize;
|
||||
use sqlx::PgPool;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Everything needed for a sequel-gen + canon-audit pass.
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ContinuationContext {
|
||||
pub parent_story_id: Uuid,
|
||||
pub parent_title: String,
|
||||
pub series_name: Option<String>,
|
||||
pub parent_word_count: i32,
|
||||
pub characters_real: Vec<CharacterRef>,
|
||||
pub characters_fictional: Vec<CharacterRef>,
|
||||
pub canon_facts: Vec<CanonFactRef>,
|
||||
pub chapter_summaries: Vec<ChapterSummaryRef>,
|
||||
pub recent_chapters: Vec<ChapterRef>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CharacterRef {
|
||||
pub name: String,
|
||||
pub kind: String,
|
||||
pub key_facts: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct CanonFactRef {
|
||||
pub category: String,
|
||||
pub title: String,
|
||||
pub body: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ChapterSummaryRef {
|
||||
pub n: i32,
|
||||
pub title: Option<String>,
|
||||
pub summary: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize)]
|
||||
pub struct ChapterRef {
|
||||
pub n: i32,
|
||||
pub title: Option<String>,
|
||||
pub body_md: String,
|
||||
pub word_count: i32,
|
||||
}
|
||||
|
||||
impl ContinuationContext {
|
||||
/// Pull a continuation-ready context for `parent_story_id`. The
|
||||
/// last `recent_n` chapters come back with full prose; everything
|
||||
/// older comes back as per-chapter summaries.
|
||||
///
|
||||
/// If a chapter older than the recent window has no summary, it's
|
||||
/// returned as a placeholder summary noting the gap — the operator
|
||||
/// can either back-fill a summary or accept that the context for
|
||||
/// that chapter is "Chapter K — summary not yet generated."
|
||||
pub async fn assemble(
|
||||
pool: &PgPool,
|
||||
parent_story_id: Uuid,
|
||||
recent_n: usize,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (parent_title, series_name, parent_word_count) = sqlx::query_as::<
|
||||
_,
|
||||
(String, Option<String>, i32),
|
||||
>(
|
||||
"SELECT title, series_name, word_count_actual
|
||||
FROM stories WHERE id = $1",
|
||||
)
|
||||
.bind(parent_story_id)
|
||||
.fetch_optional(pool)
|
||||
.await?
|
||||
.with_context(|| format!("story {parent_story_id} not found"))?;
|
||||
|
||||
let chars: Vec<(String, String, String)> = sqlx::query_as(
|
||||
"SELECT name, kind, key_facts FROM characters
|
||||
WHERE story_id = $1
|
||||
ORDER BY kind, name",
|
||||
)
|
||||
.bind(parent_story_id)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let mut characters_real: Vec<CharacterRef> = Vec::new();
|
||||
let mut characters_fictional: Vec<CharacterRef> = Vec::new();
|
||||
for (name, kind, key_facts) in chars {
|
||||
let r = CharacterRef {
|
||||
name,
|
||||
kind: kind.clone(),
|
||||
key_facts,
|
||||
};
|
||||
if kind == "real" {
|
||||
characters_real.push(r);
|
||||
} else {
|
||||
characters_fictional.push(r);
|
||||
}
|
||||
}
|
||||
|
||||
let canon_facts: Vec<CanonFactRef> = sqlx::query_as::<_, (String, String, String)>(
|
||||
"SELECT category, title, body FROM canon_facts
|
||||
WHERE story_id = $1
|
||||
ORDER BY category, title",
|
||||
)
|
||||
.bind(parent_story_id)
|
||||
.fetch_all(pool)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|(category, title, body)| CanonFactRef {
|
||||
category,
|
||||
title,
|
||||
body,
|
||||
})
|
||||
.collect();
|
||||
|
||||
// Total chapter count → split: last `recent_n` get full prose,
|
||||
// earlier chapters get summaries (or placeholders).
|
||||
let total_chapters: i64 =
|
||||
sqlx::query_scalar("SELECT count(*) FROM chapters WHERE story_id = $1")
|
||||
.bind(parent_story_id)
|
||||
.fetch_one(pool)
|
||||
.await?;
|
||||
let recent_n = recent_n as i64;
|
||||
let summary_threshold = (total_chapters - recent_n).max(0);
|
||||
|
||||
let summary_rows: Vec<(i32, Option<String>, Option<String>)> = sqlx::query_as(
|
||||
"SELECT c.n, c.title, cs.body
|
||||
FROM chapters c
|
||||
LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id
|
||||
WHERE c.story_id = $1 AND c.n <= $2
|
||||
ORDER BY c.n",
|
||||
)
|
||||
.bind(parent_story_id)
|
||||
.bind(summary_threshold as i32)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let chapter_summaries: Vec<ChapterSummaryRef> = summary_rows
|
||||
.into_iter()
|
||||
.map(|(n, title, body)| ChapterSummaryRef {
|
||||
n,
|
||||
title,
|
||||
summary: body.unwrap_or_else(|| {
|
||||
format!(
|
||||
"Chapter {n} — summary not yet generated. Consider back-filling for cleaner sequel context."
|
||||
)
|
||||
}),
|
||||
})
|
||||
.collect();
|
||||
|
||||
let recent_rows: Vec<(i32, Option<String>, String, i32)> = sqlx::query_as(
|
||||
"SELECT n, title, body_md, word_count FROM chapters
|
||||
WHERE story_id = $1 AND n > $2
|
||||
ORDER BY n",
|
||||
)
|
||||
.bind(parent_story_id)
|
||||
.bind(summary_threshold as i32)
|
||||
.fetch_all(pool)
|
||||
.await?;
|
||||
|
||||
let recent_chapters: Vec<ChapterRef> = recent_rows
|
||||
.into_iter()
|
||||
.map(|(n, title, body_md, word_count)| ChapterRef {
|
||||
n,
|
||||
title,
|
||||
body_md,
|
||||
word_count,
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(Self {
|
||||
parent_story_id,
|
||||
parent_title,
|
||||
series_name,
|
||||
parent_word_count,
|
||||
characters_real,
|
||||
characters_fictional,
|
||||
canon_facts,
|
||||
chapter_summaries,
|
||||
recent_chapters,
|
||||
})
|
||||
}
|
||||
|
||||
/// Render the context as a markdown blob suitable for handing to
|
||||
/// the forge as the LLM's working context.
|
||||
pub fn render_markdown(&self) -> String {
|
||||
let mut out = String::new();
|
||||
|
||||
out.push_str(&format!(
|
||||
"# Continuing series: {}\n\n",
|
||||
self.series_name.as_deref().unwrap_or(&self.parent_title)
|
||||
));
|
||||
out.push_str(&format!(
|
||||
"**Parent story:** {} ({} words)\n\n",
|
||||
self.parent_title, self.parent_word_count
|
||||
));
|
||||
|
||||
out.push_str("## Characters — real historical figures\n\n");
|
||||
if self.characters_real.is_empty() {
|
||||
out.push_str("_(none)_\n\n");
|
||||
} else {
|
||||
for c in &self.characters_real {
|
||||
out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts));
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
out.push_str("## Characters — fictional\n\n");
|
||||
if self.characters_fictional.is_empty() {
|
||||
out.push_str("_(none)_\n\n");
|
||||
} else {
|
||||
for c in &self.characters_fictional {
|
||||
out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts));
|
||||
}
|
||||
out.push('\n');
|
||||
}
|
||||
|
||||
// Group canon facts by category.
|
||||
let mut by_category: std::collections::BTreeMap<&str, Vec<&CanonFactRef>> =
|
||||
std::collections::BTreeMap::new();
|
||||
for fact in &self.canon_facts {
|
||||
by_category
|
||||
.entry(fact.category.as_str())
|
||||
.or_default()
|
||||
.push(fact);
|
||||
}
|
||||
for (category, facts) in &by_category {
|
||||
out.push_str(&format!("## Canon — {}\n\n", category));
|
||||
for fact in facts {
|
||||
out.push_str(&format!("### {}\n\n{}\n\n", fact.title, fact.body));
|
||||
}
|
||||
}
|
||||
|
||||
if !self.chapter_summaries.is_empty() {
|
||||
out.push_str("## Earlier chapters — summaries\n\n");
|
||||
for s in &self.chapter_summaries {
|
||||
let title = s.title.as_deref().unwrap_or("");
|
||||
out.push_str(&format!("### Chapter {} {}\n\n{}\n\n", s.n, title, s.summary));
|
||||
}
|
||||
}
|
||||
|
||||
if !self.recent_chapters.is_empty() {
|
||||
out.push_str("## Recent chapters — full prose\n\n");
|
||||
for c in &self.recent_chapters {
|
||||
let title = c.title.as_deref().unwrap_or("");
|
||||
out.push_str(&format!(
|
||||
"### Chapter {} {} ({} words)\n\n{}\n\n",
|
||||
c.n, title, c.word_count, c.body_md
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
out
|
||||
}
|
||||
|
||||
/// Total prose word-count carried in `recent_chapters`. Useful for
|
||||
/// the "are we above the 85%-of-parent" check before firing a gen
|
||||
/// pass.
|
||||
pub fn recent_word_total(&self) -> i32 {
|
||||
self.recent_chapters.iter().map(|c| c.word_count).sum()
|
||||
}
|
||||
|
||||
/// Ratio of recent-prose words to total parent words. 1.0 = the
|
||||
/// recent window covers the entire parent. The 85% rule: refuse
|
||||
/// (or warn) on continuation if this is below 0.85 AND there are
|
||||
/// no chapter summaries to bridge the gap.
|
||||
pub fn parent_coverage(&self) -> f64 {
|
||||
if self.parent_word_count == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
let recent = self.recent_word_total() as f64;
|
||||
let summaries_proxy = (self.chapter_summaries.len() as f64) * 250.0;
|
||||
let total_covered = recent + summaries_proxy;
|
||||
let parent = self.parent_word_count as f64;
|
||||
(total_covered / parent).min(1.0)
|
||||
}
|
||||
}
|
||||
|
|
@ -5,6 +5,7 @@
|
|||
//! this crate knows about any specific story. Every story is rows.
|
||||
|
||||
pub mod config;
|
||||
pub mod context;
|
||||
pub mod db;
|
||||
pub mod forge;
|
||||
pub mod ingest;
|
||||
|
|
|
|||
|
|
@ -6,11 +6,13 @@
|
|||
|
||||
mod import;
|
||||
mod serve;
|
||||
mod show_context;
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::process::ExitCode;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
use uuid::Uuid;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[command(
|
||||
|
|
@ -46,6 +48,18 @@ enum Cmd {
|
|||
#[arg(long)]
|
||||
title: Option<String>,
|
||||
},
|
||||
/// Assemble the continuation context for an existing story and
|
||||
/// print the rendered markdown blob to stdout. No LLM call —
|
||||
/// inspect what would be sent before paying for it.
|
||||
ShowContext {
|
||||
/// Story id to continue FROM (the parent).
|
||||
#[arg(long)]
|
||||
story: Uuid,
|
||||
/// How many most-recent chapters to include with full prose.
|
||||
/// Older chapters fall back to per-chapter summaries.
|
||||
#[arg(long, default_value = "3")]
|
||||
recent: usize,
|
||||
},
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -67,6 +81,9 @@ async fn run() -> anyhow::Result<()> {
|
|||
Cmd::ImportMarkdown { path, title } => {
|
||||
import::run(&cli.database_url, &path, title.as_deref()).await
|
||||
}
|
||||
Cmd::ShowContext { story, recent } => {
|
||||
show_context::run(&cli.database_url, story, recent).await
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
28
skald/src/show_context.rs
Normal file
28
skald/src/show_context.rs
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
//! `skald show-context` subcommand. Pulls the continuation context
|
||||
//! for a story and prints the rendered markdown blob to stdout.
|
||||
//! Useful pre-LLM smoke: see exactly what would be sent to opus
|
||||
//! before paying for it.
|
||||
|
||||
use skald_core::context::ContinuationContext;
|
||||
use skald_core::db;
|
||||
use uuid::Uuid;
|
||||
|
||||
pub async fn run(database_url: &str, story_id: Uuid, recent_n: usize) -> anyhow::Result<()> {
|
||||
let pool = db::connect_and_migrate(database_url).await?;
|
||||
let ctx = ContinuationContext::assemble(&pool, story_id, recent_n).await?;
|
||||
|
||||
eprintln!(
|
||||
"context: parent={} ({} words), real={} fictional={} canon_facts={} summaries={} recent_chapters={} ({} words), parent_coverage={:.0}%",
|
||||
ctx.parent_title,
|
||||
ctx.parent_word_count,
|
||||
ctx.characters_real.len(),
|
||||
ctx.characters_fictional.len(),
|
||||
ctx.canon_facts.len(),
|
||||
ctx.chapter_summaries.len(),
|
||||
ctx.recent_chapters.len(),
|
||||
ctx.recent_word_total(),
|
||||
ctx.parent_coverage() * 100.0,
|
||||
);
|
||||
println!("{}", ctx.render_markdown());
|
||||
Ok(())
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue