skald/skald-core/src/context.rs

//! Context assembly. DB rows → markdown blob the forge passes to
//! clawdforge as the LLM's working set.
//!
//! For now this module has one entry point: [`ContinuationContext::assemble`]
//! pulls everything an Opus sequel pass needs to honor canon —
//! characters, canon facts, per-chapter summaries, and the FULL prose
//! of the most recent chapters. The "≥85% of parent" rule lands here
//! by reading the last N full chapters; older chapters fall back to
//! their per-chapter summary so the token budget doesn't explode on
//! book 12.
//!
//! Render order in the markdown is deliberate:
//!   1. Series + parent title (cheap orientation)
//!   2. Characters (real + fictional, decomposed) — most-referenced data
//!   3. Canon facts (setting / mystery / theme / hooks)
//!   4. Chapter summaries (oldest → newest, full series so far)
//!   5. Recent chapters (most-recent N, full prose)
//!
//! Opus consumes the blob with the most-condensed data first and the
//! richest detail last — by the time it's writing the new chapter,
//! the previous chapter's prose is freshest in the context window.

use anyhow::Context;
use serde::Serialize;
use sqlx::PgPool;
use uuid::Uuid;

/// Everything needed for a sequel-gen + canon-audit pass.
#[derive(Debug, Clone, Serialize)]
pub struct ContinuationContext {
    pub parent_story_id: Uuid,
    pub parent_title: String,
    pub series_name: Option<String>,
    pub parent_word_count: i32,
    pub characters_real: Vec<CharacterRef>,
    pub characters_fictional: Vec<CharacterRef>,
    pub canon_facts: Vec<CanonFactRef>,
    pub chapter_summaries: Vec<ChapterSummaryRef>,
    pub recent_chapters: Vec<ChapterRef>,
}

#[derive(Debug, Clone, Serialize)]
pub struct CharacterRef {
    pub name: String,
    pub kind: String,
    pub key_facts: String,
}

#[derive(Debug, Clone, Serialize)]
pub struct CanonFactRef {
    pub category: String,
    pub title: String,
    pub body: String,
}

#[derive(Debug, Clone, Serialize)]
pub struct ChapterSummaryRef {
    pub n: i32,
    pub title: Option<String>,
    pub summary: String,
}

#[derive(Debug, Clone, Serialize)]
pub struct ChapterRef {
    pub n: i32,
    pub title: Option<String>,
    pub body_md: String,
    pub word_count: i32,
}

impl ContinuationContext {
    /// Pull a continuation-ready context for `parent_story_id`. The
    /// last `recent_n` chapters come back with full prose; everything
    /// older comes back as per-chapter summaries.
    ///
    /// If a chapter older than the recent window has no summary, it's
    /// returned as a placeholder summary noting the gap — the operator
    /// can either back-fill a summary or accept that the context for
    /// that chapter is "Chapter K — summary not yet generated."
    pub async fn assemble(
        pool: &PgPool,
        parent_story_id: Uuid,
        recent_n: usize,
    ) -> anyhow::Result<Self> {
        let (parent_title, series_name, parent_word_count) = sqlx::query_as::<
            _,
            (String, Option<String>, i32),
        >(
            "SELECT title, series_name, word_count_actual
             FROM stories WHERE id = $1",
        )
        .bind(parent_story_id)
        .fetch_optional(pool)
        .await?
        .with_context(|| format!("story {parent_story_id} not found"))?;

        let chars: Vec<(String, String, String)> = sqlx::query_as(
            "SELECT name, kind, key_facts FROM characters
             WHERE story_id = $1
             ORDER BY kind, name",
        )
        .bind(parent_story_id)
        .fetch_all(pool)
        .await?;

        let mut characters_real: Vec<CharacterRef> = Vec::new();
        let mut characters_fictional: Vec<CharacterRef> = Vec::new();
        for (name, kind, key_facts) in chars {
            let r = CharacterRef {
                name,
                kind: kind.clone(),
                key_facts,
            };
            if kind == "real" {
                characters_real.push(r);
            } else {
                characters_fictional.push(r);
            }
        }

        let canon_facts: Vec<CanonFactRef> = sqlx::query_as::<_, (String, String, String)>(
            "SELECT category, title, body FROM canon_facts
             WHERE story_id = $1
             ORDER BY category, title",
        )
        .bind(parent_story_id)
        .fetch_all(pool)
        .await?
        .into_iter()
        .map(|(category, title, body)| CanonFactRef {
            category,
            title,
            body,
        })
        .collect();

        // Total chapter count → split: last `recent_n` get full prose,
        // earlier chapters get summaries (or placeholders).
        let total_chapters: i64 =
            sqlx::query_scalar("SELECT count(*) FROM chapters WHERE story_id = $1")
                .bind(parent_story_id)
                .fetch_one(pool)
                .await?;
        let recent_n = recent_n as i64;
        let summary_threshold = (total_chapters - recent_n).max(0);

        let summary_rows: Vec<(i32, Option<String>, Option<String>)> = sqlx::query_as(
            "SELECT c.n, c.title, cs.body
             FROM chapters c
             LEFT JOIN chapter_summaries cs ON cs.chapter_id = c.id
             WHERE c.story_id = $1 AND c.n <= $2
             ORDER BY c.n",
        )
        .bind(parent_story_id)
        .bind(summary_threshold as i32)
        .fetch_all(pool)
        .await?;

        let chapter_summaries: Vec<ChapterSummaryRef> = summary_rows
            .into_iter()
            .map(|(n, title, body)| ChapterSummaryRef {
                n,
                title,
                summary: body.unwrap_or_else(|| {
                    format!(
                        "Chapter {n} — summary not yet generated. Consider back-filling for cleaner sequel context."
                    )
                }),
            })
            .collect();

        let recent_rows: Vec<(i32, Option<String>, String, i32)> = sqlx::query_as(
            "SELECT n, title, body_md, word_count FROM chapters
             WHERE story_id = $1 AND n > $2
             ORDER BY n",
        )
        .bind(parent_story_id)
        .bind(summary_threshold as i32)
        .fetch_all(pool)
        .await?;

        let recent_chapters: Vec<ChapterRef> = recent_rows
            .into_iter()
            .map(|(n, title, body_md, word_count)| ChapterRef {
                n,
                title,
                body_md,
                word_count,
            })
            .collect();

        Ok(Self {
            parent_story_id,
            parent_title,
            series_name,
            parent_word_count,
            characters_real,
            characters_fictional,
            canon_facts,
            chapter_summaries,
            recent_chapters,
        })
    }

    /// Render the context as a markdown blob suitable for handing to
    /// the forge as the LLM's working context.
    pub fn render_markdown(&self) -> String {
        let mut out = String::new();

        out.push_str(&format!(
            "# Continuing series: {}\n\n",
            self.series_name.as_deref().unwrap_or(&self.parent_title)
        ));
        out.push_str(&format!(
            "**Parent story:** {} ({} words)\n\n",
            self.parent_title, self.parent_word_count
        ));

        out.push_str("## Characters — real historical figures\n\n");
        if self.characters_real.is_empty() {
            out.push_str("_(none)_\n\n");
        } else {
            for c in &self.characters_real {
                out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts));
            }
            out.push('\n');
        }

        out.push_str("## Characters — fictional\n\n");
        if self.characters_fictional.is_empty() {
            out.push_str("_(none)_\n\n");
        } else {
            for c in &self.characters_fictional {
                out.push_str(&format!("- **{}** — {}\n", c.name, c.key_facts));
            }
            out.push('\n');
        }

        // Group canon facts by category.
        let mut by_category: std::collections::BTreeMap<&str, Vec<&CanonFactRef>> =
            std::collections::BTreeMap::new();
        for fact in &self.canon_facts {
            by_category
                .entry(fact.category.as_str())
                .or_default()
                .push(fact);
        }
        for (category, facts) in &by_category {
            out.push_str(&format!("## Canon — {}\n\n", category));
            for fact in facts {
                out.push_str(&format!("### {}\n\n{}\n\n", fact.title, fact.body));
            }
        }

        if !self.chapter_summaries.is_empty() {
            out.push_str("## Earlier chapters — summaries\n\n");
            for s in &self.chapter_summaries {
                let title = s.title.as_deref().unwrap_or("");
                out.push_str(&format!("### Chapter {} {}\n\n{}\n\n", s.n, title, s.summary));
            }
        }

        if !self.recent_chapters.is_empty() {
            out.push_str("## Recent chapters — full prose\n\n");
            for c in &self.recent_chapters {
                let title = c.title.as_deref().unwrap_or("");
                out.push_str(&format!(
                    "### Chapter {} {}  ({} words)\n\n{}\n\n",
                    c.n, title, c.word_count, c.body_md
                ));
            }
        }

        out
    }

    /// Total prose word-count carried in `recent_chapters`. Useful for
    /// the "are we above the 85%-of-parent" check before firing a gen
    /// pass.
    pub fn recent_word_total(&self) -> i32 {
        self.recent_chapters.iter().map(|c| c.word_count).sum()
    }

    /// Raw-prose coverage: opus-readable words / parent words.
    /// Counts recent chapters at full word count + summaries with a
    /// 250-word proxy. Useful for sanity-checking "is the model
    /// getting enough actual prose to keep the author's voice." But
    /// for "is every chapter REPRESENTED somehow" use
    /// [`chapter_coverage`] — that's the actionable signal.
    ///
    /// [`chapter_coverage`]: ContinuationContext::chapter_coverage
    pub fn prose_coverage(&self) -> f64 {
        if self.parent_word_count == 0 {
            return 0.0;
        }
        let recent = self.recent_word_total() as f64;
        let summaries_proxy = (self.chapter_summaries.len() as f64) * 250.0;
        let total_covered = recent + summaries_proxy;
        let parent = self.parent_word_count as f64;
        (total_covered / parent).min(1.0)
    }

    /// Older name for [`prose_coverage`], kept for one release in
    /// case anything outside this crate still calls it.
    #[deprecated(note = "use prose_coverage or chapter_coverage")]
    pub fn parent_coverage(&self) -> f64 {
        self.prose_coverage()
    }

    /// Chapter-level coverage: chapters with EITHER a summary OR full
    /// recent prose / total chapters. The "is the parent fully
    /// represented in the context blob" signal. With well-written
    /// summaries this should be 1.0 on a stable parent.
    pub fn chapter_coverage(&self) -> f64 {
        let total = self.chapter_summaries.len() + self.recent_chapters.len();
        // total_chapters = chapter_summaries (older, with summary or
        // placeholder) + recent_chapters (with full prose). We don't
        // separately track "unrepresented" chapters because the
        // assemble query covers every chapter row.
        if total == 0 {
            return 0.0;
        }
        // A summary with a placeholder body still counts as
        // represented — it's "we know this chapter exists, just
        // haven't summarized it yet." That's fine for the metric;
        // the operator-facing warning lives elsewhere.
        1.0
    }
}