Skald is a generic story-writer. The database is the product; the binary is the tooling. Everything story-specific lives in rows, not in code. cwho's monorepo + binary-per-role pattern transplanted to this domain. What this commit ships: - Cargo workspace (resolver=3, edition 2024): skald-core (lib) + skald (bin) - Migration 0001: stories, characters, canon_facts, chapters, chapter_summaries, passages (vector(1536)), generation_runs, audit_findings, tags. pgvector + pg_trgm extensions. ivfflat index deferred until we have data (post-import the first ~1k passages and add the index). - skald-core::ingest — markdown parser for the cwho/coast-down shape: '# Title' → '## Chapter N — date' headings → '# Continuity Bible' section with character roster (real + fictional sub-sections) + setting / mystery / historical / liberty / hook sub-sections. Decomposed into structured rows; original bullet body preserved in key_facts/body fields for fidelity. 6 unit tests cover the shape. - skald-core::db — Postgres connection pool + migration runner. - skald-core::models — row types via sqlx::FromRow. - skald binary — clap CLI: 'serve' (http + migrations) and 'import-markdown' (one-shot ingest). - Dockerfile — multi-stage: rust:1.95-bookworm builder, pgvector/ pgvector:pg17 runtime, tini under PID 1, custom entrypoint.sh that boots embedded postgres then execs skald serve. - compose.yml — singleton container, postgres data in volume, story corpus mounted read-only at /seed. Decisions locked 2026-05-13: 1. DB in same container 'till we have a real working tool' (cobb) 2. postgres+pgvector (NOT sqlite) — keeps semantic-search story 3. Network-not-socket connection (postgresql://localhost:5432) from day one so future split is config-only, not code-rewrite Not yet wired: - Web UI - clawdforge calls (gen → cleanup → canon-audit pipeline) - Embedding pass - TTS sidecar
189 lines
7.8 KiB
PL/PgSQL
189 lines
7.8 KiB
PL/PgSQL
-- Skald v0.1 schema. Database is the source of truth; the writer is
|
|
-- generic tooling that knows nothing hardcoded about any specific
|
|
-- story. Every story is rows.
|
|
--
|
|
-- pgvector for embedding-based callback search across past prose;
|
|
-- pg_trgm for fuzzy character-name lookups.
|
|
|
|
CREATE EXTENSION IF NOT EXISTS vector;
|
|
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
|
|
|
-- One row per story (or per sequel). parent_story_id chains a
|
|
-- series; root_story_id is the head of the chain (denormalized for
|
|
-- cheap series scans).
|
|
CREATE TABLE stories (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
title TEXT NOT NULL,
|
|
status TEXT NOT NULL DEFAULT 'seed'
|
|
CHECK (status IN (
|
|
'seed', 'draft', 'generating', 'cleaning',
|
|
'auditing', 'complete', 'failed'
|
|
)),
|
|
prompt TEXT,
|
|
model TEXT,
|
|
parent_story_id UUID REFERENCES stories(id) ON DELETE SET NULL,
|
|
root_story_id UUID REFERENCES stories(id) ON DELETE SET NULL,
|
|
series_name TEXT,
|
|
word_count_target INTEGER,
|
|
word_count_actual INTEGER NOT NULL DEFAULT 0,
|
|
summary TEXT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX idx_stories_parent ON stories(parent_story_id);
|
|
CREATE INDEX idx_stories_root ON stories(root_story_id);
|
|
CREATE INDEX idx_stories_status ON stories(status);
|
|
CREATE INDEX idx_stories_series ON stories(series_name) WHERE series_name IS NOT NULL;
|
|
|
|
-- Characters: real (historical) or fictional. The bible blob is
|
|
-- decomposed enough to be searchable but the original prose blob
|
|
-- stays in key_facts for full fidelity.
|
|
CREATE TABLE characters (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
|
name TEXT NOT NULL,
|
|
kind TEXT NOT NULL CHECK (kind IN ('real', 'fictional')),
|
|
role TEXT,
|
|
voice_traits TEXT,
|
|
key_facts TEXT NOT NULL,
|
|
aliases TEXT[] NOT NULL DEFAULT '{}',
|
|
first_seen_chapter INTEGER,
|
|
state_at_latest TEXT,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX idx_characters_story ON characters(story_id);
|
|
CREATE INDEX idx_characters_name_trgm ON characters USING gin (name gin_trgm_ops);
|
|
CREATE INDEX idx_characters_story_kind ON characters(story_id, kind);
|
|
|
|
-- Canon facts: everything that's bible-shaped but not a character.
|
|
-- Setting details, mystery threads, themes, rules, historical
|
|
-- anchors, fictional liberties, suggested hooks for sequels.
|
|
CREATE TABLE canon_facts (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
|
category TEXT NOT NULL CHECK (category IN (
|
|
'setting', 'event', 'rule', 'theme',
|
|
'mystery', 'liberty', 'hook', 'historical_anchor'
|
|
)),
|
|
title TEXT NOT NULL,
|
|
body TEXT NOT NULL,
|
|
weight INTEGER NOT NULL DEFAULT 1,
|
|
source_chapter INTEGER,
|
|
resolved BOOLEAN NOT NULL DEFAULT false,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX idx_canon_facts_story_category ON canon_facts(story_id, category);
|
|
|
|
-- Chapters: full prose body, stored in DB (markdown). One row per
|
|
-- chapter; UNIQUE(story_id, n) prevents duplicate insertion.
|
|
CREATE TABLE chapters (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
|
n INTEGER NOT NULL,
|
|
title TEXT,
|
|
body_md TEXT NOT NULL,
|
|
word_count INTEGER NOT NULL DEFAULT 0,
|
|
generated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
UNIQUE (story_id, n)
|
|
);
|
|
|
|
CREATE INDEX idx_chapters_story ON chapters(story_id);
|
|
|
|
-- Per-chapter short summary. The writer pulls these instead of full
|
|
-- chapter prose when assembling context for a sequel — much cheaper
|
|
-- on tokens. Generated by a separate LLM pass after the chapter is
|
|
-- finished.
|
|
CREATE TABLE chapter_summaries (
|
|
chapter_id UUID PRIMARY KEY REFERENCES chapters(id) ON DELETE CASCADE,
|
|
body TEXT NOT NULL,
|
|
generated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
-- Passages: paragraph-level prose with embedding vectors for
|
|
-- similarity search. Embeddings nullable so v0.1 import doesn't
|
|
-- require an embedding pass — we fill them in lazily when we
|
|
-- actually need semantic recall.
|
|
CREATE TABLE passages (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
chapter_id UUID NOT NULL REFERENCES chapters(id) ON DELETE CASCADE,
|
|
paragraph_n INTEGER NOT NULL,
|
|
body TEXT NOT NULL,
|
|
embedding vector(1536),
|
|
embedded_at TIMESTAMPTZ,
|
|
UNIQUE (chapter_id, paragraph_n)
|
|
);
|
|
|
|
CREATE INDEX idx_passages_chapter ON passages(chapter_id);
|
|
-- ivfflat index on `embedding` is deferred until we have data —
|
|
-- ivfflat requires training rows to build, and an empty-table
|
|
-- index degrades query plans. Add after first ~1k passages.
|
|
|
|
-- Every LLM call we make is logged. Useful for cost tracking,
|
|
-- forensics, "why is this chapter weird?" investigations.
|
|
CREATE TABLE generation_runs (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
|
kind TEXT NOT NULL CHECK (kind IN (
|
|
'gen', 'cleanup', 'audit',
|
|
'summary', 'embed'
|
|
)),
|
|
clawdforge_session_id TEXT,
|
|
tokens_in INTEGER,
|
|
tokens_out INTEGER,
|
|
cost_estimate_cents INTEGER,
|
|
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
|
ended_at TIMESTAMPTZ,
|
|
status TEXT NOT NULL DEFAULT 'running'
|
|
CHECK (status IN ('running', 'succeeded', 'failed')),
|
|
error TEXT
|
|
);
|
|
|
|
CREATE INDEX idx_generation_runs_story ON generation_runs(story_id);
|
|
CREATE INDEX idx_generation_runs_kind ON generation_runs(kind);
|
|
|
|
-- Canon audit findings. Third-Opus reads parent + sequel + bible
|
|
-- and flags any continuity drift, character voice shift, retconned
|
|
-- facts, timeline contradictions.
|
|
CREATE TABLE audit_findings (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
|
run_id UUID REFERENCES generation_runs(id) ON DELETE SET NULL,
|
|
severity TEXT NOT NULL CHECK (severity IN ('info', 'warn', 'crit')),
|
|
area TEXT NOT NULL CHECK (area IN (
|
|
'character', 'continuity', 'tone',
|
|
'fact', 'timeline', 'other'
|
|
)),
|
|
body TEXT NOT NULL,
|
|
resolved BOOLEAN NOT NULL DEFAULT false,
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
);
|
|
|
|
CREATE INDEX idx_audit_findings_story ON audit_findings(story_id);
|
|
|
|
-- Arbitrary user-applied labels. Genre, mood, status filters, etc.
|
|
CREATE TABLE tags (
|
|
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
|
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
|
name TEXT NOT NULL,
|
|
UNIQUE (story_id, name)
|
|
);
|
|
|
|
CREATE INDEX idx_tags_story ON tags(story_id);
|
|
|
|
-- Auto-touch stories.updated_at whenever anything changes on the
|
|
-- story row itself. Cascade-only — not triggered by child writes.
|
|
CREATE OR REPLACE FUNCTION touch_updated_at()
|
|
RETURNS TRIGGER AS $$
|
|
BEGIN
|
|
NEW.updated_at = now();
|
|
RETURN NEW;
|
|
END;
|
|
$$ LANGUAGE plpgsql;
|
|
|
|
CREATE TRIGGER stories_updated_at
|
|
BEFORE UPDATE ON stories
|
|
FOR EACH ROW
|
|
EXECUTE FUNCTION touch_updated_at();
|