scaffold v0.1: postgres+pgvector inside-container, schema, markdown ingest, CLI
Skald is a generic story-writer. The database is the product; the binary is the tooling. Everything story-specific lives in rows, not in code. cwho's monorepo + binary-per-role pattern transplanted to this domain. What this commit ships: - Cargo workspace (resolver=3, edition 2024): skald-core (lib) + skald (bin) - Migration 0001: stories, characters, canon_facts, chapters, chapter_summaries, passages (vector(1536)), generation_runs, audit_findings, tags. pgvector + pg_trgm extensions. ivfflat index deferred until we have data (post-import the first ~1k passages and add the index). - skald-core::ingest — markdown parser for the cwho/coast-down shape: '# Title' → '## Chapter N — date' headings → '# Continuity Bible' section with character roster (real + fictional sub-sections) + setting / mystery / historical / liberty / hook sub-sections. Decomposed into structured rows; original bullet body preserved in key_facts/body fields for fidelity. 6 unit tests cover the shape. - skald-core::db — Postgres connection pool + migration runner. - skald-core::models — row types via sqlx::FromRow. - skald binary — clap CLI: 'serve' (http + migrations) and 'import-markdown' (one-shot ingest). - Dockerfile — multi-stage: rust:1.95-bookworm builder, pgvector/ pgvector:pg17 runtime, tini under PID 1, custom entrypoint.sh that boots embedded postgres then execs skald serve. - compose.yml — singleton container, postgres data in volume, story corpus mounted read-only at /seed. Decisions locked 2026-05-13: 1. DB in same container 'till we have a real working tool' (cobb) 2. postgres+pgvector (NOT sqlite) — keeps semantic-search story 3. Network-not-socket connection (postgresql://localhost:5432) from day one so future split is config-only, not code-rewrite Not yet wired: - Web UI - clawdforge calls (gen → cleanup → canon-audit pipeline) - Embedding pass - TTS sidecar
This commit is contained in:
commit
f575ad3722
17 changed files with 4065 additions and 0 deletions
189
migrations/0001_init.sql
Normal file
189
migrations/0001_init.sql
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
-- Skald v0.1 schema. Database is the source of truth; the writer is
|
||||
-- generic tooling that knows nothing hardcoded about any specific
|
||||
-- story. Every story is rows.
|
||||
--
|
||||
-- pgvector for embedding-based callback search across past prose;
|
||||
-- pg_trgm for fuzzy character-name lookups.
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- One row per story (or per sequel). parent_story_id chains a
|
||||
-- series; root_story_id is the head of the chain (denormalized for
|
||||
-- cheap series scans).
|
||||
CREATE TABLE stories (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
title TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'seed'
|
||||
CHECK (status IN (
|
||||
'seed', 'draft', 'generating', 'cleaning',
|
||||
'auditing', 'complete', 'failed'
|
||||
)),
|
||||
prompt TEXT,
|
||||
model TEXT,
|
||||
parent_story_id UUID REFERENCES stories(id) ON DELETE SET NULL,
|
||||
root_story_id UUID REFERENCES stories(id) ON DELETE SET NULL,
|
||||
series_name TEXT,
|
||||
word_count_target INTEGER,
|
||||
word_count_actual INTEGER NOT NULL DEFAULT 0,
|
||||
summary TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
updated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_stories_parent ON stories(parent_story_id);
|
||||
CREATE INDEX idx_stories_root ON stories(root_story_id);
|
||||
CREATE INDEX idx_stories_status ON stories(status);
|
||||
CREATE INDEX idx_stories_series ON stories(series_name) WHERE series_name IS NOT NULL;
|
||||
|
||||
-- Characters: real (historical) or fictional. The bible blob is
|
||||
-- decomposed enough to be searchable but the original prose blob
|
||||
-- stays in key_facts for full fidelity.
|
||||
CREATE TABLE characters (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
kind TEXT NOT NULL CHECK (kind IN ('real', 'fictional')),
|
||||
role TEXT,
|
||||
voice_traits TEXT,
|
||||
key_facts TEXT NOT NULL,
|
||||
aliases TEXT[] NOT NULL DEFAULT '{}',
|
||||
first_seen_chapter INTEGER,
|
||||
state_at_latest TEXT,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_characters_story ON characters(story_id);
|
||||
CREATE INDEX idx_characters_name_trgm ON characters USING gin (name gin_trgm_ops);
|
||||
CREATE INDEX idx_characters_story_kind ON characters(story_id, kind);
|
||||
|
||||
-- Canon facts: everything that's bible-shaped but not a character.
|
||||
-- Setting details, mystery threads, themes, rules, historical
|
||||
-- anchors, fictional liberties, suggested hooks for sequels.
|
||||
CREATE TABLE canon_facts (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
category TEXT NOT NULL CHECK (category IN (
|
||||
'setting', 'event', 'rule', 'theme',
|
||||
'mystery', 'liberty', 'hook', 'historical_anchor'
|
||||
)),
|
||||
title TEXT NOT NULL,
|
||||
body TEXT NOT NULL,
|
||||
weight INTEGER NOT NULL DEFAULT 1,
|
||||
source_chapter INTEGER,
|
||||
resolved BOOLEAN NOT NULL DEFAULT false,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_canon_facts_story_category ON canon_facts(story_id, category);
|
||||
|
||||
-- Chapters: full prose body, stored in DB (markdown). One row per
|
||||
-- chapter; UNIQUE(story_id, n) prevents duplicate insertion.
|
||||
CREATE TABLE chapters (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
n INTEGER NOT NULL,
|
||||
title TEXT,
|
||||
body_md TEXT NOT NULL,
|
||||
word_count INTEGER NOT NULL DEFAULT 0,
|
||||
generated_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
UNIQUE (story_id, n)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_chapters_story ON chapters(story_id);
|
||||
|
||||
-- Per-chapter short summary. The writer pulls these instead of full
|
||||
-- chapter prose when assembling context for a sequel — much cheaper
|
||||
-- on tokens. Generated by a separate LLM pass after the chapter is
|
||||
-- finished.
|
||||
CREATE TABLE chapter_summaries (
|
||||
chapter_id UUID PRIMARY KEY REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
body TEXT NOT NULL,
|
||||
generated_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
-- Passages: paragraph-level prose with embedding vectors for
|
||||
-- similarity search. Embeddings nullable so v0.1 import doesn't
|
||||
-- require an embedding pass — we fill them in lazily when we
|
||||
-- actually need semantic recall.
|
||||
CREATE TABLE passages (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
chapter_id UUID NOT NULL REFERENCES chapters(id) ON DELETE CASCADE,
|
||||
paragraph_n INTEGER NOT NULL,
|
||||
body TEXT NOT NULL,
|
||||
embedding vector(1536),
|
||||
embedded_at TIMESTAMPTZ,
|
||||
UNIQUE (chapter_id, paragraph_n)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_passages_chapter ON passages(chapter_id);
|
||||
-- ivfflat index on `embedding` is deferred until we have data —
|
||||
-- ivfflat requires training rows to build, and an empty-table
|
||||
-- index degrades query plans. Add after first ~1k passages.
|
||||
|
||||
-- Every LLM call we make is logged. Useful for cost tracking,
|
||||
-- forensics, "why is this chapter weird?" investigations.
|
||||
CREATE TABLE generation_runs (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
kind TEXT NOT NULL CHECK (kind IN (
|
||||
'gen', 'cleanup', 'audit',
|
||||
'summary', 'embed'
|
||||
)),
|
||||
clawdforge_session_id TEXT,
|
||||
tokens_in INTEGER,
|
||||
tokens_out INTEGER,
|
||||
cost_estimate_cents INTEGER,
|
||||
started_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
ended_at TIMESTAMPTZ,
|
||||
status TEXT NOT NULL DEFAULT 'running'
|
||||
CHECK (status IN ('running', 'succeeded', 'failed')),
|
||||
error TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX idx_generation_runs_story ON generation_runs(story_id);
|
||||
CREATE INDEX idx_generation_runs_kind ON generation_runs(kind);
|
||||
|
||||
-- Canon audit findings. Third-Opus reads parent + sequel + bible
|
||||
-- and flags any continuity drift, character voice shift, retconned
|
||||
-- facts, timeline contradictions.
|
||||
CREATE TABLE audit_findings (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
run_id UUID REFERENCES generation_runs(id) ON DELETE SET NULL,
|
||||
severity TEXT NOT NULL CHECK (severity IN ('info', 'warn', 'crit')),
|
||||
area TEXT NOT NULL CHECK (area IN (
|
||||
'character', 'continuity', 'tone',
|
||||
'fact', 'timeline', 'other'
|
||||
)),
|
||||
body TEXT NOT NULL,
|
||||
resolved BOOLEAN NOT NULL DEFAULT false,
|
||||
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX idx_audit_findings_story ON audit_findings(story_id);
|
||||
|
||||
-- Arbitrary user-applied labels. Genre, mood, status filters, etc.
|
||||
CREATE TABLE tags (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
story_id UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
|
||||
name TEXT NOT NULL,
|
||||
UNIQUE (story_id, name)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_tags_story ON tags(story_id);
|
||||
|
||||
-- Auto-touch stories.updated_at whenever anything changes on the
|
||||
-- story row itself. Cascade-only — not triggered by child writes.
|
||||
CREATE OR REPLACE FUNCTION touch_updated_at()
|
||||
RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.updated_at = now();
|
||||
RETURN NEW;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE TRIGGER stories_updated_at
|
||||
BEFORE UPDATE ON stories
|
||||
FOR EACH ROW
|
||||
EXECUTE FUNCTION touch_updated_at();
|
||||
Loading…
Add table
Add a link
Reference in a new issue