skald/migrations/0001_init.sql

-- Skald v0.1 schema. Database is the source of truth; the writer is
-- generic tooling that knows nothing hardcoded about any specific
-- story. Every story is rows.
--
-- pgvector for embedding-based callback search across past prose;
-- pg_trgm for fuzzy character-name lookups.

CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;

-- One row per story (or per sequel). parent_story_id chains a
-- series; root_story_id is the head of the chain (denormalized for
-- cheap series scans).
CREATE TABLE stories (
    id                 UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    title              TEXT NOT NULL,
    status             TEXT NOT NULL DEFAULT 'seed'
                       CHECK (status IN (
                           'seed', 'draft', 'generating', 'cleaning',
                           'auditing', 'complete', 'failed'
                       )),
    prompt             TEXT,
    model              TEXT,
    parent_story_id    UUID REFERENCES stories(id) ON DELETE SET NULL,
    root_story_id      UUID REFERENCES stories(id) ON DELETE SET NULL,
    series_name        TEXT,
    word_count_target  INTEGER,
    word_count_actual  INTEGER NOT NULL DEFAULT 0,
    summary            TEXT,
    created_at         TIMESTAMPTZ NOT NULL DEFAULT now(),
    updated_at         TIMESTAMPTZ NOT NULL DEFAULT now()
);

CREATE INDEX idx_stories_parent ON stories(parent_story_id);
CREATE INDEX idx_stories_root   ON stories(root_story_id);
CREATE INDEX idx_stories_status ON stories(status);
CREATE INDEX idx_stories_series ON stories(series_name) WHERE series_name IS NOT NULL;

-- Characters: real (historical) or fictional. The bible blob is
-- decomposed enough to be searchable but the original prose blob
-- stays in key_facts for full fidelity.
CREATE TABLE characters (
    id                  UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    story_id            UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
    name                TEXT NOT NULL,
    kind                TEXT NOT NULL CHECK (kind IN ('real', 'fictional')),
    role                TEXT,
    voice_traits        TEXT,
    key_facts           TEXT NOT NULL,
    aliases             TEXT[] NOT NULL DEFAULT '{}',
    first_seen_chapter  INTEGER,
    state_at_latest     TEXT,
    created_at          TIMESTAMPTZ NOT NULL DEFAULT now()
);

CREATE INDEX idx_characters_story        ON characters(story_id);
CREATE INDEX idx_characters_name_trgm    ON characters USING gin (name gin_trgm_ops);
CREATE INDEX idx_characters_story_kind   ON characters(story_id, kind);

-- Canon facts: everything that's bible-shaped but not a character.
-- Setting details, mystery threads, themes, rules, historical
-- anchors, fictional liberties, suggested hooks for sequels.
CREATE TABLE canon_facts (
    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    story_id        UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
    category        TEXT NOT NULL CHECK (category IN (
                        'setting', 'event', 'rule', 'theme',
                        'mystery', 'liberty', 'hook', 'historical_anchor'
                    )),
    title           TEXT NOT NULL,
    body            TEXT NOT NULL,
    weight          INTEGER NOT NULL DEFAULT 1,
    source_chapter  INTEGER,
    resolved        BOOLEAN NOT NULL DEFAULT false,
    created_at      TIMESTAMPTZ NOT NULL DEFAULT now()
);

CREATE INDEX idx_canon_facts_story_category ON canon_facts(story_id, category);

-- Chapters: full prose body, stored in DB (markdown). One row per
-- chapter; UNIQUE(story_id, n) prevents duplicate insertion.
CREATE TABLE chapters (
    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    story_id        UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
    n               INTEGER NOT NULL,
    title           TEXT,
    body_md         TEXT NOT NULL,
    word_count      INTEGER NOT NULL DEFAULT 0,
    generated_at    TIMESTAMPTZ NOT NULL DEFAULT now(),
    UNIQUE (story_id, n)
);

CREATE INDEX idx_chapters_story ON chapters(story_id);

-- Per-chapter short summary. The writer pulls these instead of full
-- chapter prose when assembling context for a sequel — much cheaper
-- on tokens. Generated by a separate LLM pass after the chapter is
-- finished.
CREATE TABLE chapter_summaries (
    chapter_id      UUID PRIMARY KEY REFERENCES chapters(id) ON DELETE CASCADE,
    body            TEXT NOT NULL,
    generated_at    TIMESTAMPTZ NOT NULL DEFAULT now()
);

-- Passages: paragraph-level prose with embedding vectors for
-- similarity search. Embeddings nullable so v0.1 import doesn't
-- require an embedding pass — we fill them in lazily when we
-- actually need semantic recall.
CREATE TABLE passages (
    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    chapter_id      UUID NOT NULL REFERENCES chapters(id) ON DELETE CASCADE,
    paragraph_n     INTEGER NOT NULL,
    body            TEXT NOT NULL,
    embedding       vector(1536),
    embedded_at     TIMESTAMPTZ,
    UNIQUE (chapter_id, paragraph_n)
);

CREATE INDEX idx_passages_chapter ON passages(chapter_id);
-- ivfflat index on `embedding` is deferred until we have data —
-- ivfflat requires training rows to build, and an empty-table
-- index degrades query plans. Add after first ~1k passages.

-- Every LLM call we make is logged. Useful for cost tracking,
-- forensics, "why is this chapter weird?" investigations.
CREATE TABLE generation_runs (
    id                      UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    story_id                UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
    kind                    TEXT NOT NULL CHECK (kind IN (
                                'gen', 'cleanup', 'audit',
                                'summary', 'embed'
                            )),
    clawdforge_session_id   TEXT,
    tokens_in               INTEGER,
    tokens_out              INTEGER,
    cost_estimate_cents     INTEGER,
    started_at              TIMESTAMPTZ NOT NULL DEFAULT now(),
    ended_at                TIMESTAMPTZ,
    status                  TEXT NOT NULL DEFAULT 'running'
                            CHECK (status IN ('running', 'succeeded', 'failed')),
    error                   TEXT
);

CREATE INDEX idx_generation_runs_story ON generation_runs(story_id);
CREATE INDEX idx_generation_runs_kind  ON generation_runs(kind);

-- Canon audit findings. Third-Opus reads parent + sequel + bible
-- and flags any continuity drift, character voice shift, retconned
-- facts, timeline contradictions.
CREATE TABLE audit_findings (
    id          UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    story_id    UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
    run_id      UUID REFERENCES generation_runs(id) ON DELETE SET NULL,
    severity    TEXT NOT NULL CHECK (severity IN ('info', 'warn', 'crit')),
    area        TEXT NOT NULL CHECK (area IN (
                    'character', 'continuity', 'tone',
                    'fact', 'timeline', 'other'
                )),
    body        TEXT NOT NULL,
    resolved    BOOLEAN NOT NULL DEFAULT false,
    created_at  TIMESTAMPTZ NOT NULL DEFAULT now()
);

CREATE INDEX idx_audit_findings_story ON audit_findings(story_id);

-- Arbitrary user-applied labels. Genre, mood, status filters, etc.
CREATE TABLE tags (
    id          UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    story_id    UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
    name        TEXT NOT NULL,
    UNIQUE (story_id, name)
);

CREATE INDEX idx_tags_story ON tags(story_id);

-- Auto-touch stories.updated_at whenever anything changes on the
-- story row itself. Cascade-only — not triggered by child writes.
CREATE OR REPLACE FUNCTION touch_updated_at()
RETURNS TRIGGER AS $$
BEGIN
    NEW.updated_at = now();
    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

CREATE TRIGGER stories_updated_at
    BEFORE UPDATE ON stories
    FOR EACH ROW
    EXECUTE FUNCTION touch_updated_at();