skald/migrations/0003_narration_findings.sql

-- Audio-level audit findings. Populated by the v0.2 audit pipeline:
-- Whisper STT compares the rendered audio against the source text;
-- substantive deltas land here as findings. A separate audio-native
-- LLM pass (Gemini Flash audio etc) may add tone / prosody findings
-- the STT pass can't see.
--
-- Distinct from `audit_findings` which lives at the TEXT layer
-- (canon drift, character voice, continuity). This table is for
-- the AUDIO layer (mispronounced names, skipped lines, glitches,
-- weird inflection).

CREATE TABLE narration_findings (
    id              UUID PRIMARY KEY DEFAULT gen_random_uuid(),
    run_id          UUID NOT NULL REFERENCES narration_runs(id) ON DELETE CASCADE,
    -- "pronunciation"     — wrong word came out (Whisper diff)
    -- "skip"              — source text absent in audio
    -- "insert"            — extra word in audio not in source
    -- "glitch"            — silence, clipping, dropout, etc
    -- "prosody"           — pacing / rhythm issue (audio-LLM only)
    -- "tone"              — wrong emotional register (audio-LLM only)
    kind            TEXT NOT NULL CHECK (kind IN (
                        'pronunciation', 'skip', 'insert',
                        'glitch', 'prosody', 'tone'
                    )),
    -- Window in the chapter audio where the issue lives.
    timestamp_start REAL NOT NULL,
    timestamp_end   REAL NOT NULL,
    -- For text-layer deltas: what we asked for and what we got.
    expected_text   TEXT,
    heard_text      TEXT,
    severity        TEXT NOT NULL CHECK (severity IN ('info', 'warn', 'crit')),
    notes           TEXT,
    -- Source of the finding: 'whisper' | 'gemini-flash-audio' |
    -- 'gpt-4o-audio' | 'qwen2-audio' | etc.
    detector        TEXT NOT NULL,
    resolved        BOOLEAN NOT NULL DEFAULT false,
    created_at      TIMESTAMPTZ NOT NULL DEFAULT now()
);

CREATE INDEX idx_narration_findings_run      ON narration_findings(run_id);
CREATE INDEX idx_narration_findings_severity ON narration_findings(severity);
CREATE INDEX idx_narration_findings_resolved ON narration_findings(resolved) WHERE NOT resolved;