-- Audio-level audit findings. Populated by the v0.2 audit pipeline: -- Whisper STT compares the rendered audio against the source text; -- substantive deltas land here as findings. A separate audio-native -- LLM pass (Gemini Flash audio etc) may add tone / prosody findings -- the STT pass can't see. -- -- Distinct from `audit_findings` which lives at the TEXT layer -- (canon drift, character voice, continuity). This table is for -- the AUDIO layer (mispronounced names, skipped lines, glitches, -- weird inflection). CREATE TABLE narration_findings ( id UUID PRIMARY KEY DEFAULT gen_random_uuid(), run_id UUID NOT NULL REFERENCES narration_runs(id) ON DELETE CASCADE, -- "pronunciation" — wrong word came out (Whisper diff) -- "skip" — source text absent in audio -- "insert" — extra word in audio not in source -- "glitch" — silence, clipping, dropout, etc -- "prosody" — pacing / rhythm issue (audio-LLM only) -- "tone" — wrong emotional register (audio-LLM only) kind TEXT NOT NULL CHECK (kind IN ( 'pronunciation', 'skip', 'insert', 'glitch', 'prosody', 'tone' )), -- Window in the chapter audio where the issue lives. timestamp_start REAL NOT NULL, timestamp_end REAL NOT NULL, -- For text-layer deltas: what we asked for and what we got. expected_text TEXT, heard_text TEXT, severity TEXT NOT NULL CHECK (severity IN ('info', 'warn', 'crit')), notes TEXT, -- Source of the finding: 'whisper' | 'gemini-flash-audio' | -- 'gpt-4o-audio' | 'qwen2-audio' | etc. detector TEXT NOT NULL, resolved BOOLEAN NOT NULL DEFAULT false, created_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX idx_narration_findings_run ON narration_findings(run_id); CREATE INDEX idx_narration_findings_severity ON narration_findings(severity); CREATE INDEX idx_narration_findings_resolved ON narration_findings(resolved) WHERE NOT resolved;