skald (engine/tortoise): public release prep — AGPL-3.0, generic config, engine-variant README

engine/tortoise: sentence chunking + device fix + pitch/rate modulation
Catches up engines/tortoise/server.py with what's been deployed on the host through tonight's smoke iterations: 0.2 — _chunk_for_tortoise splits text nodes at sentence boundaries (max 220 chars) before each tts_with_preset call. Fixes the end-of-prompt gibberish past tortoise's ~20s reliable horizon. 0.3 — _get_voice now .to(DEVICE) cached samples + latents. Without this, non-lj voices crash with 'Expected all tensors to be on the same device, but found cpu and cuda:0'. 0.4 — [voice:NAME pitch=N rate=R][/voice] tag syntax. librosa pitch_shift + time_stretch applied per-chunk for single-voice multi-character renders. The strategy survived the design table — but the librosa phase-vocoder artifacts at ±5 semitones ate the quality on the 2070 Super. Parked here for the GPU rebuild; modulation works architecturally, just needs better stretching algorithm (rubberband) + more headroom. Production stayed Kokoro. Coast-Down preferred_voice_id reverted to kokoro_af_heart in the live DB after this experiment.
2026-06-28 21:38:51 -07:00 · 2026-05-14 19:08:43 -07:00
25 changed files with 324 additions and 1726 deletions
--- a/.forgejo/workflows/gitleaks.yml
+++ b/.forgejo/workflows/gitleaks.yml
@ -1,40 +0,0 @@
-# .forgejo/workflows/gitleaks.yml
-#
-# Sulkta canonical gitleaks workflow. Drop a copy into every public repo at
-# `.forgejo/workflows/gitleaks.yml` after the Forgejo act_runner is registered
-# (task #295).
-#
-# Pairs with the pre-receive hook installed on every bare repo — that one is
-# the strict enforcement layer (rejects the push); this one provides the
-# per-PR red ✗ that branch-protection rules can require before merge.
-#
-# Layer 1 (this workflow): visible per-PR status, can be a required check.
-# Layer 2 (pre-receive hook): strict enforcement at the server.
-# Layer 3 (johnny5 cron sweep): nightly full-history sweep across all repos.
-
-name: gitleaks
-
-on:
-  push:
-  pull_request:
-
-jobs:
-  scan:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          # Full history — gitleaks needs depth to scan a commit range.
-          fetch-depth: 0
-
-      - name: install gitleaks
-        run: |
-          curl -sSL -o gl.tar.gz \
-            https://github.com/gitleaks/gitleaks/releases/download/v8.21.2/gitleaks_8.21.2_linux_x64.tar.gz
-          tar xzf gl.tar.gz gitleaks
-          chmod +x gitleaks
-          ./gitleaks version
-
-      - name: scan
-        run: |
-          ./gitleaks detect --source . --no-banner --redact --verbose
--- a/Cargo.lock
+++ b/Cargo.lock
@ -78,9 +78,9 @@ dependencies = [

 [[package]]
 name = "anyhow"
-version = "1.0.103"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2a4385e2e34eb35d6b3efe798b9eb88096925d87726c0798709bf56d9ed84af3"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"

 [[package]]
 name = "assert-json-diff"
@ -237,9 +237,9 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"

 [[package]]
 name = "chrono"
-version = "0.4.45"
+version = "0.4.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1aa79e62e7697b8e29b513a68abacf485adcd1fe8284a4316c5ae868e6633327"
+checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0"
 dependencies = [
 "iana-time-zone",
 "js-sys",
@ -1583,9 +1583,9 @@ dependencies = [

 [[package]]
 name = "regex"
-version = "1.12.4"
+version = "1.12.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
+checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276"
 dependencies = [
 "aho-corasick",
 "memchr",
@ -1606,9 +1606,9 @@ dependencies = [

 [[package]]
 name = "regex-syntax"
-version = "0.8.11"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"

 [[package]]
 name = "reqwest"
@ -1796,9 +1796,9 @@ dependencies = [

 [[package]]
 name = "serde_json"
-version = "1.0.150"
+version = "1.0.149"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
+checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86"
 dependencies = [
 "itoa",
 "memchr",
@ -2235,7 +2235,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
 dependencies = [
 "fastrand",
- "getrandom 0.3.4",
+ "getrandom 0.4.2",
 "once_cell",
 "rustix",
 "windows-sys 0.61.2",
@ -2592,9 +2592,9 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"

 [[package]]
 name = "uuid"
-version = "1.23.4"
+version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf80a72845275afea99e7f2b434723d3bc7e38470fcd1c7ed39a599c73319a53"
+checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76"
 dependencies = [
 "getrandom 0.4.2",
 "js-sys",
--- a/26
+++ b/26
@ -1,16 +1,26 @@
-# Multi-stage build for skald. Postgres ships in the same image
-# until the tool stabilises; to split, swap the runtime base to
-# debian:bookworm-slim, drop entrypoint.sh, point DATABASE_URL at
-# an external pg.
+# Multi-stage build for skald.
 #
-# Build context is the workspace root: `docker build -t skald:latest .`
+# Stage 1: compile the rust binary against rust:1-bookworm.
+# Stage 2: pgvector/pgvector:pg17 (debian-bookworm postgres with
+#          pgvector preinstalled) + tini + the skald binary.
+#
+# v0.1 ships postgres inside the same container ("singleton till we
+# have a real working tool"). When we extract the DB out, swap the
+# runtime base to debian:bookworm-slim, drop entrypoint.sh, point
+# DATABASE_URL at the external pg.
+#
+# Build context is the workspace root:
+#   docker build -t skald:latest .

+# ─── builder ──────────────────────────────────────────────────────
 FROM rust:1.95-bookworm AS builder
 WORKDIR /build

-# Dependency-cache layer: copy manifests + vendored path-dep first,
-# build stubs, then drop in real sources. clawdforge is a path dep
-# resolved at workspace load time.
+# Cache the dependency graph: copy manifests first, fetch + build
+# stubs, THEN drop in real sources. The vendored clawdforge SDK
+# needs its own manifest + source available during the cache layer
+# (path dep — Cargo resolves it at workspace load time, not at
+# crate-compile time).
 COPY Cargo.toml Cargo.lock ./
 COPY skald-core/Cargo.toml skald-core/Cargo.toml
 COPY skald/Cargo.toml      skald/Cargo.toml
--- a/README.md
+++ b/README.md
@ -1,78 +1,42 @@
-# skald
+# skald — `engine/tortoise` variant

-Long-form story-writer with canon-keeping, sequel continuity, and
-self-hosted audiobook narration. The database is the source of truth;
-the binary is the tooling.
+This branch is the **Tortoise-TTS backend** variant of
+[skald](https://git.sulkta.com/Sulkta-OSS/skald). It carries
+engine-specific tuning for Tortoise that doesn't generalise to the
+other backends; everything else tracks `main`.

-Named for the Old Norse poets who composed and memorized kings' sagas
-across generations.
+For the full project — the story-writer, the schema, the narration
+pipeline — see `main` and the root `README.md`.

-## What's wired
+## What's different here

- Rust workspace (`skald-core` + `skald`)
- Postgres schema for stories, characters, canon facts, chapters,
-  passages, generation runs, audit findings, tags
- pgvector for future similarity search
- `skald import-markdown` ingests a story file (chapters + bible)
- `skald serve` exposes `/health` + the web inspector and runs
-  migrations on boot
- `skald continue` runs gen → cleanup → audit per chapter, with
-  multi-chapter batching (cap 20)
- `skald rewrite` re-authors a chapter in a named author's voice
- `skald audit` runs whole-story prose-quality audit; `skald dedup`
-  is the surgical fix half of the loop
- `skald prepare-narration` annotates a chapter with `[breath]` /
-  `[pause:Xs]` / `[scene]` beats and per-character `[voice:...]`
-  tags
- `skald narrate` renders a chapter to audio via one of three TTS
-  engines (F5-TTS, Kokoro-82M, Tortoise-TTS) — see `engines/`
- Named-author "soul" personas via `skald authors seed`; author
-  voice replaces the model's base system prompt for gen/cleanup/
-  rewrite/dedup/narrate_prep
+Tortoise-TTS is the highest-quality narrator of the three backends,
+at the cost of speed: it is far slower than real-time and is meant
+for batched, overnight renders rather than interactive use. The
+tuning in `engines/tortoise/server.py` covers:

-## Schema (cheat sheet)
+- **Sentence chunking** — long passages are split so each chunk
+  conditions cleanly.
+- **Device selection** — explicit GPU/CPU placement.
+- **Pitch / rate modulation** — post-processing for single-voice
+  multi-character rendering.

+Tortoise has ~25 named built-in voices, so no voice cloning is
+required. On an 8GB GPU it wants the card largely to itself — stop
+the other engines for the duration of a Tortoise run, or give it a
+dedicated GPU.
+
+## Usage
+
+The Tortoise sidecar speaks the same `POST /synthesize` + `GET
+/healthz` contract as the other engines (see `engines/README.md`).
+Point skald's `TORTOISE_URL` at it and route `tortoise_*` voices to
+it.
+
+```bash
+docker compose up -d            # skald + postgres
+# bring up the Tortoise sidecar from engines/tortoise/
 ```
-stories             meta + status + parent/root for series
-authors             persona identity (slug, display_name, model)
-author_revisions    versioned souls; one current per author
-characters          real or fictional, story-scoped, voice-mappable
-canon_facts         setting, mystery, theme, rule, historical_anchor, hook
-chapters            full prose body + optional body_md_tts annotation
-chapter_summaries   short summaries for cheap context loading
-passages            paragraph-level + embedding vector(1536)
-voices              TTS voice rows (F5 ref clips / Kokoro / Tortoise names)
-pronunciation_overrides  per-story + global respellings for proper nouns
-generation_runs     every LLM call logged
-audit_findings      audit pass output (severity + area)
-narration_runs      per-chapter TTS renders
-```
-
-## Quickstart
-
-```sh
-docker compose up -d
-docker exec skald skald import-markdown --path /seed/<story>.md \
-    --title "<title>"
-curl http://localhost:7780/health
-```
-
-The compose file expects `POSTGRES_PASSWORD` and (optionally)
-`CLAWDFORGE_URL` + `CLAWDFORGE_TOKEN` in `.env`. Story markdown
-goes into `./seed/`; postgres data persists in `./pgdata/`.
-
-## Architecture
-
-v0.1 ships postgres inside the skald container — singleton until
-the tool stabilises. To extract postgres later, swap the runtime
-base to `debian:bookworm-slim`, drop `entrypoint.sh`, and point
-`DATABASE_URL` at the external pg. The binary doesn't care where
-the DB lives.
-
-The generation passes call out to `clawdforge` (a bearer-token-gated
-HTTP wrapper around `claude -p`). The Rust client is vendored at
-`vendor/clawdforge/`. TTS calls go HTTP+JSON to the per-engine
-sidecars under `engines/`.

 ## License

--- a/docs/authors.md
+++ b/docs/authors.md
@ -1,163 +0,0 @@
-# Authors as personas with souls
-
-Each story has a named author with a soul. The author's voice bleeds
-through every generation pass — not as instruction stapled to the
-prompt, but as the substrate the prose grows from. The reader should
-feel "a person wrote this."
-
-Authors have memory across their corpus when the per-story
-`cross_story_memory` toggle is on. An author writing a Chernobyl
-piece can quietly echo a phrase from an earlier mining-strike story.
-Default is off — most stories stand alone.
-
-## Schema
-
-Authors live in the database, not on disk. The soul is a markdown
-blob in `author_revisions.soul`. Authors are immutable; new soul
-revisions create a new `author_revisions` row marked `is_current`
-and the previous one is demoted.
-
-```sql
-CREATE TABLE authors (
-    id              UUID PRIMARY KEY,
-    slug            TEXT NOT NULL UNIQUE,
-    display_name    TEXT NOT NULL,
-    persona_tagline TEXT,
-    model           TEXT NOT NULL DEFAULT 'opus',
-    is_synthetic    BOOLEAN NOT NULL DEFAULT true,
-    created_at      TIMESTAMPTZ NOT NULL DEFAULT now(),
-    updated_at      TIMESTAMPTZ NOT NULL DEFAULT now()
-);
-
-CREATE TABLE author_revisions (
-    id              UUID PRIMARY KEY,
-    author_id       UUID NOT NULL REFERENCES authors(id),
-    n               INT NOT NULL,
-    soul            TEXT NOT NULL,
-    system_template TEXT,
-    tools           TEXT[] NOT NULL DEFAULT '{}',
-    note            TEXT,
-    is_current      BOOLEAN NOT NULL DEFAULT false,
-    created_at      TIMESTAMPTZ NOT NULL DEFAULT now()
-);
-
-ALTER TABLE stories
-    ADD COLUMN author_id           UUID REFERENCES authors(id),
-    ADD COLUMN author_revision_id  UUID REFERENCES author_revisions(id),
-    ADD COLUMN cross_story_memory  BOOLEAN NOT NULL DEFAULT false;
-
-CREATE TABLE author_corpus (
-    author_id   UUID NOT NULL REFERENCES authors(id) ON DELETE CASCADE,
-    story_id    UUID NOT NULL REFERENCES stories(id) ON DELETE CASCADE,
-    role        TEXT NOT NULL CHECK (role IN ('authored', 'read')),
-    added_at    TIMESTAMPTZ NOT NULL DEFAULT now(),
-    PRIMARY KEY (author_id, story_id)
-);
-```
-
-## Per-pass author roles
-
- **gen** — full author voice. They are writing.
- **cleanup** — full author voice. Polishing their own draft, not a
-  neutral editor.
- **rewrite** — full author voice. Re-authoring another hand's
-  prose; canon preserved, prose reworked.
- **dedup** — full author voice. Surgical fix of audit-flagged
-  repetitions only.
- **narrate_prep** — author voice if bound; the author's beat
-  placement carries.
- **audit / prose_audit** — neutral. The audit checks the author's
-  work with fresh eyes; no author bound.
- **summarize** — neutral. Continuity utility, not prose.
-
-When an author is bound, the soul replaces the model's base system
-prompt (`SystemMode::Replace`). Without an author, a neutral house
-scaffold is appended (`SystemMode::Append`).
-
-## Soul template
-
-Following the persona-soul shape, recalibrated for authorial identity.
-Free-form prose under each section.
-
-```markdown
-# Author: {{display_name}}
-
-_Tagline: {{persona_tagline}}_
-
-## Voice
-
-Sentence rhythm. Vocabulary register. Paragraph length tendencies.
-Dialogue density. Punctuation habits (dashes, semicolons, sentence
-fragments). What your prose SOUNDS like read aloud.
-
-## Worldview
-
-What you believe about people. Power. Money. Labor. Cities. Nature.
-What makes you angry. What makes you tender. Whose side you're on
-in the implicit moral architecture of any scene.
-
-## Specifics over abstractions
-
-The concrete details you reach for. The five senses you favor.
-Smells? Cold? Texture of cloth? Sound of machines?
-
-## Pet peeves
-
-Words you refuse to write. Tropes you avoid. Sentimentalities you
-gut.
-
-## Sense of humor
-
-Dry? Dark? Absent? Bitter? Self-deprecating? Where in a paragraph
-does humor live — end of a sentence, mid-clause, or never?
-
-## Biography (real or invented)
-
-A few biographical facts that EXPLAIN the voice — the formative
-cuts, not a CV.
-
-## Anchor authors
-
-Living or dead authors the prose draws from. Useful for the model
-to triangulate voice.
-
-## Do / Don't
-
-Concrete prose moves to reach for and to avoid.
-```
-
-The default scaffold (`DEFAULT_AUTHOR_SCAFFOLD` in `skald-core::forge`)
-wraps the soul in a system prompt that:
-
- declares the model IS the author, not playing one
- pins canon as non-negotiable (names, dates, established events)
- forbids preamble, meta-commentary, fourth-wall breaks
- substitutes the per-pass directive (`GEN_DIRECTIVE`,
-  `CLEANUP_DIRECTIVE`, `REWRITE_DIRECTIVE`, `DEDUP_DIRECTIVE`,
-  `NARRATE_PREP_DIRECTIVE`)
-
-A per-author `system_template` overrides the default scaffold when
-set; otherwise the default is used.
-
-## Cross-story memory
-
-When `stories.cross_story_memory = true`, the continuation context
-pulls characters / canon_facts / passages from every story the
-author has authored or marked-read, not just the parent chain.
-
-To keep token budget sane, cross-corpus pulls are summary-only by
-default. Embeddings-similarity (once wired) can surface direct
-callbacks.
-
-## Seeding an author
-
-```sh
-skald authors seed \
-    --slug orson-black \
-    --display-name "Orson Black" \
-    --tagline "Orwell but more rebel and pissed off" \
-    --file seeds/authors/orson-black.md
-```
-
-This creates the author + first revision (or adds a new revision to
-an existing author, which becomes current).
--- a/engines/README.md
+++ b/engines/README.md
@ -35,10 +35,10 @@ generalise. Examples:
  question intonation, paragraph/scene/breath gap durations tuned
  for af_heart's pacing, notes on how respellings need to be all-
  lowercase to avoid letter-by-letter spell-out by misaki.
- `engine/tortoise` — GPU exclusivity coordinator (stops F5 +
-  Kokoro before a Tortoise run since the 2070 Super can't host
-  all three at once), preset choice ergonomics, character→tortoise-
-  voice seed assignments.
+- `engine/tortoise` — sentence chunking, device selection, and
+  pitch/rate modulation tuned for Tortoise's audiobook-quality
+  output. On an 8GB card Tortoise wants the GPU largely to itself,
+  so run it when the other engines are stopped.

 To deploy a tuned engine, check out the engine's branch in the build
 dir and `docker compose up -d --build`:
@ -51,7 +51,7 @@ docker compose up -d --build
 ## GPU coordination

 On an 8GB card F5 + Kokoro can co-reside (~5GB + ~1GB). Tortoise
-pushes the budget over and needs the GPU largely to itself — the
-`engine/tortoise` branch carries a script to stop kokoro + f5
-before a Tortoise run and restart them after. Replace with proper
-coordination once more VRAM is available.
+pushes the budget over and needs the GPU largely to itself — stop
+kokoro + f5 before a Tortoise run and restart them after, or give
+Tortoise its own card. Proper multi-engine coordination is left to
+the deployment once more VRAM is available.
--- a/engines/kokoro/Dockerfile
+++ b/engines/kokoro/Dockerfile
@ -1,8 +1,13 @@
-# Kokoro-82M TTS. Apache 2.0 code AND weights — clean stack vs
-# F5-TTS's CC-BY-NC asterisk.
+# Sulkta build of Kokoro-82M TTS.
 #
-# Kokoro runs fine on CPU but we use the cuda base to stay
-# consistent with f5-tts and pick up the GPU when free.
+# License: Apache 2.0 (code AND model weights). Clean stack — no
+# CC-BY-NC asterisk like F5-TTS's Emilia weights. This is the
+# narrator engine for sleep-quality audiobook reads; F5-TTS stays
+# around for voice-cloning cases.
+#
+# Kokoro is small enough to run on CPU but we use the cuda base
+# anyway to stay consistent with f5-tts and so it'll pick up the
+# GPU when no other tenant has it.
 FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime

 ENV DEBIAN_FRONTEND=noninteractive \
@ -22,9 +27,9 @@ RUN pip install --no-cache-dir 'kokoro>=0.9.0' 'fastapi>=0.115.0' 'uvicorn>=0.32

 RUN mkdir -p /cache/hf /audio

-COPY server.py /app/server.py
+COPY kokoro_server.py /app/kokoro_server.py
 WORKDIR /app

 EXPOSE 7860

-CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]
+CMD ["uvicorn", "kokoro_server:app", "--host", "0.0.0.0", "--port", "7860"]
--- a/engines/kokoro/server.py
+++ b/engines/kokoro/server.py
@ -1,4 +1,4 @@
-"""Kokoro-82M FastAPI server, sibling to the f5-tts server.
+"""Kokoro-82M FastAPI server, sibling to f5_server.

 Same /synthesize contract as F5 so skald can route between engines
 just by which URL it points at. The semantic difference: Kokoro
@ -234,7 +234,7 @@ def _startup() -> None:

@app.get("/healthz")
 def healthz() -> dict:
-    # Shape matches the f5-tts server's so the same Rust HealthResponse
+    # Shape matches f5_server's so the same Rust HealthResponse
    # struct deserializes both: model/vocoder/loaded fields are
    # required by skald-core::narrate::HealthResponse.
    return {
--- a/engines/tortoise/Dockerfile
+++ b/engines/tortoise/Dockerfile
@ -1,13 +1,17 @@
-# Tortoise-TTS. Apache 2.0 code + weights.
+# Sulkta build of Tortoise-TTS.
 #
-# ~26 built-in voices (no cloning): angie, daniel, deniro, emma,
-# freeman, geralt, halle, jlaw, lj, mol, myself, pat, pat2, rainbow,
-# snakes, tim_reynolds, tom, train_atkins, train_dotrice,
+# Voice roster (built-in, no cloning needed): angie, daniel, deniro,
+# emma, freeman, geralt, halle, jlaw, lj, mol, myself, pat, pat2,
+# rainbow, snakes, tim_reynolds, tom, train_atkins, train_dotrice,
 # train_dreams, train_grace, train_kennard, train_lescault,
-# train_mouse, weaver, william.
+# train_mouse, weaver, william. ~26 voices baked in.
 #
-# Slow: standard preset is ~10x slower than Kokoro; high_quality is
-# ~30x. Trade for quality.
+# License: Apache 2.0 (code) + Apache 2.0 (model weights). Clean
+# stack for share/publish.
+#
+# Speed: slow. Trade for quality. Standard preset is ~10x slower
+# than Kokoro; high_quality is ~30x slower. Worth it for the
+# audiobook-quality bar.

 FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime

@ -33,9 +37,9 @@ RUN pip install --no-cache-dir \

 RUN mkdir -p /cache/hf /cache/tortoise-models /audio

-COPY server.py /app/server.py
+COPY tortoise_server.py /app/tortoise_server.py
 WORKDIR /app

 EXPOSE 7860

-CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]
+CMD ["uvicorn", "tortoise_server:app", "--host", "0.0.0.0", "--port", "7860"]
--- a/engines/tortoise/server.py
+++ b/engines/tortoise/server.py
@ -1,4 +1,4 @@
-"""Tortoise-TTS FastAPI server. Sibling to the kokoro server.
+"""Tortoise-TTS FastAPI server. Sibling to kokoro_server.

 Same /synthesize contract as the kokoro server so skald only has to
 route by voice.source. Differences:
@ -23,6 +23,7 @@ import time
 import uuid
 from pathlib import Path

+import librosa
 import numpy as np
 import soundfile as sf
 import torch
@ -62,28 +63,70 @@ def _get_tts() -> TextToSpeech:
    return _tts


+def _move_to_device(obj):
+    """Recursively .to(DEVICE) tensors inside the structure tortoise
+    returns from load_voice. voice_samples is a list of tensors;
+    conditioning_latents is a tuple of tensors. Anything else
+    passes through unchanged (e.g. None, ints)."""
+    if obj is None:
+        return obj
+    if isinstance(obj, torch.Tensor):
+        return obj.to(DEVICE)
+    if isinstance(obj, list):
+        return [_move_to_device(x) for x in obj]
+    if isinstance(obj, tuple):
+        return tuple(_move_to_device(x) for x in obj)
+    return obj
+
+
 def _get_voice(name: str) -> tuple:
    """Cache voice latents to avoid re-loading reference clips on
    every synthesis call. Tortoise's load_voice returns
-    (voice_samples, conditioning_latents)."""
+    (voice_samples, conditioning_latents) — but they're created on
+    CPU; we move them to DEVICE so the autoregressive model (on
+    CUDA) doesn't fail with cpu/cuda tensor-device mismatch."""
    if name not in _voice_cache:
-        _voice_cache[name] = load_voice(name)
+        samples, latents = load_voice(name)
+        _voice_cache[name] = (_move_to_device(samples), _move_to_device(latents))
    return _voice_cache[name]


-# ─── tag splitter (lifted from the kokoro server) ───────────────
+# ─── tag splitter (lifted from kokoro_server) ───────────────────


 class Node:
-    __slots__ = ("kind", "value", "voice")
+    __slots__ = ("kind", "value", "voice", "pitch", "rate")

-    def __init__(self, kind: str, value, voice: str | None = None):
+    def __init__(
+        self,
+        kind: str,
+        value,
+        voice: str | None = None,
+        pitch: float = 0.0,
+        rate: float = 1.0,
+    ):
+        # kind ∈ {"text", "silence"}; value is str for text, float
+        # seconds for silence. voice/pitch/rate are character-voicing
+        # modifiers from [voice:NAME pitch=N rate=R] tags. Default:
+        # request voice, 0 semitones, 1x rate.
        self.kind = kind
        self.value = value
        self.voice = voice
+        self.pitch = pitch
+        self.rate = rate


-_VOICE_OPEN_RE = re.compile(r"\[voice:([A-Za-z0-9_-]+)\]")
+# Voice open tag — name + optional pitch (semitones) + optional rate:
+#   [voice:dyatlov]               → voice swap only
+#   [voice:lj pitch=-3]           → same voice, 3 semitones lower
+#   [voice:lj pitch=2 rate=1.1]   → higher + slightly faster (fairy)
+#   [voice:lj pitch=-4 rate=0.9]  → lower + slower (troll)
+_VOICE_OPEN_RE = re.compile(
+    r"\[voice:([A-Za-z0-9_-]+)"
+    r"(?:\s+pitch=(-?[0-9]+(?:\.[0-9]+)?))?"
+    r"(?:\s+rate=([0-9]+(?:\.[0-9]+)?))?"
+    r"\]"
+)
 _VOICE_CLOSE = "[/voice]"
 _TAG_RE = re.compile(
    r"\[(pause:(?P<dur>[0-9]+(?:\.[0-9]+)?)(?P<unit>s|ms)?|breath|scene)\]",
@ -102,7 +145,70 @@ def _parse_tag(match: re.Match) -> float:
    return dur / 1000.0 if unit == "ms" else dur


-def _expand_inline(text: str, voice: str | None) -> list[Node]:
+# Tortoise's autoregressive head loses coherence past ~20s of generated
+# audio per inference call. lj's pace is roughly 14 chars/s, so anything
+# past ~280 chars per call risks gibberish at the end. We split inside
+# _expand_inline at sentence boundaries to keep each tts_with_preset
+# call inside the model's reliable horizon.
+TORTOISE_MAX_CHUNK_CHARS = 220
+
+# Sentence boundary regex — splits on `.`/`?`/`!` followed by whitespace
+# and a capital letter (keeps "Mr. Smith" / "U.S." together) OR at any
+# newline.
+_SENTENCE_BOUNDARY = re.compile(r"(?<=[\.!?])\s+(?=[A-Z\"\(])|(?<=\n)\s*")
+
+
+def _chunk_for_tortoise(text: str, max_chars: int = TORTOISE_MAX_CHUNK_CHARS) -> list[str]:
+    """Split text into chunks <= max_chars at sentence boundaries.
+    If a single sentence exceeds max_chars (rare for prose), fall
+    back to splitting that sentence at commas or just hard-cutting.
+    """
+    sentences = [s.strip() for s in _SENTENCE_BOUNDARY.split(text) if s and s.strip()]
+    chunks: list[str] = []
+    current = ""
+    for sent in sentences:
+        # Long sentence: emit alone, but try sub-splitting at commas.
+        if len(sent) > max_chars:
+            if current:
+                chunks.append(current.strip())
+                current = ""
+            # Split on commas
+            parts = [p.strip() for p in sent.split(",") if p.strip()]
+            sub = ""
+            for p in parts:
+                add = (sub + ", " if sub else "") + p
+                if len(add) <= max_chars:
+                    sub = add
+                else:
+                    if sub:
+                        chunks.append(sub)
+                    # If even the part alone exceeds, hard-cut at max_chars
+                    while len(p) > max_chars:
+                        chunks.append(p[:max_chars])
+                        p = p[max_chars:]
+                    sub = p
+            if sub:
+                chunks.append(sub)
+            continue
+        # Sentence fits — accumulate.
+        candidate = (current + " " if current else "") + sent
+        if len(candidate) <= max_chars:
+            current = candidate
+        else:
+            if current:
+                chunks.append(current.strip())
+            current = sent
+    if current:
+        chunks.append(current.strip())
+    return chunks
+
+
+def _expand_inline(
+    text: str,
+    voice: str | None,
+    pitch: float = 0.0,
+    rate: float = 1.0,
+) -> list[Node]:
    out: list[Node] = []
    text = text.strip()
    if not text:
@ -111,12 +217,12 @@ def _expand_inline(text: str, voice: str | None) -> list[Node]:
    for m in _TAG_RE.finditer(text):
        pre = text[cursor : m.start()].strip()
        if pre:
-            out.append(Node("text", pre, voice))
+            out.append(Node("text", pre, voice, pitch, rate))
        out.append(Node("silence", _parse_tag(m)))
        cursor = m.end()
    tail = text[cursor:].strip()
    if tail:
-        out.append(Node("text", tail, voice))
+        out.append(Node("text", tail, voice, pitch, rate))
    return out


@ -130,12 +236,14 @@ def _split_paragraph_voices(para: str) -> list[Node]:
            break
        out.extend(_expand_inline(para[cursor : m.start()], None))
        voice = m.group(1)
+        pitch = float(m.group(2)) if m.group(2) else 0.0
+        rate = float(m.group(3)) if m.group(3) else 1.0
        body_start = m.end()
        close_idx = para.find(_VOICE_CLOSE, body_start)
        if close_idx < 0:
-            out.extend(_expand_inline(para[body_start:], voice))
+            out.extend(_expand_inline(para[body_start:], voice, pitch, rate))
            break
-        out.extend(_expand_inline(para[body_start:close_idx], voice))
+        out.extend(_expand_inline(para[body_start:close_idx], voice, pitch, rate))
        cursor = close_idx + len(_VOICE_CLOSE)
    return out

@ -209,7 +317,7 @@ def _startup() -> None:

@app.get("/healthz")
 def healthz() -> dict:
-    # Shape matches the f5-tts + kokoro servers so skald's HealthResponse
+    # Shape matches f5_server/kokoro_server so skald's HealthResponse
    # struct deserializes all three.
    return {
        "ok": True,
@ -253,6 +361,7 @@ def synthesize(req: SynthesizeRequest) -> SynthesizeResponse:
    started = time.monotonic()
    pieces: list[np.ndarray] = []
    voices_used: set[str] = set()
+    tortoise_chunks_rendered = 0
    for node in nodes:
        if node.kind == "silence":
            pieces.append(_silence_samples(node.value))
@ -264,18 +373,37 @@ def synthesize(req: SynthesizeRequest) -> SynthesizeResponse:
        except Exception as e:
            log.warning("voice %s failed to load (%s); falling back to default", seg_voice, e)
            samples, latents = _get_voice(voice)
-        # Tortoise's tts_with_preset returns a torch.Tensor on the
-        # configured device.
-        audio_tensor = tts.tts_with_preset(
-            text=node.value,
-            voice_samples=samples,
-            conditioning_latents=latents,
-            preset=preset,
-        )
-        if isinstance(audio_tensor, list):
-            audio_tensor = audio_tensor[0]
-        arr = audio_tensor.squeeze().cpu().numpy().astype(np.float32)
-        pieces.append(arr)
+        # Each text node may exceed Tortoise's reliable ~20s horizon —
+        # split at sentence boundaries before feeding the model.
+        sub_chunks = _chunk_for_tortoise(node.value)
+        for sub_idx, sub in enumerate(sub_chunks):
+            audio_tensor = tts.tts_with_preset(
+                text=sub,
+                voice_samples=samples,
+                conditioning_latents=latents,
+                preset=preset,
+            )
+            if isinstance(audio_tensor, list):
+                audio_tensor = audio_tensor[0]
+            arr = audio_tensor.squeeze().cpu().numpy().astype(np.float32)
+            # Per-character voice modulation via librosa. Apply
+            # pitch first (preserves duration), then rate (preserves
+            # pitch). Default pitch=0, rate=1.0 = no-op fast path.
+            if abs(node.pitch) > 1e-3:
+                arr = librosa.effects.pitch_shift(
+                    arr, sr=SAMPLE_RATE, n_steps=node.pitch
+                )
+            if abs(node.rate - 1.0) > 1e-3:
+                arr = librosa.effects.time_stretch(arr, rate=node.rate)
+            arr = arr.astype(np.float32)
+            pieces.append(arr)
+            tortoise_chunks_rendered += 1
+            log.info(
+                "chunk %d/%d done (%d chars, pitch=%+.1f rate=%.2f, %.1fs audio so far)",
+                sub_idx + 1, len(sub_chunks), len(sub),
+                node.pitch, node.rate,
+                sum(len(p) for p in pieces) / SAMPLE_RATE,
+            )
    elapsed_ms = int((time.monotonic() - started) * 1000)

    if not pieces:
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -1,9 +1,14 @@
 #!/usr/bin/env bash
+# Skald container entrypoint.
+#
 # Boots the embedded postgres via the pgvector image's own
 # docker-entrypoint, waits for it to accept connections, then execs
-# `skald` in the foreground. Tini is PID 1; postgres becomes our
-# child. When the DB is extracted to its own container, this reduces
-# to `exec /usr/local/bin/skald "$@"`.
+# `skald` in the foreground. Tini is PID 1 (so it can reap zombies +
+# forward signals); we are PID 2; postgres becomes our child.
+#
+# This is explicitly "DB in the same container, for now" — when we
+# split the DB out (see project notes), the entrypoint reduces to
+# `exec /usr/local/bin/skald "$@"` and the pg startup goes away.

 set -eo pipefail

--- a/migrations/0008_chapter_rewrite.sql
+++ b/migrations/0008_chapter_rewrite.sql
@ -1,15 +0,0 @@
-- The rewrite pass: an author re-authors existing chapter prose in
-- their own voice (canon preserved, prose reworked). body_md gets
-- overwritten with the rewritten version; body_md_original keeps
-- the pre-rewrite prose so the original is never lost. Populated
-- only on the FIRST rewrite of a chapter (if NULL) — subsequent
-- rewrites leave the original alone.
-ALTER TABLE chapters
-    ADD COLUMN IF NOT EXISTS body_md_original text;
-
-- Allow 'rewrite' as a generation_runs.kind.
-ALTER TABLE generation_runs
-    DROP CONSTRAINT generation_runs_kind_check;
-ALTER TABLE generation_runs
-    ADD CONSTRAINT generation_runs_kind_check
-    CHECK (kind = ANY (ARRAY['gen', 'cleanup', 'audit', 'summary', 'embed', 'narrate_prep', 'rewrite']));
--- a/migrations/0009_story_audiobook.sql
+++ b/migrations/0009_story_audiobook.sql
@ -1,7 +0,0 @@
-- A story can have a single stitched audiobook — all its chapter
-- renders concatenated into one chaptered file (see the m4b built
-- from per-chapter narration_runs). audiobook_path is the path the
-- web server serves it from (e.g. /audio/The-Coast-Down.m4b); NULL
-- means no audiobook has been stitched yet.
-ALTER TABLE stories
-    ADD COLUMN IF NOT EXISTS audiobook_path text;
--- a/migrations/0010_prose_audit.sql
+++ b/migrations/0010_prose_audit.sql
@ -1,20 +0,0 @@
-- The prose-quality audit pass: a QC gate that reads a finished
-- story end to end and flags repetition, template tics, self-
-- restatement and continuity drift before it goes to narration.
-- Repetition a silent reader skims is glaring once narrated aloud.
-
-- Allow 'prose_audit' as a generation_runs.kind.
-ALTER TABLE generation_runs DROP CONSTRAINT generation_runs_kind_check;
-ALTER TABLE generation_runs ADD CONSTRAINT generation_runs_kind_check
-    CHECK (kind = ANY (ARRAY[
-        'gen', 'cleanup', 'audit', 'summary', 'embed',
-        'narrate_prep', 'rewrite', 'prose_audit'
-    ]));
-
-- 'repetition' is a first-class audit finding area.
-ALTER TABLE audit_findings DROP CONSTRAINT audit_findings_area_check;
-ALTER TABLE audit_findings ADD CONSTRAINT audit_findings_area_check
-    CHECK (area = ANY (ARRAY[
-        'character', 'continuity', 'tone', 'fact',
-        'timeline', 'repetition', 'other'
-    ]));
--- a/migrations/0011_dedup_pass.sql
+++ b/migrations/0011_dedup_pass.sql
@ -1,9 +0,0 @@
-- The dedup pass: the fix half of the audit loop. Takes a chapter
-- plus the story's prose-audit findings and rephrases only the
-- flagged repetitions, leaving everything else verbatim.
-ALTER TABLE generation_runs DROP CONSTRAINT generation_runs_kind_check;
-ALTER TABLE generation_runs ADD CONSTRAINT generation_runs_kind_check
-    CHECK (kind = ANY (ARRAY[
-        'gen', 'cleanup', 'audit', 'summary', 'embed',
-        'narrate_prep', 'rewrite', 'prose_audit', 'dedup'
-    ]));
--- a/skald-core/src/config.rs
+++ b/skald-core/src/config.rs
@ -8,23 +8,24 @@ use serde::{Deserialize, Serialize};

 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ForgeConfig {
-    /// Base URL of the clawdforge HTTP service. The calling binary
-    /// resolves this from `CLAWDFORGE_URL`.
+    /// Base URL of the clawdforge HTTP service. Defaults to
+    /// `http://clawdforge.example.local:8800` in production; override
+    /// for tests via env.
    pub base_url: String,

    /// App-level bearer token. Resolved by the binary from
    /// `CLAWDFORGE_TOKEN`; should never be logged or `Display`ed.
    pub app_token: String,

-    /// Model alias passed to clawdforge → `claude -p --model`.
-    /// Defaults to opus.
+    /// Model alias passed to clawdforge → `claude -p --model`. Skald
+    /// is opinionated: always opus max effort. Default reflects that.
    pub model: String,
 }

 impl Default for ForgeConfig {
    fn default() -> Self {
        Self {
-            base_url: "http://localhost:8800".into(),
+            base_url: "http://clawdforge.example.local:8800".into(),
            app_token: String::new(),
            model: "opus".into(),
        }
--- a/skald-core/src/forge.rs
+++ b/skald-core/src/forge.rs
@ -7,7 +7,7 @@
 //!
 //! 1. **gen** — produces a new chapter draft from an assembled
 //!    context blob (parent prose + bible + characters + similarity-
-//!    matched passages, all from the database). Opus, high effort.
+//!    matched passages, all from the database). Opus, max effort.
 //!
 //! 2. **cleanup** — polishes the draft for prose quality, voice
 //!    consistency, dialogue rhythm, pacing dead spots. Same Opus,
@ -43,11 +43,8 @@ use crate::config::ForgeConfig;
 pub struct Forge {
    client: Client,
    /// The model alias we pass to clawdforge. Skald is opinionated:
-    /// always opus. Story-writing passes (gen/cleanup/narrate_prep/
-    /// rewrite) run at HIGH effort; only the audit pass runs at MAX —
-    /// audit genuinely needs the frontier reasoning, prose-craft does
-    /// not, and the $200/mo `claude -p` cap makes max-everywhere
-    /// unaffordable. `clawdforge` resolves the alias to the CLI flag.
+    /// always opus max effort. (See `project_story_writer_container.md`.)
+    /// `clawdforge` resolves the alias to the actual claude CLI flag.
    model: String,
 }

@ -77,18 +74,6 @@ pub enum PassKind {
    /// prose; output should be byte-identical except for the
    /// tag insertions.
    NarratePrep,
-    /// Re-author existing chapter prose in an author's voice. Canon
-    /// (names, dates, events, places, facts) is preserved exactly;
-    /// the prose itself is rewritten. Not editing — re-authoring.
-    Rewrite,
-    /// Whole-story prose-quality audit — reads a finished story end
-    /// to end and flags repetition, template tics, self-restatement
-    /// and continuity drift. The QC gate before narration.
-    ProseAudit,
-    /// Surgical dedup — takes a chapter plus the story's audit
-    /// findings and rephrases only the flagged repetitions, leaving
-    /// everything else verbatim. The fix half of the audit loop.
-    Dedup,
 }

 impl PassKind {
@ -99,9 +84,6 @@ impl PassKind {
            Self::Audit => "audit",
            Self::Summary => "summary",
            Self::NarratePrep => "narrate_prep",
-            Self::Rewrite => "rewrite",
-            Self::ProseAudit => "prose_audit",
-            Self::Dedup => "dedup",
        }
    }
 }
@ -111,12 +93,10 @@ impl Forge {
        let client = ClientBuilder::default()
            .base_url(&cfg.base_url)
            .token(&cfg.app_token)
-            // Generation passes at --effort max run 10–20 min wall clock;
-            // the max-effort whole-book prose audit can run past an hour.
-            // This is the client-side ceiling — set high enough to cover
-            // the slowest pass; each pass still sends its own server-side
-            // timeout_secs. Default 120s would strand any real pass.
-            .timeout(Duration::from_secs(7200))
+            // Generation passes at --effort max can run 10–20 min wall
+            // clock. clawdforge's server-side cap is 1800s — match it.
+            // Default 120s would strand any prose-craft pass.
+            .timeout(Duration::from_secs(1800))
            .user_agent(concat!("skald/", env!("CARGO_PKG_VERSION")))
            .build()?;
        Ok(Self {
@ -151,7 +131,7 @@ impl Forge {
            model: Some(self.model.clone()),
            system: Some(system),
            system_mode: Some(mode),
-            effort: Some(Effort::High),
+            effort: Some(Effort::Max),
            timeout_secs: Some(1800),
            ..Default::default()
        };
@ -176,7 +156,7 @@ impl Forge {
            model: Some(self.model.clone()),
            system: Some(system),
            system_mode: Some(mode),
-            effort: Some(Effort::High),
+            effort: Some(Effort::Max),
            timeout_secs: Some(1800),
            ..Default::default()
        };
@ -205,75 +185,6 @@ impl Forge {
        })
    }

-    /// Whole-story prose-quality audit. Receives every chapter of a
-    /// finished story concatenated in order and returns findings JSON
-    /// — repetition, template tics, self-restatement, continuity
-    /// drift. This is the QC gate before a story goes to narration,
-    /// where repetition a silent reader skims becomes glaring.
-    ///
-    /// HIGH effort: finding repeated passages and motif reuse is
-    /// comparison work, not deep reasoning — high-effort Opus does it
-    /// well in minutes. A max-effort pass over a whole-book input
-    /// (quarter-million chars) runs an impractical hour-plus.
-    pub async fn prose_audit(&self, full_story: &str) -> anyhow::Result<PassOutput> {
-        let prompt = format!(
-            "Audit the complete story below for repetition, template tics, \
-             self-restatement and continuity errors. Read every chapter. \
-             Return JSON only, matching the schema in the system prompt.\n\n\
-             {full_story}"
-        );
-        let body = RunRequest {
-            prompt,
-            model: Some(self.model.clone()),
-            system: Some(SYSTEM_PROSE_AUDIT.to_string()),
-            effort: Some(Effort::High),
-            // High effort over a whole book lands in minutes; 3600s is
-            // a generous ceiling that won't bite.
-            timeout_secs: Some(3600),
-            ..Default::default()
-        };
-        let r = self.client.run(body).await?;
-        let duration_ms = r.duration_ms;
-        Ok(PassOutput { kind: PassKind::ProseAudit, result: r, duration_ms })
-    }
-
-    /// Surgical dedup of one chapter — the fix half of the audit
-    /// loop. Receives the chapter's prose plus the whole story's
-    /// audit findings, and rephrases ONLY the flagged repetitions
-    /// that occur in this chapter; everything the findings do not
-    /// flag stays verbatim. Author REQUIRED — the fresh phrasing
-    /// lands in the author's voice (SystemMode::Replace). High
-    /// effort: it is prose-craft, same posture as rewrite.
-    pub async fn dedup(
-        &self,
-        prose: &str,
-        findings: &str,
-        author: &AuthorWithRevision,
-    ) -> anyhow::Result<PassOutput> {
-        let scaffold = author
-            .revision
-            .system_template
-            .as_deref()
-            .unwrap_or(DEFAULT_AUTHOR_SCAFFOLD);
-        let system = scaffold
-            .replace("{{display_name}}", &author.author.display_name)
-            .replace("{{pass_directive}}", DEDUP_DIRECTIVE)
-            .replace("{{soul}}", &author.revision.soul);
-        let user_prompt = dedup_user_prompt(prose, findings);
-        let body = RunRequest {
-            prompt: user_prompt,
-            model: Some(self.model.clone()),
-            system: Some(system),
-            system_mode: Some(SystemMode::Replace),
-            effort: Some(Effort::High),
-            timeout_secs: Some(1800),
-            ..Default::default()
-        };
-        let r = self.client.run(body).await?;
-        let duration_ms = r.duration_ms;
-        Ok(PassOutput { kind: PassKind::Dedup, result: r, duration_ms })
-    }
-
    /// Annotate prose with narration control tags. The model
    /// receives the full chapter prose and returns the SAME prose
    /// with `[pause:Xs]`, `[breath]`, `[scene]` markers inserted
@ -281,12 +192,10 @@ impl Forge {
    /// Orson Black places beats differently than another author
    /// would. Replace-mode if author is set; Append otherwise.
    ///
-    /// `characters` is the story's character roster. When non-empty,
+    /// `characters` is the story's character roster. When provided,
    /// the system prompt instructs the model to wrap dialogue in
-    /// `[voice:<slug>]"..."[/voice]` for multi-voice rendering; the
+    /// `[voice:<slug>]"..."[/voice]` for multi-voice rendering. The
    /// slug is mapped to a Kokoro voice id by skald's narrate path.
-    /// An EMPTY roster selects single-voice mode — the prompt then
-    /// forbids `[voice:...]` tags entirely (one narrator, no cast).
    ///
    /// Hard rule the system prompt enforces: do not change a word
    /// of prose. Tags are additive only.
@ -296,12 +205,6 @@ impl Forge {
        author: Option<&AuthorWithRevision>,
        characters: &[CharacterSpeaker],
    ) -> anyhow::Result<PassOutput> {
-        // An empty character roster means single-voice narration —
-        // the whole chapter reads in one voice. In that mode the
-        // prompt must NOT invite `[voice:...]` tags, or the model
-        // invents speaker slugs from names in the prose that the
-        // narrate path then has to detect and neutralize.
-        let single_voice = characters.is_empty();
        let user_prompt = narrate_prep_user_prompt(prose, characters);
        let (system, mode) = match author {
            Some(a) => {
@ -310,34 +213,22 @@ impl Forge {
                    .system_template
                    .as_deref()
                    .unwrap_or(DEFAULT_AUTHOR_SCAFFOLD);
-                let directive = if single_voice {
-                    NARRATE_PREP_DIRECTIVE_SINGLE
-                } else {
-                    NARRATE_PREP_DIRECTIVE
-                };
                let composed = scaffold
                    .replace("{{display_name}}", &a.author.display_name)
-                    .replace("{{pass_directive}}", directive)
+                    .replace("{{pass_directive}}", NARRATE_PREP_DIRECTIVE)
                    .replace("{{soul}}", &a.revision.soul);
                (composed, SystemMode::Replace)
            }
-            None => {
-                let house = if single_voice {
-                    HOUSE_NARRATE_PREP_SYSTEM_SINGLE
-                } else {
-                    HOUSE_NARRATE_PREP_SYSTEM
-                };
-                (house.to_string(), SystemMode::Append)
-            }
+            None => (HOUSE_NARRATE_PREP_SYSTEM.to_string(), SystemMode::Append),
        };
        let body = RunRequest {
            prompt: user_prompt,
            model: Some(self.model.clone()),
            system: Some(system),
            system_mode: Some(mode),
-            // Tag placement IS a craft choice; high effort is
-            // plenty for beat sense. Same posture as gen/cleanup.
-            effort: Some(Effort::High),
+            // Tag placement IS a craft choice; max effort buys
+            // better beat sense. Same posture as gen/cleanup.
+            effort: Some(Effort::Max),
            timeout_secs: Some(1800),
            ..Default::default()
        };
@ -346,46 +237,6 @@ impl Forge {
        Ok(PassOutput { kind: PassKind::NarratePrep, result: r, duration_ms })
    }

-    /// Re-author existing chapter prose in the author's voice. The
-    /// model receives prose written by another hand and rewrites it
-    /// entirely in its own style — sentence rhythm, word choice,
-    /// paragraph shape all become the author's. Canon is preserved
-    /// exactly: names, dates, events, places, technical facts, and
-    /// the sequence of what happens do not change.
-    ///
-    /// Author REQUIRED — a rewrite without an author has no target
-    /// voice. SystemMode::Replace; the model BECOMES the author.
-    /// High effort: re-authoring is heavy prose-craft, but it's
-    /// still craft, not reasoning — max is reserved for audit.
-    pub async fn rewrite(
-        &self,
-        prose: &str,
-        author: &AuthorWithRevision,
-    ) -> anyhow::Result<PassOutput> {
-        let scaffold = author
-            .revision
-            .system_template
-            .as_deref()
-            .unwrap_or(DEFAULT_AUTHOR_SCAFFOLD);
-        let system = scaffold
-            .replace("{{display_name}}", &author.author.display_name)
-            .replace("{{pass_directive}}", REWRITE_DIRECTIVE)
-            .replace("{{soul}}", &author.revision.soul);
-        let user_prompt = rewrite_user_prompt(prose);
-        let body = RunRequest {
-            prompt: user_prompt,
-            model: Some(self.model.clone()),
-            system: Some(system),
-            system_mode: Some(SystemMode::Replace),
-            effort: Some(Effort::High),
-            timeout_secs: Some(1800),
-            ..Default::default()
-        };
-        let r = self.client.run(body).await?;
-        let duration_ms = r.duration_ms;
-        Ok(PassOutput { kind: PassKind::Rewrite, result: r, duration_ms })
-    }
-
    /// Summarize one chapter to ~250 words. The summary feeds into
    /// the continuation context for older chapters so the token
    /// budget stays sane on long series (book 12 doesn't carry book 1
@ -484,38 +335,20 @@ Hard rules:
 {{soul}}
 "#;

-const GEN_DIRECTIVE: &str = "This is a GENERATION pass. Write the next chapter from scratch. Honor canon. Begin with a chapter heading on the first line.\n\nGuard against repetition. If a recurring image, motif or descriptive beat appeared in an earlier chapter, render it in fresh words here — never reuse the same sentence shape or verb sequence for it. Do not stack consecutive sentences on one template (\"He thought… He thought…\", \"She felt… She felt…\"). Vary how you land beats. This will be read aloud by a single narrator, so phrasing a silent reader skims is glaring to a listener.";
+const GEN_DIRECTIVE: &str = "This is a GENERATION pass. Write the next chapter from scratch. Honor canon. Begin with a chapter heading on the first line.";

-const CLEANUP_DIRECTIVE: &str = "This is a CLEANUP pass. The user prompt contains a draft you wrote. Polish for prose quality — tighten dialogue, fix pacing dead spots, hold your voice steady. Do NOT add new plot, do NOT retcon canon.\n\nHunt repetition hard — this prose will be narrated aloud, where repetition is far more glaring than on the page. If a recurring image or motif appears more than once, re-phrase every occurrence so no two share a sentence shape or verb sequence. Do not stack more than two consecutive sentences on the same opening stem (\"He thought… He thought…\"). Echo a line of dialogue back verbatim at most once in the whole chapter. If a sentence merely restates something already said, cut it. Return ONLY the polished chapter.";
+const CLEANUP_DIRECTIVE: &str = "This is a CLEANUP pass. The user prompt contains a draft you wrote. Polish for prose quality — tighten dialogue, fix pacing dead spots, hold your voice steady. Do NOT add new plot, do NOT retcon canon. Return ONLY the polished chapter.";

 const HOUSE_GEN_SYSTEM: &str = "You are a long-form fiction author writing the next chapter of a series. Honor the canon (characters, setting, established facts) exactly. Return only the chapter prose, starting with a heading line. No preamble.";

 const HOUSE_CLEANUP_SYSTEM: &str = "You are a copy editor polishing a draft chapter. Tighten dialogue, fix pacing, keep voice consistent. Do not add new plot. Return only the polished chapter.";

-const SYSTEM_PROSE_AUDIT: &str = "You are a ruthless prose-quality auditor for long-form fiction destined for single-voice audiobook narration. You receive a complete story — every chapter, in order — and you read all of it. You return STRUCTURED JSON ONLY: no commentary, no preamble. You hunt four things:\n\n1. REPETITION across the whole book: a recurring image, motif or descriptive beat rendered in near-identical wording more than once; a sentence (or near-identical sentence) reused in different chapters; a line of dialogue echoed verbatim.\n2. TEMPLATE TICS: the same sentence template stacked or over-reused — e.g. clusters of consecutive sentences all opening 'He thought:' / 'She felt:' — anywhere it becomes a noticeable pattern.\n3. SELF-RESTATEMENT: a sentence or paragraph that says again, slightly reworded, something the text already said.\n4. CONTINUITY ERRORS: a fact, name, age, date or detail that contradicts an earlier one.\n\nThis prose will be read ALOUD by one narrator, so repetition a silent reader skims is glaring to a listener — be exhaustive, do not let near-duplicates pass. Return EXACTLY this shape: { \"findings\": [ { \"severity\": \"info\"|\"warn\"|\"crit\", \"area\": \"repetition\"|\"continuity\"|\"other\", \"body\": \"...\" } ] }. In each finding's body, quote the offending text exactly and name the chapter number(s). Use 'crit' for anything a listener will plainly hear as a mistake, 'warn' for noticeable repetition, 'info' for minor. If the story is clean, return { \"findings\": [] }.";
-
 const SYSTEM_AUDIT: &str = "You are a canon auditor for long-form fiction. You compare a parent story and a new chapter against the bible. You flag continuity drift, character voice shift, retconned facts, dropped threads, timeline contradictions. You return STRUCTURED JSON ONLY — no commentary, no preamble. The exact shape: { \"findings\": [ { \"severity\": \"info\"|\"warn\"|\"crit\", \"area\": \"character\"|\"continuity\"|\"tone\"|\"fact\"|\"timeline\"|\"other\", \"body\": \"...\" } ] }. If no findings, return { \"findings\": [] }.";

 const NARRATE_PREP_DIRECTIVE: &str = "This is a NARRATION-ANNOTATION pass. You receive your own prose and prepare it for an audiobook reading. Three kinds of inserts are allowed:\n\n1. BEAT MARKERS (additive, not prose): `[breath]` (~400ms), `[pause:1.2s]` (explicit silence in seconds, e.g. 0.5s, 1.2s, 2s), `[scene]` (~1500ms scene break). Place where the prose's rhythm asks for them — after a hard one-line beat, before a turn in dialogue, on a paragraph that lands with weight.\n\n2. SPEAKER VOICE TAGS (multi-voice dialogue): wrap dialogue lines in `[voice:<slug>]\"...\"[/voice]` based on who is speaking. The roster of available speaker slugs is given in the user prompt. The dialogue itself stays verbatim — only the wrapper is added. If a line of dialogue is not clearly attributable to a roster speaker, leave it unwrapped (the narrator voice will read it). Quoted thoughts (italicized interior monologue) stay unwrapped — only spoken aloud dialogue gets a voice tag.\n\n3. NARRATOR STUMBLES (humanizing prose-level inserts): a real narrator occasionally stumbles on a hard word, catches themselves, repeats. You may add these *sparingly* where the prose's pacing makes them feel right. Patterns: em-dash repetition (`Prip— Pripyat`), self-correction (`she — no, the wife — had been told`), hesitation (`the dose, the dose was`). USE SPARINGLY. Maybe 1-3 per chapter. Pick proper nouns, technical terms, or moments where the narrator might genuinely catch herself. Avoid stumbling on emotional climaxes — those should land clean.\n\nApart from stumbles, do NOT change a word of the original prose. Return the prose with beat markers, voice tags, and stumbles inline. No preamble. No commentary about your choices.";

-/// Single-voice variant of [`NARRATE_PREP_DIRECTIVE`]. Used when the
-/// chapter narrates in one voice (no speaker roster). The multi-voice
-/// directive's section 2 is dropped entirely AND a hard prohibition
-/// is added — without it the model invents `[voice:<slug>]` tags from
-/// character names in the prose, which the narrate path then has to
-/// detect and neutralize.
-const NARRATE_PREP_DIRECTIVE_SINGLE: &str = "This is a NARRATION-ANNOTATION pass. You receive your own prose and prepare it for a SINGLE-narrator audiobook reading — the whole chapter, dialogue included, is read aloud in ONE voice. Two kinds of inserts are allowed:\n\n1. BEAT MARKERS (additive, not prose): `[breath]` (~400ms), `[pause:1.2s]` (explicit silence in seconds, e.g. 0.5s, 1.2s, 2s), `[scene]` (~1500ms scene break). Place where the prose's rhythm asks for them — after a hard one-line beat, before a turn in dialogue, on a paragraph that lands with weight.\n\n2. NARRATOR STUMBLES (humanizing prose-level inserts): a real narrator occasionally stumbles on a hard word, catches themselves, repeats. You may add these *sparingly* where the prose's pacing makes them feel right. Patterns: em-dash repetition (`Prip— Pripyat`), self-correction (`she — no, the wife — had been told`), hesitation (`the dose, the dose was`). USE SPARINGLY. Maybe 1-3 per chapter. Pick proper nouns, technical terms, or moments where the narrator might genuinely catch herself. Avoid stumbling on emotional climaxes — those should land clean.\n\nDo NOT add `[voice:...]` speaker tags of any kind — there is one narrator, not a cast. Apart from stumbles, do NOT change a word of the original prose. Return the prose with beat markers and stumbles inline. No preamble. No commentary about your choices.";
-
 const HOUSE_NARRATE_PREP_SYSTEM: &str = "You are a senior audiobook director annotating prose for narration. You insert (a) beat markers — `[breath]`, `[pause:Xs]`, `[scene]` — where a skilled narrator would breathe or pause, (b) speaker voice tags `[voice:<slug>]\"...\"[/voice]` wrapping dialogue based on who is speaking (roster supplied in user prompt; leave unattributed dialogue unwrapped), and (c) occasional humanizing narrator stumbles using em-dash repetition or self-correction (sparingly — maybe 1-3 per chapter, on proper nouns or hard words). Apart from those stumbles you do NOT change a word of the prose. Return the prose verbatim plus beat markers, voice tags, and (rare) stumbles inline. No preamble, no commentary.";

-/// Single-voice variant of [`HOUSE_NARRATE_PREP_SYSTEM`] — no speaker
-/// voice tags, one narrator throughout.
-const HOUSE_NARRATE_PREP_SYSTEM_SINGLE: &str = "You are a senior audiobook director annotating prose for a SINGLE-narrator reading. You insert (a) beat markers — `[breath]`, `[pause:Xs]`, `[scene]` — where a skilled narrator would breathe or pause, and (b) occasional humanizing narrator stumbles using em-dash repetition or self-correction (sparingly — maybe 1-3 per chapter, on proper nouns or hard words). Do NOT add `[voice:...]` speaker tags — the whole chapter is one voice. Apart from those stumbles you do NOT change a word of the prose. Return the prose verbatim plus beat markers and (rare) stumbles inline. No preamble, no commentary.";
-
-const DEDUP_DIRECTIVE: &str = "This is a DEDUP pass. The user prompt contains ONE chapter of a story you wrote, plus a list of audit findings — repeated phrases, motifs, similes, sentence templates and continuity errors found across the whole book. Your job: return this chapter with every flagged repetition that occurs IN IT rephrased fresh, and everything else byte-identical.\n\nHARD RULES:\n- For any motif, simile, phrase, image or structural tic the findings flag as recurring: if it appears in THIS chapter, render this chapter's occurrence in fresh, distinctive wording. Never reuse the flagged original phrasing. The other chapters' occurrences are being revised separately — do NOT try to coordinate with them; just make yours distinct from the flagged original.\n- Fix any continuity error the findings flag that touches this chapter (a wrong age, number, name, date) — use the correct value the findings identify.\n- Change NOTHING the findings do not flag. Every sentence not implicated by a finding stays EXACTLY as written, word for word. This is not a rewrite, not a polish, not an edit for taste — it is a surgical dedup. When in doubt, leave it.\n- Canon is absolute: names, dates, events, the order they happen, every fact — unchanged. The chapter stays the same length and shape.\n- Return ONLY the chapter prose. No heading unless the source had one. No preamble, no commentary, no list of what you changed.\n\n";
-
-const REWRITE_DIRECTIVE: &str = "This is a REWRITE pass. The user prompt contains a chapter of prose written by another hand. Re-author it entirely in YOUR voice — every sentence reworked in your style: your sentence rhythm, your word choice, your paragraph shape, your way of landing a beat. This is not editing or polishing. It is re-authoring. The reader should not be able to tell another writer ever touched it.\n\nHARD CONSTRAINTS — canon is non-negotiable:\n- Every character name, every date, every place name stays exactly as written.\n- Every event, and the ORDER events happen in, stays exactly as written.\n- Every technical or historical fact stays exactly as written.\n- Do not add new scenes, characters, or events. Do not cut any scene or beat. Same story, same shape — your telling.\n\nReturn ONLY the rewritten chapter prose. Begin with the chapter heading line (`## Chapter N — title`) exactly as in the source. No preamble, no commentary about the rewrite.";
-
 // ─── User-prompt builders ───────────────────────────────────────

 fn gen_user_prompt(
@ -562,38 +395,6 @@ pub struct CharacterSpeaker {
    pub hint: Option<String>,
 }

-fn dedup_user_prompt(prose: &str, findings: &str) -> String {
-    let mut out = String::with_capacity(prose.len() + findings.len() + 512);
-    out.push_str("# Audit findings for this story\n\n");
-    out.push_str(
-        "These repetitions and errors were found across the whole book. \
-         Fix only the ones that occur in the chapter below.\n\n",
-    );
-    out.push_str(findings);
-    out.push_str("\n\n# Chapter to dedup\n\n");
-    out.push_str(prose);
-    out.push_str(
-        "\n\n# Task\n\nReturn the chapter above with every flagged repetition \
-         that appears in it rephrased fresh, and any flagged continuity error \
-         touching it corrected. Leave every unflagged sentence verbatim. \
-         Return only the chapter prose.\n",
-    );
-    out
-}
-
-fn rewrite_user_prompt(prose: &str) -> String {
-    let mut out = String::with_capacity(prose.len() + 256);
-    out.push_str("# Chapter to re-author\n\n");
-    out.push_str(prose);
-    out.push_str(
-        "\n\n# Task\n\nRe-author the chapter above entirely in your voice. \
-         Preserve all canon — names, dates, places, events, the order they \
-         happen, every technical fact. Change only the prose. Return only \
-         the rewritten chapter, starting with its `## Chapter N` heading.\n",
-    );
-    out
-}
-
 fn narrate_prep_user_prompt(prose: &str, characters: &[CharacterSpeaker]) -> String {
    let mut out = String::with_capacity(prose.len() + 512);

@ -623,22 +424,12 @@ fn narrate_prep_user_prompt(prose: &str, characters: &[CharacterSpeaker]) -> Str

    out.push_str("# Prose to annotate\n\n");
    out.push_str(prose);
-    if characters.is_empty() {
-        out.push_str(
-            "\n\n# Task\n\nReturn the prose above with `[breath]`, `[pause:Xs]`, \
-             `[scene]` beat markers inserted appropriately. Do NOT add any \
-             `[voice:...]` tags — this is a single-voice reading. Do not \
-             change any word. Do not skip any sentence. Return only the \
-             annotated prose.\n",
-        );
-    } else {
-        out.push_str(
-            "\n\n# Task\n\nReturn the prose above with `[breath]`, `[pause:Xs]`, \
-             `[scene]` markers and `[voice:<slug>]\"...\"[/voice]` dialogue wrappers \
-             inserted appropriately. Do not change any word. Do not skip any \
-             sentence. Return only the annotated prose.\n",
-        );
-    }
+    out.push_str(
+        "\n\n# Task\n\nReturn the prose above with `[breath]`, `[pause:Xs]`, \
+         `[scene]` markers and `[voice:<slug>]\"...\"[/voice]` dialogue wrappers \
+         inserted appropriately. Do not change any word. Do not skip any \
+         sentence. Return only the annotated prose.\n",
+    );
    out
 }

@ -666,10 +457,6 @@ fn build_audit_request(model: &str, parent: &str, sequel: &str, bible: &str) ->
        prompt,
        model: Some(model.to_string()),
        system: Some(SYSTEM_AUDIT.to_string()),
-        // Audit is the one pass that keeps MAX effort — catching
-        // canon drift, timeline gaps and retcons is reasoning work
-        // worth the frontier spend; prose-craft passes run at high.
-        effort: Some(Effort::Max),
        timeout_secs: Some(600),
        ..Default::default()
    }
--- a/skald-core/src/narrate.rs
+++ b/skald-core/src/narrate.rs
@ -27,7 +27,7 @@ use uuid::Uuid;

 #[derive(Debug, Clone)]
 pub struct F5Config {
-    /// e.g. http://localhost:7792
+    /// e.g. http://127.0.0.1:7792
    pub base_url: String,
    /// Inference subprocess timeout. Long-form chapters (3000 words)
    /// take 60-180s on an 8GB GPU; cap at 1800s to match clawdforge.
--- a/skald/src/audit.rs
+++ b/skald/src/audit.rs
@ -1,144 +0,0 @@
-//! `skald audit` — whole-story prose-quality audit. Reads every
-//! chapter of a story end to end and flags repetition, template
-//! tics, self-restatement and continuity drift. The QC gate before
-//! a story goes to narration, where repetition a silent reader
-//! skims becomes glaring once read aloud.
-//!
-//! Findings land in the `audit_findings` table (area 'repetition' /
-//! 'continuity' / 'other') and are printed to stdout. v1 flags
-//! only — acting on the findings (a fix pass) is a separate step.
-
-use std::time::Instant;
-
-use anyhow::{Context, bail};
-use chrono::Utc;
-use skald_core::config::ForgeConfig;
-use skald_core::db;
-use skald_core::forge::{AuditFinding, AuditResponse, Forge, PassKind, PassOutput};
-use uuid::Uuid;
-
-pub async fn run(database_url: &str, story_id: Uuid) -> anyhow::Result<()> {
-    let cfg = load_forge_config()?;
-    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
-
-    let pool = db::connect_and_migrate(database_url).await?;
-    let forge = Forge::new(&cfg)?;
-
-    let title: String = sqlx::query_scalar("SELECT title FROM stories WHERE id = $1")
-        .bind(story_id)
-        .fetch_optional(&pool)
-        .await?
-        .with_context(|| format!("story {story_id} not found"))?;
-
-    let chapters: Vec<(i32, Option<String>, String)> = sqlx::query_as(
-        "SELECT n, title, body_md FROM chapters WHERE story_id = $1 ORDER BY n",
-    )
-    .bind(story_id)
-    .fetch_all(&pool)
-    .await?;
-    if chapters.is_empty() {
-        bail!("story {story_id} has no chapters to audit");
-    }
-
-    // Concatenate the whole story in chapter order — the audit needs
-    // to see across chapter boundaries to catch cross-chapter reuse.
-    let mut full = String::new();
-    for (n, ct, body) in &chapters {
-        full.push_str(&format!(
-            "## Chapter {n} — {}\n\n",
-            ct.as_deref().unwrap_or("")
-        ));
-        full.push_str(body);
-        full.push_str("\n\n");
-    }
-    tracing::info!(
-        story = %title,
-        chapters = chapters.len(),
-        chars = full.len(),
-        "prose audit starting",
-    );
-
-    let run_id: Uuid = sqlx::query_scalar(
-        "INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id",
-    )
-    .bind(story_id)
-    .bind(PassKind::ProseAudit.as_str())
-    .fetch_one(&pool)
-    .await?;
-
-    let started = Instant::now();
-    let out_res = forge.prose_audit(&full).await;
-    let elapsed = started.elapsed();
-
-    let out: PassOutput = match out_res {
-        Ok(o) => o,
-        Err(e) => {
-            sqlx::query(
-                "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
-            )
-            .bind(format!("{e:#}"))
-            .bind(Utc::now())
-            .bind(run_id)
-            .execute(&pool)
-            .await?;
-            return Err(e);
-        }
-    };
-
-    let findings = parse_findings(&out);
-    for f in &findings {
-        sqlx::query(
-            "INSERT INTO audit_findings (story_id, run_id, severity, area, body)
-             VALUES ($1, $2, $3, $4, $5)",
-        )
-        .bind(story_id)
-        .bind(run_id)
-        .bind(&f.severity)
-        .bind(&f.area)
-        .bind(&f.body)
-        .execute(&pool)
-        .await?;
-    }
-    sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2")
-        .bind(Utc::now())
-        .bind(run_id)
-        .execute(&pool)
-        .await?;
-
-    let crit = findings.iter().filter(|f| f.severity == "crit").count();
-    let warn = findings.iter().filter(|f| f.severity == "warn").count();
-    let info = findings.iter().filter(|f| f.severity == "info").count();
-    println!(
-        "prose audit: \"{title}\" — {} finding(s): {crit} crit, {warn} warn, {info} info ({:.1}s)",
-        findings.len(),
-        elapsed.as_secs_f32(),
-    );
-    for f in &findings {
-        println!("\n[{} · {}]\n{}", f.severity.to_uppercase(), f.area, f.body);
-    }
-    Ok(())
-}
-
-fn parse_findings(out: &PassOutput) -> Vec<AuditFinding> {
-    if let Ok(typed) = out.result.as_json::<AuditResponse>() {
-        return typed.findings;
-    }
-    if let Some(s) = out.result.as_text() {
-        if let Ok(typed) = serde_json::from_str::<AuditResponse>(s) {
-            return typed.findings;
-        }
-    }
-    tracing::warn!("prose audit output did not parse as AuditResponse — no findings recorded");
-    Vec::new()
-}
-
-fn load_forge_config() -> anyhow::Result<ForgeConfig> {
-    let base_url = std::env::var("CLAWDFORGE_URL").context("CLAWDFORGE_URL not set")?;
-    let app_token = std::env::var("CLAWDFORGE_TOKEN").context("CLAWDFORGE_TOKEN not set")?;
-    let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
-    Ok(ForgeConfig {
-        base_url,
-        app_token,
-        model,
-    })
-}
--- a/skald/src/dedup.rs
+++ b/skald/src/dedup.rs
@ -1,271 +0,0 @@
-//! `skald dedup` — the fix half of the audit loop. Reads a story's
-//! most recent prose-audit findings and walks the chapters, handing
-//! each chapter + the findings to the author with instructions to
-//! rephrase ONLY the flagged repetitions and leave everything else
-//! verbatim. Overwrites body_md and clears body_md_tts so the
-//! chapter gets re-prepped before narration.
-//!
-//! Run `skald audit` first — dedup needs findings to act on.
-
-use std::time::Instant;
-
-use anyhow::{Context, bail};
-use chrono::Utc;
-use skald_core::authors::{self, AuthorWithRevision};
-use skald_core::config::ForgeConfig;
-use skald_core::db;
-use skald_core::forge::{Forge, PassKind, PassOutput};
-use sqlx::PgPool;
-use uuid::Uuid;
-
-pub async fn run(
-    database_url: &str,
-    story_id: Uuid,
-    author_slug: Option<&str>,
-    chapter_filter: Option<i32>,
-) -> anyhow::Result<()> {
-    let cfg = load_forge_config()?;
-    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
-
-    let pool = db::connect_and_migrate(database_url).await?;
-    let forge = Forge::new(&cfg)?;
-
-    let story: Option<(String, Option<Uuid>)> =
-        sqlx::query_as("SELECT title, author_id FROM stories WHERE id = $1")
-            .bind(story_id)
-            .fetch_optional(&pool)
-            .await?;
-    let (title, story_author_id) =
-        story.with_context(|| format!("story {story_id} not found"))?;
-
-    let author = resolve_author(&pool, story_author_id, author_slug)
-        .await?
-        .ok_or_else(|| {
-            anyhow::anyhow!(
-                "dedup needs an author for the rephrasing — pass --author <slug> \
-                 or bind one to the story"
-            )
-        })?;
-
-    // Findings from the most recent succeeded prose-audit run.
-    let findings = load_latest_findings(&pool, story_id).await?;
-    if findings.is_empty() {
-        bail!(
-            "no prose-audit findings for story {story_id} — run `skald audit \
-             --story {story_id}` first"
-        );
-    }
-    let findings_block = render_findings(&findings);
-    tracing::info!(
-        story = %title,
-        author = %author.author.slug,
-        finding_count = findings.len(),
-        "dedup starting",
-    );
-
-    let mut chapters: Vec<(Uuid, i32)> = sqlx::query_as(
-        "SELECT id, n FROM chapters WHERE story_id = $1 ORDER BY n",
-    )
-    .bind(story_id)
-    .fetch_all(&pool)
-    .await?;
-    if chapters.is_empty() {
-        bail!("story {story_id} has no chapters");
-    }
-    // --chapter narrows the run to one chapter — used to retry a
-    // chapter the length guard skipped on an earlier run.
-    if let Some(target) = chapter_filter {
-        chapters.retain(|(_, n)| *n == target);
-        if chapters.is_empty() {
-            bail!("chapter {target} not found in story {story_id}");
-        }
-    }
-
-    let mut skipped = 0usize;
-    for (chapter_id, n) in &chapters {
-        let body_md: String =
-            sqlx::query_scalar("SELECT body_md FROM chapters WHERE id = $1")
-                .bind(chapter_id)
-                .fetch_one(&pool)
-                .await?;
-
-        let run_id: Uuid = sqlx::query_scalar(
-            "INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id",
-        )
-        .bind(story_id)
-        .bind(PassKind::Dedup.as_str())
-        .fetch_one(&pool)
-        .await?;
-
-        let started = Instant::now();
-        let out_res = forge.dedup(&body_md, &findings_block, &author).await;
-        let elapsed = started.elapsed();
-
-        let out: PassOutput = match out_res {
-            Ok(o) => o,
-            Err(e) => {
-                sqlx::query(
-                    "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
-                )
-                .bind(format!("{e:#}"))
-                .bind(Utc::now())
-                .bind(run_id)
-                .execute(&pool)
-                .await?;
-                return Err(e).with_context(|| format!("dedup failed on chapter {n}"));
-            }
-        };
-
-        let deduped = pass_text(&out)?;
-
-        // Sanity guard: a surgical dedup nudges a chapter's length by
-        // a little. An output wildly off the input means the model
-        // duplicated or ballooned the chapter — reject it, leave the
-        // chapter untouched, move on. A re-run with --chapter retries
-        // just the skipped one.
-        let before = body_md.len();
-        let after = deduped.len();
-        if after > before * 3 / 2 || after < before * 3 / 5 {
-            sqlx::query(
-                "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
-            )
-            .bind(format!(
-                "rejected: dedup output {after}c is wildly off input {before}c \
-                 — likely a duplicated or ballooned output"
-            ))
-            .bind(Utc::now())
-            .bind(run_id)
-            .execute(&pool)
-            .await?;
-            skipped += 1;
-            println!(
-                "SKIPPED chapter {n}: dedup returned {after}c from {before}c \
-                 — chapter left untouched (retry with --chapter {n})"
-            );
-            continue;
-        }
-
-        // Overwrite body_md and clear body_md_tts — the chapter must be
-        // re-prepped before it is narrated again. body_md_original is
-        // left untouched (it belongs to the rewrite pass).
-        sqlx::query("UPDATE chapters SET body_md = $1, body_md_tts = NULL WHERE id = $2")
-            .bind(&deduped)
-            .bind(chapter_id)
-            .execute(&pool)
-            .await?;
-        sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2")
-            .bind(Utc::now())
-            .bind(run_id)
-            .execute(&pool)
-            .await?;
-
-        println!(
-            "deduped chapter {n} ({before}c -> {after}c) in {:.1}s",
-            elapsed.as_secs_f32(),
-        );
-    }
-
-    if skipped > 0 {
-        println!(
-            "dedup complete: \"{title}\" — {} chapter(s) processed, {skipped} SKIPPED \
-             (retry each with --chapter)",
-            chapters.len(),
-        );
-    } else {
-        println!(
-            "dedup complete: \"{title}\" — {} chapter(s) deduped against {} finding(s)",
-            chapters.len(),
-            findings.len(),
-        );
-    }
-    Ok(())
-}
-
-#[derive(Debug, Clone)]
-struct Finding {
-    severity: String,
-    area: String,
-    body: String,
-}
-
-async fn load_latest_findings(pool: &PgPool, story_id: Uuid) -> anyhow::Result<Vec<Finding>> {
-    let rows: Vec<(String, String, String)> = sqlx::query_as(
-        "SELECT severity, area, body FROM audit_findings
-         WHERE story_id = $1
-           AND run_id = (
-             SELECT id FROM generation_runs
-             WHERE story_id = $1 AND kind = 'prose_audit' AND status = 'succeeded'
-             ORDER BY started_at DESC LIMIT 1
-           )
-         ORDER BY
-           CASE severity WHEN 'crit' THEN 0 WHEN 'warn' THEN 1 ELSE 2 END,
-           area",
-    )
-    .bind(story_id)
-    .fetch_all(pool)
-    .await?;
-    Ok(rows
-        .into_iter()
-        .map(|(severity, area, body)| Finding { severity, area, body })
-        .collect())
-}
-
-fn render_findings(findings: &[Finding]) -> String {
-    let mut out = String::new();
-    for f in findings {
-        out.push_str(&format!(
-            "[{} · {}]\n{}\n\n",
-            f.severity.to_uppercase(),
-            f.area,
-            f.body,
-        ));
-    }
-    out
-}
-
-async fn resolve_author(
-    pool: &PgPool,
-    story_author_id: Option<Uuid>,
-    flag_slug: Option<&str>,
-) -> anyhow::Result<Option<AuthorWithRevision>> {
-    if let Some(slug) = flag_slug {
-        return authors::get_with_current_revision(pool, slug)
-            .await?
-            .map(Some)
-            .with_context(|| format!("author '{slug}' not found"));
-    }
-    if let Some(aid) = story_author_id {
-        let row: Option<(String,)> = sqlx::query_as("SELECT slug FROM authors WHERE id = $1")
-            .bind(aid)
-            .fetch_optional(pool)
-            .await?;
-        if let Some((slug,)) = row {
-            return Ok(authors::get_with_current_revision(pool, &slug).await?);
-        }
-    }
-    Ok(None)
-}
-
-fn pass_text(out: &PassOutput) -> anyhow::Result<String> {
-    let text = out
-        .result
-        .as_text()
-        .map(|s| s.to_string())
-        .or_else(|| out.result.result.as_str().map(|s| s.to_string()))
-        .unwrap_or_else(|| out.result.result.to_string());
-    if text.trim().is_empty() {
-        bail!("dedup pass returned empty");
-    }
-    Ok(text)
-}
-
-fn load_forge_config() -> anyhow::Result<ForgeConfig> {
-    let base_url = std::env::var("CLAWDFORGE_URL").context("CLAWDFORGE_URL not set")?;
-    let app_token = std::env::var("CLAWDFORGE_TOKEN").context("CLAWDFORGE_TOKEN not set")?;
-    let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
-    Ok(ForgeConfig {
-        base_url,
-        app_token,
-        model,
-    })
-}
--- a/skald/src/main.rs
+++ b/skald/src/main.rs
@ -4,14 +4,11 @@
 //!   skald serve              — boot the http server (v0.1 = /health + migrations)
 //!   skald import-markdown    — ingest a story markdown file into the DB

-mod audit;
 mod authors_seed;
 mod continue_story;
-mod dedup;
 mod import;
 mod narrate;
 mod narrate_prep;
-mod rewrite;
 mod serve;
 mod show_context;
 mod summarize;
@ -30,8 +27,8 @@ use uuid::Uuid;
    about = "Long-form story-writer. Database is the source of truth; the writer is the tooling."
 )]
 struct Cli {
-    /// Postgres connection URL. Read from `DATABASE_URL` if unset.
-    #[arg(long, env = "DATABASE_URL")]
+    /// Postgres connection URL. Defaults to `postgresql://skald:skald@localhost:5432/skald`.
+    #[arg(long, env = "DATABASE_URL", default_value = "postgresql://skald:skald@localhost:5432/skald")]
    database_url: String,

    #[command(subcommand)]
@ -158,51 +155,6 @@ enum Cmd {
        /// errors out to avoid clobbering a hand-tuned version.
        #[arg(long)]
        overwrite: bool,
-        /// Single-voice mode: skip the character speaker roster so
-        /// no [voice:X] dialogue tags are inserted. Use when the
-        /// whole chapter narrates in one voice.
-        #[arg(long)]
-        single_voice: bool,
-    },
-    /// Re-author one chapter's prose in an author's voice. Canon
-    /// preserved, prose reworked. Overwrites body_md (stashing the
-    /// original in body_md_original) and clears body_md_tts.
-    Rewrite {
-        /// Chapter UUID to re-author.
-        #[arg(long)]
-        chapter: Uuid,
-        /// Author slug to rewrite as. Falls back to the story's
-        /// bound author if omitted.
-        #[arg(long)]
-        author: Option<String>,
-    },
-    /// Whole-story prose-quality audit. Reads every chapter end to
-    /// end and flags repetition, template tics, self-restatement
-    /// and continuity drift. The QC gate before narration. Findings
-    /// land in audit_findings and print to stdout. Requires
-    /// CLAWDFORGE_URL + CLAWDFORGE_TOKEN.
-    Audit {
-        /// Story to audit.
-        #[arg(long)]
-        story: Uuid,
-    },
-    /// Dedup a story against its most recent prose-audit findings.
-    /// Walks every chapter, rephrasing only the flagged repetitions
-    /// and fixing flagged continuity errors — everything else stays
-    /// verbatim. Overwrites body_md and clears body_md_tts. Run
-    /// `skald audit` first.
-    Dedup {
-        /// Story to dedup.
-        #[arg(long)]
-        story: Uuid,
-        /// Author slug for the rephrasing. Falls back to the story's
-        /// bound author if omitted.
-        #[arg(long)]
-        author: Option<String>,
-        /// Restrict the run to a single chapter number — used to
-        /// retry a chapter the length guard skipped.
-        #[arg(long)]
-        chapter: Option<i32>,
    },
 }

@ -278,23 +230,8 @@ async fn run() -> anyhow::Result<()> {
            chapter,
            author,
            overwrite,
-            single_voice,
        } => {
-            narrate_prep::run(
-                &cli.database_url,
-                chapter,
-                author.as_deref(),
-                overwrite,
-                single_voice,
-            )
-            .await
-        }
-        Cmd::Rewrite { chapter, author } => {
-            rewrite::run(&cli.database_url, chapter, author.as_deref()).await
-        }
-        Cmd::Audit { story } => audit::run(&cli.database_url, story).await,
-        Cmd::Dedup { story, author, chapter } => {
-            dedup::run(&cli.database_url, story, author.as_deref(), chapter).await
+            narrate_prep::run(&cli.database_url, chapter, author.as_deref(), overwrite).await
        }
    }
 }
--- a/skald/src/narrate.rs
+++ b/skald/src/narrate.rs
@ -65,16 +65,12 @@ pub async fn run(
    let run_id = Uuid::new_v4();
    let output_filename = format!("{}-{}-{}.wav", chapter.story_id, chapter.n, run_id);

-    // Engine + version threaded from the voice row's source. Three
-    // engines on the host currently:
-    //   kokoro_*    → kokoro 82M
-    //   tortoise_*  → tortoise-tts
-    //   anything else (lj_speech etc.) → f5-tts
-    // Future: a dedicated voices.engine column to make this explicit.
+    // Engine + version threaded from the voice row's source/license
+    // pair. lj_speech-style PD voices live behind f5-tts; kokoro_*
+    // voices live behind kokoro. Future: a dedicated voices.engine
+    // column to make this explicit.
    let (engine, engine_version) = if voice.source.starts_with("kokoro") {
        ("kokoro-82m", "0.9")
-    } else if voice.source.starts_with("tortoise") {
-        ("tortoise-tts", "3.0")
    } else {
        ("f5-tts", "1.1.20")
    };
@ -95,20 +91,14 @@ pub async fn run(
    // the Kokoro server only ever sees real voice ids. Only kicks
    // in for kokoro-routed renders; F5 voice-tag handling isn't
    // implemented and any tags pass through unchanged.
-    // Two pre-processing passes (kokoro + tortoise — engines that
-    // parse [voice:X] dialogue tags). Order matters:
-    //   1. Speaker voice substitution rewrites [voice:slug] → the
-    //      engine's named voice id. Must run BEFORE pronunciation
-    //      overrides so we don't try to respell character slugs.
-    //      Tortoise: characters with no tortoise-voice mapping
-    //      gracefully fall back to the narrator voice server-side.
+    // Two pre-processing passes (kokoro only). Order matters:
+    //   1. Speaker voice substitution rewrites [voice:slug] → [voice:kokoro_id].
+    //      This must run BEFORE pronunciation overrides so we don't
+    //      accidentally try to respell character slugs.
    //   2. Pronunciation overrides word-substitute proper nouns
-    //      (Pripyat, Dyatlov, etc.) with English-readable
-    //      respellings. The respellings are kokoro/misaki-tuned but
-    //      pass through tortoise's g2p_en well enough to apply.
-    let routes_to_engine_with_voice_tags =
-        voice.source.starts_with("kokoro") || voice.source.starts_with("tortoise");
-    let gen_text = if routes_to_engine_with_voice_tags {
+    //      (Pripyat, Dyatlov, etc.) with English-readable respellings
+    //      so Kokoro's small phonemizer doesn't mangle them.
+    let gen_text = if voice.source.starts_with("kokoro") {
        let voiced = substitute_speaker_voices(
            &pool,
            chapter.story_id,
@ -161,12 +151,6 @@ pub async fn run(
    .execute(&pool)
    .await?;

-    // This chapter now has a fresh canonical render. Prior render
-    // WAVs are dead weight — every re-render otherwise leaves its
-    // predecessor on disk forever. Reclaim it. Best-effort: a
-    // cleanup failure must never fail an otherwise-good render.
-    cleanup_superseded_renders(&pool, chapter_id, run_row_id).await;
-
    println!(
        "narrated chapter {} of story {}: {} ({:.2}s audio, {:.1}s wall clock)",
        chapter.n,
@ -389,84 +373,15 @@ async fn apply_pronunciation_overrides(
    Ok(out)
 }

-/// Delete the WAV files of prior renders of this chapter and clear
-/// their `output_path`. The newest succeeded render is the canonical
-/// one; older renders are superseded the moment a new one lands, and
-/// without this every re-render would leave a stale ~80MB file on
-/// disk forever.
-///
-/// The `narration_runs` rows themselves are KEPT — engine, voice,
-/// timing and status stay as render history. Only `output_path` is
-/// nulled, so no row ever points at a file that no longer exists.
-///
-/// Best-effort throughout: this runs *after* the current render has
-/// already been recorded as succeeded, so any failure here (a query
-/// error, a permission problem on the audio dir) is logged and
-/// swallowed — it must never turn a good render into a failed one.
-async fn cleanup_superseded_renders(pool: &PgPool, chapter_id: Uuid, current_run: Uuid) {
-    // output_path is only ever set on the success UPDATE, so
-    // "output_path IS NOT NULL AND id != current" is exactly the set
-    // of prior completed renders.
-    let prior: Vec<(Uuid, String)> = match sqlx::query_as(
-        "SELECT id, output_path FROM narration_runs
-         WHERE chapter_id = $1 AND id <> $2 AND output_path IS NOT NULL",
-    )
-    .bind(chapter_id)
-    .bind(current_run)
-    .fetch_all(pool)
-    .await
-    {
-        Ok(rows) => rows,
-        Err(e) => {
-            tracing::warn!(error = %e, "superseded-render cleanup: query failed, skipping");
-            return;
-        }
-    };
-
-    for (run_id, output_path) in prior {
-        // output_path is the HTTP-facing path "/audio/<file>"; the
-        // `/audio` bind mount means that is also the on-disk path
-        // inside this container.
-        match std::fs::remove_file(&output_path) {
-            Ok(()) => {
-                tracing::info!(run_id = %run_id, path = %output_path, "removed superseded render");
-            }
-            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
-                // File already gone — still clear the dangling row.
-            }
-            Err(e) => {
-                // Could not delete — leave output_path intact rather
-                // than pointing the row at nothing.
-                tracing::warn!(
-                    run_id = %run_id, path = %output_path, error = %e,
-                    "superseded-render cleanup: could not delete file, leaving row intact",
-                );
-                continue;
-            }
-        }
-        if let Err(e) = sqlx::query("UPDATE narration_runs SET output_path = NULL WHERE id = $1")
-            .bind(run_id)
-            .execute(pool)
-            .await
-        {
-            tracing::warn!(run_id = %run_id, error = %e, "superseded-render cleanup: could not null output_path");
-        }
+/// Pick the engine base URL for a given voice.source. Voices whose
+/// source starts with "kokoro" route to KOKORO_URL; everything else
+/// routes to F5_TTS_URL. Each env var has a LAN-default for the host.
+fn engine_url_for(source: &str) -> anyhow::Result<String> {
+    if source.starts_with("kokoro") {
+        Ok(std::env::var("KOKORO_URL")
+            .unwrap_or_else(|_| "http://127.0.0.1:7794".into()))
+    } else {
+        Ok(std::env::var("F5_TTS_URL")
+            .unwrap_or_else(|_| "http://127.0.0.1:7792".into()))
    }
 }
-
-/// Pick the engine base URL for a given voice.source.
-///   kokoro_*    → KOKORO_URL
-///   tortoise_*  → TORTOISE_URL
-///   anything else (lj_speech etc.) → F5_TTS_URL
-fn engine_url_for(source: &str) -> anyhow::Result<String> {
-    let (env_var, engine) = if source.starts_with("kokoro") {
-        ("KOKORO_URL", "kokoro")
-    } else if source.starts_with("tortoise") {
-        ("TORTOISE_URL", "tortoise")
-    } else {
-        ("F5_TTS_URL", "f5-tts")
-    };
-    std::env::var(env_var).map_err(|_| {
-        anyhow::anyhow!("{env_var} not set — point at the {engine} sidecar")
-    })
-}
--- a/skald/src/narrate_prep.rs
+++ b/skald/src/narrate_prep.rs
@ -24,7 +24,6 @@ pub async fn run(
    chapter_id: Uuid,
    author_slug: Option<&str>,
    overwrite: bool,
-    single_voice: bool,
 ) -> anyhow::Result<()> {
    let cfg = load_forge_config()?;
    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
@ -61,20 +60,10 @@ pub async fn run(
    .fetch_one(&pool)
    .await?;

-    // Single-voice mode skips the speaker roster entirely — the
-    // narrate_prep pass then inserts only [breath]/[pause]/[scene]
-    // beats, no [voice:X] dialogue tags. Right when the whole
-    // chapter narrates in one voice.
-    let characters = if single_voice {
-        tracing::info!("single-voice mode — skipping speaker roster");
-        Vec::new()
-    } else {
-        let c = load_speakers(&pool, chapter.story_id).await?;
-        if !c.is_empty() {
-            tracing::info!(speaker_count = c.len(), "speaker roster loaded");
-        }
-        c
-    };
+    let characters = load_speakers(&pool, chapter.story_id).await?;
+    if !characters.is_empty() {
+        tracing::info!(speaker_count = characters.len(), "speaker roster loaded");
+    }

    let started = Instant::now();
    let out_res = forge
--- a/skald/src/rewrite.rs
+++ b/skald/src/rewrite.rs
@ -1,252 +0,0 @@
-//! `skald rewrite` — re-author one chapter's prose in an author's
-//! voice. Canon preserved, prose reworked. Overwrites chapters.body_md
-//! with the rewritten version; the pre-rewrite prose is stashed in
-//! chapters.body_md_original on the first rewrite (if NULL) so the
-//! original is never lost.
-//!
-//! Author resolution: --author flag wins, else the chapter's
-//! story.author_id. A rewrite with no author errors — there's no
-//! target voice.
-
-use std::time::Instant;
-
-use anyhow::{Context, bail};
-use chrono::Utc;
-use skald_core::authors::{self, AuthorWithRevision};
-use skald_core::config::ForgeConfig;
-use skald_core::db;
-use skald_core::forge::{Forge, PassKind, PassOutput};
-use sqlx::PgPool;
-use uuid::Uuid;
-
-pub async fn run(
-    database_url: &str,
-    chapter_id: Uuid,
-    author_slug: Option<&str>,
-) -> anyhow::Result<()> {
-    let cfg = load_forge_config()?;
-    tracing::info!(base_url = %cfg.base_url, model = %cfg.model, "forge configured");
-
-    let pool = db::connect_and_migrate(database_url).await?;
-    let forge = Forge::new(&cfg)?;
-
-    let chapter = load_chapter(&pool, chapter_id).await?;
-    let author = resolve_author(&pool, &chapter, author_slug)
-        .await?
-        .ok_or_else(|| {
-            anyhow::anyhow!(
-                "rewrite needs an author — pass --author <slug> or bind one to the story"
-            )
-        })?;
-    tracing::info!(
-        slug = %author.author.slug,
-        revision_n = author.revision.n,
-        chapter_n = chapter.n,
-        word_count_in = word_count(&chapter.body_md),
-        "re-authoring chapter",
-    );
-
-    let run_id: Uuid = sqlx::query_scalar(
-        "INSERT INTO generation_runs (story_id, kind, status) VALUES ($1, $2, 'running') RETURNING id",
-    )
-    .bind(chapter.story_id)
-    .bind(PassKind::Rewrite.as_str())
-    .fetch_one(&pool)
-    .await?;
-
-    let started = Instant::now();
-    let out_res = forge.rewrite(&chapter.body_md, &author).await;
-    let elapsed = started.elapsed();
-
-    let out: PassOutput = match out_res {
-        Ok(o) => o,
-        Err(e) => {
-            sqlx::query(
-                "UPDATE generation_runs SET status='failed', error=$1, ended_at=$2 WHERE id=$3",
-            )
-            .bind(format!("{e:#}"))
-            .bind(Utc::now())
-            .bind(run_id)
-            .execute(&pool)
-            .await?;
-            return Err(e);
-        }
-    };
-
-    let rewritten = pass_text(&out)?;
-    let (_n, title, body) = parse_chapter(&rewritten);
-
-    // Stash the original on first rewrite, then overwrite body_md.
-    // body_md_tts is cleared — it was annotated against the OLD
-    // prose and must be regenerated by a fresh prepare-narration.
-    sqlx::query(
-        "UPDATE chapters
-         SET body_md_original = COALESCE(body_md_original, body_md),
-             body_md = $1,
-             title = COALESCE($2, title),
-             body_md_tts = NULL,
-             word_count = $3,
-             generated_at = now()
-         WHERE id = $4",
-    )
-    .bind(&body)
-    .bind(title.as_deref())
-    .bind(word_count(&body))
-    .bind(chapter_id)
-    .execute(&pool)
-    .await?;
-
-    // Replace passages with the rewritten paragraphs.
-    sqlx::query("DELETE FROM passages WHERE chapter_id = $1")
-        .bind(chapter_id)
-        .execute(&pool)
-        .await?;
-    for (i, para) in body.split("\n\n").enumerate() {
-        let p = para.trim();
-        if p.is_empty() || p == "---" {
-            continue;
-        }
-        sqlx::query("INSERT INTO passages (chapter_id, paragraph_n, body) VALUES ($1, $2, $3)")
-            .bind(chapter_id)
-            .bind(i as i32 + 1)
-            .bind(p)
-            .execute(&pool)
-            .await?;
-    }
-    sqlx::query(
-        "UPDATE stories SET word_count_actual = (SELECT COALESCE(SUM(word_count), 0) FROM chapters WHERE story_id = $1) WHERE id = $1",
-    )
-    .bind(chapter.story_id)
-    .execute(&pool)
-    .await?;
-
-    sqlx::query("UPDATE generation_runs SET status='succeeded', ended_at=$1 WHERE id=$2")
-        .bind(Utc::now())
-        .bind(run_id)
-        .execute(&pool)
-        .await?;
-
-    println!(
-        "rewrote chapter {} of story {} as {} ({} → {} words) in {:.1}s",
-        chapter.n,
-        chapter.story_id,
-        author.author.slug,
-        word_count(&chapter.body_md),
-        word_count(&body),
-        elapsed.as_secs_f32(),
-    );
-    Ok(())
-}
-
-#[derive(Debug, Clone)]
-struct ChapterRow {
-    story_id: Uuid,
-    n: i32,
-    body_md: String,
-    story_author_id: Option<Uuid>,
-}
-
-async fn load_chapter(pool: &PgPool, id: Uuid) -> anyhow::Result<ChapterRow> {
-    let row: Option<(Uuid, i32, String, Option<Uuid>)> = sqlx::query_as(
-        "SELECT c.story_id, c.n, c.body_md, s.author_id
-         FROM chapters c JOIN stories s ON s.id = c.story_id
-         WHERE c.id = $1",
-    )
-    .bind(id)
-    .fetch_optional(pool)
-    .await?;
-    let (story_id, n, body_md, story_author_id) =
-        row.with_context(|| format!("chapter {id} not found"))?;
-    Ok(ChapterRow {
-        story_id,
-        n,
-        body_md,
-        story_author_id,
-    })
-}
-
-async fn resolve_author(
-    pool: &PgPool,
-    chapter: &ChapterRow,
-    flag_slug: Option<&str>,
-) -> anyhow::Result<Option<AuthorWithRevision>> {
-    if let Some(slug) = flag_slug {
-        return authors::get_with_current_revision(pool, slug)
-            .await?
-            .map(Some)
-            .with_context(|| format!("author '{slug}' not found"));
-    }
-    if let Some(aid) = chapter.story_author_id {
-        let row: Option<(String,)> = sqlx::query_as("SELECT slug FROM authors WHERE id = $1")
-            .bind(aid)
-            .fetch_optional(pool)
-            .await?;
-        if let Some((slug,)) = row {
-            return Ok(authors::get_with_current_revision(pool, &slug).await?);
-        }
-    }
-    Ok(None)
-}
-
-fn pass_text(out: &PassOutput) -> anyhow::Result<String> {
-    let text = out
-        .result
-        .as_text()
-        .map(|s| s.to_string())
-        .or_else(|| out.result.result.as_str().map(|s| s.to_string()))
-        .unwrap_or_else(|| out.result.result.to_string());
-    if text.trim().is_empty() {
-        bail!("rewrite pass returned empty");
-    }
-    Ok(text)
-}
-
-/// Parse (n, title, body) out of the rewritten chapter. Tolerant of
-/// a missing heading — if the first line isn't a heading we keep the
-/// whole text as body and return n=0 (caller keeps the existing n).
-fn parse_chapter(text: &str) -> (i32, Option<String>, String) {
-    let trimmed = text.trim_start();
-    let first = trimmed.lines().next().unwrap_or("").trim();
-    if let Some(heading) = first.strip_prefix('#') {
-        let heading = heading.trim_start_matches('#').trim();
-        let n = heading
-            .to_lowercase()
-            .find("chapter")
-            .and_then(|idx| {
-                heading[idx + 7..]
-                    .trim_start()
-                    .split([' ', '—', '-', ':', ','])
-                    .next()
-                    .and_then(|w| w.parse::<i32>().ok())
-            })
-            .unwrap_or(0);
-        let title = heading
-            .split_once(" — ")
-            .or_else(|| heading.split_once(" - "))
-            .map(|(_, t)| t.trim().to_string())
-            .filter(|t| !t.is_empty());
-        let body = trimmed
-            .lines()
-            .skip(1)
-            .collect::<Vec<_>>()
-            .join("\n")
-            .trim_start()
-            .to_string();
-        let body = if body.is_empty() { text.trim().to_string() } else { body };
-        return (n, title, body);
-    }
-    (0, None, text.trim().to_string())
-}
-
-fn word_count(s: &str) -> i32 {
-    s.split_whitespace().count() as i32
-}
-
-fn load_forge_config() -> anyhow::Result<ForgeConfig> {
-    let base_url = std::env::var("CLAWDFORGE_URL")
-        .context("CLAWDFORGE_URL not set")?;
-    let app_token = std::env::var("CLAWDFORGE_TOKEN")
-        .context("CLAWDFORGE_TOKEN not set")?;
-    let model = std::env::var("SKALD_MODEL").unwrap_or_else(|_| "opus".into());
-    Ok(ForgeConfig { base_url, app_token, model })
-}
--- a/skald/src/web.rs
+++ b/skald/src/web.rs
@ -45,7 +45,6 @@ pub fn router(state: WebState) -> Router {
            "/stories/{id}/chapters/{n}/narrate",
            post(chapter_narrate_fire),
        )
-        .route("/stories/{id}/listen", get(listen_view))
        .route("/stories/{id}/runs", get(runs_view))
        .nest_service("/audio", ServeDir::new(audio_dir))
        .with_state(Arc::new(state))
@ -201,12 +200,8 @@ async fn new_story_create(
    // parent to compare against. So you get a single first-chapter
    // gen + cleanup pass and status flows to 'complete'.
    if form.fire == "now" {
-        let Ok(database_url) = std::env::var("DATABASE_URL") else {
-            return Err((
-                StatusCode::INTERNAL_SERVER_ERROR,
-                "DATABASE_URL not set — cannot spawn background gen".into(),
-            ));
-        };
+        let database_url = std::env::var("DATABASE_URL")
+            .unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into());
        let author_owned = if author_slug.is_empty() {
            None
        } else {
@ -341,12 +336,7 @@ async fn continue_create(
    // If user clicked "fire now," spawn a background gen task.
    // Otherwise the sequel sits in seed state until CLI fires it.
    if form.fire == "now" {
-        let Ok(database_url) = std::env::var("DATABASE_URL") else {
-            return Err((
-                StatusCode::INTERNAL_SERVER_ERROR,
-                "DATABASE_URL not set — cannot spawn background gen".into(),
-            ));
-        };
+        let database_url = std::env::var("DATABASE_URL").unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into());
        let author_owned = if author_slug.is_empty() { None } else { Some(author_slug.to_string()) };
        let direction_owned = direction.clone();
        let chapters = parse_chapters(&form.chapters);
@ -412,18 +402,7 @@ async fn story_detail(
    .await
    .unwrap_or_default();

-    let has_audiobook: bool = sqlx::query_scalar::<_, Option<String>>(
-        "SELECT audiobook_path FROM stories WHERE id = $1",
-    )
-    .bind(id)
-    .fetch_optional(&state.pool)
-    .await
-    .ok()
-    .flatten()
-    .flatten()
-    .is_some();
-
-    let panel = story_panel(&story, &chapters, &characters, &canon_facts, has_audiobook);
+    let panel = story_panel(&story, &chapters, &characters, &canon_facts);
    Ok(Html(render_shell(&stories, Some(id), panel).into_string()))
 }

@ -521,12 +500,8 @@ async fn chapter_narrate_fire(
    let chapter_id =
        chapter_id.ok_or((StatusCode::NOT_FOUND, "chapter not found".into()))?;

-    let Ok(database_url) = std::env::var("DATABASE_URL") else {
-        return Err((
-            StatusCode::INTERNAL_SERVER_ERROR,
-            "DATABASE_URL not set — cannot spawn background narrate".into(),
-        ));
-    };
+    let database_url = std::env::var("DATABASE_URL")
+        .unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into());
    tokio::spawn(async move {
        if let Err(e) = crate::narrate::run(&database_url, chapter_id, None, 1.0).await {
            tracing::error!(chapter_id = %chapter_id, error = %e, "background narrate failed");
@ -538,73 +513,6 @@ async fn chapter_narrate_fire(
    Ok(Redirect::to(&format!("/stories/{id}/chapters/{n}")))
 }

-/// One chapter as the audiobook player sees it: its start offset
-/// (seconds from the top of the stitched file) and its length.
-#[derive(Debug, Clone)]
-struct AudiobookChapter {
-    n: i32,
-    title: Option<String>,
-    offset_seconds: f64,
-    duration_seconds: f64,
-}
-
-async fn listen_view(
-    State(state): State<Arc<WebState>>,
-    Path(id): Path<Uuid>,
-) -> Result<Html<String>, StatusCode> {
-    let stories = fetch_stories(&state.pool).await;
-    let Some(story) = stories.iter().find(|s| s.id == id).cloned() else {
-        return Err(StatusCode::NOT_FOUND);
-    };
-
-    let audiobook_path: Option<String> = sqlx::query_scalar::<_, Option<String>>(
-        "SELECT audiobook_path FROM stories WHERE id = $1",
-    )
-    .bind(id)
-    .fetch_optional(&state.pool)
-    .await
-    .ok()
-    .flatten()
-    .flatten();
-
-    // Per-chapter durations come from each chapter's most recent
-    // succeeded narration_run. Summed cumulatively in chapter order
-    // they give every chapter's start offset inside the stitched
-    // file — the same order the file was stitched in.
-    let rows: Vec<(i32, Option<String>, Option<f32>)> = sqlx::query_as(
-        r#"
-        SELECT c.n, c.title,
-            (SELECT nr.duration_seconds FROM narration_runs nr
-             WHERE nr.chapter_id = c.id AND nr.status = 'succeeded'
-               AND nr.duration_seconds IS NOT NULL
-             ORDER BY nr.ended_at DESC LIMIT 1)
-        FROM chapters c
-        WHERE c.story_id = $1
-        ORDER BY c.n
-        "#,
-    )
-    .bind(id)
-    .fetch_all(&state.pool)
-    .await
-    .unwrap_or_default();
-
-    let mut chapters = Vec::new();
-    let mut cursor = 0.0_f64;
-    for (n, title, dur) in rows {
-        let d = dur.unwrap_or(0.0) as f64;
-        chapters.push(AudiobookChapter {
-            n,
-            title,
-            offset_seconds: cursor,
-            duration_seconds: d,
-        });
-        cursor += d;
-    }
-
-    let panel = audiobook_panel(&story, audiobook_path.as_deref(), &chapters);
-    Ok(Html(render_shell(&stories, Some(id), panel).into_string()))
-}
-
 async fn runs_view(
    State(state): State<Arc<WebState>>,
    Path(id): Path<Uuid>,
@ -684,7 +592,7 @@ fn render_shell(stories: &[StoryRow], current: Option<Uuid>, main: Markup) -> Ma
                }
                footer.footbar {
                    span { "skald · v0.3 · written down · "
-                        a href="https://git.sulkta.com/Sulkta-OSS/skald" { "Sulkta-OSS/skald" }
+                        a href="http://127.0.0.1:3001/Sulkta-OSS/skald" { "Sulkta-OSS/skald" }
                    }
                }
            }
@ -806,7 +714,6 @@ fn story_panel(
    chapters: &[(i32, Option<String>, i32, bool)],
    characters: &[(String, String, String)],
    canon_facts: &[(String, String, String)],
-    has_audiobook: bool,
 ) -> Markup {
    let real_chars: Vec<_> = characters.iter().filter(|c| c.1 == "real").collect();
    let fictional_chars: Vec<_> = characters.iter().filter(|c| c.1 == "fictional").collect();
@ -824,9 +731,6 @@ fn story_panel(
            }
            nav.story-actions {
                a.action-primary href=(format!("/stories/{}/continue", s.id)) { "✦ continue this saga" }
-                @if has_audiobook {
-                    a.action-listen href=(format!("/stories/{}/listen", s.id)) { "♪ listen" }
-                }
                a.action-secondary href=(format!("/stories/{}/runs", s.id)) { "generation log →" }
            }

@ -1073,96 +977,6 @@ fn runs_panel(story_id: Uuid, runs: &[(Uuid, String, String, DateTime<Utc>, Opti
    }
 }

-/// Audiobook player — one `<audio>` element over the whole stitched
-/// file plus a clickable chapter list. Clicking a chapter seeks; the
-/// chapter under the playhead highlights as it plays. All client-side
-/// in one small inline script (the file's only JS).
-fn audiobook_panel(
-    story: &StoryRow,
-    audiobook_path: Option<&str>,
-    chapters: &[AudiobookChapter],
-) -> Markup {
-    html! {
-        article.audiobook {
-            a.back href=(format!("/stories/{}", story.id)) { "← back to story" }
-            h1 { (story.title) }
-            (ornament())
-            @match audiobook_path {
-                Some(path) => {
-                    @let basename = path.rsplit('/').next().unwrap_or(path);
-                    @let audio_url = format!("/audio/{}", basename);
-                    @let total: f64 = chapters.iter().map(|c| c.duration_seconds).sum();
-                    p.muted {
-                        (chapters.len()) " chapters · " (fmt_hms(total)) " · "
-                        a href=(audio_url) download=(basename) { "download" }
-                    }
-                    audio #book controls preload="metadata" src=(audio_url) {}
-                    ol.audiobook-chapters {
-                        @for c in chapters {
-                            @let end = c.offset_seconds + c.duration_seconds;
-                            li.ab-chapter
-                                data-seek=(format!("{:.3}", c.offset_seconds))
-                                data-end=(format!("{:.3}", end))
-                            {
-                                span.ab-n { "Chapter " (c.n) }
-                                @if let Some(t) = &c.title {
-                                    span.ab-title { (strip_chapter_prefix(t, c.n)) }
-                                }
-                                span.ab-time { (fmt_hms(c.offset_seconds)) }
-                            }
-                        }
-                    }
-                    script {
-                        (maud::PreEscaped(AUDIOBOOK_JS))
-                    }
-                }
-                None => {
-                    p.muted {
-                        "No audiobook stitched for this saga yet. Render every "
-                        "chapter to audio, then stitch the per-chapter files into one."
-                    }
-                }
-            }
-        }
-    }
-}
-
-const AUDIOBOOK_JS: &str = r#"
-(function () {
-  var audio = document.getElementById('book');
-  if (!audio) return;
-  var rows = Array.prototype.slice.call(document.querySelectorAll('.ab-chapter'));
-  rows.forEach(function (row) {
-    row.addEventListener('click', function () {
-      var t = parseFloat(row.getAttribute('data-seek'));
-      if (!isNaN(t)) { audio.currentTime = t; audio.play(); }
-    });
-  });
-  audio.addEventListener('timeupdate', function () {
-    var now = audio.currentTime;
-    rows.forEach(function (row) {
-      var s = parseFloat(row.getAttribute('data-seek'));
-      var e = parseFloat(row.getAttribute('data-end'));
-      if (now >= s && now < e) { row.classList.add('playing'); }
-      else { row.classList.remove('playing'); }
-    });
-  });
-})();
-"#;
-
-/// Format seconds as H:MM:SS, or M:SS when under an hour.
-fn fmt_hms(s: f64) -> String {
-    let total = s.max(0.0) as i64;
-    let h = total / 3600;
-    let m = (total % 3600) / 60;
-    let sec = total % 60;
-    if h > 0 {
-        format!("{h}:{m:02}:{sec:02}")
-    } else {
-        format!("{m}:{sec:02}")
-    }
-}
-
 // ─── helpers ─────────────────────────────────────────────────────

 /// Knotwork divider — a small SVG ornament used as section break.
@ -1504,45 +1318,6 @@ code { font-family: var(--mono); font-size: 0.9em; background: var(--surface-2);

 .empty { color: var(--ink-faint); font-style: italic; }

-/* ─── audiobook player ─────────────────────────────────────── */
-.action-listen {
-    font-family: var(--display); letter-spacing: 2px; font-size: 12px;
-    text-transform: uppercase; color: var(--bronze);
-    border: 1px solid var(--bronze-dim); padding: 9px 18px;
-    background: var(--surface);
-}
-.action-listen:hover { color: var(--bg); background: var(--bronze); border-color: var(--bronze); }
-.audiobook .back {
-    display: inline-block; color: var(--ink-faint); font-size: 12px;
-    margin-bottom: 18px; letter-spacing: 0.5px;
-}
-.audiobook h1 {
-    font-family: var(--display); font-size: 32px; color: var(--ink);
-    margin: 0; font-weight: 700; letter-spacing: 1px;
-}
-.audiobook > .muted { font-size: 13px; margin: 0 0 14px 0; }
-.audiobook > .muted a { color: var(--bronze); }
-.audiobook audio { width: 100%; margin: 4px 0 26px 0; }
-.audiobook-chapters { list-style: none; margin: 0; padding: 0; }
-.ab-chapter {
-    display: grid; grid-template-columns: 120px 1fr auto; gap: 20px;
-    align-items: baseline; padding: 13px 16px; cursor: pointer;
-    border-left: 2px solid transparent; border-bottom: 1px solid var(--surface-2);
-    transition: background 80ms ease;
-}
-.ab-chapter:hover { background: var(--surface-2); border-left-color: var(--bronze-dim); }
-.ab-chapter.playing { background: var(--surface-2); border-left-color: var(--accent); }
-.ab-n {
-    font-family: var(--mono); font-size: 12px; color: var(--bronze-dim);
-    letter-spacing: 1px;
-}
-.ab-chapter.playing .ab-n { color: var(--accent); }
-.ab-title { font-family: var(--serif); font-size: 16px; color: var(--ink); }
-.ab-time {
-    font-family: var(--mono); font-size: 12px; color: var(--ink-faint);
-    text-align: right;
-}
-
 /* ─── forms (new-saga + continue) ──────────────────────────── */
 .form-panel h1 {
    font-family: var(--display); font-size: 30px; color: var(--ink);
@ -1604,7 +1379,6 @@ code { font-family: var(--mono); font-size: 0.9em; background: var(--surface-2);
    .topnav { margin-left: 0; width: 100%; padding-top: 6px; }
    .chapter-list a { grid-template-columns: 80px 1fr auto; }
    .chapter-list .wc { display: none; }
-    .ab-chapter { grid-template-columns: 80px 1fr auto; gap: 12px; }
    .char-list li { grid-template-columns: 1fr; gap: 4px; }
    .cname { font-size: 14px; }
    .brand { font-size: 22px; letter-spacing: 3px; }