diff --git a/Cargo.toml b/Cargo.toml index e61e49c..b966fc4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ version = "0.0.1" edition = "2024" license = "MIT" authors = ["Sulkta"] -repository = "http://127.0.0.1:3001/Sulkta/skald" +repository = "https://git.sulkta.com/Sulkta/skald" [workspace.dependencies] tokio = { version = "1", features = ["full"] } diff --git a/Dockerfile b/Dockerfile index f2d3850..c8f191f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,26 +1,16 @@ -# Multi-stage build for skald. +# Multi-stage build for skald. Postgres ships in the same image +# until the tool stabilises; to split, swap the runtime base to +# debian:bookworm-slim, drop entrypoint.sh, point DATABASE_URL at +# an external pg. # -# Stage 1: compile the rust binary against rust:1-bookworm. -# Stage 2: pgvector/pgvector:pg17 (debian-bookworm postgres with -# pgvector preinstalled) + tini + the skald binary. -# -# v0.1 ships postgres inside the same container ("singleton till we -# have a real working tool"). When we extract the DB out, swap the -# runtime base to debian:bookworm-slim, drop entrypoint.sh, point -# DATABASE_URL at the external pg. -# -# Build context is the workspace root: -# docker build -t skald:latest . +# Build context is the workspace root: `docker build -t skald:latest .` -# ─── builder ────────────────────────────────────────────────────── FROM rust:1.95-bookworm AS builder WORKDIR /build -# Cache the dependency graph: copy manifests first, fetch + build -# stubs, THEN drop in real sources. The vendored clawdforge SDK -# needs its own manifest + source available during the cache layer -# (path dep — Cargo resolves it at workspace load time, not at -# crate-compile time). +# Dependency-cache layer: copy manifests + vendored path-dep first, +# build stubs, then drop in real sources. clawdforge is a path dep +# resolved at workspace load time. COPY Cargo.toml Cargo.lock ./ COPY skald-core/Cargo.toml skald-core/Cargo.toml COPY skald/Cargo.toml skald/Cargo.toml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4430ab4 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Sulkta + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 97ae616..c53a3bf 100644 --- a/README.md +++ b/README.md @@ -1,95 +1,79 @@ # skald Long-form story-writer with canon-keeping, sequel continuity, and -(future) self-hosted audiobook narration. Database is the source of -truth — the writer is the tooling. +self-hosted audiobook narration. The database is the source of truth; +the binary is the tooling. -Named for the Old Norse poets who composed and memorized kings' -sagas across generations. +Named for the Old Norse poets who composed and memorized kings' sagas +across generations. -## Status: v0.1 — scaffold - -What's wired: +## What's wired - Rust workspace (`skald-core` + `skald`) - Postgres schema for stories, characters, canon facts, chapters, passages, generation runs, audit findings, tags -- pgvector extension installed for future similarity search +- pgvector for future similarity search - `skald import-markdown` ingests a story file (chapters + bible) - into the schema -- `skald serve` exposes `/health` and runs migrations on boot -- Single-container deploy: postgres + skald in one image - -Wired (this commit): - -- clawdforge Rust SDK vendored at `vendor/clawdforge/` (upstream: - `Sulkta-OSS/clawdforge` `clients/rust/`) -- `skald-core::forge` — three-pass orchestration shell (gen / cleanup / - audit). Prompts are TODO stubs; pipeline plumbing is in place. - -Not yet wired: - -- Web UI (the inbox + browse + queue surface) -- Prompt templates for the three passes (heavy prompt-engineering - work — own session) -- `skald-core::context` — assemble the LLM context blob from DB rows - (bible + characters + parent prose summaries + similarity-matched - passages) -- Embeddings backfill + ivfflat index -- TTS sidecar container + post-render audit chain (see - `docs/tts-pipeline.md`) - -## v0.1 smoke - -```bash -docker compose -p skald up -d -docker exec skald skald import-markdown \ - --path /seed/coast-down.md \ - --title "The Coast-Down" - -curl http://localhost:7780/health -# → { ok: true, db_ok: true, story_count: 1, ... } -``` +- `skald serve` exposes `/health` + the web inspector and runs + migrations on boot +- `skald continue` runs gen → cleanup → audit per chapter, with + multi-chapter batching (cap 20) +- `skald rewrite` re-authors a chapter in a named author's voice +- `skald audit` runs whole-story prose-quality audit; `skald dedup` + is the surgical fix half of the loop +- `skald prepare-narration` annotates a chapter with `[breath]` / + `[pause:Xs]` / `[scene]` beats and per-character `[voice:...]` + tags +- `skald narrate` renders a chapter to audio via one of three TTS + engines (F5-TTS, Kokoro-82M, Tortoise-TTS) — see `engines/` +- Named-author "soul" personas via `skald authors seed`; author + voice replaces the model's base system prompt for gen/cleanup/ + rewrite/dedup/narrate_prep ## Schema (cheat sheet) ``` -stories → meta + status + parent/root for series -characters → real or fictional, story-scoped -canon_facts → setting, mystery, theme, rule, historical_anchor, hook -chapters → full prose body -chapter_summaries → short summaries for cheap context loading -passages → paragraph-level + embedding vector(1536) -generation_runs → every LLM call logged -audit_findings → canon audit output (severity + area) -tags → arbitrary labels +stories meta + status + parent/root for series +authors persona identity (slug, display_name, model) +author_revisions versioned souls; one current per author +characters real or fictional, story-scoped, voice-mappable +canon_facts setting, mystery, theme, rule, historical_anchor, hook +chapters full prose body + optional body_md_tts annotation +chapter_summaries short summaries for cheap context loading +passages paragraph-level + embedding vector(1536) +voices TTS voice rows (F5 ref clips / Kokoro / Tortoise names) +pronunciation_overrides per-story + global respellings for proper nouns +generation_runs every LLM call logged +audit_findings audit pass output (severity + area) +narration_runs per-chapter TTS renders ``` -## Architecture (v0.1 + the plan) +## Quickstart -``` -┌─────────────────────────────────┐ -│ skald container │ -│ ┌───────────┐ ┌────────────┐ │ -│ │ postgres │ │ skald-rust │ │ -│ │ pgvector │←─│ axum + cli │ │ -│ │ localhost │ │ :7780 │ │ -│ └───────────┘ └─────┬──────┘ │ -└─────────────────────────┼────────┘ - │ HTTP (future) - ↓ - ┌──────────┐ - │clawdforge│ - └─────┬────┘ - ↓ - opus calls +```sh +docker compose up -d +docker exec skald skald import-markdown --path /seed/.md \ + --title "" +curl http://localhost:7780/health ``` -v1.0+: extract postgres to its own container on db-net. skald -becomes pure stateless rust, connects via `DATABASE_URL`. Migration -is a connection-string change + a network move; the binary doesn't -care where the DB lives. +The compose file expects `POSTGRES_PASSWORD` and (optionally) +`CLAWDFORGE_URL` + `CLAWDFORGE_TOKEN` in `.env`. Story markdown +goes into `./seed/`; postgres data persists in `./pgdata/`. + +## Architecture + +v0.1 ships postgres inside the skald container — singleton until +the tool stabilises. To extract postgres later, swap the runtime +base to `debian:bookworm-slim`, drop `entrypoint.sh`, and point +`DATABASE_URL` at the external pg. The binary doesn't care where +the DB lives. + +The generation passes call out to `clawdforge` (a bearer-token-gated +HTTP wrapper around `claude -p`). The Rust client is vendored at +`vendor/clawdforge/`. TTS calls go HTTP+JSON to the per-engine +sidecars under `engines/`. ## License -MIT. +MIT — see `LICENSE`. diff --git a/compose.yml b/compose.yml index 6feb7b0..d37e4f0 100644 --- a/compose.yml +++ b/compose.yml @@ -1,37 +1,42 @@ -# Standalone compose stack for skald v0.1. Postgres lives in the -# same container — single deployable unit "till we have a real -# working tool" (Sulkta's call, 2026-05-13). +# Standalone compose stack for skald. Postgres lives inside the +# skald container — single deployable unit until the tool stabilises. # -# To deploy on the host: -# sudo mkdir -p /srv/appdata/skald/{pgdata,seed} -# sudo cp <story>.md /srv/appdata/skald/seed/ -# sudo cp skald.env /srv/appdata/secrets/skald.env # POSTGRES_PASSWORD=... -# docker compose -p skald up -d +# Set in .env (or the environment): +# POSTGRES_PASSWORD=... # required +# CLAWDFORGE_URL=http://...:8800 # if running gen / cleanup / audit +# CLAWDFORGE_TOKEN=cf_... +# SKALD_DATA=./pgdata # optional override; defaults to ./pgdata +# SKALD_SEED=./seed # optional override; defaults to ./seed # # To import the first story: -# docker exec skald skald import-markdown \ +# docker compose exec skald skald import-markdown \ # --path /seed/<story>.md \ # --title "<title>" +name: skald services: skald: - image: registry.example.local:5000/skald:latest + build: . + image: skald:latest container_name: skald restart: unless-stopped ports: - "7780:7780" - env_file: - - /srv/appdata/secrets/skald.env + environment: + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:?POSTGRES_PASSWORD is required} + POSTGRES_USER: ${POSTGRES_USER:-skald} + POSTGRES_DB: ${POSTGRES_DB:-skald} + DATABASE_URL: ${DATABASE_URL:-postgresql://skald:${POSTGRES_PASSWORD}@localhost:5432/skald} + CLAWDFORGE_URL: ${CLAWDFORGE_URL:-} + CLAWDFORGE_TOKEN: ${CLAWDFORGE_TOKEN:-} + SKALD_MODEL: ${SKALD_MODEL:-opus} + F5_TTS_URL: ${F5_TTS_URL:-} + KOKORO_URL: ${KOKORO_URL:-} + TORTOISE_URL: ${TORTOISE_URL:-} + RUST_LOG: ${RUST_LOG:-info} + SKALD_LOG_FORMAT: ${SKALD_LOG_FORMAT:-json} volumes: # Postgres data — persist across container recreates. - - /srv/appdata/skald/pgdata:/var/lib/postgresql/data - # Markdown corpus to import via `docker exec skald skald import-markdown`. - - /srv/appdata/skald/seed:/seed:ro - environment: - RUST_LOG: ${RUST_LOG:-info} - SKALD_LOG_FORMAT: json - labels: - org.sulkta.domain: "sulkta" - org.sulkta.owner: "Sulkta" - org.sulkta.managed-by: "compose" - org.sulkta.role: "skald" + - ${SKALD_DATA:-./pgdata}:/var/lib/postgresql/data + # Markdown corpus to import via `skald import-markdown`. + - ${SKALD_SEED:-./seed}:/seed:ro diff --git a/engines/README.md b/engines/README.md index ff3154e..ab11976 100644 --- a/engines/README.md +++ b/engines/README.md @@ -40,19 +40,18 @@ generalise. Examples: all three at once), preset choice ergonomics, character→tortoise- voice seed assignments. -When deploying an engine to the host, the build dir at -`/srv/appdata/<engine>/build/` tracks the engine's branch: +To deploy a tuned engine, check out the engine's branch in the build +dir and `docker compose up -d --build`: ```bash -cd /srv/appdata/kokoro/build git fetch && git checkout engine/kokoro -docker compose -p <name> up -d --build +docker compose up -d --build ``` -## GPU coordination (2070 Super) +## GPU coordination -The 8GB card is the bottleneck. F5 + Kokoro can co-reside (~5GB + -~1GB). Tortoise pushes the budget over and needs the GPU largely -to itself — the `engine/tortoise` branch will carry the script -that stops kokoro + f5 before a tortoise run and restarts them -after. Replace with proper coordination once we have more VRAM. +On an 8GB card F5 + Kokoro can co-reside (~5GB + ~1GB). Tortoise +pushes the budget over and needs the GPU largely to itself — the +`engine/tortoise` branch carries a script to stop kokoro + f5 +before a Tortoise run and restart them after. Replace with proper +coordination once more VRAM is available. diff --git a/engines/f5-tts/Dockerfile b/engines/f5-tts/Dockerfile index fae0af6..f07de8c 100644 --- a/engines/f5-tts/Dockerfile +++ b/engines/f5-tts/Dockerfile @@ -1,9 +1,6 @@ -# Sulkta build of F5-TTS — upstream ghcr.io/swivid/f5-tts:main was -# shipped with torch 2.11/torchaudio 2.4 ABI mismatch on 2026-05-13, -# breaking import torchaudio at boot. We rebuild on a known-good -# pytorch base + pip install f5-tts. -# -# Image tag in localhost-registry: registry.example.local:5000/f5-tts:<ver> +# F5-TTS rebuild on a known-good pytorch base. Upstream +# ghcr.io/swivid/f5-tts:main shipped a torch/torchaudio ABI mismatch +# that broke `import torchaudio` at boot; this image bypasses that. # # License: Apache 2.0 (code) / CC-BY-NC (Emilia-trained weights). # Personal use OK; redistribution gray-area — flagged. @@ -30,12 +27,11 @@ RUN pip install --no-cache-dir 'f5-tts>=1.0.0' # Pre-warm the HF cache directory. RUN mkdir -p /cache/hf /audio /voices -COPY f5_server.py /app/f5_server.py +COPY server.py /app/server.py WORKDIR /app EXPOSE 7860 -# Skald talks to our purpose-built FastAPI server, not Gradio. -# Models load at startup (first request would otherwise pay the -# cold-start cost). uvicorn on :7860 to keep the port stable. -CMD ["uvicorn", "f5_server:app", "--host", "0.0.0.0", "--port", "7860"] +# Purpose-built FastAPI server, not Gradio. Models load at startup +# so the first request doesn't pay the cold-start cost. +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/engines/f5-tts/compose.yml b/engines/f5-tts/compose.yml index 2294531..87aaccb 100644 --- a/engines/f5-tts/compose.yml +++ b/engines/f5-tts/compose.yml @@ -1,19 +1,21 @@ -# F5-TTS standalone stack on the host. +# F5-TTS sidecar. # -# License posture (acknowledged 2026-05-13): code is Apache 2.0, but -# the pretrained model weights are CC-BY-NC (Emilia training data). -# Personal listen is fine; public sharing is a flagged gray area. -# Sulkta's call: ship anyway. +# Code is Apache 2.0; pretrained F5TTS_v1_Base weights are CC-BY-NC +# (Emilia training data). Personal use is fine; redistribution is a +# flagged gray area. # -# Runtime: 8GB GPU is plenty (F5 inference ~4-6GB peak). +# First run downloads ~2GB of model weights from HuggingFace into +# the hf-cache volume; subsequent runs are warm. # -# First-run cost: ~2GB model download from HuggingFace into hf-cache, -# happens on first inference request. Subsequent runs are warm. +# Set in .env (or override): +# F5_HOST_PORT=7792 +# F5_DATA=./data # ${F5_DATA}/hf-cache + voices + audio name: f5-tts services: f5-tts: - image: registry.example.local:5000/f5-tts:0.3 + build: . + image: f5-tts:0.3 container_name: f5-tts restart: unless-stopped deploy: @@ -24,20 +26,11 @@ services: count: all capabilities: [gpu] ports: - - "127.0.0.1:7792:7860" - - "127.0.0.1:7792:7860" + - "${F5_HOST_PORT:-7792}:7860" volumes: - # HF model weights cache — persists ~2GB after first download. - - /srv/appdata/f5-tts/hf-cache:/cache/hf - # Reference voice clips (lj_speech.wav, etc). - - /srv/appdata/f5-tts/voices:/voices:ro - # Rendered audio output — skald writes story narrations here. - - /srv/appdata/f5-tts/audio:/audio + - ${F5_DATA:-./data}/hf-cache:/cache/hf + - ${F5_DATA:-./data}/voices:/voices:ro + - ${F5_DATA:-./data}/audio:/audio environment: HF_HOME: /cache/hf HF_HUB_DISABLE_TELEMETRY: "1" - labels: - org.sulkta.domain: "sulkta" - org.sulkta.owner: "Sulkta" - org.sulkta.managed-by: "compose" - org.sulkta.role: "f5-tts" diff --git a/engines/kokoro/Dockerfile b/engines/kokoro/Dockerfile index 5c7b13e..41e0b03 100644 --- a/engines/kokoro/Dockerfile +++ b/engines/kokoro/Dockerfile @@ -1,13 +1,8 @@ -# Sulkta build of Kokoro-82M TTS. +# Kokoro-82M TTS. Apache 2.0 code AND weights — clean stack vs +# F5-TTS's CC-BY-NC asterisk. # -# License: Apache 2.0 (code AND model weights). Clean stack — no -# CC-BY-NC asterisk like F5-TTS's Emilia weights. This is the -# narrator engine for sleep-quality audiobook reads; F5-TTS stays -# around for voice-cloning cases. -# -# Kokoro is small enough to run on CPU but we use the cuda base -# anyway to stay consistent with f5-tts and so it'll pick up the -# GPU when no other tenant has it. +# Kokoro runs fine on CPU but we use the cuda base to stay +# consistent with f5-tts and pick up the GPU when free. FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime ENV DEBIAN_FRONTEND=noninteractive \ @@ -27,9 +22,9 @@ RUN pip install --no-cache-dir 'kokoro>=0.9.0' 'fastapi>=0.115.0' 'uvicorn>=0.32 RUN mkdir -p /cache/hf /audio -COPY kokoro_server.py /app/kokoro_server.py +COPY server.py /app/server.py WORKDIR /app EXPOSE 7860 -CMD ["uvicorn", "kokoro_server:app", "--host", "0.0.0.0", "--port", "7860"] +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/engines/kokoro/compose.yml b/engines/kokoro/compose.yml index 72687d2..b2c83a8 100644 --- a/engines/kokoro/compose.yml +++ b/engines/kokoro/compose.yml @@ -1,16 +1,18 @@ -# Kokoro-82M TTS stack on the host. +# Kokoro-82M TTS sidecar. # -# Audiobook-quality narrator engine (Apache 2.0 code + weights — -# clean stack vs F5-TTS's CC-BY-NC asterisk). Sibling to f5-tts; -# both share /srv/appdata/f5-tts/audio so skald's audio -# route serves outputs from either engine through the same path. +# Apache 2.0 code AND model weights — clean stack for share/publish. +# Audiobook-quality narrator; F5-TTS stays around for voice-cloning. # -# License: Apache 2.0 top to bottom. Right for share/publish. +# Set in .env (or override): +# KOKORO_HOST_PORT=7794 +# KOKORO_DATA=./data # ${KOKORO_DATA}/hf-cache +# AUDIO_DIR=../f5-tts/data/audio # shared output dir across engines name: kokoro services: kokoro: - image: registry.example.local:5000/kokoro:0.5 + build: . + image: kokoro:0.5 container_name: kokoro restart: unless-stopped deploy: @@ -21,17 +23,10 @@ services: count: all capabilities: [gpu] ports: - - "127.0.0.1:7794:7860" - - "127.0.0.1:7794:7860" + - "${KOKORO_HOST_PORT:-7794}:7860" volumes: - - /srv/appdata/kokoro/hf-cache:/cache/hf - # Shared with f5-tts so skald's /audio route covers both. - - /srv/appdata/f5-tts/audio:/audio + - ${KOKORO_DATA:-./data}/hf-cache:/cache/hf + - ${AUDIO_DIR:-./data/audio}:/audio environment: HF_HOME: /cache/hf HF_HUB_DISABLE_TELEMETRY: "1" - labels: - org.sulkta.domain: "sulkta" - org.sulkta.owner: "Sulkta" - org.sulkta.managed-by: "compose" - org.sulkta.role: "kokoro" diff --git a/engines/kokoro/server.py b/engines/kokoro/server.py index 169cbbd..b2c3c6f 100644 --- a/engines/kokoro/server.py +++ b/engines/kokoro/server.py @@ -1,4 +1,4 @@ -"""Kokoro-82M FastAPI server, sibling to f5_server. +"""Kokoro-82M FastAPI server, sibling to the f5-tts server. Same /synthesize contract as F5 so skald can route between engines just by which URL it points at. The semantic difference: Kokoro @@ -234,7 +234,7 @@ def _startup() -> None: @app.get("/healthz") def healthz() -> dict: - # Shape matches f5_server's so the same Rust HealthResponse + # Shape matches the f5-tts server's so the same Rust HealthResponse # struct deserializes both: model/vocoder/loaded fields are # required by skald-core::narrate::HealthResponse. return { diff --git a/engines/tortoise/Dockerfile b/engines/tortoise/Dockerfile index d2f104a..74ab6c3 100644 --- a/engines/tortoise/Dockerfile +++ b/engines/tortoise/Dockerfile @@ -1,17 +1,13 @@ -# Sulkta build of Tortoise-TTS. +# Tortoise-TTS. Apache 2.0 code + weights. # -# Voice roster (built-in, no cloning needed): angie, daniel, deniro, -# emma, freeman, geralt, halle, jlaw, lj, mol, myself, pat, pat2, -# rainbow, snakes, tim_reynolds, tom, train_atkins, train_dotrice, +# ~26 built-in voices (no cloning): angie, daniel, deniro, emma, +# freeman, geralt, halle, jlaw, lj, mol, myself, pat, pat2, rainbow, +# snakes, tim_reynolds, tom, train_atkins, train_dotrice, # train_dreams, train_grace, train_kennard, train_lescault, -# train_mouse, weaver, william. ~26 voices baked in. +# train_mouse, weaver, william. # -# License: Apache 2.0 (code) + Apache 2.0 (model weights). Clean -# stack for share/publish. -# -# Speed: slow. Trade for quality. Standard preset is ~10x slower -# than Kokoro; high_quality is ~30x slower. Worth it for the -# audiobook-quality bar. +# Slow: standard preset is ~10x slower than Kokoro; high_quality is +# ~30x. Trade for quality. FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime @@ -37,9 +33,9 @@ RUN pip install --no-cache-dir \ RUN mkdir -p /cache/hf /cache/tortoise-models /audio -COPY tortoise_server.py /app/tortoise_server.py +COPY server.py /app/server.py WORKDIR /app EXPOSE 7860 -CMD ["uvicorn", "tortoise_server:app", "--host", "0.0.0.0", "--port", "7860"] +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/engines/tortoise/compose.yml b/engines/tortoise/compose.yml index e13b192..6d3a470 100644 --- a/engines/tortoise/compose.yml +++ b/engines/tortoise/compose.yml @@ -1,19 +1,22 @@ -# Tortoise-TTS stack on the host. Audiobook-quality engine with 25+ -# named voices (no cloning). Apache 2.0 top to bottom. +# Tortoise-TTS sidecar. 25+ named voices, no cloning needed. +# Apache 2.0 top to bottom. # -# Slow: ~10x kokoro wall clock at 'standard' preset. Worth it for -# the quality bar. Sulkta's call 2026-05-14: "use higgs (now tortoise) -# and we will only let it use the full gpu for runs" — translated: -# runs are batched, slow is acceptable. +# Slow: ~10x kokoro wall-clock at 'standard' preset. Worth it for the +# quality bar; runs are batched. # -# Co-resides with kokoro on the 2070 Super since tortoise is ~5GB -# and kokoro is ~1GB (8GB total). If OOM hits during a render, -# we'll add a coordination layer to pause kokoro first. +# Co-resides with kokoro on an 8GB card (tortoise ~5GB + kokoro ~1GB). +# OOM during a render: add a coordinator that pauses kokoro first. +# +# Set in .env (or override): +# TORTOISE_HOST_PORT=7795 +# TORTOISE_DATA=./data # ${TORTOISE_DATA}/{hf-cache,models} +# AUDIO_DIR=../f5-tts/data/audio # shared output dir across engines name: tortoise services: tortoise: - image: registry.example.local:5000/tortoise:0.1 + build: . + image: tortoise:0.1 container_name: tortoise restart: unless-stopped deploy: @@ -24,20 +27,12 @@ services: count: all capabilities: [gpu] ports: - - "127.0.0.1:7795:7860" - - "127.0.0.1:7795:7860" + - "${TORTOISE_HOST_PORT:-7795}:7860" volumes: - - /srv/appdata/tortoise/hf-cache:/cache/hf - - /srv/appdata/tortoise/models:/cache/tortoise-models - # Shared audio dir with f5/kokoro so skald serves all engines' - # outputs through the same /audio route. - - /srv/appdata/f5-tts/audio:/audio + - ${TORTOISE_DATA:-./data}/hf-cache:/cache/hf + - ${TORTOISE_DATA:-./data}/models:/cache/tortoise-models + - ${AUDIO_DIR:-./data/audio}:/audio environment: HF_HOME: /cache/hf HF_HUB_DISABLE_TELEMETRY: "1" TORTOISE_MODELS_DIR: /cache/tortoise-models - labels: - org.sulkta.domain: "sulkta" - org.sulkta.owner: "Sulkta" - org.sulkta.managed-by: "compose" - org.sulkta.role: "tortoise-tts" diff --git a/engines/tortoise/server.py b/engines/tortoise/server.py index c39eafe..4f5e500 100644 --- a/engines/tortoise/server.py +++ b/engines/tortoise/server.py @@ -1,4 +1,4 @@ -"""Tortoise-TTS FastAPI server. Sibling to kokoro_server. +"""Tortoise-TTS FastAPI server. Sibling to the kokoro server. Same /synthesize contract as the kokoro server so skald only has to route by voice.source. Differences: @@ -71,7 +71,7 @@ def _get_voice(name: str) -> tuple: return _voice_cache[name] -# ─── tag splitter (lifted from kokoro_server) ─────────────────── +# ─── tag splitter (lifted from the kokoro server) ─────────────── class Node: @@ -209,7 +209,7 @@ def _startup() -> None: @app.get("/healthz") def healthz() -> dict: - # Shape matches f5_server/kokoro_server so skald's HealthResponse + # Shape matches the f5-tts + kokoro servers so skald's HealthResponse # struct deserializes all three. return { "ok": True, diff --git a/entrypoint.sh b/entrypoint.sh index 2c6ca85..e2192b3 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,14 +1,9 @@ #!/usr/bin/env bash -# Skald container entrypoint. -# # Boots the embedded postgres via the pgvector image's own # docker-entrypoint, waits for it to accept connections, then execs -# `skald` in the foreground. Tini is PID 1 (so it can reap zombies + -# forward signals); we are PID 2; postgres becomes our child. -# -# This is explicitly "DB in the same container, for now" — when we -# split the DB out (see project notes), the entrypoint reduces to -# `exec /usr/local/bin/skald "$@"` and the pg startup goes away. +# `skald` in the foreground. Tini is PID 1; postgres becomes our +# child. When the DB is extracted to its own container, this reduces +# to `exec /usr/local/bin/skald "$@"`. set -eo pipefail diff --git a/skald-core/src/config.rs b/skald-core/src/config.rs index 8326d93..ea819a9 100644 --- a/skald-core/src/config.rs +++ b/skald-core/src/config.rs @@ -8,24 +8,23 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ForgeConfig { - /// Base URL of the clawdforge HTTP service. Defaults to - /// `http://clawdforge.example.local:8800` in production; override - /// for tests via env. + /// Base URL of the clawdforge HTTP service. The calling binary + /// resolves this from `CLAWDFORGE_URL`. pub base_url: String, /// App-level bearer token. Resolved by the binary from /// `CLAWDFORGE_TOKEN`; should never be logged or `Display`ed. pub app_token: String, - /// Model alias passed to clawdforge → `claude -p --model`. Skald - /// is opinionated: always opus max effort. Default reflects that. + /// Model alias passed to clawdforge → `claude -p --model`. + /// Defaults to opus. pub model: String, } impl Default for ForgeConfig { fn default() -> Self { Self { - base_url: "http://clawdforge.example.local:8800".into(), + base_url: "http://localhost:8800".into(), app_token: String::new(), model: "opus".into(), } diff --git a/skald-core/src/narrate.rs b/skald-core/src/narrate.rs index 61b8e37..4e3218e 100644 --- a/skald-core/src/narrate.rs +++ b/skald-core/src/narrate.rs @@ -27,7 +27,7 @@ use uuid::Uuid; #[derive(Debug, Clone)] pub struct F5Config { - /// e.g. http://127.0.0.1:7792 + /// e.g. http://localhost:7792 pub base_url: String, /// Inference subprocess timeout. Long-form chapters (3000 words) /// take 60-180s on an 8GB GPU; cap at 1800s to match clawdforge. diff --git a/skald/src/main.rs b/skald/src/main.rs index eb24342..04e715a 100644 --- a/skald/src/main.rs +++ b/skald/src/main.rs @@ -30,8 +30,8 @@ use uuid::Uuid; about = "Long-form story-writer. Database is the source of truth; the writer is the tooling." )] struct Cli { - /// Postgres connection URL. Defaults to `postgresql://skald:skald@localhost:5432/skald`. - #[arg(long, env = "DATABASE_URL", default_value = "postgresql://skald:skald@localhost:5432/skald")] + /// Postgres connection URL. Read from `DATABASE_URL` if unset. + #[arg(long, env = "DATABASE_URL")] database_url: String, #[command(subcommand)] diff --git a/skald/src/narrate.rs b/skald/src/narrate.rs index a311edc..11f4596 100644 --- a/skald/src/narrate.rs +++ b/skald/src/narrate.rs @@ -458,16 +458,15 @@ async fn cleanup_superseded_renders(pool: &PgPool, chapter_id: Uuid, current_run /// kokoro_* → KOKORO_URL /// tortoise_* → TORTOISE_URL /// anything else (lj_speech etc.) → F5_TTS_URL -/// Each env var has a LAN-default for the host. fn engine_url_for(source: &str) -> anyhow::Result<String> { - if source.starts_with("kokoro") { - Ok(std::env::var("KOKORO_URL") - .unwrap_or_else(|_| "http://127.0.0.1:7794".into())) + let (env_var, engine) = if source.starts_with("kokoro") { + ("KOKORO_URL", "kokoro") } else if source.starts_with("tortoise") { - Ok(std::env::var("TORTOISE_URL") - .unwrap_or_else(|_| "http://127.0.0.1:7795".into())) + ("TORTOISE_URL", "tortoise") } else { - Ok(std::env::var("F5_TTS_URL") - .unwrap_or_else(|_| "http://127.0.0.1:7792".into())) - } + ("F5_TTS_URL", "f5-tts") + }; + std::env::var(env_var).map_err(|_| { + anyhow::anyhow!("{env_var} not set — point at the {engine} sidecar") + }) } diff --git a/skald/src/web.rs b/skald/src/web.rs index 15c08f4..66ec261 100644 --- a/skald/src/web.rs +++ b/skald/src/web.rs @@ -201,8 +201,12 @@ async fn new_story_create( // parent to compare against. So you get a single first-chapter // gen + cleanup pass and status flows to 'complete'. if form.fire == "now" { - let database_url = std::env::var("DATABASE_URL") - .unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into()); + let Ok(database_url) = std::env::var("DATABASE_URL") else { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "DATABASE_URL not set — cannot spawn background gen".into(), + )); + }; let author_owned = if author_slug.is_empty() { None } else { @@ -337,7 +341,12 @@ async fn continue_create( // If user clicked "fire now," spawn a background gen task. // Otherwise the sequel sits in seed state until CLI fires it. if form.fire == "now" { - let database_url = std::env::var("DATABASE_URL").unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into()); + let Ok(database_url) = std::env::var("DATABASE_URL") else { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "DATABASE_URL not set — cannot spawn background gen".into(), + )); + }; let author_owned = if author_slug.is_empty() { None } else { Some(author_slug.to_string()) }; let direction_owned = direction.clone(); let chapters = parse_chapters(&form.chapters); @@ -512,8 +521,12 @@ async fn chapter_narrate_fire( let chapter_id = chapter_id.ok_or((StatusCode::NOT_FOUND, "chapter not found".into()))?; - let database_url = std::env::var("DATABASE_URL") - .unwrap_or_else(|_| "postgresql://skald:skald@localhost:5432/skald".into()); + let Ok(database_url) = std::env::var("DATABASE_URL") else { + return Err(( + StatusCode::INTERNAL_SERVER_ERROR, + "DATABASE_URL not set — cannot spawn background narrate".into(), + )); + }; tokio::spawn(async move { if let Err(e) = crate::narrate::run(&database_url, chapter_id, None, 1.0).await { tracing::error!(chapter_id = %chapter_id, error = %e, "background narrate failed"); @@ -671,7 +684,7 @@ fn render_shell(stories: &[StoryRow], current: Option<Uuid>, main: Markup) -> Ma } footer.footbar { span { "skald · v0.3 · written down · " - a href="http://127.0.0.1:3001/Sulkta/skald" { "Sulkta/skald" } + a href="https://git.sulkta.com/Sulkta/skald" { "Sulkta/skald" } } } }