skald/engines/f5-tts/compose.yml

# F5-TTS standalone stack on Lucy.
#
# License posture (acknowledged 2026-05-13): code is Apache 2.0, but
# the pretrained model weights are CC-BY-NC (Emilia training data).
# Personal listen is fine; public sharing is a flagged gray area.
# Cobb's call: ship anyway.
#
# Runtime: 8GB GPU is plenty (F5 inference ~4-6GB peak).
#
# First-run cost: ~2GB model download from HuggingFace into hf-cache,
# happens on first inference request. Subsequent runs are warm.
name: f5-tts

services:
  f5-tts:
    image: lucy-registry:5000/f5-tts:0.3
    container_name: f5-tts
    restart: unless-stopped
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [gpu]
    ports:
      - "192.168.0.5:7792:7860"
      - "127.0.0.1:7792:7860"
    volumes:
      # HF model weights cache — persists ~2GB after first download.
      - /mnt/cache/appdata/f5-tts/hf-cache:/cache/hf
      # Reference voice clips (lj_speech.wav, etc).
      - /mnt/cache/appdata/f5-tts/voices:/voices:ro
      # Rendered audio output — skald writes story narrations here.
      - /mnt/cache/appdata/f5-tts/audio:/audio
    environment:
      HF_HOME: /cache/hf
      HF_HUB_DISABLE_TELEMETRY: "1"
    labels:
      org.sulkta.domain: "sulkta"
      org.sulkta.owner: "cobb"
      org.sulkta.managed-by: "compose"
      org.sulkta.role: "f5-tts"