skald/engines/f5-tts/Dockerfile

# F5-TTS rebuild on a known-good pytorch base. Upstream
# ghcr.io/swivid/f5-tts:main shipped a torch/torchaudio ABI mismatch
# that broke `import torchaudio` at boot; this image bypasses that.
#
# License: Apache 2.0 (code) / CC-BY-NC (Emilia-trained weights).
# Personal use OK; redistribution gray-area — flagged.

FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime

ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    HF_HOME=/cache/hf \
    HF_HUB_DISABLE_TELEMETRY=1

RUN apt-get update && apt-get install -y --no-install-recommends \
        ffmpeg \
        git \
        ca-certificates \
        curl \
    && rm -rf /var/lib/apt/lists/*

# Base torch 2.6.0 + torchaudio 2.6.0; f5-tts pulls a recent
# transformers (5.x) which needs torch >=2.5's modern
# torch.library.custom_op type signatures.
RUN pip install --no-cache-dir 'f5-tts>=1.0.0'

# Pre-warm the HF cache directory.
RUN mkdir -p /cache/hf /audio /voices

COPY server.py /app/server.py
WORKDIR /app

EXPOSE 7860

# Purpose-built FastAPI server, not Gradio. Models load at startup
# so the first request doesn't pay the cold-start cost.
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "7860"]