skald/engines/kokoro/Dockerfile

# Sulkta build of Kokoro-82M TTS.
#
# License: Apache 2.0 (code AND model weights). Clean stack — no
# CC-BY-NC asterisk like F5-TTS's Emilia weights. This is the
# narrator engine for sleep-quality audiobook reads; F5-TTS stays
# around for voice-cloning cases.
#
# Kokoro is small enough to run on CPU but we use the cuda base
# anyway to stay consistent with f5-tts and so it'll pick up the
# GPU when no other tenant has it.
FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime

ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    HF_HOME=/cache/hf \
    HF_HUB_DISABLE_TELEMETRY=1

RUN apt-get update && apt-get install -y --no-install-recommends \
        ffmpeg \
        espeak-ng \
        ca-certificates \
        curl \
    && rm -rf /var/lib/apt/lists/*

# kokoro pulls phonemizer + soundfile + espeakng transitively.
RUN pip install --no-cache-dir 'kokoro>=0.9.0' 'fastapi>=0.115.0' 'uvicorn>=0.32.0' 'soundfile>=0.13.0'

RUN mkdir -p /cache/hf /audio

COPY kokoro_server.py /app/kokoro_server.py
WORKDIR /app

EXPOSE 7860

CMD ["uvicorn", "kokoro_server:app", "--host", "0.0.0.0", "--port", "7860"]