diff --git a/.gitignore b/.gitignore index 6310f74..af08bc6 100644 --- a/.gitignore +++ b/.gitignore @@ -34,23 +34,10 @@ ENV/ # Project specific # Model files -*.pt + *.pth *.tar* -# Voice files -api/src/voices/af_bella.pt -api/src/voices/af_nicole.pt -api/src/voices/af_sarah.pt -api/src/voices/af_sky.pt -api/src/voices/af.pt -api/src/voices/am_adam.pt -api/src/voices/am_michael.pt -api/src/voices/bf_emma.pt -api/src/voices/bf_isabella.pt -api/src/voices/bm_george.pt -api/src/voices/bm_lewis.pt - # Audio files examples/*.wav examples/*.pcm diff --git a/README.md b/README.md index 6564de0..a02abe9 100644 --- a/README.md +++ b/README.md @@ -23,47 +23,31 @@ Dockerized FastAPI wrapper for [Kokoro-82M](https://huggingface.co/hexgrad/Kokor The service can be accessed through either the API endpoints or the Gradio web interface. -1. Install prerequisites: +1. Install prerequisites, and start the service using Docker Compose (Full setup including UI): - Install [Docker Desktop](https://www.docker.com/products/docker-desktop/) - Clone the repository: ```bash git clone https://github.com/remsky/Kokoro-FastAPI.git cd Kokoro-FastAPI - ``` + + # * Switch to stable branch if any issues * + git checkout v0.0.5post1-stable -2. Start the service: - - - Using Docker Compose (Full setup including UI): - ```bash cd docker/gpu # OR # cd docker/cpu # Run this or the above docker compose up --build ``` + Once started: - The API will be available at http://localhost:8880 - The UI can be accessed at http://localhost:7860 - - - OR run the API alone using Docker (model + voice packs baked in): - ```bash - docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:latest # CPU - docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:latest # Nvidia GPU - # Minified versions are available with `:latest-slim` tag, though it is a first test and may not be functional - ``` - -3. Running the UI Docker Service: - - - If you only want to run the Gradio web interface separately and connect it to an existing API service: - ```bash - docker run -p 7860:7860 \ - -e API_HOST= \ - -e API_PORT=8880 \ - ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0 - ``` - - - Replace `` with: - - `kokoro-tts` if the UI container is running in the same Docker Compose setup. - - `localhost` if the API is running on your local machine. + __Or__ running the API alone using Docker (model + voice packs baked in) (Most Recent): + + ```bash + docker run -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-cpu:v0.1.0post1 # CPU + docker run --gpus all -p 8880:8880 ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0post1 # Nvidia GPU + ``` 4. Run locally as an OpenAI-Compatible Speech Endpoint @@ -74,13 +58,14 @@ The service can be accessed through either the API endpoints or the Gradio web i api_key="not-needed" ) - response = client.audio.speech.create( + with client.audio.speech.with_streaming_response.create( model="kokoro", voice="af_sky+af_bella", #single or multiple voicepack combo input="Hello world!", response_format="mp3" - ) - response.stream_to_file("output.mp3") + ) as response: + response.stream_to_file("output.mp3") + ``` or visit http://localhost:7860 @@ -200,8 +185,19 @@ If you only want the API, just comment out everything in the docker-compose.yml Currently, voices created via the API are accessible here, but voice combination/creation has not yet been added -*Note: Recent updates for streaming could lead to temporary glitches. If so, pull from the most recent stable release v0.0.2 to restore* +Running the UI Docker Service + - If you only want to run the Gradio web interface separately and connect it to an existing API service: + ```bash + docker run -p 7860:7860 \ + -e API_HOST= \ + -e API_PORT=8880 \ + ghcr.io/remsky/kokoro-fastapi-ui:v0.1.0 + ``` + - Replace `` with: + - `kokoro-tts` if the UI container is running in the same Docker Compose setup. + - `localhost` if the API is running on your local machine. + ### Disabling Local Saving You can disable local saving of audio files and hide the file view in the UI by setting the `DISABLE_LOCAL_SAVING` environment variable to `true`. This is useful when running the service on a server where you don't want to store generated audio files locally. diff --git a/api/src/voices/af.pt b/api/src/voices/af.pt new file mode 100644 index 0000000..c737393 Binary files /dev/null and b/api/src/voices/af.pt differ diff --git a/api/src/voices/af_bella.pt b/api/src/voices/af_bella.pt new file mode 100644 index 0000000..83f79c2 Binary files /dev/null and b/api/src/voices/af_bella.pt differ diff --git a/api/src/voices/af_nicole.pt b/api/src/voices/af_nicole.pt new file mode 100644 index 0000000..c218f7d Binary files /dev/null and b/api/src/voices/af_nicole.pt differ diff --git a/api/src/voices/af_sarah.pt b/api/src/voices/af_sarah.pt new file mode 100644 index 0000000..57ae574 Binary files /dev/null and b/api/src/voices/af_sarah.pt differ diff --git a/api/src/voices/af_sky.pt b/api/src/voices/af_sky.pt new file mode 100644 index 0000000..d86ae26 Binary files /dev/null and b/api/src/voices/af_sky.pt differ diff --git a/api/src/voices/am_adam.pt b/api/src/voices/am_adam.pt new file mode 100644 index 0000000..16b8f95 Binary files /dev/null and b/api/src/voices/am_adam.pt differ diff --git a/api/src/voices/am_michael.pt b/api/src/voices/am_michael.pt new file mode 100644 index 0000000..ff1382f Binary files /dev/null and b/api/src/voices/am_michael.pt differ diff --git a/api/src/voices/bf_emma.pt b/api/src/voices/bf_emma.pt new file mode 100644 index 0000000..7b3eece Binary files /dev/null and b/api/src/voices/bf_emma.pt differ diff --git a/api/src/voices/bf_isabella.pt b/api/src/voices/bf_isabella.pt new file mode 100644 index 0000000..ebf95a5 Binary files /dev/null and b/api/src/voices/bf_isabella.pt differ diff --git a/api/src/voices/bm_george.pt b/api/src/voices/bm_george.pt new file mode 100644 index 0000000..3f8d049 Binary files /dev/null and b/api/src/voices/bm_george.pt differ diff --git a/api/src/voices/bm_lewis.pt b/api/src/voices/bm_lewis.pt new file mode 100644 index 0000000..d1e36fa Binary files /dev/null and b/api/src/voices/bm_lewis.pt differ diff --git a/docker/cpu/docker-compose.yml b/docker/cpu/docker-compose.yml index a545122..0f9003a 100644 --- a/docker/cpu/docker-compose.yml +++ b/docker/cpu/docker-compose.yml @@ -1,10 +1,10 @@ name: kokoro-tts services: kokoro-tts: - image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.1.0 - # build: - # context: ../.. - # dockerfile: docker/cpu/Dockerfile + # image: ghcr.io/remsky/kokoro-fastapi-cpu:v0.1.0 + build: + context: ../.. + dockerfile: docker/cpu/Dockerfile volumes: - ../../api/src:/app/api/src - ../../api/src/voices:/app/api/src/voices diff --git a/docker/gpu/docker-compose.yml b/docker/gpu/docker-compose.yml index 7cb4b94..9433dc6 100644 --- a/docker/gpu/docker-compose.yml +++ b/docker/gpu/docker-compose.yml @@ -1,10 +1,10 @@ name: kokoro-tts services: kokoro-tts: - image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0 - # build: - # context: ../.. - # dockerfile: docker/gpu/Dockerfile + # image: ghcr.io/remsky/kokoro-fastapi-gpu:v0.1.0 + build: + context: ../.. + dockerfile: docker/gpu/Dockerfile volumes: - ../../api/src:/app/api/src # Mount src for development - ../../api/src/voices:/app/api/src/voices # Mount voices for persistence