Kokoro-FastAPI/longshot.py

#!/usr/bin/env python3
"""
1. Uses huggingface_hub.snapshot_download() with HF_TOKEN to fetch your gated model.
2. Launches NVIDIA NIM microservice via Docker, mounting the downloaded model.
"""

import os
import subprocess
import sys
from huggingface_hub import snapshot_download

# ——— Configuration ———
HF_TOKEN = os.environ.get("HF_TOKEN")  # Expect you exported this already
if not HF_TOKEN:
    print("Error: Please set your HF_TOKEN environment variable.", file=sys.stderr)
    sys.exit(1)

# Replace with your HF org/model path
HF_REPO_ID = "yuiuo/alex-65_BATCH-gradient_65"  
# Where to cache the downloaded model
LOCAL_MODEL_DIR = os.path.expanduser("~/nim_models/your-gated-model")  

# NIM Docker image for your base model type; adjust if using a different microservice
NIM_IMAGE = "ghcr.io/nvidia/nim/text-generation-base:latest"  
# The environment variable inside the container pointing to your model store
NIM_MODEL_PATH = "/models/your-gated-model"  

# GPU flags (remove --gpus if CPU-only)
DOCKER_GPU_FLAG = "--gpus all"  

def download_model():
    """
    Pulls the gated HF model to LOCAL_MODEL_DIR using your read token.
    """
    print(f"Downloading '{HF_REPO_ID}' to '{LOCAL_MODEL_DIR}' …")
    os.makedirs(LOCAL_MODEL_DIR, exist_ok=True)
    snapshot_download(
        repo_id=HF_REPO_ID,
        cache_dir=LOCAL_MODEL_DIR,
        library_name="nim-integration",
        token=HF_TOKEN,
        local_files_only=False,  # ensure it pulls from remote
    )
    print("Download complete.\n")

def serve_with_nim():
    """
    Launches the NIM microservice Docker container, mounting the model dir.
    """
    cmd = [
        "docker", "run", "-d",
        DOCKER_GPU_FLAG,
        "--name", "nim-gated-model",
        "-p", "8000:8000",                         # expose port 8000 for inference
        "-e", f"NIM_MODEL_STORE={NIM_MODEL_PATH}", # tells NIM where to find models
        "-e", "NIM_REFRESH_INTERVAL=3600",         # reload every hour
        "-v", f"{LOCAL_MODEL_DIR}:{NIM_MODEL_PATH}:ro",  # mount your model readonly
        NIM_IMAGE
    ]
    print("Starting NIM container with command:")
    print("  " + " ".join(cmd))
    subprocess.check_call(cmd)
    print("\nNIM microservice is now running on http://localhost:8000")

def main():
    download_model()
    serve_with_nim()

if __name__ == "__main__":
    main()
Pending changes exported from your codespace 2025-06-17 08:19:37 +00:00			`#!/usr/bin/env python3`
			`"""`
			`1. Uses huggingface_hub.snapshot_download() with HF_TOKEN to fetch your gated model.`
			`2. Launches NVIDIA NIM microservice via Docker, mounting the downloaded model.`
			`"""`

			`import os`
			`import subprocess`
			`import sys`
			`from huggingface_hub import snapshot_download`

			`# ——— Configuration ———`
			`HF_TOKEN = os.environ.get("HF_TOKEN") # Expect you exported this already`
			`if not HF_TOKEN:`
			`print("Error: Please set your HF_TOKEN environment variable.", file=sys.stderr)`
			`sys.exit(1)`

			`# Replace with your HF org/model path`
			`HF_REPO_ID = "yuiuo/alex-65_BATCH-gradient_65"`
			`# Where to cache the downloaded model`
			`LOCAL_MODEL_DIR = os.path.expanduser("~/nim_models/your-gated-model")`

			`# NIM Docker image for your base model type; adjust if using a different microservice`
			`NIM_IMAGE = "ghcr.io/nvidia/nim/text-generation-base:latest"`
			`# The environment variable inside the container pointing to your model store`
			`NIM_MODEL_PATH = "/models/your-gated-model"`

			`# GPU flags (remove --gpus if CPU-only)`
			`DOCKER_GPU_FLAG = "--gpus all"`

			`def download_model():`
			`"""`
			`Pulls the gated HF model to LOCAL_MODEL_DIR using your read token.`
			`"""`
			`print(f"Downloading '{HF_REPO_ID}' to '{LOCAL_MODEL_DIR}' …")`
			`os.makedirs(LOCAL_MODEL_DIR, exist_ok=True)`
			`snapshot_download(`
			`repo_id=HF_REPO_ID,`
			`cache_dir=LOCAL_MODEL_DIR,`
			`library_name="nim-integration",`
			`token=HF_TOKEN,`
			`local_files_only=False, # ensure it pulls from remote`
			`)`
			`print("Download complete.\n")`

			`def serve_with_nim():`
			`"""`
			`Launches the NIM microservice Docker container, mounting the model dir.`
			`"""`
			`cmd = [`
			`"docker", "run", "-d",`
			`DOCKER_GPU_FLAG,`
			`"--name", "nim-gated-model",`
			`"-p", "8000:8000", # expose port 8000 for inference`
			`"-e", f"NIM_MODEL_STORE={NIM_MODEL_PATH}", # tells NIM where to find models`
			`"-e", "NIM_REFRESH_INTERVAL=3600", # reload every hour`
			`"-v", f"{LOCAL_MODEL_DIR}:{NIM_MODEL_PATH}:ro", # mount your model readonly`
			`NIM_IMAGE`
			`]`
			`print("Starting NIM container with command:")`
			`print(" " + " ".join(cmd))`
			`subprocess.check_call(cmd)`
			`print("\nNIM microservice is now running on http://localhost:8000")`

			`def main():`
			`download_model()`
			`serve_with_nim()`

			`if __name__ == "__main__":`
			`main()`