#!/usr/bin/env python3 """Benchmark script for unified streaming implementation""" import asyncio import time from pathlib import Path from typing import List, Tuple from openai import OpenAI import numpy as np import matplotlib.pyplot as plt # Initialize OpenAI client client = OpenAI(base_url="http://localhost:8880/v1", api_key="not-needed") TEST_TEXTS = { "short": "The quick brown fox jumps over the lazy dog.", "medium": """In a bustling city, life moves at a rapid pace. People hurry along the sidewalks, while cars navigate through the busy streets. The air is filled with the sounds of urban activity.""", "long": """The technological revolution has transformed how we live and work. From artificial intelligence to renewable energy, innovations continue to shape our future. As we face global challenges, scientific advances offer new solutions. The intersection of technology and human creativity drives progress forward, opening new possibilities for tomorrow.""" } async def benchmark_streaming(text_name: str, text: str) -> Tuple[float, float, int]: """Benchmark streaming performance Returns: Tuple of (time to first byte, total time, total bytes) """ start_time = time.time() total_bytes = 0 first_byte_time = None with client.audio.speech.with_streaming_response.create( model="kokoro", voice="af_bella", response_format="pcm", input=text, ) as response: for chunk in response.iter_bytes(chunk_size=1024): if first_byte_time is None: first_byte_time = time.time() - start_time total_bytes += len(chunk) total_time = time.time() - start_time return first_byte_time, total_time, total_bytes async def benchmark_non_streaming(text_name: str, text: str) -> Tuple[float, int]: """Benchmark non-streaming performance Returns: Tuple of (total time, total bytes) """ start_time = time.time() speech_file = Path(__file__).parent / f"non_stream_{text_name}.mp3" with client.audio.speech.with_streaming_response.create( model="kokoro", voice="af_bella", input=text, ) as response: response.stream_to_file(speech_file) total_time = time.time() - start_time total_bytes = speech_file.stat().st_size return total_time, total_bytes def plot_results(results: dict): """Plot benchmark results""" plt.figure(figsize=(12, 6)) # Prepare data text_lengths = [len(text) for text in TEST_TEXTS.values()] streaming_times = [r["streaming"]["total_time"] for r in results.values()] non_streaming_times = [r["non_streaming"]["total_time"] for r in results.values()] first_byte_times = [r["streaming"]["first_byte_time"] for r in results.values()] # Plot times x = np.arange(len(TEST_TEXTS)) width = 0.25 plt.bar(x - width, streaming_times, width, label='Streaming Total Time') plt.bar(x, non_streaming_times, width, label='Non-Streaming Total Time') plt.bar(x + width, first_byte_times, width, label='Time to First Byte') plt.xlabel('Text Length (characters)') plt.ylabel('Time (seconds)') plt.title('Unified Streaming Performance Comparison') plt.xticks(x, text_lengths) plt.legend() # Save plot plt.savefig(Path(__file__).parent / 'benchmark_results.png') plt.close() async def main(): """Run benchmarks""" print("Starting unified streaming benchmarks...") results = {} for name, text in TEST_TEXTS.items(): print(f"\nTesting {name} text ({len(text)} chars)...") # Test streaming print("Running streaming test...") first_byte_time, stream_total_time, stream_bytes = await benchmark_streaming(name, text) # Test non-streaming print("Running non-streaming test...") non_stream_total_time, non_stream_bytes = await benchmark_non_streaming(name, text) results[name] = { "text_length": len(text), "streaming": { "first_byte_time": first_byte_time, "total_time": stream_total_time, "total_bytes": stream_bytes, "throughput": stream_bytes / stream_total_time / 1024 # KB/s }, "non_streaming": { "total_time": non_stream_total_time, "total_bytes": non_stream_bytes, "throughput": non_stream_bytes / non_stream_total_time / 1024 # KB/s } } # Print results for this test print(f"\nResults for {name} text:") print(f"Streaming:") print(f" Time to first byte: {first_byte_time:.3f}s") print(f" Total time: {stream_total_time:.3f}s") print(f" Throughput: {stream_bytes/stream_total_time/1024:.1f} KB/s") print(f"Non-streaming:") print(f" Total time: {non_stream_total_time:.3f}s") print(f" Throughput: {non_stream_bytes/non_stream_total_time/1024:.1f} KB/s") # Plot results plot_results(results) print("\nBenchmark results have been plotted to benchmark_results.png") if __name__ == "__main__": asyncio.run(main())