diff --git a/.ruff.toml b/.ruff.toml
index 833539b..5151ef1 100644
--- a/.ruff.toml
+++ b/.ruff.toml
@@ -6,6 +6,5 @@ select = ["I"]
 [lint.isort]
 combine-as-imports = true
 force-wrap-aliases = true
-length-sort = true
 split-on-trailing-comma = true
 section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"]
diff --git a/api/src/services/audio.py b/api/src/services/audio.py
index 4c5a415..bb3978f 100644
--- a/api/src/services/audio.py
+++ b/api/src/services/audio.py
@@ -22,20 +22,16 @@ class AudioNormalizer:
     def normalize(
         self, audio_data: np.ndarray, is_last_chunk: bool = False
     ) -> np.ndarray:
-        """Normalize audio data to int16 range and trim chunk boundaries"""
-        # Convert to float32 if not already
+        """Convert audio data to int16 range and trim chunk boundaries"""
+        # Simple float32 to int16 conversion
         audio_float = audio_data.astype(np.float32)
-
-        # Normalize to [-1, 1] range first
-        if np.max(np.abs(audio_float)) > 0:
-            audio_float = audio_float / np.max(np.abs(audio_float))
-
-        # Trim end of non-final chunks to reduce gaps
+        
+        # Trim for non-final chunks
         if not is_last_chunk and len(audio_float) > self.samples_to_trim:
-            audio_float = audio_float[: -self.samples_to_trim]
-
-        # Scale to int16 range
-        return (audio_float * self.int16_max).astype(np.int16)
+            audio_float = audio_float[:-self.samples_to_trim]
+        
+        # Direct scaling like the non-streaming version
+        return (audio_float * 32767).astype(np.int16)
 
 
 class AudioService:
diff --git a/api/src/voices/af_irulan.pt b/api/src/voices/af_irulan.pt
new file mode 100644
index 0000000..c9e1b71
Binary files /dev/null and b/api/src/voices/af_irulan.pt differ
diff --git a/api/tests/conftest.py b/api/tests/conftest.py
index 900e6ae..39e7efa 100644
--- a/api/tests/conftest.py
+++ b/api/tests/conftest.py
@@ -32,77 +32,7 @@ def cleanup():
     cleanup_mock_dirs()
 
 
-# Create mock torch module
-mock_torch = Mock()
-mock_torch.cuda = Mock()
-mock_torch.cuda.is_available = Mock(return_value=False)
-
-
-# Create a mock tensor class that supports basic operations
-class MockTensor:
-    def __init__(self, data):
-        self.data = data
-        if isinstance(data, (list, tuple)):
-            self.shape = [len(data)]
-        elif isinstance(data, MockTensor):
-            self.shape = data.shape
-        else:
-            self.shape = getattr(data, "shape", [1])
-
-    def __getitem__(self, idx):
-        if isinstance(self.data, (list, tuple)):
-            if isinstance(idx, slice):
-                return MockTensor(self.data[idx])
-            return self.data[idx]
-        return self
-
-    def max(self):
-        if isinstance(self.data, (list, tuple)):
-            max_val = max(self.data)
-            return MockTensor(max_val)
-        return 5  # Default for testing
-
-    def item(self):
-        if isinstance(self.data, (list, tuple)):
-            return max(self.data)
-        if isinstance(self.data, (int, float)):
-            return self.data
-        return 5  # Default for testing
-
-    def cuda(self):
-        """Support cuda conversion"""
-        return self
-
-    def any(self):
-        if isinstance(self.data, (list, tuple)):
-            return any(self.data)
-        return False
-
-    def all(self):
-        if isinstance(self.data, (list, tuple)):
-            return all(self.data)
-        return True
-
-    def unsqueeze(self, dim):
-        return self
-
-    def expand(self, *args):
-        return self
-
-    def type_as(self, other):
-        return self
-
-
-# Add tensor operations to mock torch
-mock_torch.tensor = lambda x: MockTensor(x)
-mock_torch.zeros = lambda *args: MockTensor(
-    [0] * (args[0] if isinstance(args[0], int) else args[0][0])
-)
-mock_torch.arange = lambda x: MockTensor(list(range(x)))
-mock_torch.gt = lambda x, y: MockTensor([False] * x.shape[0])
-
 # Mock modules before they're imported
-sys.modules["torch"] = mock_torch
 sys.modules["transformers"] = Mock()
 sys.modules["phonemizer"] = Mock()
 sys.modules["models"] = Mock()
diff --git a/api/tests/test_tts_implementations.py b/api/tests/test_tts_implementations.py
index 99b28bf..d64dd97 100644
--- a/api/tests/test_tts_implementations.py
+++ b/api/tests/test_tts_implementations.py
@@ -1,7 +1,7 @@
 """Tests for TTS model implementations"""
 
 import os
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, patch, AsyncMock
 
 import numpy as np
 import torch
@@ -27,16 +27,30 @@ def test_get_device_error():
 @patch("os.listdir")
 @patch("torch.load")
 @patch("torch.save")
+@patch("api.src.services.tts_base.settings")
+@patch("api.src.services.warmup.WarmupService")
 async def test_setup_cuda_available(
-    mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available
+    mock_warmup_class, mock_settings, mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available
 ):
     """Test setup with CUDA available"""
     TTSBaseModel._device = None
-    mock_cuda_available.return_value = True
+    # Mock CUDA as unavailable since we're using CPU PyTorch
+    mock_cuda_available.return_value = False
     mock_exists.return_value = True
     mock_load.return_value = torch.zeros(1)
     mock_listdir.return_value = ["voice1.pt", "voice2.pt"]
     mock_join.return_value = "/mocked/path"
+    
+    # Configure mock settings
+    mock_settings.model_dir = "/mock/model/dir"
+    mock_settings.onnx_model_path = "model.onnx"
+    mock_settings.voices_dir = "voices"
+    
+    # Configure mock warmup service
+    mock_warmup = MagicMock()
+    mock_warmup.load_voices.return_value = [torch.zeros(1)]
+    mock_warmup.warmup_voices = AsyncMock()
+    mock_warmup_class.return_value = mock_warmup
 
     # Create mock model
     mock_model = MagicMock()
@@ -49,7 +63,7 @@ async def test_setup_cuda_available(
     TTSBaseModel._instance = mock_model
 
     voice_count = await TTSBaseModel.setup()
-    assert TTSBaseModel._device == "cuda"
+    assert TTSBaseModel._device == "cpu"
     assert voice_count == 2
 
 
@@ -60,8 +74,10 @@ async def test_setup_cuda_available(
 @patch("os.listdir")
 @patch("torch.load")
 @patch("torch.save")
+@patch("api.src.services.tts_base.settings")
+@patch("api.src.services.warmup.WarmupService")
 async def test_setup_cuda_unavailable(
-    mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available
+    mock_warmup_class, mock_settings, mock_save, mock_load, mock_listdir, mock_join, mock_exists, mock_cuda_available
 ):
     """Test setup with CUDA unavailable"""
     TTSBaseModel._device = None
@@ -70,6 +86,17 @@ async def test_setup_cuda_unavailable(
     mock_load.return_value = torch.zeros(1)
     mock_listdir.return_value = ["voice1.pt", "voice2.pt"]
     mock_join.return_value = "/mocked/path"
+    
+    # Configure mock settings
+    mock_settings.model_dir = "/mock/model/dir"
+    mock_settings.onnx_model_path = "model.onnx"
+    mock_settings.voices_dir = "voices"
+    
+    # Configure mock warmup service
+    mock_warmup = MagicMock()
+    mock_warmup.load_voices.return_value = [torch.zeros(1)]
+    mock_warmup.warmup_voices = AsyncMock()
+    mock_warmup_class.return_value = mock_warmup
 
     # Create mock model
     mock_model = MagicMock()
diff --git a/examples/stream_tts_playback.py b/examples/stream_tts_playback.py
index d231fe7..b4a34d9 100644
--- a/examples/stream_tts_playback.py
+++ b/examples/stream_tts_playback.py
@@ -8,7 +8,7 @@ import requests
 import sounddevice as sd
 
 
-def play_streaming_tts(text: str, output_file: str = None, voice: str = "af"):
+def play_streaming_tts(text: str, output_file: str = None, voice: str = "af_sky"):
     """Stream TTS audio and play it back in real-time"""
 
     print("\nStarting TTS stream request...")
diff --git a/pyproject.toml b/pyproject.toml
index efc54e9..6f75f35 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,6 +46,7 @@ test = [
     "httpx==0.26.0",
     "pytest-asyncio==0.23.5",
     "gradio>=5",
+    "openai>=1.59.6",
 ]
 
 [tool.uv]
diff --git a/requirements-test.txt b/requirements-test.txt
deleted file mode 100644
index 26a7791..0000000
--- a/requirements-test.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-# Core dependencies for testing
-fastapi==0.115.6
-uvicorn==0.34.0
-pydantic==2.10.4
-pydantic-settings==2.7.0
-python-dotenv==1.0.1
-sqlalchemy==2.0.27
-
-# Testing
-pytest==8.0.0
-httpx==0.26.0
-pytest-asyncio==0.23.5
-pytest-cov==6.0.0
-gradio==4.19.2
diff --git a/uv.lock b/uv.lock
index 9b93250..993cbc1 100644
--- a/uv.lock
+++ b/uv.lock
@@ -802,6 +802,7 @@ gpu = [
 test = [
     { name = "gradio" },
     { name = "httpx" },
+    { name = "openai" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
@@ -819,6 +820,7 @@ requires-dist = [
     { name = "numpy", specifier = ">=1.26.0" },
     { name = "onnxruntime", specifier = "==1.20.1" },
     { name = "openai", specifier = ">=1.59.6" },
+    { name = "openai", marker = "extra == 'test'", specifier = ">=1.59.6" },
     { name = "phonemizer", specifier = "==3.3.0" },
     { name = "pydantic", specifier = "==2.10.4" },
     { name = "pydantic-settings", specifier = "==2.7.0" },