diff --git a/api/src/inference/kokoro_v1.py b/api/src/inference/kokoro_v1.py index 3fbd14e..b36978e 100644 --- a/api/src/inference/kokoro_v1.py +++ b/api/src/inference/kokoro_v1.py @@ -64,6 +64,85 @@ class KokoroV1(BaseModelBackend): except Exception as e: raise RuntimeError(f"Failed to load Kokoro model: {e}") + async def generate_from_tokens( + self, + tokens: str, + voice: Union[str, Tuple[str, Union[torch.Tensor, str]]], + speed: float = 1.0 + ) -> AsyncGenerator[np.ndarray, None]: + """Generate audio from phoneme tokens. + + Args: + tokens: Input phoneme tokens to synthesize + voice: Either a voice path string or a tuple of (voice_name, voice_tensor/path) + speed: Speed multiplier + + Yields: + Generated audio chunks + + Raises: + RuntimeError: If generation fails + """ + if not self.is_loaded: + raise RuntimeError("Model not loaded") + + try: + # Memory management for GPU + if self._device == "cuda": + if self._check_memory(): + self._clear_memory() + + # Handle voice input + voice_path: str + if isinstance(voice, tuple): + voice_name, voice_data = voice + if isinstance(voice_data, str): + voice_path = voice_data + else: + # Save tensor to temporary file + import tempfile + temp_dir = tempfile.gettempdir() + voice_path = os.path.join(temp_dir, f"{voice_name}.pt") + # Save tensor with CPU mapping for portability + torch.save(voice_data.cpu(), voice_path) + else: + voice_path = voice + + # Load voice tensor with proper device mapping + voice_tensor = await paths.load_voice_tensor(voice_path, device=self._device) + # Save back to a temporary file with proper device mapping + import tempfile + temp_dir = tempfile.gettempdir() + temp_path = os.path.join(temp_dir, f"temp_voice_{os.path.basename(voice_path)}") + await paths.save_voice_tensor(voice_tensor, temp_path) + voice_path = temp_path + + # Generate using pipeline's generate_from_tokens method + logger.debug(f"Generating audio from tokens: '{tokens[:100]}...'") + for result in self._pipeline.generate_from_tokens( + tokens=tokens, + voice=voice_path, + speed=speed, + model=self._model + ): + if result.audio is not None: + logger.debug(f"Got audio chunk with shape: {result.audio.shape}") + yield result.audio.numpy() + else: + logger.warning("No audio in chunk") + + except Exception as e: + logger.error(f"Generation failed: {e}") + if ( + self._device == "cuda" + and model_config.pytorch_gpu.retry_on_oom + and "out of memory" in str(e).lower() + ): + self._clear_memory() + async for chunk in self.generate_from_tokens(tokens, voice, speed): + yield chunk + raise + async def generate( self, text: str, diff --git a/api/src/services/text_processing/text_processor.py b/api/src/services/text_processing/text_processor.py index 8aa8b65..a0d437a 100644 --- a/api/src/services/text_processing/text_processor.py +++ b/api/src/services/text_processing/text_processor.py @@ -27,24 +27,12 @@ def process_text_chunk(text: str, language: str = "a", skip_phonemize: bool = Fa start_time = time.time() if skip_phonemize: - # Input is already phonemes, just tokenize - t0 = time.time() tokens = tokenize(text) - t1 = time.time() else: # Normal text processing pipeline - t0 = time.time() normalized = normalize_text(text) - t1 = time.time() - - - t0 = time.time() phonemes = phonemize(normalized, language, normalize=False) # Already normalized - t1 = time.time() - - t0 = time.time() tokens = tokenize(phonemes) - t1 = time.time() total_time = time.time() - start_time logger.debug(f"Total processing took {total_time*1000:.2f}ms for chunk: '{text[:50]}...'") @@ -95,13 +83,55 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]: return results async def smart_split(text: str, max_tokens: int = ABSOLUTE_MAX) -> AsyncGenerator[Tuple[str, List[int]], None]: - """Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.""" + """Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens. + Special symbols: + - <<>> : Forces a break between chunks + """ + CHUNK_BREAK = "<<>>" + start_time = time.time() chunk_count = 0 logger.info(f"Starting smart split for {len(text)} chars") - # Process all sentences - sentences = get_sentence_info(text) + # First split on forced break symbol + forced_chunks = [chunk.strip() for chunk in text.split(CHUNK_BREAK) if chunk.strip()] + + # If no forced breaks, process normally + if len(forced_chunks) <= 1: + sentences = get_sentence_info(text) + else: + # Process each forced chunk separately + for forced_chunk in forced_chunks: + # Process sentences within this forced chunk + chunk_sentences = get_sentence_info(forced_chunk) + + # Process and yield all sentences in this chunk before moving to next + current_chunk = [] + current_tokens = [] + current_count = 0 + + for sentence, tokens, count in chunk_sentences: + if current_count + count <= TARGET_MAX: + current_chunk.append(sentence) + current_tokens.extend(tokens) + current_count += count + else: + if current_chunk: + chunk_text = " ".join(current_chunk) + chunk_count += 1 + yield chunk_text, current_tokens + current_chunk = [sentence] + current_tokens = tokens + current_count = count + + # Yield remaining sentences in this forced chunk + if current_chunk: + chunk_text = " ".join(current_chunk) + chunk_count += 1 + yield chunk_text, current_tokens + + # Skip the rest of the processing since we've handled all chunks + return current_chunk = [] current_tokens = [] diff --git a/api/src/services/tts_service.py b/api/src/services/tts_service.py index 8497cc6..da11cf6 100644 --- a/api/src/services/tts_service.py +++ b/api/src/services/tts_service.py @@ -485,31 +485,29 @@ class TTSService: start_time = time.time() try: # Get backend and voice path - raise ValueError("Not yet implemented") - # linked to https://github.com/hexgrad/kokoro/pull/53 or similiar backend = self.model_manager.get_backend() voice_name, voice_path = await self._get_voice_path(voice) - # if isinstance(backend, KokoroV1): - # # For Kokoro V1, pass phonemes directly to pipeline - # result = None - # for r in backend._pipeline( - # phonemes, - # voice=voice_path, - # speed=speed, - # model=backend._model - # ): - # if r.audio is not None: - # result = r - # break + if isinstance(backend, KokoroV1): + # For Kokoro V1, use generate_from_tokens with raw phonemes + result = None + for r in backend._pipeline.generate_from_tokens( + tokens=phonemes, # Pass raw phonemes string + voice=voice_path, + speed=speed, + model=backend._model + ): + if r.audio is not None: + result = r + break - # if result is None or result.audio is None: - # raise ValueError("No audio generated") + if result is None or result.audio is None: + raise ValueError("No audio generated") - # processing_time = time.time() - start_time - # return result.audio.numpy(), processing_time - # else: - pass + processing_time = time.time() - start_time + return result.audio.numpy(), processing_time + else: + raise ValueError("Phoneme generation only supported with Kokoro V1 backend") except Exception as e: logger.error(f"Error in phoneme audio generation: {str(e)}") diff --git a/docker/gpu/.dockerignore b/docker/gpu/.dockerignore index df5f9db..8592ec7 100644 --- a/docker/gpu/.dockerignore +++ b/docker/gpu/.dockerignore @@ -15,7 +15,7 @@ __pycache__ # Environment # .env -.venv +.venv* env/ venv/ ENV/ diff --git a/docker/gpu/docker-compose.yml b/docker/gpu/docker-compose.yml index d78592a..594fb13 100644 --- a/docker/gpu/docker-compose.yml +++ b/docker/gpu/docker-compose.yml @@ -19,7 +19,7 @@ services: reservations: devices: - driver: nvidia - count: 1 + count: all capabilities: [gpu] # # Gradio UI service diff --git a/examples/phoneme_examples/examples/phoneme_examples/output/phoneme_test.wav b/examples/phoneme_examples/examples/phoneme_examples/output/phoneme_test.wav new file mode 100644 index 0000000..857ada9 Binary files /dev/null and b/examples/phoneme_examples/examples/phoneme_examples/output/phoneme_test.wav differ diff --git a/examples/phoneme_examples/test_phoneme_generation.py b/examples/phoneme_examples/test_phoneme_generation.py new file mode 100644 index 0000000..4700944 --- /dev/null +++ b/examples/phoneme_examples/test_phoneme_generation.py @@ -0,0 +1,48 @@ +import requests +import os +import json + +def main(): + # Test phoneme string + phonemes = "hˈɛloʊ wˈɜrld" # "Hello world" in phonemes + + try: + print("\nTesting phoneme generation via API...") + + # Create request payload + payload = { + "phonemes": phonemes, + "voice": "af_bella" # Using bella voice + } + + # Make request to the API endpoint + response = requests.post( + "http://localhost:8880/dev/generate_from_phonemes", + json=payload, + stream=True # Enable streaming for audio data + ) + + # Check if request was successful + if response.status_code == 200: + # Create output directory if it doesn't exist + os.makedirs("examples/phoneme_examples/output", exist_ok=True) + + # Save the audio response + output_path = 'examples/phoneme_examples/output/phoneme_test.wav' + with open(output_path, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + + print(f"\nAudio saved to: {output_path}") + print("\nPhoneme test completed successfully!") + print(f"\nInput phonemes: {phonemes}") + else: + print(f"Error: API request failed with status code {response.status_code}") + print(f"Response: {response.text}") + + except Exception as e: + print(f"An error occurred: {str(e)}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 41da5c3..6d09582 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,8 +36,8 @@ dependencies = [ "matplotlib>=3.10.0", "mutagen>=1.47.0", "psutil>=6.1.1", - "kokoro==0.7.4", - 'misaki[en,ja,ko,zh,vi]==0.7.4', + "kokoro==0.7.6", + 'misaki[en,ja,ko,zh,vi]==0.7.6', "spacy>=3.7.6", "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" ] diff --git a/web/index.html b/web/index.html index 2fc9e26..a862b76 100644 --- a/web/index.html +++ b/web/index.html @@ -23,6 +23,7 @@
+
HexGrad/Kokoro-82M on Hugging Face @@ -45,64 +46,75 @@
- -
-
- -
-
- -
-
-
- -
-
-
-
- - -
-
- - -
-
-
- - - - - +
+ + + +
+

Tips

+
    +
  • Use <<>> to add an intentional break between chunks
  • +
+ +
+ + + +
- -
+
+
+ +
+
+ +
+
+
+ +
+
+
+
+ + +
+
+ + +
+
+
+ + +
diff --git a/web/src/services/AudioService.js b/web/src/services/AudioService.js index 4edbfae..7109d5e 100644 --- a/web/src/services/AudioService.js +++ b/web/src/services/AudioService.js @@ -137,6 +137,12 @@ export class AudioService { // Signal completion onProgress?.(estimatedChunks, estimatedChunks); this.dispatchEvent('complete'); + + // Check if we should autoplay for small inputs that didn't trigger during streaming + if (this.shouldAutoplay && !hasStartedPlaying && this.sourceBuffer.buffered.length > 0) { + setTimeout(() => this.play(), 100); + } + setTimeout(() => { this.dispatchEvent('downloadReady'); }, 800); diff --git a/web/styles-clean.css b/web/styles-clean.css deleted file mode 100644 index 035fb7c..0000000 --- a/web/styles-clean.css +++ /dev/null @@ -1,266 +0,0 @@ -:root { - --bg-color: #0f172a; - --fg-color: #6366f1; - --surface: rgba(30, 41, 59, 1); /* Made opaque */ - --text: #f8fafc; - --text-light: #cbd5e1; - --border: rgba(148, 163, 184, 0.2); - --error: #ef4444; - --success: #22c55e; - --font-family: 'Inter', system-ui, sans-serif; -} - -* { - margin: 0; - padding: 0; - box-sizing: border-box; -} - -body { - font-family: var(--font-family); - line-height: 1.6; - color: var(--text); - background: radial-gradient(circle at top right, - var(--fg-color) 0%, - var(--bg-color) 100%); - min-height: 100vh; - position: relative; -} - -.container { - max-width: 800px; - margin: 0 auto; - padding: 4rem 1.5rem; -} - -header { - margin-bottom: 3rem; - text-align: center; -} - -h1 { - font-size: 3rem; - font-weight: 700; - color: var(--text); - margin-bottom: 0.5rem; -} - -.subtitle { - color: var(--text-light); - font-size: 1.1rem; -} - -.input-section, .player-section { - background: var(--surface); - padding: 2rem; - border-radius: 1rem; - border: 1px solid var(--border); - backdrop-filter: blur(12px); - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), - 0 2px 4px -1px rgba(0, 0, 0, 0.06); - margin-bottom: 2rem; -} - -textarea { - width: 100%; - min-height: 120px; - padding: 1rem; - border: 1px solid var(--border); - border-radius: 0.5rem; - background: rgba(15, 23, 42, 0.3); - color: var(--text); - font-size: 1rem; - transition: border-color 0.2s ease; - font-family: var(--font-family); - resize: vertical; -} - -textarea:focus { - outline: none; - border-color: var(--fg-color); - box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2); -} - -textarea::placeholder { - color: var(--text-light); -} - -.controls { - margin-top: 1.5rem; - display: flex; - flex-direction: column; - gap: 1.5rem; -} - -.voice-select-container { - position: relative; - display: flex; - align-items: center; - gap: 1rem; -} - -.voice-search { - flex: 1; - padding: 0.75rem 1rem; - border: 1px solid var(--border); - border-radius: 0.5rem; - background: rgba(15, 23, 42, 0.3); - color: var(--text); - font-size: 1rem; - transition: all 0.2s ease; -} - -.voice-search:focus { - outline: none; - border-color: var(--fg-color); - box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2); -} - -.voice-search::placeholder { - color: var(--text-light); -} - -.voice-dropdown { - display: none; - position: absolute; - top: 100%; - left: 0; - right: 0; - background: var(--surface); - border: 1px solid var(--border); - border-radius: 0.5rem; - margin-top: 0.5rem; - max-height: 200px; - overflow-y: auto; - z-index: 1000; /* Increased z-index */ - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), - 0 2px 4px -1px rgba(0, 0, 0, 0.06); -} - -.voice-select-container:focus-within .voice-dropdown, -.voice-dropdown:hover { - display: block; -} - -.voice-option { - display: flex; - align-items: center; - padding: 0.75rem; - cursor: pointer; - border-radius: 0.25rem; - transition: background-color 0.2s ease; - color: var(--text); -} - -.voice-option:hover { - background: rgba(99, 102, 241, 0.1); -} - -.selected-voice-tag { - background: rgba(99, 102, 241, 0.2); - padding: 0.25rem 0.75rem; - border-radius: 1rem; - font-size: 0.875rem; - border: 1px solid rgba(99, 102, 241, 0.3); -} - -.options { - display: flex; - gap: 2rem; - flex-wrap: wrap; -} - -.options label { - display: flex; - align-items: center; - gap: 0.5rem; - color: var(--text-light); - cursor: pointer; - white-space: nowrap; -} - -button { - background: var(--fg-color); - color: var(--text); - padding: 1rem; - border-radius: 0.5rem; - border: none; - font-weight: 500; - cursor: pointer; - transition: transform 0.2s ease, box-shadow 0.2s ease; - display: flex; - align-items: center; - justify-content: center; - gap: 0.5rem; -} - -button:hover { - transform: translateY(-1px); - box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2); -} - -button:disabled { - opacity: 0.7; - cursor: not-allowed; - transform: none; - box-shadow: none; -} - -.theme-toggle { - position: fixed; - top: 20px; - right: 20px; - width: 60px; - height: 60px; - cursor: pointer; - z-index: 100; - border-radius: 50%; - background: var(--fg-color); - box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2); - transition: transform 0.3s ease; -} - -.theme-toggle:hover { - transform: scale(1.1); -} - -/* Make audio player full width */ -audio { - width: 100%; - margin-top: 1rem; -} - -@media (max-width: 640px) { - .container { - padding: 2rem 1rem; - } - - h1 { - font-size: 2rem; - } - - .subtitle { - font-size: 1rem; - } - - .input-section, .player-section { - padding: 1.5rem; - } - - .voice-select-container { - flex-direction: column; - align-items: stretch; - } - - .options { - flex-direction: column; - gap: 1rem; - } - - .theme-toggle { - width: 50px; - height: 50px; - top: 10px; - right: 10px; - } -} \ No newline at end of file diff --git a/web/styles.css b/web/styles.css deleted file mode 100644 index 86c24c3..0000000 --- a/web/styles.css +++ /dev/null @@ -1,771 +0,0 @@ -:root { - --bg-color: #0f172a; - --fg-color: #6366f1; - --surface: rgba(30, 41, 59, 1); - --text: #f8fafc; - --text-light: #cbd5e1; - --border: rgba(148, 163, 184, 0.2); - --error: #ef4444; - --success: #22c55e; - --font-family: 'Inter', system-ui, sans-serif; -} - -* { - margin: 0; - padding: 0; - box-sizing: border-box; -} - -body { - font-family: var(--font-family); - line-height: 1.6; - color: var(--text); - background: radial-gradient(circle at top right, - var(--fg-color) 0%, - var(--bg-color) 100%); - min-height: 100vh; - position: relative; -} - -.overlay { - position: fixed; - inset: 0; - background-image: - repeating-linear-gradient(0deg, - rgba(255,255,255,0.03) 0px, - rgba(255,255,255,0.03) 1px, - transparent 1px, - transparent 20px), - repeating-linear-gradient(90deg, - rgba(255,255,255,0.03) 0px, - rgba(255,255,255,0.03) 1px, - transparent 1px, - transparent 20px); - pointer-events: none; -} - -.sun { - position: fixed; - top: 20px; - right: 20px; - width: 100px; - height: 100px; - border-radius: 50%; - background: radial-gradient(circle at center, - rgba(99, 102, 241, 0.2) 0%, - transparent 70%); - pointer-events: none; - z-index: 0; -} - -.scanline { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 2px; - background: rgba(99, 102, 241, 0.1); - animation: scan 4s linear infinite; -} - -@keyframes scan { - 0% { transform: translateY(0); } - 100% { transform: translateY(100px); } -} - -.container { - max-width: 1200px; - margin: 0 auto; - padding: 4rem 1.5rem; - position: relative; - z-index: 1; -} - -header { - margin-bottom: 3rem; - text-align: center; -} - -.logo-container { - display: flex; - align-items: center; - justify-content: center; - gap: 20px; -} - -h1 { - font-size: 5rem; - font-weight: 700; - margin: 0; - background: linear-gradient(rgba(255,255,255,0.1) 1px, transparent 1px), - linear-gradient(90deg, rgba(255,255,255,0.1) 1px, transparent 1px); - background-size: 10px 10px; - -webkit-background-clip: text; - background-clip: text; - color: var(--text); - text-shadow: - -2px -2px 0 rgba(0,0,0,0.5), - 2px -2px 0 rgba(0,0,0,0.5), - -2px 2px 0 rgba(0,0,0,0.5), - 2px 2px 0 rgba(0,0,0,0.5), - 3px 3px var(--fg-color); -} - -.subtitle { - color: var(--text-light); - font-size: 1.5rem; - opacity: 0.9; - margin-top: 0.5rem; -} - -.cup { - width: 40px; - height: 50px; - border: 3px solid var(--text); - border-radius: 0 0 20px 20px; - position: relative; - animation: float 3s ease-in-out 2; - animation-fill-mode: forwards; -} - -.handle { - width: 15px; - height: 25px; - border: 3px solid var(--text); - border-radius: 0 10px 10px 0; - position: absolute; - right: -15px; - top: 10px; -} - -.steam { - position: absolute; - top: -15px; - left: 5px; - right: 5px; - height: 15px; - display: flex; - justify-content: space-between; -} - -.steam::before, -.steam::after { - content: ""; - width: 10px; - height: 100%; - background: rgba(255,255,255,0.7); - border-radius: 10px; - animation: steam 2s 2; - animation-fill-mode: forwards; -} - -@keyframes steam { - to { - transform: translateY(-10px) scale(1.5); - opacity: 0; - } -} - -@keyframes float { - 50% { - transform: translateY(-5px); - } -} - -main { - display: flex; - flex-direction: column; - gap: 2rem; - min-height: 600px; -} - -@media (min-width: 1024px) { - main { - display: grid; - grid-template-columns: 1fr 1fr; - gap: 2rem; - align-items: stretch; - } - - .input-section, .player-section { - height: 100%; - display: flex; - flex-direction: column; - } - - .input-section textarea { - flex: 1; - min-height: 200px; - } - - .player-section { - display: flex; - flex-direction: column; - } - - .audio-controls { - flex: 1; - display: flex; - flex-direction: column; - } - - .wave-container { - flex: 1; - min-height: 200px; - } -} - -.input-section, .player-section { - background: var(--surface); - padding: 2rem; - border-radius: 1rem; - border: 1px solid var(--border); - backdrop-filter: blur(12px); - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), - 0 2px 4px -1px rgba(0, 0, 0, 0.06); -} - -textarea { - width: 100%; - min-height: 120px; - padding: 1rem; - border: 1px solid var(--border); - border-radius: 0.5rem; - background: rgba(15, 23, 42, 0.3); - color: var(--text); - font-size: 1rem; - transition: border-color 0.2s ease; - font-family: var(--font-family); - resize: vertical; -} - -textarea:focus { - outline: none; - border-color: var(--fg-color); - box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2); -} - -textarea::placeholder { - color: var(--text-light); -} - -.controls { - margin-top: 1.5rem; - display: flex; - flex-direction: column; - gap: 1.5rem; -} - -.voice-select-container { - position: relative; - display: flex; - flex-direction: column; - gap: 1rem; -} - -.voice-search { - width: 100%; - padding: 0.75rem 1rem; - border: 1px solid var(--border); - border-radius: 0.5rem; - background: rgba(15, 23, 42, 0.3); - color: var(--text); - font-size: 1rem; - transition: all 0.2s ease; -} - -.voice-search:focus { - outline: none; - border-color: var(--fg-color); - box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2); -} - -.voice-search::placeholder { - color: var(--text-light); -} - -.voice-dropdown { - display: none; - position: absolute; - top: 100%; - left: 0; - right: 0; - background: var(--surface); - border: 1px solid var(--border); - border-radius: 0.5rem; - margin-top: 0.5rem; - max-height: 200px; - overflow-y: auto; - z-index: 1000; - box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), - 0 2px 4px -1px rgba(0, 0, 0, 0.06); -} - -.voice-select-container:focus-within .voice-dropdown, -.voice-dropdown:hover { - display: block; -} - -.voice-option { - display: flex; - align-items: center; - padding: 0.75rem; - cursor: pointer; - border-radius: 0.25rem; - transition: background-color 0.2s ease; - color: var(--text); -} - -.voice-option:hover { - background: rgba(99, 102, 241, 0.1); -} - -.selected-voices { - display: flex; - flex-wrap: wrap; - gap: 0.5rem; - margin-top: 0.5rem; -} - -.selected-voice-tag { - background: rgba(99, 102, 241, 0.2); - padding: 0.25rem 0.75rem; - border-radius: 1rem; - font-size: 0.875rem; - display: flex; - align-items: center; - gap: 0.5rem; - border: 1px solid rgba(99, 102, 241, 0.3); -} - -.remove-voice { - cursor: pointer; - opacity: 0.7; - transition: opacity 0.2s ease; -} - -.remove-voice:hover { - opacity: 1; -} - -.options { - display: flex; - align-items: center; - justify-content: space-between; - gap: 2rem; - flex-wrap: wrap; -} - -.options label { - display: flex; - align-items: center; - gap: 0.5rem; - color: var(--text-light); - cursor: pointer; -} - -.format-select { - background: rgba(15, 23, 42, 0.3); - color: var(--text); - border: 1px solid var(--border); - border-radius: 0.5rem; - padding: 0.5rem 1rem; - font-family: var(--font-family); - font-size: 0.875rem; - cursor: pointer; - transition: all 0.2s ease; - min-width: 100px; -} - -.format-select:hover { - border-color: var(--fg-color); -} - -.format-select:focus { - outline: none; - border-color: var(--fg-color); - box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2); -} - -.format-select option { - background: var(--surface); - color: var(--text); -} - -.button-group { - display: flex; - gap: 1rem; -} - -button { - background: var(--fg-color); - color: var(--text); - padding: 1rem; - border-radius: 0.5rem; - border: none; - font-weight: 500; - cursor: pointer; - transition: transform 0.2s ease, box-shadow 0.2s ease; - display: flex; - align-items: center; - justify-content: center; - gap: 0.5rem; - flex: 1; -} - -button:hover { - transform: translateY(-1px); - box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2); -} - -button:disabled { - opacity: 0.7; - cursor: not-allowed; - transform: none; - box-shadow: none; -} - -.cancel-btn { - background: var(--error); -} - -.loader { - display: none; - width: 16px; - height: 16px; - border: 2px solid var(--text); - border-radius: 50%; - border-top-color: transparent; - animation: spin 1s linear infinite; -} - -@keyframes spin { - to { transform: rotate(360deg); } -} - -.loading .loader { - display: inline-block; -} - -.loading .btn-text { - display: none; -} - -.audio-controls { - display: flex; - flex-direction: column; - gap: 1rem; - margin-top: 1rem; -} - -.player-container { - display: flex; - flex-direction: column; - gap: 1rem; - background: rgba(15, 23, 42, 0.3); - padding: 1rem; - border-radius: 0.5rem; - border: 1px solid var(--border); -} - -.player-controls { - display: flex; - align-items: center; - gap: 1rem; - width: 100%; - background: rgba(15, 23, 42, 0.3); - padding: 0.5rem; - border-radius: 0.5rem; -} - -.seek-slider, -.volume-slider { - -webkit-appearance: none; - height: 4px; - border-radius: 2px; - background: rgba(99, 102, 241, 0.2); - outline: none; - cursor: pointer; - transition: height 0.2s ease-in-out; -} - -.seek-slider { - flex: 1; -} - -.volume-slider { - width: 100px; -} - -.seek-slider::-webkit-slider-thumb, -.volume-slider::-webkit-slider-thumb { - -webkit-appearance: none; - width: 12px; - height: 12px; - border-radius: 50%; - background: var(--fg-color); - cursor: pointer; - transition: transform 0.2s ease; -} - -.seek-slider::-webkit-slider-thumb:hover, -.volume-slider::-webkit-slider-thumb:hover { - transform: scale(1.2); -} - -.seek-slider::-moz-range-thumb, -.volume-slider::-moz-range-thumb { - width: 12px; - height: 12px; - border: none; - border-radius: 50%; - background: var(--fg-color); - cursor: pointer; - transition: transform 0.2s ease; -} - -.seek-slider::-moz-range-thumb:hover, -.volume-slider::-moz-range-thumb:hover { - transform: scale(1.2); -} - -.volume-control { - display: flex; - align-items: center; - gap: 0.5rem; - padding-left: 0.5rem; - border-left: 1px solid var(--border); -} - -.volume-icon { - color: var(--fg-color); - opacity: 0.8; - transition: opacity 0.2s ease; -} - -.volume-icon:hover { - opacity: 1; -} - -.player-btn { - background: var(--fg-color); - color: var(--text); - padding: 0.5rem 1rem; - border-radius: 0.5rem; - border: none; - font-weight: 500; - cursor: pointer; - transition: all 0.2s ease; - flex: 0 0 auto; - min-width: 80px; -} - -.player-btn:hover { - transform: translateY(-1px); - box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2); -} - -.wave-container { - width: 100%; - height: 120px; - background: rgba(15, 23, 42, 0.3); - border-radius: 0.25rem; - overflow: hidden; - position: relative; - margin-top: 0.5rem; -} - -.wave-container canvas { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 100%; -} - -.time-display { - color: var(--text-light); - font-size: 0.875rem; - min-width: 100px; - text-align: right; - font-variant-numeric: tabular-nums; -} - -.download-button { - position: relative; - width: 40px; - height: 40px; - cursor: pointer; - display: flex; - align-items: center; - justify-content: center; - transition: transform 0.2s ease; - align-self: flex-end; -} - -.download-glow { - position: absolute; - inset: -15%; - background: conic-gradient( - from 0deg, - var(--fg-color), - var(--success), - var(--fg-color) - ); - border-radius: 4px; - animation: rotate 4s linear infinite; - filter: blur(8px); - opacity: 0.5; -} - -.download-icon { - width: 40px; - height: 40px; - position: relative; - z-index: 2; - background: var(--surface); - border: 1px solid var(--border); - border-radius: 4px; - display: flex; - align-items: center; - justify-content: center; - color: var(--text); - transition: transform 0.2s ease, box-shadow 0.2s ease; -} - -.download-button:hover { - transform: scale(1.05); -} - -.download-button:hover .download-icon { - box-shadow: 0 0 15px rgba(34, 197, 94, 0.3); -} - -@keyframes rotate { - from { transform: rotate(0deg); } - to { transform: rotate(360deg); } -} - -.status { - padding: 0.75rem 1rem; - border-radius: 0.5rem; - margin-bottom: 1rem; - transition: all 0.3s ease; - opacity: 0; - font-weight: 500; - text-align: center; -} - -.status.info { - background: rgba(99, 102, 241, 0.1); - border: 1px solid rgba(99, 102, 241, 0.2); - opacity: 1; -} - -.status.error { - background: rgba(239, 68, 68, 0.1); - border: 1px solid rgba(239, 68, 68, 0.2); - opacity: 1; -} - -.status.success { - background: rgba(34, 197, 94, 0.1); - border: 1px solid rgba(34, 197, 94, 0.2); - opacity: 1; -} - -@media (max-width: 1023px) { - .container { - padding: 2rem 1rem; - } - - h1 { - font-size: 3rem; - } - - .subtitle { - font-size: 1.2rem; - } - - .cup { - width: 30px; - height: 40px; - } - - .handle { - width: 12px; - height: 20px; - right: -12px; - top: 8px; - } - - .steam { - top: -12px; - } - - .steam::before, - .steam::after { - width: 6px; - } - - .input-section, .player-section { - padding: 1.5rem; - } - - .voice-select-container { - flex-direction: column; - align-items: stretch; - } - - .options { - flex-direction: column; - gap: 1rem; - } - - .sun { - width: 80px; - height: 80px; - top: 10px; - right: 10px; - } - - .button-group { - flex-direction: column; - } - - .player-container { - flex-direction: column; - align-items: stretch; - gap: 0.75rem; - } - - .player-controls { - flex-direction: column; - gap: 0.75rem; - } - - .player-btn { - width: 100%; - } - - .volume-control { - border-left: none; - border-top: 1px solid var(--border); - padding-left: 0; - padding-top: 0.75rem; - width: 100%; - } - - .volume-slider { - flex: 1; - width: auto; - } - - .wave-container { - height: 80px; - } - - .time-display { - text-align: center; - } -} \ No newline at end of file diff --git a/web/styles/base.css b/web/styles/base.css index 31bf2df..25ef0db 100644 --- a/web/styles/base.css +++ b/web/styles/base.css @@ -20,15 +20,24 @@ body { font-family: var(--font-family); line-height: 1.6; color: var(--text); - background: radial-gradient(circle at top right, - var(--fg-color) 0%, - var(--bg-color) 80%); - background-attachment: fixed; + background: var(--bg-color); min-height: 100vh; position: relative; + padding: 1rem; } .overlay { + position: fixed; + inset: 0; + background: + radial-gradient(circle at top right, + var(--fg-color) 0%, + var(--bg-color) 100%); + pointer-events: none; + z-index: 0; +} + +.grid-overlay { position: fixed; inset: 0; background-image: @@ -43,38 +52,19 @@ body { transparent 1px, transparent 20px); pointer-events: none; - z-index: 1; + z-index: 0; } -.sun { - position: fixed; - bottom: 40px; - right: 40px; - width: 80px; - height: 80px; - border-radius: 50%; - background-color: rgba(99, 102, 241, 0.4); - box-shadow: - 0 0 40px 15px rgba(213, 99, 241, 0.4), - 0 0 80px 25px rgba(99, 102, 241, 0.3), - 0 0 120px 35px rgba(91, 53, 228, 0.2); - pointer-events: none; - z-index: 9999; +.container { + max-width: 900px; + margin: 0 auto; + display: flex; + flex-direction: column; + min-height: calc(100vh - 2rem); } -.scanline { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 2px; - background: rgba(218, 140, 198, 0.375); - animation: scan 4s linear infinite; -} - -@keyframes scan { - 0% { transform: translateY(0); } - 100% { transform: translateY(100px); } +header { + margin-bottom: 1rem; } .status { diff --git a/web/styles/forms.css b/web/styles/forms.css index e2f0bc1..64e479a 100644 --- a/web/styles/forms.css +++ b/web/styles/forms.css @@ -1,6 +1,71 @@ +.textarea-container { + position: relative; + width: 100%; + height: 100%; +} + +.help-icon { + position: absolute; + top: 1rem; + right: 1rem; + color: var(--text-light); + cursor: pointer; + z-index: 2; + opacity: 0.7; + transition: opacity 0.3s ease; +} + +.help-icon:hover { + opacity: 1; +} + +.tooltip-content { + visibility: hidden; + opacity: 0; + position: absolute; + top: calc(100% + 10px); + right: 0; + background: var(--surface); + border: 1px solid var(--border); + border-radius: 0.5rem; + padding: 1rem; + width: 300px; + box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), + 0 2px 4px -1px rgba(0, 0, 0, 0.06); + z-index: 1000; +} + +.help-icon:hover .tooltip-content { + visibility: visible; + opacity: 1; + transition: visibility 0s linear 0.2s, opacity 0.3s ease 0.2s; +} + +.tooltip-content { + transition: visibility 0s linear 0.3s, opacity 0.3s ease; +} + +.tooltip-content h4 { + margin: 0 0 0.5rem 0; + color: var(--text); +} + +.tooltip-content ul { + margin: 0; + padding-left: 1.25rem; + color: var(--text-light); +} + +.tooltip-content code { + background: rgba(99, 102, 241, 0.1); + padding: 0.125rem 0.25rem; + border-radius: 0.25rem; + font-family: monospace; +} + textarea { width: 100%; - min-height: 120px; + height: calc(100% - 50px); padding: 1rem; border: 1px solid var(--border); border-radius: 0.5rem; @@ -9,7 +74,8 @@ textarea { font-size: 1rem; transition: border-color 0.2s ease; font-family: var(--font-family); - resize: vertical; + resize: none; + margin-bottom: 0.75rem; } textarea:focus { @@ -22,19 +88,22 @@ textarea::placeholder { color: var(--text-light); } -.controls { - margin-top: 1.5rem; +.text-controls { display: flex; - flex-direction: column; - gap: 1.5rem; + gap: 0.75rem; + margin-top: auto; +} + +.text-controls button { + flex: 1; } .voice-select-container { position: relative; display: flex; flex-direction: column; - gap: 1rem; - z-index: 1001; /* Higher than other elements */ + gap: 0.5rem; + z-index: 1001; } .voice-search { @@ -83,7 +152,7 @@ textarea::placeholder { .voice-option { display: flex; align-items: center; - padding: 0.75rem; + padding: 0.5rem; cursor: pointer; border-radius: 0.25rem; transition: background-color 0.2s ease; @@ -97,21 +166,31 @@ textarea::placeholder { .selected-voices { display: flex; flex-wrap: wrap; - gap: 0.5rem; + gap: 0.25rem; margin-top: 0.5rem; } .selected-voice-tag { background: rgba(99, 102, 241, 0.2); - padding: 0.25rem 0.75rem; + padding: 0.25rem 0.4rem; border-radius: 1rem; font-size: 0.875rem; display: flex; align-items: center; - gap: 0.5rem; + gap: 0.25rem; border: 1px solid rgba(99, 102, 241, 0.3); } +.selected-voice-tag input { + width: 3em; + padding: 0.1rem 0.2rem; + min-height: 1.5em; + background: transparent; + border: none; + color: inherit; + font-size: inherit; +} + .remove-voice { cursor: pointer; opacity: 0.7; @@ -125,15 +204,19 @@ textarea::placeholder { .options { display: flex; align-items: center; - justify-content: space-between; - gap: 2rem; + justify-content: center; + gap: 1rem; flex-wrap: wrap; + padding: 0.5rem; + border-top: 1px solid var(--border); + border-bottom: 1px solid var(--border); + margin: 0.5rem 0; } .option-group { display: flex; align-items: center; - gap: 2rem; + gap: 1rem; } .speed-control { @@ -221,14 +304,13 @@ textarea::placeholder { } .button-group { - display: flex; - gap: 1rem; + margin-top: auto; } button { background: var(--fg-color); color: var(--text); - padding: 1rem; + padding: 0.75rem 1.5rem; border-radius: 0.5rem; border: none; font-weight: 500; @@ -238,7 +320,7 @@ button { align-items: center; justify-content: center; gap: 0.5rem; - flex: 1; + width: 100%; } button:hover { @@ -255,6 +337,7 @@ button:disabled { .cancel-btn { background: var(--error); + margin-top: 0.75rem; } .loader { @@ -284,7 +367,6 @@ button:disabled { border: 1px solid var(--border) !important; color: var(--text-light) !important; padding: 0.5rem 1rem !important; - flex: 0 !important; /* Don't expand like other buttons */ } .clear-btn:hover { diff --git a/web/styles/layout.css b/web/styles/layout.css index 23993f5..fed3308 100644 --- a/web/styles/layout.css +++ b/web/styles/layout.css @@ -1,62 +1,60 @@ .container { - max-width: 1200px; + max-width: 900px; margin: 0 auto; - padding: 2rem 1.5rem; + padding: 2rem 1rem; position: relative; z-index: 1; } main { display: flex; - flex-direction: column; - gap: 2rem; - min-height: 600px; + gap: 1rem; + width: 100%; + max-width: 900px; + margin: 0 auto; } -@media (min-width: 1024px) { - main { - display: grid; - grid-template-columns: 1fr 1fr; - gap: 2rem; - align-items: stretch; - } - - .input-section, .player-section { - height: 100%; - display: flex; - flex-direction: column; - } - - .input-section textarea { - flex: 1; - min-height: 200px; - } - - .player-section { - display: flex; - flex-direction: column; - } - - .audio-controls { - flex: 1; - display: flex; - flex-direction: column; - } - - .wave-container { - flex: 1; - min-height: 200px; - } -} - -.input-section, .player-section { +.input-section { background: var(--surface); - padding: 2rem; - border-radius: 1rem; + padding: 1rem; + border-radius: 0.5rem; border: 1px solid var(--border); backdrop-filter: blur(12px); box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); + display: flex; + flex-direction: column; + width: 400px; + height: 400px; +} + +.controls { + background: var(--surface); + padding: 1rem; + border-radius: 0.5rem; + border: 1px solid var(--border); + backdrop-filter: blur(12px); + box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), + 0 2px 4px -1px rgba(0, 0, 0, 0.06); + width: 400px; + height: 400px; + display: flex; + flex-direction: column; +} + #upload-btn { display: inline-block; +} + +@media (max-width: 850px) { + main { + flex-direction: column; + align-items: center; + } + + .input-section, + .controls { + width: 100%; + max-width: 400px; + } } \ No newline at end of file diff --git a/web/styles/player.css b/web/styles/player.css index 214f865..dac9bcf 100644 --- a/web/styles/player.css +++ b/web/styles/player.css @@ -1,28 +1,34 @@ .audio-controls { display: flex; flex-direction: column; - gap: 1rem; + gap: 0.75rem; margin-top: 1rem; + flex: 1; } .player-container { display: flex; flex-direction: column; - gap: 1rem; + gap: 0.75rem; + gap: 0.75rem; + width: 100%; background: rgba(15, 23, 42, 0.3); - padding: 1rem; + padding: 0.75rem; border-radius: 0.5rem; border: 1px solid var(--border); + flex: 1; + position: relative; + min-height: 140px; } .player-controls { display: flex; align-items: center; - gap: 1rem; + gap: 0.75rem; width: 100%; - background: rgba(15, 23, 42, 0.3); padding: 0.5rem; border-radius: 0.5rem; + height: 40px; } .seek-slider, @@ -41,7 +47,7 @@ } .volume-slider { - width: 100px; + width: 80px; } .seek-slider::-webkit-slider-thumb, @@ -79,8 +85,8 @@ .volume-control { display: flex; align-items: center; - gap: 0.5rem; - padding-left: 0.5rem; + gap: 0.75rem; + padding-left: 0.75rem; border-left: 1px solid var(--border); } @@ -104,7 +110,8 @@ cursor: pointer; transition: all 0.2s ease; flex: 0 0 auto; - min-width: 80px; + min-width: 60px; + height: 32px; } .player-btn:hover { @@ -114,12 +121,20 @@ .wave-container { width: 100%; - height: 120px; background: rgba(15, 23, 42, 0.3); - border-radius: 0.25rem; + border-radius: 0.5rem; overflow: hidden; position: relative; - margin-top: 0.5rem; + flex: 1; + min-height: 60px; +} + +.time-display { + min-width: 80px; + font-size: 0.875rem; + color: var(--text-light); + text-align: right; + font-variant-numeric: tabular-nums; } /* Progress bar styles */ @@ -169,24 +184,17 @@ height: 100%; } -.time-display { - color: var(--text-light); - font-size: 0.875rem; - min-width: 100px; - text-align: right; - font-variant-numeric: tabular-nums; -} - .download-button { - position: relative; - width: 40px; - height: 40px; + position: absolute; + bottom: 0.75rem; + right: 0.75rem; + width: 32px; + height: 32px; cursor: pointer; display: flex; align-items: center; justify-content: center; transition: transform 0.2s ease; - align-self: flex-end; } .download-glow { @@ -205,8 +213,8 @@ } .download-icon { - width: 40px; - height: 40px; + width: 32px; + height: 32px; position: relative; z-index: 2; background: var(--surface);