Update Docker configuration, enhance audio service autoplay, and improve styling

This commit is contained in:
remsky 2025-02-05 02:45:28 -07:00
parent d3741d0d99
commit fe6c9acaf5
16 changed files with 467 additions and 1253 deletions

View file

@ -64,6 +64,85 @@ class KokoroV1(BaseModelBackend):
except Exception as e:
raise RuntimeError(f"Failed to load Kokoro model: {e}")
async def generate_from_tokens(
self,
tokens: str,
voice: Union[str, Tuple[str, Union[torch.Tensor, str]]],
speed: float = 1.0
) -> AsyncGenerator[np.ndarray, None]:
"""Generate audio from phoneme tokens.
Args:
tokens: Input phoneme tokens to synthesize
voice: Either a voice path string or a tuple of (voice_name, voice_tensor/path)
speed: Speed multiplier
Yields:
Generated audio chunks
Raises:
RuntimeError: If generation fails
"""
if not self.is_loaded:
raise RuntimeError("Model not loaded")
try:
# Memory management for GPU
if self._device == "cuda":
if self._check_memory():
self._clear_memory()
# Handle voice input
voice_path: str
if isinstance(voice, tuple):
voice_name, voice_data = voice
if isinstance(voice_data, str):
voice_path = voice_data
else:
# Save tensor to temporary file
import tempfile
temp_dir = tempfile.gettempdir()
voice_path = os.path.join(temp_dir, f"{voice_name}.pt")
# Save tensor with CPU mapping for portability
torch.save(voice_data.cpu(), voice_path)
else:
voice_path = voice
# Load voice tensor with proper device mapping
voice_tensor = await paths.load_voice_tensor(voice_path, device=self._device)
# Save back to a temporary file with proper device mapping
import tempfile
temp_dir = tempfile.gettempdir()
temp_path = os.path.join(temp_dir, f"temp_voice_{os.path.basename(voice_path)}")
await paths.save_voice_tensor(voice_tensor, temp_path)
voice_path = temp_path
# Generate using pipeline's generate_from_tokens method
logger.debug(f"Generating audio from tokens: '{tokens[:100]}...'")
for result in self._pipeline.generate_from_tokens(
tokens=tokens,
voice=voice_path,
speed=speed,
model=self._model
):
if result.audio is not None:
logger.debug(f"Got audio chunk with shape: {result.audio.shape}")
yield result.audio.numpy()
else:
logger.warning("No audio in chunk")
except Exception as e:
logger.error(f"Generation failed: {e}")
if (
self._device == "cuda"
and model_config.pytorch_gpu.retry_on_oom
and "out of memory" in str(e).lower()
):
self._clear_memory()
async for chunk in self.generate_from_tokens(tokens, voice, speed):
yield chunk
raise
async def generate(
self,
text: str,

View file

@ -27,24 +27,12 @@ def process_text_chunk(text: str, language: str = "a", skip_phonemize: bool = Fa
start_time = time.time()
if skip_phonemize:
# Input is already phonemes, just tokenize
t0 = time.time()
tokens = tokenize(text)
t1 = time.time()
else:
# Normal text processing pipeline
t0 = time.time()
normalized = normalize_text(text)
t1 = time.time()
t0 = time.time()
phonemes = phonemize(normalized, language, normalize=False) # Already normalized
t1 = time.time()
t0 = time.time()
tokens = tokenize(phonemes)
t1 = time.time()
total_time = time.time() - start_time
logger.debug(f"Total processing took {total_time*1000:.2f}ms for chunk: '{text[:50]}...'")
@ -95,13 +83,55 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
return results
async def smart_split(text: str, max_tokens: int = ABSOLUTE_MAX) -> AsyncGenerator[Tuple[str, List[int]], None]:
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens."""
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.
Special symbols:
- <<>> : Forces a break between chunks
"""
CHUNK_BREAK = "<<>>"
start_time = time.time()
chunk_count = 0
logger.info(f"Starting smart split for {len(text)} chars")
# Process all sentences
sentences = get_sentence_info(text)
# First split on forced break symbol
forced_chunks = [chunk.strip() for chunk in text.split(CHUNK_BREAK) if chunk.strip()]
# If no forced breaks, process normally
if len(forced_chunks) <= 1:
sentences = get_sentence_info(text)
else:
# Process each forced chunk separately
for forced_chunk in forced_chunks:
# Process sentences within this forced chunk
chunk_sentences = get_sentence_info(forced_chunk)
# Process and yield all sentences in this chunk before moving to next
current_chunk = []
current_tokens = []
current_count = 0
for sentence, tokens, count in chunk_sentences:
if current_count + count <= TARGET_MAX:
current_chunk.append(sentence)
current_tokens.extend(tokens)
current_count += count
else:
if current_chunk:
chunk_text = " ".join(current_chunk)
chunk_count += 1
yield chunk_text, current_tokens
current_chunk = [sentence]
current_tokens = tokens
current_count = count
# Yield remaining sentences in this forced chunk
if current_chunk:
chunk_text = " ".join(current_chunk)
chunk_count += 1
yield chunk_text, current_tokens
# Skip the rest of the processing since we've handled all chunks
return
current_chunk = []
current_tokens = []

View file

@ -485,31 +485,29 @@ class TTSService:
start_time = time.time()
try:
# Get backend and voice path
raise ValueError("Not yet implemented")
# linked to https://github.com/hexgrad/kokoro/pull/53 or similiar
backend = self.model_manager.get_backend()
voice_name, voice_path = await self._get_voice_path(voice)
# if isinstance(backend, KokoroV1):
# # For Kokoro V1, pass phonemes directly to pipeline
# result = None
# for r in backend._pipeline(
# phonemes,
# voice=voice_path,
# speed=speed,
# model=backend._model
# ):
# if r.audio is not None:
# result = r
# break
if isinstance(backend, KokoroV1):
# For Kokoro V1, use generate_from_tokens with raw phonemes
result = None
for r in backend._pipeline.generate_from_tokens(
tokens=phonemes, # Pass raw phonemes string
voice=voice_path,
speed=speed,
model=backend._model
):
if r.audio is not None:
result = r
break
# if result is None or result.audio is None:
# raise ValueError("No audio generated")
if result is None or result.audio is None:
raise ValueError("No audio generated")
# processing_time = time.time() - start_time
# return result.audio.numpy(), processing_time
# else:
pass
processing_time = time.time() - start_time
return result.audio.numpy(), processing_time
else:
raise ValueError("Phoneme generation only supported with Kokoro V1 backend")
except Exception as e:
logger.error(f"Error in phoneme audio generation: {str(e)}")

View file

@ -15,7 +15,7 @@ __pycache__
# Environment
# .env
.venv
.venv*
env/
venv/
ENV/

View file

@ -19,7 +19,7 @@ services:
reservations:
devices:
- driver: nvidia
count: 1
count: all
capabilities: [gpu]
# # Gradio UI service

View file

@ -0,0 +1,48 @@
import requests
import os
import json
def main():
# Test phoneme string
phonemes = "hˈɛloʊ wˈɜrld" # "Hello world" in phonemes
try:
print("\nTesting phoneme generation via API...")
# Create request payload
payload = {
"phonemes": phonemes,
"voice": "af_bella" # Using bella voice
}
# Make request to the API endpoint
response = requests.post(
"http://localhost:8880/dev/generate_from_phonemes",
json=payload,
stream=True # Enable streaming for audio data
)
# Check if request was successful
if response.status_code == 200:
# Create output directory if it doesn't exist
os.makedirs("examples/phoneme_examples/output", exist_ok=True)
# Save the audio response
output_path = 'examples/phoneme_examples/output/phoneme_test.wav'
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
print(f"\nAudio saved to: {output_path}")
print("\nPhoneme test completed successfully!")
print(f"\nInput phonemes: {phonemes}")
else:
print(f"Error: API request failed with status code {response.status_code}")
print(f"Response: {response.text}")
except Exception as e:
print(f"An error occurred: {str(e)}")
if __name__ == "__main__":
main()

View file

@ -36,8 +36,8 @@ dependencies = [
"matplotlib>=3.10.0",
"mutagen>=1.47.0",
"psutil>=6.1.1",
"kokoro==0.7.4",
'misaki[en,ja,ko,zh,vi]==0.7.4',
"kokoro==0.7.6",
'misaki[en,ja,ko,zh,vi]==0.7.6',
"spacy>=3.7.6",
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl"
]

View file

@ -23,6 +23,7 @@
<div class="scanline"></div>
</div>
<div class="overlay"></div>
<div class="grid-overlay"></div>
<div class="badges-container">
<a href="https://huggingface.co/hexgrad/Kokoro-82M" target="_blank" class="badge">
<img src="https://img.shields.io/badge/Powered--by--HexGrad%2FKokoro--82M-black?logo=huggingface&logoColor=white&labelColor=black&style=for-the-badge" alt="HexGrad/Kokoro-82M on Hugging Face">
@ -45,64 +46,75 @@
<main>
<div class="input-section">
<textarea
id="text-input"
placeholder="Enter text to convert to speech..."
></textarea>
<div class="controls">
<div class="voice-select-container">
<input
type="text"
id="voice-search"
class="voice-search"
placeholder="Search voices..."
>
<div class="voice-dropdown" id="voice-dropdown">
<div class="voice-options" id="voice-options">
<!-- Voice options will be inserted here -->
</div>
</div>
<div class="selected-voices" id="selected-voices">
<!-- Selected voice tags will appear here -->
</div>
</div>
<div class="options">
<div class="option-group">
<label>
<input type="checkbox" id="autoplay-toggle" checked>
Auto-play
</label>
<select id="format-select" class="format-select">
<option value="mp3">MP3</option>
<option value="wav">WAV</option>
<option value="pcm">PCM</option>
</select>
</div>
<div class="speed-control">
<label for="speed-slider">Speed: <span id="speed-value">1.0</span>x</label>
<input type="range" id="speed-slider" min="0.1" max="4" step="0.1" value="1.0">
</div>
</div>
<div class="button-group">
<input type="file" id="file-input" accept=".txt" style="display: none;">
<button id="upload-btn" class="clear-btn">
Upload Text
</button>
<button id="clear-btn" class="clear-btn">
Clear Text
</button>
<button id="generate-btn">
<span class="btn-text">Generate Speech</span>
<span class="loader"></span>
</button>
<button id="cancel-btn" class="cancel-btn" style="display: none;">
Cancel
</button>
<div class="help-icon" title="Tips">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-1-7v2h2v-2h-2zm2-1.645A3.502 3.502 0 0012 6.5a3.501 3.501 0 00-3.433 2.813l1.962.393A1.5 1.5 0 1112 11.5a1 1 0 00-1 1V14h2v-.645z" fill="currentColor"/>
</svg>
<div class="tooltip-content">
<h4>Tips</h4>
<ul>
<li>Use <code><<>></code> to add an intentional break between chunks</li>
</ul>
</div>
</div>
<textarea
id="text-input"
placeholder="Enter text to convert to speech..."
></textarea>
<div class="text-controls">
<input type="file" id="file-input" accept=".txt" style="display: none;">
<button id="upload-btn" class="clear-btn">
Upload Text
</button>
<button id="clear-btn" class="clear-btn">
Clear Text
</button>
</div>
</div>
<div class="player-section">
<div class="controls">
<div class="voice-select-container">
<input
type="text"
id="voice-search"
class="voice-search"
placeholder="Search voices..."
autocomplete="off"
>
<div class="voice-dropdown" id="voice-dropdown">
<div class="voice-options" id="voice-options">
<!-- Voice options will be inserted here -->
</div>
</div>
<div class="selected-voices" id="selected-voices">
<!-- Selected voice tags will appear here -->
</div>
</div>
<div class="options">
<div class="option-group">
<label>
<input type="checkbox" id="autoplay-toggle" checked>
Auto-play
</label>
<select id="format-select" class="format-select">
<option value="mp3">MP3</option>
<option value="wav">WAV</option>
<option value="pcm">PCM</option>
</select>
</div>
<div class="speed-control">
<label for="speed-slider">Speed: <span id="speed-value">1.0</span>x</label>
<input type="range" id="speed-slider" min="0.1" max="4" step="0.1" value="1.0">
</div>
</div>
<div class="button-group">
<button id="generate-btn">
<span class="btn-text">Generate Speech</span>
<span class="loader"></span>
</button>
<button id="cancel-btn" class="cancel-btn" style="display: none;">
Cancel
</button>
</div>
<div id="status" class="status"></div>
<div class="audio-controls">
<div class="player-container">

View file

@ -137,6 +137,12 @@ export class AudioService {
// Signal completion
onProgress?.(estimatedChunks, estimatedChunks);
this.dispatchEvent('complete');
// Check if we should autoplay for small inputs that didn't trigger during streaming
if (this.shouldAutoplay && !hasStartedPlaying && this.sourceBuffer.buffered.length > 0) {
setTimeout(() => this.play(), 100);
}
setTimeout(() => {
this.dispatchEvent('downloadReady');
}, 800);

View file

@ -1,266 +0,0 @@
:root {
--bg-color: #0f172a;
--fg-color: #6366f1;
--surface: rgba(30, 41, 59, 1); /* Made opaque */
--text: #f8fafc;
--text-light: #cbd5e1;
--border: rgba(148, 163, 184, 0.2);
--error: #ef4444;
--success: #22c55e;
--font-family: 'Inter', system-ui, sans-serif;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: var(--font-family);
line-height: 1.6;
color: var(--text);
background: radial-gradient(circle at top right,
var(--fg-color) 0%,
var(--bg-color) 100%);
min-height: 100vh;
position: relative;
}
.container {
max-width: 800px;
margin: 0 auto;
padding: 4rem 1.5rem;
}
header {
margin-bottom: 3rem;
text-align: center;
}
h1 {
font-size: 3rem;
font-weight: 700;
color: var(--text);
margin-bottom: 0.5rem;
}
.subtitle {
color: var(--text-light);
font-size: 1.1rem;
}
.input-section, .player-section {
background: var(--surface);
padding: 2rem;
border-radius: 1rem;
border: 1px solid var(--border);
backdrop-filter: blur(12px);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
margin-bottom: 2rem;
}
textarea {
width: 100%;
min-height: 120px;
padding: 1rem;
border: 1px solid var(--border);
border-radius: 0.5rem;
background: rgba(15, 23, 42, 0.3);
color: var(--text);
font-size: 1rem;
transition: border-color 0.2s ease;
font-family: var(--font-family);
resize: vertical;
}
textarea:focus {
outline: none;
border-color: var(--fg-color);
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
}
textarea::placeholder {
color: var(--text-light);
}
.controls {
margin-top: 1.5rem;
display: flex;
flex-direction: column;
gap: 1.5rem;
}
.voice-select-container {
position: relative;
display: flex;
align-items: center;
gap: 1rem;
}
.voice-search {
flex: 1;
padding: 0.75rem 1rem;
border: 1px solid var(--border);
border-radius: 0.5rem;
background: rgba(15, 23, 42, 0.3);
color: var(--text);
font-size: 1rem;
transition: all 0.2s ease;
}
.voice-search:focus {
outline: none;
border-color: var(--fg-color);
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
}
.voice-search::placeholder {
color: var(--text-light);
}
.voice-dropdown {
display: none;
position: absolute;
top: 100%;
left: 0;
right: 0;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 0.5rem;
margin-top: 0.5rem;
max-height: 200px;
overflow-y: auto;
z-index: 1000; /* Increased z-index */
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
}
.voice-select-container:focus-within .voice-dropdown,
.voice-dropdown:hover {
display: block;
}
.voice-option {
display: flex;
align-items: center;
padding: 0.75rem;
cursor: pointer;
border-radius: 0.25rem;
transition: background-color 0.2s ease;
color: var(--text);
}
.voice-option:hover {
background: rgba(99, 102, 241, 0.1);
}
.selected-voice-tag {
background: rgba(99, 102, 241, 0.2);
padding: 0.25rem 0.75rem;
border-radius: 1rem;
font-size: 0.875rem;
border: 1px solid rgba(99, 102, 241, 0.3);
}
.options {
display: flex;
gap: 2rem;
flex-wrap: wrap;
}
.options label {
display: flex;
align-items: center;
gap: 0.5rem;
color: var(--text-light);
cursor: pointer;
white-space: nowrap;
}
button {
background: var(--fg-color);
color: var(--text);
padding: 1rem;
border-radius: 0.5rem;
border: none;
font-weight: 500;
cursor: pointer;
transition: transform 0.2s ease, box-shadow 0.2s ease;
display: flex;
align-items: center;
justify-content: center;
gap: 0.5rem;
}
button:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2);
}
button:disabled {
opacity: 0.7;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
.theme-toggle {
position: fixed;
top: 20px;
right: 20px;
width: 60px;
height: 60px;
cursor: pointer;
z-index: 100;
border-radius: 50%;
background: var(--fg-color);
box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2);
transition: transform 0.3s ease;
}
.theme-toggle:hover {
transform: scale(1.1);
}
/* Make audio player full width */
audio {
width: 100%;
margin-top: 1rem;
}
@media (max-width: 640px) {
.container {
padding: 2rem 1rem;
}
h1 {
font-size: 2rem;
}
.subtitle {
font-size: 1rem;
}
.input-section, .player-section {
padding: 1.5rem;
}
.voice-select-container {
flex-direction: column;
align-items: stretch;
}
.options {
flex-direction: column;
gap: 1rem;
}
.theme-toggle {
width: 50px;
height: 50px;
top: 10px;
right: 10px;
}
}

View file

@ -1,771 +0,0 @@
:root {
--bg-color: #0f172a;
--fg-color: #6366f1;
--surface: rgba(30, 41, 59, 1);
--text: #f8fafc;
--text-light: #cbd5e1;
--border: rgba(148, 163, 184, 0.2);
--error: #ef4444;
--success: #22c55e;
--font-family: 'Inter', system-ui, sans-serif;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: var(--font-family);
line-height: 1.6;
color: var(--text);
background: radial-gradient(circle at top right,
var(--fg-color) 0%,
var(--bg-color) 100%);
min-height: 100vh;
position: relative;
}
.overlay {
position: fixed;
inset: 0;
background-image:
repeating-linear-gradient(0deg,
rgba(255,255,255,0.03) 0px,
rgba(255,255,255,0.03) 1px,
transparent 1px,
transparent 20px),
repeating-linear-gradient(90deg,
rgba(255,255,255,0.03) 0px,
rgba(255,255,255,0.03) 1px,
transparent 1px,
transparent 20px);
pointer-events: none;
}
.sun {
position: fixed;
top: 20px;
right: 20px;
width: 100px;
height: 100px;
border-radius: 50%;
background: radial-gradient(circle at center,
rgba(99, 102, 241, 0.2) 0%,
transparent 70%);
pointer-events: none;
z-index: 0;
}
.scanline {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 2px;
background: rgba(99, 102, 241, 0.1);
animation: scan 4s linear infinite;
}
@keyframes scan {
0% { transform: translateY(0); }
100% { transform: translateY(100px); }
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 4rem 1.5rem;
position: relative;
z-index: 1;
}
header {
margin-bottom: 3rem;
text-align: center;
}
.logo-container {
display: flex;
align-items: center;
justify-content: center;
gap: 20px;
}
h1 {
font-size: 5rem;
font-weight: 700;
margin: 0;
background: linear-gradient(rgba(255,255,255,0.1) 1px, transparent 1px),
linear-gradient(90deg, rgba(255,255,255,0.1) 1px, transparent 1px);
background-size: 10px 10px;
-webkit-background-clip: text;
background-clip: text;
color: var(--text);
text-shadow:
-2px -2px 0 rgba(0,0,0,0.5),
2px -2px 0 rgba(0,0,0,0.5),
-2px 2px 0 rgba(0,0,0,0.5),
2px 2px 0 rgba(0,0,0,0.5),
3px 3px var(--fg-color);
}
.subtitle {
color: var(--text-light);
font-size: 1.5rem;
opacity: 0.9;
margin-top: 0.5rem;
}
.cup {
width: 40px;
height: 50px;
border: 3px solid var(--text);
border-radius: 0 0 20px 20px;
position: relative;
animation: float 3s ease-in-out 2;
animation-fill-mode: forwards;
}
.handle {
width: 15px;
height: 25px;
border: 3px solid var(--text);
border-radius: 0 10px 10px 0;
position: absolute;
right: -15px;
top: 10px;
}
.steam {
position: absolute;
top: -15px;
left: 5px;
right: 5px;
height: 15px;
display: flex;
justify-content: space-between;
}
.steam::before,
.steam::after {
content: "";
width: 10px;
height: 100%;
background: rgba(255,255,255,0.7);
border-radius: 10px;
animation: steam 2s 2;
animation-fill-mode: forwards;
}
@keyframes steam {
to {
transform: translateY(-10px) scale(1.5);
opacity: 0;
}
}
@keyframes float {
50% {
transform: translateY(-5px);
}
}
main {
display: flex;
flex-direction: column;
gap: 2rem;
min-height: 600px;
}
@media (min-width: 1024px) {
main {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 2rem;
align-items: stretch;
}
.input-section, .player-section {
height: 100%;
display: flex;
flex-direction: column;
}
.input-section textarea {
flex: 1;
min-height: 200px;
}
.player-section {
display: flex;
flex-direction: column;
}
.audio-controls {
flex: 1;
display: flex;
flex-direction: column;
}
.wave-container {
flex: 1;
min-height: 200px;
}
}
.input-section, .player-section {
background: var(--surface);
padding: 2rem;
border-radius: 1rem;
border: 1px solid var(--border);
backdrop-filter: blur(12px);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
}
textarea {
width: 100%;
min-height: 120px;
padding: 1rem;
border: 1px solid var(--border);
border-radius: 0.5rem;
background: rgba(15, 23, 42, 0.3);
color: var(--text);
font-size: 1rem;
transition: border-color 0.2s ease;
font-family: var(--font-family);
resize: vertical;
}
textarea:focus {
outline: none;
border-color: var(--fg-color);
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
}
textarea::placeholder {
color: var(--text-light);
}
.controls {
margin-top: 1.5rem;
display: flex;
flex-direction: column;
gap: 1.5rem;
}
.voice-select-container {
position: relative;
display: flex;
flex-direction: column;
gap: 1rem;
}
.voice-search {
width: 100%;
padding: 0.75rem 1rem;
border: 1px solid var(--border);
border-radius: 0.5rem;
background: rgba(15, 23, 42, 0.3);
color: var(--text);
font-size: 1rem;
transition: all 0.2s ease;
}
.voice-search:focus {
outline: none;
border-color: var(--fg-color);
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
}
.voice-search::placeholder {
color: var(--text-light);
}
.voice-dropdown {
display: none;
position: absolute;
top: 100%;
left: 0;
right: 0;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 0.5rem;
margin-top: 0.5rem;
max-height: 200px;
overflow-y: auto;
z-index: 1000;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
}
.voice-select-container:focus-within .voice-dropdown,
.voice-dropdown:hover {
display: block;
}
.voice-option {
display: flex;
align-items: center;
padding: 0.75rem;
cursor: pointer;
border-radius: 0.25rem;
transition: background-color 0.2s ease;
color: var(--text);
}
.voice-option:hover {
background: rgba(99, 102, 241, 0.1);
}
.selected-voices {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
margin-top: 0.5rem;
}
.selected-voice-tag {
background: rgba(99, 102, 241, 0.2);
padding: 0.25rem 0.75rem;
border-radius: 1rem;
font-size: 0.875rem;
display: flex;
align-items: center;
gap: 0.5rem;
border: 1px solid rgba(99, 102, 241, 0.3);
}
.remove-voice {
cursor: pointer;
opacity: 0.7;
transition: opacity 0.2s ease;
}
.remove-voice:hover {
opacity: 1;
}
.options {
display: flex;
align-items: center;
justify-content: space-between;
gap: 2rem;
flex-wrap: wrap;
}
.options label {
display: flex;
align-items: center;
gap: 0.5rem;
color: var(--text-light);
cursor: pointer;
}
.format-select {
background: rgba(15, 23, 42, 0.3);
color: var(--text);
border: 1px solid var(--border);
border-radius: 0.5rem;
padding: 0.5rem 1rem;
font-family: var(--font-family);
font-size: 0.875rem;
cursor: pointer;
transition: all 0.2s ease;
min-width: 100px;
}
.format-select:hover {
border-color: var(--fg-color);
}
.format-select:focus {
outline: none;
border-color: var(--fg-color);
box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2);
}
.format-select option {
background: var(--surface);
color: var(--text);
}
.button-group {
display: flex;
gap: 1rem;
}
button {
background: var(--fg-color);
color: var(--text);
padding: 1rem;
border-radius: 0.5rem;
border: none;
font-weight: 500;
cursor: pointer;
transition: transform 0.2s ease, box-shadow 0.2s ease;
display: flex;
align-items: center;
justify-content: center;
gap: 0.5rem;
flex: 1;
}
button:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2);
}
button:disabled {
opacity: 0.7;
cursor: not-allowed;
transform: none;
box-shadow: none;
}
.cancel-btn {
background: var(--error);
}
.loader {
display: none;
width: 16px;
height: 16px;
border: 2px solid var(--text);
border-radius: 50%;
border-top-color: transparent;
animation: spin 1s linear infinite;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
.loading .loader {
display: inline-block;
}
.loading .btn-text {
display: none;
}
.audio-controls {
display: flex;
flex-direction: column;
gap: 1rem;
margin-top: 1rem;
}
.player-container {
display: flex;
flex-direction: column;
gap: 1rem;
background: rgba(15, 23, 42, 0.3);
padding: 1rem;
border-radius: 0.5rem;
border: 1px solid var(--border);
}
.player-controls {
display: flex;
align-items: center;
gap: 1rem;
width: 100%;
background: rgba(15, 23, 42, 0.3);
padding: 0.5rem;
border-radius: 0.5rem;
}
.seek-slider,
.volume-slider {
-webkit-appearance: none;
height: 4px;
border-radius: 2px;
background: rgba(99, 102, 241, 0.2);
outline: none;
cursor: pointer;
transition: height 0.2s ease-in-out;
}
.seek-slider {
flex: 1;
}
.volume-slider {
width: 100px;
}
.seek-slider::-webkit-slider-thumb,
.volume-slider::-webkit-slider-thumb {
-webkit-appearance: none;
width: 12px;
height: 12px;
border-radius: 50%;
background: var(--fg-color);
cursor: pointer;
transition: transform 0.2s ease;
}
.seek-slider::-webkit-slider-thumb:hover,
.volume-slider::-webkit-slider-thumb:hover {
transform: scale(1.2);
}
.seek-slider::-moz-range-thumb,
.volume-slider::-moz-range-thumb {
width: 12px;
height: 12px;
border: none;
border-radius: 50%;
background: var(--fg-color);
cursor: pointer;
transition: transform 0.2s ease;
}
.seek-slider::-moz-range-thumb:hover,
.volume-slider::-moz-range-thumb:hover {
transform: scale(1.2);
}
.volume-control {
display: flex;
align-items: center;
gap: 0.5rem;
padding-left: 0.5rem;
border-left: 1px solid var(--border);
}
.volume-icon {
color: var(--fg-color);
opacity: 0.8;
transition: opacity 0.2s ease;
}
.volume-icon:hover {
opacity: 1;
}
.player-btn {
background: var(--fg-color);
color: var(--text);
padding: 0.5rem 1rem;
border-radius: 0.5rem;
border: none;
font-weight: 500;
cursor: pointer;
transition: all 0.2s ease;
flex: 0 0 auto;
min-width: 80px;
}
.player-btn:hover {
transform: translateY(-1px);
box-shadow: 0 4px 12px rgba(99, 102, 241, 0.2);
}
.wave-container {
width: 100%;
height: 120px;
background: rgba(15, 23, 42, 0.3);
border-radius: 0.25rem;
overflow: hidden;
position: relative;
margin-top: 0.5rem;
}
.wave-container canvas {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
}
.time-display {
color: var(--text-light);
font-size: 0.875rem;
min-width: 100px;
text-align: right;
font-variant-numeric: tabular-nums;
}
.download-button {
position: relative;
width: 40px;
height: 40px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: transform 0.2s ease;
align-self: flex-end;
}
.download-glow {
position: absolute;
inset: -15%;
background: conic-gradient(
from 0deg,
var(--fg-color),
var(--success),
var(--fg-color)
);
border-radius: 4px;
animation: rotate 4s linear infinite;
filter: blur(8px);
opacity: 0.5;
}
.download-icon {
width: 40px;
height: 40px;
position: relative;
z-index: 2;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 4px;
display: flex;
align-items: center;
justify-content: center;
color: var(--text);
transition: transform 0.2s ease, box-shadow 0.2s ease;
}
.download-button:hover {
transform: scale(1.05);
}
.download-button:hover .download-icon {
box-shadow: 0 0 15px rgba(34, 197, 94, 0.3);
}
@keyframes rotate {
from { transform: rotate(0deg); }
to { transform: rotate(360deg); }
}
.status {
padding: 0.75rem 1rem;
border-radius: 0.5rem;
margin-bottom: 1rem;
transition: all 0.3s ease;
opacity: 0;
font-weight: 500;
text-align: center;
}
.status.info {
background: rgba(99, 102, 241, 0.1);
border: 1px solid rgba(99, 102, 241, 0.2);
opacity: 1;
}
.status.error {
background: rgba(239, 68, 68, 0.1);
border: 1px solid rgba(239, 68, 68, 0.2);
opacity: 1;
}
.status.success {
background: rgba(34, 197, 94, 0.1);
border: 1px solid rgba(34, 197, 94, 0.2);
opacity: 1;
}
@media (max-width: 1023px) {
.container {
padding: 2rem 1rem;
}
h1 {
font-size: 3rem;
}
.subtitle {
font-size: 1.2rem;
}
.cup {
width: 30px;
height: 40px;
}
.handle {
width: 12px;
height: 20px;
right: -12px;
top: 8px;
}
.steam {
top: -12px;
}
.steam::before,
.steam::after {
width: 6px;
}
.input-section, .player-section {
padding: 1.5rem;
}
.voice-select-container {
flex-direction: column;
align-items: stretch;
}
.options {
flex-direction: column;
gap: 1rem;
}
.sun {
width: 80px;
height: 80px;
top: 10px;
right: 10px;
}
.button-group {
flex-direction: column;
}
.player-container {
flex-direction: column;
align-items: stretch;
gap: 0.75rem;
}
.player-controls {
flex-direction: column;
gap: 0.75rem;
}
.player-btn {
width: 100%;
}
.volume-control {
border-left: none;
border-top: 1px solid var(--border);
padding-left: 0;
padding-top: 0.75rem;
width: 100%;
}
.volume-slider {
flex: 1;
width: auto;
}
.wave-container {
height: 80px;
}
.time-display {
text-align: center;
}
}

View file

@ -20,15 +20,24 @@ body {
font-family: var(--font-family);
line-height: 1.6;
color: var(--text);
background: radial-gradient(circle at top right,
var(--fg-color) 0%,
var(--bg-color) 80%);
background-attachment: fixed;
background: var(--bg-color);
min-height: 100vh;
position: relative;
padding: 1rem;
}
.overlay {
position: fixed;
inset: 0;
background:
radial-gradient(circle at top right,
var(--fg-color) 0%,
var(--bg-color) 100%);
pointer-events: none;
z-index: 0;
}
.grid-overlay {
position: fixed;
inset: 0;
background-image:
@ -43,38 +52,19 @@ body {
transparent 1px,
transparent 20px);
pointer-events: none;
z-index: 1;
z-index: 0;
}
.sun {
position: fixed;
bottom: 40px;
right: 40px;
width: 80px;
height: 80px;
border-radius: 50%;
background-color: rgba(99, 102, 241, 0.4);
box-shadow:
0 0 40px 15px rgba(213, 99, 241, 0.4),
0 0 80px 25px rgba(99, 102, 241, 0.3),
0 0 120px 35px rgba(91, 53, 228, 0.2);
pointer-events: none;
z-index: 9999;
.container {
max-width: 900px;
margin: 0 auto;
display: flex;
flex-direction: column;
min-height: calc(100vh - 2rem);
}
.scanline {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 2px;
background: rgba(218, 140, 198, 0.375);
animation: scan 4s linear infinite;
}
@keyframes scan {
0% { transform: translateY(0); }
100% { transform: translateY(100px); }
header {
margin-bottom: 1rem;
}
.status {

View file

@ -1,6 +1,71 @@
.textarea-container {
position: relative;
width: 100%;
height: 100%;
}
.help-icon {
position: absolute;
top: 1rem;
right: 1rem;
color: var(--text-light);
cursor: pointer;
z-index: 2;
opacity: 0.7;
transition: opacity 0.3s ease;
}
.help-icon:hover {
opacity: 1;
}
.tooltip-content {
visibility: hidden;
opacity: 0;
position: absolute;
top: calc(100% + 10px);
right: 0;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 0.5rem;
padding: 1rem;
width: 300px;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
z-index: 1000;
}
.help-icon:hover .tooltip-content {
visibility: visible;
opacity: 1;
transition: visibility 0s linear 0.2s, opacity 0.3s ease 0.2s;
}
.tooltip-content {
transition: visibility 0s linear 0.3s, opacity 0.3s ease;
}
.tooltip-content h4 {
margin: 0 0 0.5rem 0;
color: var(--text);
}
.tooltip-content ul {
margin: 0;
padding-left: 1.25rem;
color: var(--text-light);
}
.tooltip-content code {
background: rgba(99, 102, 241, 0.1);
padding: 0.125rem 0.25rem;
border-radius: 0.25rem;
font-family: monospace;
}
textarea {
width: 100%;
min-height: 120px;
height: calc(100% - 50px);
padding: 1rem;
border: 1px solid var(--border);
border-radius: 0.5rem;
@ -9,7 +74,8 @@ textarea {
font-size: 1rem;
transition: border-color 0.2s ease;
font-family: var(--font-family);
resize: vertical;
resize: none;
margin-bottom: 0.75rem;
}
textarea:focus {
@ -22,19 +88,22 @@ textarea::placeholder {
color: var(--text-light);
}
.controls {
margin-top: 1.5rem;
.text-controls {
display: flex;
flex-direction: column;
gap: 1.5rem;
gap: 0.75rem;
margin-top: auto;
}
.text-controls button {
flex: 1;
}
.voice-select-container {
position: relative;
display: flex;
flex-direction: column;
gap: 1rem;
z-index: 1001; /* Higher than other elements */
gap: 0.5rem;
z-index: 1001;
}
.voice-search {
@ -83,7 +152,7 @@ textarea::placeholder {
.voice-option {
display: flex;
align-items: center;
padding: 0.75rem;
padding: 0.5rem;
cursor: pointer;
border-radius: 0.25rem;
transition: background-color 0.2s ease;
@ -97,21 +166,31 @@ textarea::placeholder {
.selected-voices {
display: flex;
flex-wrap: wrap;
gap: 0.5rem;
gap: 0.25rem;
margin-top: 0.5rem;
}
.selected-voice-tag {
background: rgba(99, 102, 241, 0.2);
padding: 0.25rem 0.75rem;
padding: 0.25rem 0.4rem;
border-radius: 1rem;
font-size: 0.875rem;
display: flex;
align-items: center;
gap: 0.5rem;
gap: 0.25rem;
border: 1px solid rgba(99, 102, 241, 0.3);
}
.selected-voice-tag input {
width: 3em;
padding: 0.1rem 0.2rem;
min-height: 1.5em;
background: transparent;
border: none;
color: inherit;
font-size: inherit;
}
.remove-voice {
cursor: pointer;
opacity: 0.7;
@ -125,15 +204,19 @@ textarea::placeholder {
.options {
display: flex;
align-items: center;
justify-content: space-between;
gap: 2rem;
justify-content: center;
gap: 1rem;
flex-wrap: wrap;
padding: 0.5rem;
border-top: 1px solid var(--border);
border-bottom: 1px solid var(--border);
margin: 0.5rem 0;
}
.option-group {
display: flex;
align-items: center;
gap: 2rem;
gap: 1rem;
}
.speed-control {
@ -221,14 +304,13 @@ textarea::placeholder {
}
.button-group {
display: flex;
gap: 1rem;
margin-top: auto;
}
button {
background: var(--fg-color);
color: var(--text);
padding: 1rem;
padding: 0.75rem 1.5rem;
border-radius: 0.5rem;
border: none;
font-weight: 500;
@ -238,7 +320,7 @@ button {
align-items: center;
justify-content: center;
gap: 0.5rem;
flex: 1;
width: 100%;
}
button:hover {
@ -255,6 +337,7 @@ button:disabled {
.cancel-btn {
background: var(--error);
margin-top: 0.75rem;
}
.loader {
@ -284,7 +367,6 @@ button:disabled {
border: 1px solid var(--border) !important;
color: var(--text-light) !important;
padding: 0.5rem 1rem !important;
flex: 0 !important; /* Don't expand like other buttons */
}
.clear-btn:hover {

View file

@ -1,62 +1,60 @@
.container {
max-width: 1200px;
max-width: 900px;
margin: 0 auto;
padding: 2rem 1.5rem;
padding: 2rem 1rem;
position: relative;
z-index: 1;
}
main {
display: flex;
flex-direction: column;
gap: 2rem;
min-height: 600px;
gap: 1rem;
width: 100%;
max-width: 900px;
margin: 0 auto;
}
@media (min-width: 1024px) {
main {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 2rem;
align-items: stretch;
}
.input-section, .player-section {
height: 100%;
display: flex;
flex-direction: column;
}
.input-section textarea {
flex: 1;
min-height: 200px;
}
.player-section {
display: flex;
flex-direction: column;
}
.audio-controls {
flex: 1;
display: flex;
flex-direction: column;
}
.wave-container {
flex: 1;
min-height: 200px;
}
}
.input-section, .player-section {
.input-section {
background: var(--surface);
padding: 2rem;
border-radius: 1rem;
padding: 1rem;
border-radius: 0.5rem;
border: 1px solid var(--border);
backdrop-filter: blur(12px);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
display: flex;
flex-direction: column;
width: 400px;
height: 400px;
}
.controls {
background: var(--surface);
padding: 1rem;
border-radius: 0.5rem;
border: 1px solid var(--border);
backdrop-filter: blur(12px);
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
width: 400px;
height: 400px;
display: flex;
flex-direction: column;
}
#upload-btn {
display: inline-block;
}
@media (max-width: 850px) {
main {
flex-direction: column;
align-items: center;
}
.input-section,
.controls {
width: 100%;
max-width: 400px;
}
}

View file

@ -1,28 +1,34 @@
.audio-controls {
display: flex;
flex-direction: column;
gap: 1rem;
gap: 0.75rem;
margin-top: 1rem;
flex: 1;
}
.player-container {
display: flex;
flex-direction: column;
gap: 1rem;
gap: 0.75rem;
gap: 0.75rem;
width: 100%;
background: rgba(15, 23, 42, 0.3);
padding: 1rem;
padding: 0.75rem;
border-radius: 0.5rem;
border: 1px solid var(--border);
flex: 1;
position: relative;
min-height: 140px;
}
.player-controls {
display: flex;
align-items: center;
gap: 1rem;
gap: 0.75rem;
width: 100%;
background: rgba(15, 23, 42, 0.3);
padding: 0.5rem;
border-radius: 0.5rem;
height: 40px;
}
.seek-slider,
@ -41,7 +47,7 @@
}
.volume-slider {
width: 100px;
width: 80px;
}
.seek-slider::-webkit-slider-thumb,
@ -79,8 +85,8 @@
.volume-control {
display: flex;
align-items: center;
gap: 0.5rem;
padding-left: 0.5rem;
gap: 0.75rem;
padding-left: 0.75rem;
border-left: 1px solid var(--border);
}
@ -104,7 +110,8 @@
cursor: pointer;
transition: all 0.2s ease;
flex: 0 0 auto;
min-width: 80px;
min-width: 60px;
height: 32px;
}
.player-btn:hover {
@ -114,12 +121,20 @@
.wave-container {
width: 100%;
height: 120px;
background: rgba(15, 23, 42, 0.3);
border-radius: 0.25rem;
border-radius: 0.5rem;
overflow: hidden;
position: relative;
margin-top: 0.5rem;
flex: 1;
min-height: 60px;
}
.time-display {
min-width: 80px;
font-size: 0.875rem;
color: var(--text-light);
text-align: right;
font-variant-numeric: tabular-nums;
}
/* Progress bar styles */
@ -169,24 +184,17 @@
height: 100%;
}
.time-display {
color: var(--text-light);
font-size: 0.875rem;
min-width: 100px;
text-align: right;
font-variant-numeric: tabular-nums;
}
.download-button {
position: relative;
width: 40px;
height: 40px;
position: absolute;
bottom: 0.75rem;
right: 0.75rem;
width: 32px;
height: 32px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: transform 0.2s ease;
align-self: flex-end;
}
.download-glow {
@ -205,8 +213,8 @@
}
.download-icon {
width: 40px;
height: 40px;
width: 32px;
height: 32px;
position: relative;
z-index: 2;
background: var(--surface);