Refactor audio service chunk size, remove unused help icon, and optimize text processing limits

This commit is contained in:
remsky 2025-02-05 20:31:48 -07:00
parent 5d48688ab0
commit 444491defe
5 changed files with 63 additions and 226 deletions

View file

@ -9,9 +9,9 @@ from .normalizer import normalize_text
from .vocabulary import tokenize
# Target token ranges
TARGET_MIN = 200
TARGET_MAX = 350
ABSOLUTE_MAX = 500
TARGET_MIN = 175
TARGET_MAX = 250
ABSOLUTE_MAX = 450
def process_text_chunk(text: str, language: str = "a", skip_phonemize: bool = False) -> List[int]:
"""Process a chunk of text through normalization, phonemization, and tokenization.
@ -27,12 +27,24 @@ def process_text_chunk(text: str, language: str = "a", skip_phonemize: bool = Fa
start_time = time.time()
if skip_phonemize:
# Input is already phonemes, just tokenize
t0 = time.time()
tokens = tokenize(text)
t1 = time.time()
else:
# Normal text processing pipeline
t0 = time.time()
normalized = normalize_text(text)
t1 = time.time()
t0 = time.time()
phonemes = phonemize(normalized, language, normalize=False) # Already normalized
t1 = time.time()
t0 = time.time()
tokens = tokenize(phonemes)
t1 = time.time()
total_time = time.time() - start_time
logger.debug(f"Total processing took {total_time*1000:.2f}ms for chunk: '{text[:50]}...'")
@ -83,55 +95,13 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
return results
async def smart_split(text: str, max_tokens: int = ABSOLUTE_MAX) -> AsyncGenerator[Tuple[str, List[int]], None]:
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.
Special symbols:
- <<>> : Forces a break between chunks
"""
CHUNK_BREAK = "<<>>"
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens."""
start_time = time.time()
chunk_count = 0
logger.info(f"Starting smart split for {len(text)} chars")
# First split on forced break symbol
forced_chunks = [chunk.strip() for chunk in text.split(CHUNK_BREAK) if chunk.strip()]
# If no forced breaks, process normally
if len(forced_chunks) <= 1:
sentences = get_sentence_info(text)
else:
# Process each forced chunk separately
for forced_chunk in forced_chunks:
# Process sentences within this forced chunk
chunk_sentences = get_sentence_info(forced_chunk)
# Process and yield all sentences in this chunk before moving to next
current_chunk = []
current_tokens = []
current_count = 0
for sentence, tokens, count in chunk_sentences:
if current_count + count <= TARGET_MAX:
current_chunk.append(sentence)
current_tokens.extend(tokens)
current_count += count
else:
if current_chunk:
chunk_text = " ".join(current_chunk)
chunk_count += 1
yield chunk_text, current_tokens
current_chunk = [sentence]
current_tokens = tokens
current_count = count
# Yield remaining sentences in this forced chunk
if current_chunk:
chunk_text = " ".join(current_chunk)
chunk_count += 1
yield chunk_text, current_tokens
# Skip the rest of the processing since we've handled all chunks
return
# Process all sentences
sentences = get_sentence_info(text)
current_chunk = []
current_tokens = []

View file

@ -45,17 +45,6 @@
</header>
<main>
<div class="help-icon" title="Tips">
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-1-7v2h2v-2h-2zm2-1.645A3.502 3.502 0 0012 6.5a3.501 3.501 0 00-3.433 2.813l1.962.393A1.5 1.5 0 1112 11.5a1 1 0 00-1 1V14h2v-.645z" fill="currentColor"/>
</svg>
<div class="tooltip-content">
<h4>Tips</h4>
<ul>
<li>Use <code><<>></code> to add an intentional break between chunks</li>
</ul>
</div>
</div>
<div id="text-editor"></div>
<div class="controls">
<div class="voice-select-container">

View file

@ -8,7 +8,7 @@ export class AudioService {
this.minimumPlaybackSize = 50000; // 50KB minimum before playback
this.textLength = 0;
this.shouldAutoplay = false;
this.CHARS_PER_CHUNK = 300; // Estimated chars per chunk
this.CHARS_PER_CHUNK = 150; // Estimated chars per chunk
this.serverDownloadPath = null; // Server-side download path
this.pendingOperations = []; // Queue for buffer operations
}

View file

@ -181,111 +181,17 @@
box-shadow: none;
}
.help-icon {
position: absolute;
top: 1rem;
right: 1rem;
color: var(--text-light);
cursor: pointer;
z-index: 2;
opacity: 0.7;
transition: opacity 0.3s ease;
}
.help-icon:hover {
opacity: 1;
}
.tooltip-content {
visibility: hidden;
opacity: 0;
position: absolute;
top: calc(100% + 10px);
right: 0;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 0.5rem;
padding: 1rem;
width: 300px;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
z-index: 1000;
transition: visibility 0s linear 0.3s, opacity 0.3s ease;
}
.help-icon:hover .tooltip-content {
visibility: visible;
opacity: 1;
transition: visibility 0s linear 0.2s, opacity 0.3s ease 0.2s;
}
.tooltip-content h4 {
margin: 0 0 0.5rem 0;
color: var(--text);
}
.tooltip-content ul {
margin: 0;
padding-left: 1.25rem;
color: var(--text-light);
}
.tooltip-content code {
background: rgba(99, 102, 241, 0.1);
padding: 0.125rem 0.25rem;
border-radius: 0.25rem;
font-family: monospace;
}
main {
display: grid;
grid-template-columns: 1fr 300px;
grid-template-rows: 1fr auto;
gap: 1.25rem;
height: auto;
min-height: calc(100vh - 3rem);
gap: 1.5rem;
row-gap: 0;
max-width: 1200px;
margin: 0 auto;
padding: 1rem 1rem 2rem 1rem;
align-items: start;
padding: 1.5rem;
}
.text-editor,
.controls,
.player-container {
margin-bottom: 0.5rem;
}
.generation-progress {
width: 100%;
height: 4px;
background: rgba(99, 102, 241, 0.1);
border-radius: 2px;
margin: 0.75rem 0;
overflow: hidden;
position: relative;
}
.generation-progress::after {
content: '';
position: absolute;
top: 0;
left: 0;
height: 100%;
width: 30%;
background: var(--fg-color);
border-radius: 2px;
animation: progress 1.5s ease-in-out infinite;
}
@keyframes progress {
0% {
left: -30%;
}
100% {
left: 100%;
}
}
/* Custom scrollbar styles */
::-webkit-scrollbar {
@ -309,33 +215,30 @@ main {
.text-editor {
grid-column: 1;
grid-row: 1;
min-height: 0;
height: calc(100vh - 14rem);
min-height: 400px;
max-height: 600px;
overflow: auto;
scrollbar-width: thin;
scrollbar-color: rgba(99, 102, 241, 0.2) transparent;
margin: 0;
margin-bottom: 1.5rem;
}
.controls {
grid-column: 2;
grid-row: 1;
min-height: 0;
height: calc(100vh - 14rem);
display: flex;
flex-direction: column;
gap: 1.25rem;
gap: 1rem;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 0.5rem;
padding: 1.25rem;
padding: 1rem;
overflow-y: auto;
overflow-x: hidden;
scrollbar-width: thin;
scrollbar-color: rgba(99, 102, 241, 0.2) transparent;
margin: 0;
position: relative;
height: fit-content;
margin-bottom: 1rem;
max-height: 600px;
}
.voice-select-container {
@ -347,8 +250,8 @@ main {
border-radius: 0.5rem;
padding: 1rem;
height: auto;
min-height: 120px;
max-height: 200px;
min-height: 160px;
max-height: 240px;
flex-shrink: 0;
margin: 0.5rem 0;
overflow: visible;
@ -356,13 +259,13 @@ main {
.selected-voices {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
grid-template-columns: repeat(auto-fill, minmax(120px, 4fr));
gap: 0.5rem;
margin-top: 0.25rem;
height: auto;
min-height: 60px;
max-height: none;
overflow-y: visible;
min-height: 50px;
max-height: 100%;
overflow-y: auto;
padding: 0.75rem;
background: rgba(15, 23, 42, 0.3);
border-radius: 0.25rem;
@ -435,22 +338,36 @@ main {
padding: 0.375rem 0.75rem;
border-radius: 1rem;
font-size: 0.75rem;
display: inline-flex;
display: grid;
grid-template-columns: auto auto 1fr;
align-items: center;
gap: 0.375rem;
gap: 0.75rem;
border: 1px solid rgba(99, 102, 241, 0.3);
white-space: nowrap;
flex-shrink: 0;
min-width: 180px;
max-width: 100%;
}
.remove-voice {
font-size: 1.2em;
}
.selected-voice-tag input {
width: 2.5em;
padding: 0.1rem;
min-height: 1.25em;
width: 100%;
padding: 0.25rem;
min-height: 1.5em;
background: transparent;
border: none;
color: inherit;
font-size: inherit;
text-align: center;
border-radius: 0.25rem;
transition: background-color 0.2s;
}
.selected-voice-tag input:hover,
.selected-voice-tag input:focus {
background: rgba(99, 102, 241, 0.1);
}
.remove-voice {
@ -569,29 +486,23 @@ main {
display: flex;
flex-direction: column;
height: 100vh;
padding: 0.75rem 1.25rem 1.5rem 1.25rem;
padding: 0.75rem 1rem 1rem 1rem;
gap: 1rem;
}
.player-container {
grid-column: 1 / -1;
grid-row: 2;
background: var(--surface);
border: 1px solid var(--border);
border-radius: 0.5rem;
padding: 1.25rem 1.5rem;
height: auto;
min-height: 90px;
display: flex;
flex-direction: column;
justify-content: center;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
0 2px 4px -1px rgba(0, 0, 0, 0.06);
margin: 1rem 0;
align-self: start;
width: 100%;
position: relative;
z-index: 1;
}
.options {
@ -613,7 +524,7 @@ main {
align-items: center;
gap: 1rem;
padding: 0.5rem 0;
margin-top: 0.5rem;
margin-top: auto;
border-top: 1px solid var(--border);
}
@ -687,3 +598,7 @@ button:disabled {
.loading .btn-text {
display: none;
}
.text-editor,
.controls {
height: 600px;
}

View file

@ -133,43 +133,6 @@
border-left: 1px solid rgba(99, 102, 241, 0.2);
}
.generation-progress {
-webkit-appearance: none;
appearance: none;
width: 100%;
height: 6px;
border: none;
background: rgba(99, 102, 241, 0.1);
border-radius: 3px;
margin: 0.5rem 0;
display: block;
}
.generation-progress::-webkit-progress-bar {
background: rgba(99, 102, 241, 0.1);
border-radius: 3px;
}
.generation-progress::-webkit-progress-value {
background: var(--fg-color);
border-radius: 3px;
transition: width 0.2s ease;
box-shadow: 0 0 10px rgba(99, 102, 241, 0.3);
}
.generation-progress::-moz-progress-bar {
background: var(--fg-color);
border-radius: 3px;
transition: width 0.2s ease;
box-shadow: 0 0 10px rgba(99, 102, 241, 0.3);
}
.generation-progress::-ms-fill {
background: var(--fg-color);
border-radius: 3px;
transition: width 0.2s ease;
box-shadow: 0 0 10px rgba(99, 102, 241, 0.3);
}
.wave-container canvas {
position: absolute;