mirror of
https://github.com/remsky/Kokoro-FastAPI.git
synced 2025-08-05 16:48:53 +00:00
Refactor audio service chunk size, remove unused help icon, and optimize text processing limits
This commit is contained in:
parent
5d48688ab0
commit
444491defe
5 changed files with 63 additions and 226 deletions
|
@ -9,9 +9,9 @@ from .normalizer import normalize_text
|
|||
from .vocabulary import tokenize
|
||||
|
||||
# Target token ranges
|
||||
TARGET_MIN = 200
|
||||
TARGET_MAX = 350
|
||||
ABSOLUTE_MAX = 500
|
||||
TARGET_MIN = 175
|
||||
TARGET_MAX = 250
|
||||
ABSOLUTE_MAX = 450
|
||||
|
||||
def process_text_chunk(text: str, language: str = "a", skip_phonemize: bool = False) -> List[int]:
|
||||
"""Process a chunk of text through normalization, phonemization, and tokenization.
|
||||
|
@ -27,12 +27,24 @@ def process_text_chunk(text: str, language: str = "a", skip_phonemize: bool = Fa
|
|||
start_time = time.time()
|
||||
|
||||
if skip_phonemize:
|
||||
# Input is already phonemes, just tokenize
|
||||
t0 = time.time()
|
||||
tokens = tokenize(text)
|
||||
t1 = time.time()
|
||||
else:
|
||||
# Normal text processing pipeline
|
||||
t0 = time.time()
|
||||
normalized = normalize_text(text)
|
||||
t1 = time.time()
|
||||
|
||||
|
||||
t0 = time.time()
|
||||
phonemes = phonemize(normalized, language, normalize=False) # Already normalized
|
||||
t1 = time.time()
|
||||
|
||||
t0 = time.time()
|
||||
tokens = tokenize(phonemes)
|
||||
t1 = time.time()
|
||||
|
||||
total_time = time.time() - start_time
|
||||
logger.debug(f"Total processing took {total_time*1000:.2f}ms for chunk: '{text[:50]}...'")
|
||||
|
@ -83,55 +95,13 @@ def get_sentence_info(text: str) -> List[Tuple[str, List[int], int]]:
|
|||
return results
|
||||
|
||||
async def smart_split(text: str, max_tokens: int = ABSOLUTE_MAX) -> AsyncGenerator[Tuple[str, List[int]], None]:
|
||||
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens.
|
||||
Special symbols:
|
||||
- <<>> : Forces a break between chunks
|
||||
"""
|
||||
CHUNK_BREAK = "<<>>"
|
||||
|
||||
"""Build optimal chunks targeting 300-400 tokens, never exceeding max_tokens."""
|
||||
start_time = time.time()
|
||||
chunk_count = 0
|
||||
logger.info(f"Starting smart split for {len(text)} chars")
|
||||
|
||||
# First split on forced break symbol
|
||||
forced_chunks = [chunk.strip() for chunk in text.split(CHUNK_BREAK) if chunk.strip()]
|
||||
|
||||
# If no forced breaks, process normally
|
||||
if len(forced_chunks) <= 1:
|
||||
sentences = get_sentence_info(text)
|
||||
else:
|
||||
# Process each forced chunk separately
|
||||
for forced_chunk in forced_chunks:
|
||||
# Process sentences within this forced chunk
|
||||
chunk_sentences = get_sentence_info(forced_chunk)
|
||||
|
||||
# Process and yield all sentences in this chunk before moving to next
|
||||
current_chunk = []
|
||||
current_tokens = []
|
||||
current_count = 0
|
||||
|
||||
for sentence, tokens, count in chunk_sentences:
|
||||
if current_count + count <= TARGET_MAX:
|
||||
current_chunk.append(sentence)
|
||||
current_tokens.extend(tokens)
|
||||
current_count += count
|
||||
else:
|
||||
if current_chunk:
|
||||
chunk_text = " ".join(current_chunk)
|
||||
chunk_count += 1
|
||||
yield chunk_text, current_tokens
|
||||
current_chunk = [sentence]
|
||||
current_tokens = tokens
|
||||
current_count = count
|
||||
|
||||
# Yield remaining sentences in this forced chunk
|
||||
if current_chunk:
|
||||
chunk_text = " ".join(current_chunk)
|
||||
chunk_count += 1
|
||||
yield chunk_text, current_tokens
|
||||
|
||||
# Skip the rest of the processing since we've handled all chunks
|
||||
return
|
||||
# Process all sentences
|
||||
sentences = get_sentence_info(text)
|
||||
|
||||
current_chunk = []
|
||||
current_tokens = []
|
||||
|
|
|
@ -45,17 +45,6 @@
|
|||
</header>
|
||||
|
||||
<main>
|
||||
<div class="help-icon" title="Tips">
|
||||
<svg width="20" height="20" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<path d="M12 22C6.477 22 2 17.523 2 12S6.477 2 12 2s10 4.477 10 10-4.477 10-10 10zm-1-7v2h2v-2h-2zm2-1.645A3.502 3.502 0 0012 6.5a3.501 3.501 0 00-3.433 2.813l1.962.393A1.5 1.5 0 1112 11.5a1 1 0 00-1 1V14h2v-.645z" fill="currentColor"/>
|
||||
</svg>
|
||||
<div class="tooltip-content">
|
||||
<h4>Tips</h4>
|
||||
<ul>
|
||||
<li>Use <code><<>></code> to add an intentional break between chunks</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div id="text-editor"></div>
|
||||
<div class="controls">
|
||||
<div class="voice-select-container">
|
||||
|
|
|
@ -8,7 +8,7 @@ export class AudioService {
|
|||
this.minimumPlaybackSize = 50000; // 50KB minimum before playback
|
||||
this.textLength = 0;
|
||||
this.shouldAutoplay = false;
|
||||
this.CHARS_PER_CHUNK = 300; // Estimated chars per chunk
|
||||
this.CHARS_PER_CHUNK = 150; // Estimated chars per chunk
|
||||
this.serverDownloadPath = null; // Server-side download path
|
||||
this.pendingOperations = []; // Queue for buffer operations
|
||||
}
|
||||
|
|
|
@ -181,111 +181,17 @@
|
|||
box-shadow: none;
|
||||
}
|
||||
|
||||
.help-icon {
|
||||
position: absolute;
|
||||
top: 1rem;
|
||||
right: 1rem;
|
||||
color: var(--text-light);
|
||||
cursor: pointer;
|
||||
z-index: 2;
|
||||
opacity: 0.7;
|
||||
transition: opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.help-icon:hover {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.tooltip-content {
|
||||
visibility: hidden;
|
||||
opacity: 0;
|
||||
position: absolute;
|
||||
top: calc(100% + 10px);
|
||||
right: 0;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 0.5rem;
|
||||
padding: 1rem;
|
||||
width: 300px;
|
||||
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
|
||||
0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
||||
z-index: 1000;
|
||||
transition: visibility 0s linear 0.3s, opacity 0.3s ease;
|
||||
}
|
||||
|
||||
.help-icon:hover .tooltip-content {
|
||||
visibility: visible;
|
||||
opacity: 1;
|
||||
transition: visibility 0s linear 0.2s, opacity 0.3s ease 0.2s;
|
||||
}
|
||||
|
||||
.tooltip-content h4 {
|
||||
margin: 0 0 0.5rem 0;
|
||||
color: var(--text);
|
||||
}
|
||||
|
||||
.tooltip-content ul {
|
||||
margin: 0;
|
||||
padding-left: 1.25rem;
|
||||
color: var(--text-light);
|
||||
}
|
||||
|
||||
.tooltip-content code {
|
||||
background: rgba(99, 102, 241, 0.1);
|
||||
padding: 0.125rem 0.25rem;
|
||||
border-radius: 0.25rem;
|
||||
font-family: monospace;
|
||||
}
|
||||
|
||||
main {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 300px;
|
||||
grid-template-rows: 1fr auto;
|
||||
gap: 1.25rem;
|
||||
height: auto;
|
||||
min-height: calc(100vh - 3rem);
|
||||
gap: 1.5rem;
|
||||
row-gap: 0;
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
padding: 1rem 1rem 2rem 1rem;
|
||||
align-items: start;
|
||||
padding: 1.5rem;
|
||||
}
|
||||
|
||||
.text-editor,
|
||||
.controls,
|
||||
.player-container {
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.generation-progress {
|
||||
width: 100%;
|
||||
height: 4px;
|
||||
background: rgba(99, 102, 241, 0.1);
|
||||
border-radius: 2px;
|
||||
margin: 0.75rem 0;
|
||||
overflow: hidden;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.generation-progress::after {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 0;
|
||||
left: 0;
|
||||
height: 100%;
|
||||
width: 30%;
|
||||
background: var(--fg-color);
|
||||
border-radius: 2px;
|
||||
animation: progress 1.5s ease-in-out infinite;
|
||||
}
|
||||
|
||||
@keyframes progress {
|
||||
0% {
|
||||
left: -30%;
|
||||
}
|
||||
100% {
|
||||
left: 100%;
|
||||
}
|
||||
}
|
||||
|
||||
/* Custom scrollbar styles */
|
||||
::-webkit-scrollbar {
|
||||
|
@ -309,33 +215,30 @@ main {
|
|||
|
||||
.text-editor {
|
||||
grid-column: 1;
|
||||
grid-row: 1;
|
||||
min-height: 0;
|
||||
height: calc(100vh - 14rem);
|
||||
min-height: 400px;
|
||||
max-height: 600px;
|
||||
overflow: auto;
|
||||
scrollbar-width: thin;
|
||||
scrollbar-color: rgba(99, 102, 241, 0.2) transparent;
|
||||
margin: 0;
|
||||
margin-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
.controls {
|
||||
grid-column: 2;
|
||||
grid-row: 1;
|
||||
min-height: 0;
|
||||
height: calc(100vh - 14rem);
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 1.25rem;
|
||||
gap: 1rem;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 0.5rem;
|
||||
padding: 1.25rem;
|
||||
padding: 1rem;
|
||||
overflow-y: auto;
|
||||
overflow-x: hidden;
|
||||
scrollbar-width: thin;
|
||||
scrollbar-color: rgba(99, 102, 241, 0.2) transparent;
|
||||
margin: 0;
|
||||
position: relative;
|
||||
height: fit-content;
|
||||
margin-bottom: 1rem;
|
||||
max-height: 600px;
|
||||
}
|
||||
|
||||
.voice-select-container {
|
||||
|
@ -347,8 +250,8 @@ main {
|
|||
border-radius: 0.5rem;
|
||||
padding: 1rem;
|
||||
height: auto;
|
||||
min-height: 120px;
|
||||
max-height: 200px;
|
||||
min-height: 160px;
|
||||
max-height: 240px;
|
||||
flex-shrink: 0;
|
||||
margin: 0.5rem 0;
|
||||
overflow: visible;
|
||||
|
@ -356,13 +259,13 @@ main {
|
|||
|
||||
.selected-voices {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fill, minmax(120px, 1fr));
|
||||
grid-template-columns: repeat(auto-fill, minmax(120px, 4fr));
|
||||
gap: 0.5rem;
|
||||
margin-top: 0.25rem;
|
||||
height: auto;
|
||||
min-height: 60px;
|
||||
max-height: none;
|
||||
overflow-y: visible;
|
||||
min-height: 50px;
|
||||
max-height: 100%;
|
||||
overflow-y: auto;
|
||||
padding: 0.75rem;
|
||||
background: rgba(15, 23, 42, 0.3);
|
||||
border-radius: 0.25rem;
|
||||
|
@ -435,22 +338,36 @@ main {
|
|||
padding: 0.375rem 0.75rem;
|
||||
border-radius: 1rem;
|
||||
font-size: 0.75rem;
|
||||
display: inline-flex;
|
||||
display: grid;
|
||||
grid-template-columns: auto auto 1fr;
|
||||
align-items: center;
|
||||
gap: 0.375rem;
|
||||
gap: 0.75rem;
|
||||
border: 1px solid rgba(99, 102, 241, 0.3);
|
||||
white-space: nowrap;
|
||||
flex-shrink: 0;
|
||||
min-width: 180px;
|
||||
max-width: 100%;
|
||||
}
|
||||
.remove-voice {
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
||||
.selected-voice-tag input {
|
||||
width: 2.5em;
|
||||
padding: 0.1rem;
|
||||
min-height: 1.25em;
|
||||
width: 100%;
|
||||
padding: 0.25rem;
|
||||
min-height: 1.5em;
|
||||
background: transparent;
|
||||
border: none;
|
||||
color: inherit;
|
||||
font-size: inherit;
|
||||
text-align: center;
|
||||
border-radius: 0.25rem;
|
||||
transition: background-color 0.2s;
|
||||
}
|
||||
|
||||
.selected-voice-tag input:hover,
|
||||
.selected-voice-tag input:focus {
|
||||
background: rgba(99, 102, 241, 0.1);
|
||||
}
|
||||
|
||||
.remove-voice {
|
||||
|
@ -569,29 +486,23 @@ main {
|
|||
display: flex;
|
||||
flex-direction: column;
|
||||
height: 100vh;
|
||||
padding: 0.75rem 1.25rem 1.5rem 1.25rem;
|
||||
padding: 0.75rem 1rem 1rem 1rem;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.player-container {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 2;
|
||||
background: var(--surface);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 0.5rem;
|
||||
padding: 1.25rem 1.5rem;
|
||||
height: auto;
|
||||
min-height: 90px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1),
|
||||
0 2px 4px -1px rgba(0, 0, 0, 0.06);
|
||||
margin: 1rem 0;
|
||||
align-self: start;
|
||||
width: 100%;
|
||||
position: relative;
|
||||
z-index: 1;
|
||||
}
|
||||
|
||||
.options {
|
||||
|
@ -613,7 +524,7 @@ main {
|
|||
align-items: center;
|
||||
gap: 1rem;
|
||||
padding: 0.5rem 0;
|
||||
margin-top: 0.5rem;
|
||||
margin-top: auto;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
|
@ -687,3 +598,7 @@ button:disabled {
|
|||
.loading .btn-text {
|
||||
display: none;
|
||||
}
|
||||
.text-editor,
|
||||
.controls {
|
||||
height: 600px;
|
||||
}
|
||||
|
|
|
@ -133,43 +133,6 @@
|
|||
border-left: 1px solid rgba(99, 102, 241, 0.2);
|
||||
}
|
||||
|
||||
.generation-progress {
|
||||
-webkit-appearance: none;
|
||||
appearance: none;
|
||||
width: 100%;
|
||||
height: 6px;
|
||||
border: none;
|
||||
background: rgba(99, 102, 241, 0.1);
|
||||
border-radius: 3px;
|
||||
margin: 0.5rem 0;
|
||||
display: block;
|
||||
}
|
||||
|
||||
.generation-progress::-webkit-progress-bar {
|
||||
background: rgba(99, 102, 241, 0.1);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
.generation-progress::-webkit-progress-value {
|
||||
background: var(--fg-color);
|
||||
border-radius: 3px;
|
||||
transition: width 0.2s ease;
|
||||
box-shadow: 0 0 10px rgba(99, 102, 241, 0.3);
|
||||
}
|
||||
|
||||
.generation-progress::-moz-progress-bar {
|
||||
background: var(--fg-color);
|
||||
border-radius: 3px;
|
||||
transition: width 0.2s ease;
|
||||
box-shadow: 0 0 10px rgba(99, 102, 241, 0.3);
|
||||
}
|
||||
|
||||
.generation-progress::-ms-fill {
|
||||
background: var(--fg-color);
|
||||
border-radius: 3px;
|
||||
transition: width 0.2s ease;
|
||||
box-shadow: 0 0 10px rgba(99, 102, 241, 0.3);
|
||||
}
|
||||
|
||||
.wave-container canvas {
|
||||
position: absolute;
|
||||
|
|
Loading…
Add table
Reference in a new issue