diff --git a/.gitignore b/.gitignore index 03479ec..a36b9f2 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,4 @@ examples/ebook_test/chapters_to_audio.py examples/ebook_test/parse_epub.py examples/ebook_test/River_of_Teet_-_Sarah_Gailey.epub examples/ebook_test/River_of_Teet_-_Sarah_Gailey.txt +api/src/voices/af_jadzia.pt diff --git a/api/src/services/streaming_audio_writer.py b/api/src/services/streaming_audio_writer.py index 54e2ee5..23d63c0 100644 --- a/api/src/services/streaming_audio_writer.py +++ b/api/src/services/streaming_audio_writer.py @@ -34,6 +34,7 @@ class StreamingAudioWriter: # For MP3, we'll use pydub's incremental writer self.buffer = BytesIO() self.segments = [] # Store segments until we have enough data + self.total_duration = 0 # Track total duration in milliseconds # Initialize an empty AudioSegment as our encoder self.encoder = AudioSegment.silent(duration=0, frame_rate=self.sample_rate) @@ -85,7 +86,17 @@ class StreamingAudioWriter: elif self.format == "mp3": # Final export of any remaining audio if hasattr(self, 'encoder') and len(self.encoder) > 0: - self.encoder.export(buffer, format="mp3", bitrate="192k", parameters=["-q:a", "2"]) + # Export with duration metadata + self.encoder.export( + buffer, + format="mp3", + bitrate="192k", + parameters=[ + "-q:a", "2", + "-write_xing", "1", # Force XING/LAME header + "-metadata", f"duration={self.total_duration/1000}" # Duration in seconds + ] + ) self.encoder = None return buffer.getvalue() @@ -119,11 +130,18 @@ class StreamingAudioWriter: channels=self.channels ) + # Track total duration + self.total_duration += len(segment) + # Add segment to encoder self.encoder = self.encoder + segment # Export current state to buffer - self.encoder.export(buffer, format="mp3", bitrate="192k", parameters=["-q:a", "2"]) + self.encoder.export(buffer, format="mp3", bitrate="192k", parameters=[ + "-q:a", "2", + "-write_xing", "1", # Force XING/LAME header + "-metadata", f"duration={self.total_duration/1000}" # Duration in seconds + ]) # Get the encoded data encoded_data = buffer.getvalue() diff --git a/web/app.js b/web/app.js deleted file mode 100644 index 9f699d1..0000000 --- a/web/app.js +++ /dev/null @@ -1,445 +0,0 @@ -class KokoroPlayer { - constructor() { - this.elements = { - textInput: document.getElementById('text-input'), - voiceSearch: document.getElementById('voice-search'), - voiceDropdown: document.getElementById('voice-dropdown'), - voiceOptions: document.getElementById('voice-options'), - selectedVoices: document.getElementById('selected-voices'), - autoplayToggle: document.getElementById('autoplay-toggle'), - formatSelect: document.getElementById('format-select'), - generateBtn: document.getElementById('generate-btn'), - cancelBtn: document.getElementById('cancel-btn'), - playPauseBtn: document.getElementById('play-pause-btn'), - waveContainer: document.getElementById('wave-container'), - timeDisplay: document.getElementById('time-display'), - downloadBtn: document.getElementById('download-btn'), - status: document.getElementById('status'), - speedSlider: document.getElementById('speed-slider'), - speedValue: document.getElementById('speed-value') - }; - - this.isGenerating = false; - this.availableVoices = []; - this.selectedVoiceSet = new Set(); - this.currentController = null; - this.audioChunks = []; - this.sound = null; - this.wave = null; - this.init(); - } - - async init() { - await this.loadVoices(); - this.setupWave(); - this.setupEventListeners(); - this.setupAudioControls(); - } - - setupWave() { - this.wave = new SiriWave({ - container: this.elements.waveContainer, - width: this.elements.waveContainer.clientWidth, - height: 80, - style: 'ios9', - // color: '#6366f1', - speed: 0.02, - amplitude: 0.7, - frequency: 4 - }); - } - - formatTime(secs) { - const minutes = Math.floor(secs / 60); - const seconds = Math.floor(secs % 60); - return `${minutes}:${seconds.toString().padStart(2, '0')}`; - } - - updateTimeDisplay() { - if (!this.sound) return; - const seek = this.sound.seek() || 0; - const duration = this.sound.duration() || 0; - this.elements.timeDisplay.textContent = `${this.formatTime(seek)} / ${this.formatTime(duration)}`; - - // Update seek slider - const seekSlider = document.getElementById('seek-slider'); - seekSlider.value = (seek / duration) * 100 || 0; - - if (this.sound.playing()) { - requestAnimationFrame(() => this.updateTimeDisplay()); - } - } - - setupAudioControls() { - const seekSlider = document.getElementById('seek-slider'); - const volumeSlider = document.getElementById('volume-slider'); - - seekSlider.addEventListener('input', (e) => { - if (!this.sound) return; - const duration = this.sound.duration(); - const seekTime = (duration * e.target.value) / 100; - this.sound.seek(seekTime); - }); - - volumeSlider.addEventListener('input', (e) => { - if (!this.sound) return; - const volume = e.target.value / 100; - this.sound.volume(volume); - }); - } - - async loadVoices() { - try { - const response = await fetch('/v1/audio/voices'); - if (!response.ok) { - const error = await response.json(); - throw new Error(error.detail?.message || 'Failed to load voices'); - } - - const data = await response.json(); - if (!data.voices?.length) { - throw new Error('No voices available'); - } - - this.availableVoices = data.voices; - this.renderVoiceOptions(this.availableVoices); - - if (this.selectedVoiceSet.size === 0) { - const firstVoice = this.availableVoices.find(voice => voice && voice.trim()); - if (firstVoice) { - this.addSelectedVoice(firstVoice); - } - } - - this.showStatus('Voices loaded successfully', 'success'); - } catch (error) { - this.showStatus('Failed to load voices: ' + error.message, 'error'); - this.elements.generateBtn.disabled = true; - } - } - - renderVoiceOptions(voices) { - this.elements.voiceOptions.innerHTML = voices - .map(voice => ` - - `) - .join(''); - this.updateSelectedVoicesDisplay(); - } - - updateSelectedVoicesDisplay() { - this.elements.selectedVoices.innerHTML = Array.from(this.selectedVoiceSet) - .map(voice => ` - - ${voice} - × - - `) - .join(''); - - if (this.selectedVoiceSet.size > 0) { - this.elements.voiceSearch.placeholder = 'Search voices...'; - } else { - this.elements.voiceSearch.placeholder = 'Search and select voices...'; - } - } - - addSelectedVoice(voice) { - this.selectedVoiceSet.add(voice); - this.updateSelectedVoicesDisplay(); - } - - removeSelectedVoice(voice) { - this.selectedVoiceSet.delete(voice); - this.updateSelectedVoicesDisplay(); - const checkbox = this.elements.voiceOptions.querySelector(`input[value="${voice}"]`); - if (checkbox) checkbox.checked = false; - } - - filterVoices(searchTerm) { - const filtered = this.availableVoices.filter(voice => - voice.toLowerCase().includes(searchTerm.toLowerCase()) - ); - this.renderVoiceOptions(filtered); - } - - setupEventListeners() { - window.addEventListener('beforeunload', () => { - if (this.currentController) { - this.currentController.abort(); - } - if (this.sound) { - this.sound.unload(); - } - }); - - this.elements.voiceSearch.addEventListener('input', (e) => { - this.filterVoices(e.target.value); - }); - - this.elements.voiceOptions.addEventListener('change', (e) => { - if (e.target.type === 'checkbox') { - if (e.target.checked) { - this.addSelectedVoice(e.target.value); - } else { - this.removeSelectedVoice(e.target.value); - } - } - }); - - this.elements.selectedVoices.addEventListener('click', (e) => { - if (e.target.classList.contains('remove-voice')) { - const voice = e.target.dataset.voice; - this.removeSelectedVoice(voice); - } - }); - - this.elements.generateBtn.addEventListener('click', () => this.generateSpeech()); - this.elements.cancelBtn.addEventListener('click', () => this.cancelGeneration()); - this.elements.playPauseBtn.addEventListener('click', () => this.togglePlayPause()); - this.elements.downloadBtn.addEventListener('click', () => this.downloadAudio()); - - this.elements.speedSlider.addEventListener('input', (e) => { - const speed = parseFloat(e.target.value); - this.elements.speedValue.textContent = speed.toFixed(1); - }); - - document.addEventListener('click', (e) => { - if (!this.elements.voiceSearch.contains(e.target) && - !this.elements.voiceDropdown.contains(e.target)) { - this.elements.voiceDropdown.style.display = 'none'; - } - }); - - this.elements.voiceSearch.addEventListener('focus', () => { - this.elements.voiceDropdown.style.display = 'block'; - if (!this.elements.voiceSearch.value) { - this.elements.voiceSearch.placeholder = 'Search voices...'; - } - }); - - this.elements.voiceSearch.addEventListener('blur', () => { - if (!this.elements.voiceSearch.value && this.selectedVoiceSet.size === 0) { - this.elements.voiceSearch.placeholder = 'Search and select voices...'; - } - }); - - window.addEventListener('resize', () => { - if (this.wave) { - this.wave.width = this.elements.waveContainer.clientWidth; - } - }); - } - - showStatus(message, type = 'info') { - this.elements.status.textContent = message; - this.elements.status.className = 'status ' + type; - setTimeout(() => { - this.elements.status.className = 'status'; - }, 5000); - } - - setLoading(loading) { - this.isGenerating = loading; - this.elements.generateBtn.disabled = loading; - this.elements.generateBtn.className = loading ? 'loading' : ''; - this.elements.cancelBtn.style.display = loading ? 'block' : 'none'; - } - - validateInput() { - const text = this.elements.textInput.value.trim(); - if (!text) { - this.showStatus('Please enter some text', 'error'); - return false; - } - - if (this.selectedVoiceSet.size === 0) { - this.showStatus('Please select a voice', 'error'); - return false; - } - - return true; - } - - cancelGeneration() { - if (this.currentController) { - this.currentController.abort(); - this.currentController = null; - if (this.sound) { - this.sound.unload(); - this.sound = null; - } - this.wave.stop(); - this.showStatus('Generation cancelled', 'info'); - this.setLoading(false); - } - } - - togglePlayPause() { - if (!this.sound) return; - - if (this.sound.playing()) { - this.sound.pause(); - this.wave.stop(); - this.elements.playPauseBtn.textContent = 'Play'; - } else { - this.sound.play(); - this.wave.start(); - this.elements.playPauseBtn.textContent = 'Pause'; - this.updateTimeDisplay(); - } - } - - async generateSpeech() { - if (this.isGenerating || !this.validateInput()) return; - - if (this.sound) { - this.sound.unload(); - this.sound = null; - } - this.wave.stop(); - - this.elements.downloadBtn.style.display = 'none'; - this.audioChunks = []; - - const text = this.elements.textInput.value.trim(); - const voice = Array.from(this.selectedVoiceSet).join('+'); - - this.setLoading(true); - this.currentController = new AbortController(); - - try { - await this.handleAudio(text, voice); - } catch (error) { - if (error.name === 'AbortError') { - this.showStatus('Generation cancelled', 'info'); - } else { - this.showStatus('Error generating speech: ' + error.message, 'error'); - } - } finally { - this.currentController = null; - this.setLoading(false); - } - } - - async handleAudio(text, voice) { - this.showStatus('Generating audio...', 'info'); - - const response = await fetch('/v1/audio/speech', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - input: text, - voice: voice, - response_format: 'mp3', - stream: true, - speed: parseFloat(this.elements.speedSlider.value) - }), - signal: this.currentController.signal - }); - - if (!response.ok) { - const error = await response.json(); - throw new Error(error.detail?.message || 'Failed to generate speech'); - } - - const chunks = []; - const reader = response.body.getReader(); - let totalChunks = 0; - - try { - while (true) { - const {value, done} = await reader.read(); - - if (done) { - this.showStatus('Processing complete', 'success'); - break; - } - - chunks.push(value); - this.audioChunks.push(value.slice(0)); - totalChunks++; - - if (totalChunks % 5 === 0) { - this.showStatus(`Received ${totalChunks} chunks...`, 'info'); - } - } - - const blob = new Blob(chunks, { type: 'audio/mpeg' }); - const url = URL.createObjectURL(blob); - - if (this.sound) { - this.sound.unload(); - } - - this.sound = new Howl({ - src: [url], - format: ['mp3'], - html5: true, - onplay: () => { - this.elements.playPauseBtn.textContent = 'Pause'; - this.wave.start(); - this.updateTimeDisplay(); - }, - onpause: () => { - this.elements.playPauseBtn.textContent = 'Play'; - this.wave.stop(); - }, - onend: () => { - this.elements.playPauseBtn.textContent = 'Play'; - this.wave.stop(); - this.elements.generateBtn.disabled = false; - }, - onload: () => { - URL.revokeObjectURL(url); - this.showStatus('Audio ready', 'success'); - this.enableDownload(); - if (this.elements.autoplayToggle.checked) { - this.sound.play(); - } - }, - onloaderror: () => { - URL.revokeObjectURL(url); - this.showStatus('Error loading audio', 'error'); - } - }); - - } catch (error) { - if (error.name === 'AbortError') { - throw error; - } - console.error('Streaming error:', error); - this.showStatus('Error during streaming', 'error'); - throw error; - } - } - - enableDownload() { - this.elements.downloadBtn.style.display = 'flex'; - } - - downloadAudio() { - if (this.audioChunks.length === 0) return; - - const format = this.elements.formatSelect.value; - const voice = Array.from(this.selectedVoiceSet).join('+'); - const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); - const blob = new Blob(this.audioChunks, { type: `audio/${format}` }); - const url = URL.createObjectURL(blob); - const a = document.createElement('a'); - a.href = url; - a.download = `${voice}_${timestamp}.${format}`; - document.body.appendChild(a); - a.click(); - document.body.removeChild(a); - URL.revokeObjectURL(url); - } -} - -document.addEventListener('DOMContentLoaded', () => { - new KokoroPlayer(); -}); \ No newline at end of file diff --git a/web/index.html b/web/index.html index 7c6a5b5..2e0ce73 100644 --- a/web/index.html +++ b/web/index.html @@ -15,8 +15,7 @@ - - + @@ -126,6 +125,6 @@ - + diff --git a/web/src/App.js b/web/src/App.js new file mode 100644 index 0000000..41382b0 --- /dev/null +++ b/web/src/App.js @@ -0,0 +1,188 @@ +import AudioService from './services/AudioService.js'; +import VoiceService from './services/VoiceService.js'; +import PlayerState from './state/PlayerState.js'; +import PlayerControls from './components/PlayerControls.js'; +import VoiceSelector from './components/VoiceSelector.js'; +import WaveVisualizer from './components/WaveVisualizer.js'; + +export class App { + constructor() { + this.elements = { + textInput: document.getElementById('text-input'), + generateBtn: document.getElementById('generate-btn'), + generateBtnText: document.querySelector('#generate-btn .btn-text'), + generateBtnLoader: document.querySelector('#generate-btn .loader'), + downloadBtn: document.getElementById('download-btn'), + autoplayToggle: document.getElementById('autoplay-toggle'), + formatSelect: document.getElementById('format-select'), + status: document.getElementById('status'), + cancelBtn: document.getElementById('cancel-btn') + }; + + this.initialize(); + } + + async initialize() { + // Initialize services and state + this.playerState = new PlayerState(); + this.audioService = new AudioService(); + this.voiceService = new VoiceService(); + + // Initialize components + this.playerControls = new PlayerControls(this.audioService, this.playerState); + this.voiceSelector = new VoiceSelector(this.voiceService); + this.waveVisualizer = new WaveVisualizer(this.playerState); + + // Initialize voice selector + const voicesLoaded = await this.voiceSelector.initialize(); + if (!voicesLoaded) { + this.showStatus('Failed to load voices', 'error'); + this.elements.generateBtn.disabled = true; + return; + } + + this.setupEventListeners(); + this.setupAudioEvents(); + } + + setupEventListeners() { + // Generate button + this.elements.generateBtn.addEventListener('click', () => this.generateSpeech()); + + // Download button + this.elements.downloadBtn.addEventListener('click', () => this.downloadAudio()); + + // Cancel button + this.elements.cancelBtn.addEventListener('click', () => { + this.audioService.cancel(); + this.setGenerating(false); + this.elements.downloadBtn.style.display = 'none'; + this.showStatus('Generation cancelled', 'info'); + }); + + // Handle page unload + window.addEventListener('beforeunload', () => { + this.audioService.cleanup(); + this.playerControls.cleanup(); + this.waveVisualizer.cleanup(); + }); + } + + setupAudioEvents() { + // Handle download button visibility + this.audioService.addEventListener('downloadReady', () => { + this.elements.downloadBtn.style.display = 'flex'; + }); + + // Handle completion + this.audioService.addEventListener('complete', () => { + this.setGenerating(false); + this.showStatus('Generation complete', 'success'); + }); + + // Handle errors + this.audioService.addEventListener('error', (error) => { + this.showStatus('Error: ' + error.message, 'error'); + this.setGenerating(false); + this.elements.downloadBtn.style.display = 'none'; + }); + } + + showStatus(message, type = 'info') { + this.elements.status.textContent = message; + this.elements.status.className = 'status ' + type; + setTimeout(() => { + this.elements.status.className = 'status'; + }, 5000); + } + + setGenerating(isGenerating) { + this.playerState.setGenerating(isGenerating); + this.elements.generateBtn.disabled = isGenerating; + this.elements.generateBtn.className = isGenerating ? 'loading' : ''; + this.elements.generateBtnLoader.style.display = isGenerating ? 'block' : 'none'; + this.elements.generateBtnText.style.visibility = isGenerating ? 'hidden' : 'visible'; + this.elements.cancelBtn.style.display = isGenerating ? 'block' : 'none'; + } + + validateInput() { + const text = this.elements.textInput.value.trim(); + if (!text) { + this.showStatus('Please enter some text', 'error'); + return false; + } + + if (!this.voiceService.hasSelectedVoices()) { + this.showStatus('Please select a voice', 'error'); + return false; + } + + return true; + } + + async generateSpeech() { + // Don't check isGenerating state since we want to allow generation after cancel + if (!this.validateInput()) { + return; + } + + const text = this.elements.textInput.value.trim(); + const voice = this.voiceService.getSelectedVoiceString(); + const speed = this.playerState.getState().speed; + + this.setGenerating(true); + this.elements.downloadBtn.style.display = 'none'; + + // Just reset progress bar, don't do full cleanup + this.waveVisualizer.updateProgress(0, 1); + + try { + console.log('Starting audio generation...', { text, voice, speed }); + + // Ensure we have valid input + if (!text || !voice) { + console.error('Invalid input:', { text, voice, speed }); + throw new Error('Invalid input parameters'); + } + + await this.audioService.streamAudio( + text, + voice, + speed, + (loaded, total) => { + console.log('Progress update:', { loaded, total }); + this.waveVisualizer.updateProgress(loaded, total); + } + ); + } catch (error) { + console.error('Generation error:', error); + if (error.name !== 'AbortError') { + this.showStatus('Error generating speech: ' + error.message, 'error'); + this.setGenerating(false); + } + } + } + + downloadAudio() { + const downloadUrl = this.audioService.getDownloadUrl(); + if (!downloadUrl) return; + + const format = this.elements.formatSelect.value; + const voice = this.voiceService.getSelectedVoiceString(); + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); + + // Create download link + const a = document.createElement('a'); + a.href = downloadUrl; + a.download = `${voice}_${timestamp}.${format}`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(downloadUrl); + } +} + +// Initialize app when DOM is loaded +document.addEventListener('DOMContentLoaded', () => { + new App(); +}); \ No newline at end of file diff --git a/web/src/components/PlayerControls.js b/web/src/components/PlayerControls.js new file mode 100644 index 0000000..3daf33c --- /dev/null +++ b/web/src/components/PlayerControls.js @@ -0,0 +1,168 @@ +export class PlayerControls { + constructor(audioService, playerState) { + this.audioService = audioService; + this.playerState = playerState; + this.elements = { + playPauseBtn: document.getElementById('play-pause-btn'), + seekSlider: document.getElementById('seek-slider'), + volumeSlider: document.getElementById('volume-slider'), + speedSlider: document.getElementById('speed-slider'), + speedValue: document.getElementById('speed-value'), + timeDisplay: document.getElementById('time-display'), + cancelBtn: document.getElementById('cancel-btn') + }; + + this.setupEventListeners(); + this.setupAudioEvents(); + this.setupStateSubscription(); + this.timeUpdateInterval = null; + } + + formatTime(secs) { + const minutes = Math.floor(secs / 60); + const seconds = Math.floor(secs % 60); + return `${minutes}:${seconds.toString().padStart(2, '0')}`; + } + + startTimeUpdate() { + this.stopTimeUpdate(); // Clear any existing interval + this.timeUpdateInterval = setInterval(() => { + this.updateTimeDisplay(); + }, 100); // Update every 100ms for smooth tracking + } + + stopTimeUpdate() { + if (this.timeUpdateInterval) { + clearInterval(this.timeUpdateInterval); + this.timeUpdateInterval = null; + } + } + + updateTimeDisplay() { + const currentTime = this.audioService.getCurrentTime(); + const duration = this.audioService.getDuration(); + + // Update time display + this.elements.timeDisplay.textContent = + `${this.formatTime(currentTime)} / ${this.formatTime(duration || 0)}`; + + // Update seek slider + if (duration > 0 && !this.elements.seekSlider.dragging) { + this.elements.seekSlider.value = (currentTime / duration) * 100; + } + + // Update state + this.playerState.setTime(currentTime, duration); + } + + setupEventListeners() { + // Play/Pause button + this.elements.playPauseBtn.addEventListener('click', () => { + if (this.audioService.isPlaying()) { + this.audioService.pause(); + } else { + this.audioService.play(); + } + }); + + // Seek slider + this.elements.seekSlider.addEventListener('mousedown', () => { + this.elements.seekSlider.dragging = true; + }); + + this.elements.seekSlider.addEventListener('mouseup', () => { + this.elements.seekSlider.dragging = false; + }); + + this.elements.seekSlider.addEventListener('input', (e) => { + const duration = this.audioService.getDuration(); + const seekTime = (duration * e.target.value) / 100; + this.audioService.seek(seekTime); + this.updateTimeDisplay(); + }); + + // Volume slider + this.elements.volumeSlider.addEventListener('input', (e) => { + const volume = e.target.value / 100; + this.audioService.setVolume(volume); + this.playerState.setVolume(volume); + }); + + // Speed slider + this.elements.speedSlider.addEventListener('input', (e) => { + const speed = parseFloat(e.target.value); + this.elements.speedValue.textContent = speed.toFixed(1); + this.playerState.setSpeed(speed); + }); + + // Cancel button + this.elements.cancelBtn.addEventListener('click', () => { + this.audioService.cancel(); + this.playerState.reset(); + this.updateControls({ isGenerating: false }); + this.stopTimeUpdate(); + }); + } + + setupAudioEvents() { + this.audioService.addEventListener('play', () => { + this.elements.playPauseBtn.textContent = 'Pause'; + this.playerState.setPlaying(true); + this.startTimeUpdate(); + }); + + this.audioService.addEventListener('pause', () => { + this.elements.playPauseBtn.textContent = 'Play'; + this.playerState.setPlaying(false); + this.stopTimeUpdate(); + }); + + this.audioService.addEventListener('ended', () => { + this.elements.playPauseBtn.textContent = 'Play'; + this.playerState.setPlaying(false); + this.stopTimeUpdate(); + }); + + // Initial time display + this.updateTimeDisplay(); + } + + setupStateSubscription() { + this.playerState.subscribe(state => this.updateControls(state)); + } + + updateControls(state) { + // Update button states + this.elements.playPauseBtn.disabled = !state.duration && !state.isGenerating; + this.elements.seekSlider.disabled = !state.duration; + this.elements.cancelBtn.style.display = state.isGenerating ? 'block' : 'none'; + + // Update volume and speed if changed externally + if (this.elements.volumeSlider.value !== state.volume * 100) { + this.elements.volumeSlider.value = state.volume * 100; + } + + if (this.elements.speedSlider.value !== state.speed.toString()) { + this.elements.speedSlider.value = state.speed; + this.elements.speedValue.textContent = state.speed.toFixed(1); + } + } + + cleanup() { + this.stopTimeUpdate(); + if (this.audioService) { + this.audioService.pause(); + } + if (this.playerState) { + this.playerState.reset(); + } + // Reset UI elements + this.elements.playPauseBtn.textContent = 'Play'; + this.elements.playPauseBtn.disabled = true; + this.elements.seekSlider.value = 0; + this.elements.seekSlider.disabled = true; + this.elements.timeDisplay.textContent = '0:00 / 0:00'; + } +} + +export default PlayerControls; \ No newline at end of file diff --git a/web/src/components/VoiceSelector.js b/web/src/components/VoiceSelector.js new file mode 100644 index 0000000..4209378 --- /dev/null +++ b/web/src/components/VoiceSelector.js @@ -0,0 +1,117 @@ +export class VoiceSelector { + constructor(voiceService) { + this.voiceService = voiceService; + this.elements = { + voiceSearch: document.getElementById('voice-search'), + voiceDropdown: document.getElementById('voice-dropdown'), + voiceOptions: document.getElementById('voice-options'), + selectedVoices: document.getElementById('selected-voices') + }; + + this.setupEventListeners(); + } + + setupEventListeners() { + // Voice search + this.elements.voiceSearch.addEventListener('input', (e) => { + const filteredVoices = this.voiceService.filterVoices(e.target.value); + this.renderVoiceOptions(filteredVoices); + }); + + // Voice selection + this.elements.voiceOptions.addEventListener('change', (e) => { + if (e.target.type === 'checkbox') { + if (e.target.checked) { + this.voiceService.addVoice(e.target.value); + } else { + this.voiceService.removeVoice(e.target.value); + } + this.updateSelectedVoicesDisplay(); + } + }); + + // Remove selected voice + this.elements.selectedVoices.addEventListener('click', (e) => { + if (e.target.classList.contains('remove-voice')) { + const voice = e.target.dataset.voice; + this.voiceService.removeVoice(voice); + this.updateVoiceCheckbox(voice, false); + this.updateSelectedVoicesDisplay(); + } + }); + + // Dropdown visibility + this.elements.voiceSearch.addEventListener('focus', () => { + this.elements.voiceDropdown.style.display = 'block'; + this.updateSearchPlaceholder(); + }); + + document.addEventListener('click', (e) => { + if (!this.elements.voiceSearch.contains(e.target) && + !this.elements.voiceDropdown.contains(e.target)) { + this.elements.voiceDropdown.style.display = 'none'; + } + }); + + this.elements.voiceSearch.addEventListener('blur', () => { + if (!this.elements.voiceSearch.value) { + this.updateSearchPlaceholder(); + } + }); + } + + renderVoiceOptions(voices) { + this.elements.voiceOptions.innerHTML = voices + .map(voice => ` + + `) + .join(''); + } + + updateSelectedVoicesDisplay() { + const selectedVoices = this.voiceService.getSelectedVoices(); + this.elements.selectedVoices.innerHTML = selectedVoices + .map(voice => ` + + ${voice} + × + + `) + .join(''); + + this.updateSearchPlaceholder(); + } + + updateSearchPlaceholder() { + const hasSelected = this.voiceService.hasSelectedVoices(); + this.elements.voiceSearch.placeholder = hasSelected ? + 'Search voices...' : + 'Search and select voices...'; + } + + updateVoiceCheckbox(voice, checked) { + const checkbox = this.elements.voiceOptions + .querySelector(`input[value="${voice}"]`); + if (checkbox) { + checkbox.checked = checked; + } + } + + async initialize() { + try { + await this.voiceService.loadVoices(); + this.renderVoiceOptions(this.voiceService.getAvailableVoices()); + this.updateSelectedVoicesDisplay(); + return true; + } catch (error) { + console.error('Failed to initialize voice selector:', error); + return false; + } + } +} + +export default VoiceSelector; \ No newline at end of file diff --git a/web/src/components/WaveVisualizer.js b/web/src/components/WaveVisualizer.js new file mode 100644 index 0000000..a3eb201 --- /dev/null +++ b/web/src/components/WaveVisualizer.js @@ -0,0 +1,107 @@ +export class WaveVisualizer { + constructor(playerState) { + this.playerState = playerState; + this.wave = null; + this.progressBar = null; + this.container = document.getElementById('wave-container'); + + this.setupWave(); + this.setupProgressBar(); + this.setupStateSubscription(); + } + + setupWave() { + this.wave = new SiriWave({ + container: this.container, + style: 'ios9', + width: this.container.clientWidth, + height: 100, // Increased height + autostart: false, + amplitude: 1, + speed: 0.1 + }); + + // Handle window resize + window.addEventListener('resize', () => { + if (this.wave) { + this.wave.width = this.container.clientWidth; + } + }); + } + + setupProgressBar() { + this.progressBar = document.createElement('progress'); + this.progressBar.style.width = '100%'; + this.progressBar.max = 100; + this.progressBar.value = 0; + this.progressBar.className = 'generation-progress'; + // Insert inside player-container, after wave-container + const playerContainer = this.container.closest('.player-container'); + playerContainer.insertBefore(this.progressBar, playerContainer.lastElementChild); + this.progressBar.style.display = 'none'; + } + + setupStateSubscription() { + this.playerState.subscribe(state => { + // Handle generation progress + if (state.isGenerating) { + this.progressBar.style.display = 'block'; + this.progressBar.value = state.progress; + } else if (state.progress >= 100) { + // Hide progress bar after completion + setTimeout(() => { + this.progressBar.style.display = 'none'; + this.progressBar.value = 0; + }, 500); + } + + // Only animate when playing, stop otherwise + if (state.isPlaying) { + this.wave.start(); + } else { + this.wave.stop(); + } + }); + } + + updateProgress(receivedChunks, totalChunks) { + if (!totalChunks) return; + + // Calculate progress percentage based on chunks + const progress = Math.min((receivedChunks / totalChunks) * 100, 99); + + // Always update on 0 progress or when progress increases + if (receivedChunks === 0 || progress > this.progressBar.value) { + this.progressBar.style.display = 'block'; + this.progressBar.value = progress; + this.playerState.setProgress(receivedChunks, totalChunks); + } + } + + cleanup() { + if (this.wave) { + this.wave.stop(); + this.wave.dispose(); + this.wave = null; + } + + if (this.progressBar) { + this.progressBar.style.display = 'none'; + this.progressBar.value = 0; + if (this.progressBar.parentNode) { + this.progressBar.parentNode.removeChild(this.progressBar); + } + this.progressBar = null; + } + + // Re-setup wave and progress bar + this.setupWave(); + this.setupProgressBar(); + + if (this.playerState) { + this.playerState.setProgress(0, 1); // Reset progress in state + } + } +} + +export default WaveVisualizer; \ No newline at end of file diff --git a/web/src/services/AudioService.js b/web/src/services/AudioService.js new file mode 100644 index 0000000..adbd20a --- /dev/null +++ b/web/src/services/AudioService.js @@ -0,0 +1,295 @@ +export class AudioService { + constructor() { + this.mediaSource = null; + this.sourceBuffer = null; + this.audio = null; + this.controller = null; + this.eventListeners = new Map(); + this.chunks = []; + this.minimumPlaybackSize = 50000; // 50KB minimum before playback + this.textLength = 0; + this.shouldAutoplay = false; + this.CHARS_PER_CHUNK = 600; // Estimated chars per chunk + } + + async streamAudio(text, voice, speed, onProgress) { + try { + console.log('AudioService: Starting stream...', { text, voice, speed }); + + // Only abort if there's an active controller + if (this.controller) { + this.controller.abort(); + this.controller = null; + } + + // Create new controller before cleanup to prevent race conditions + this.controller = new AbortController(); + + // Clean up previous audio state + this.cleanup(); + onProgress?.(0, 1); // Reset progress to 0 + this.chunks = []; + this.textLength = text.length; + this.shouldAutoplay = document.getElementById('autoplay-toggle').checked; + + // Calculate expected number of chunks based on text length + const estimatedChunks = Math.max(1, Math.ceil(this.textLength / this.CHARS_PER_CHUNK)); + + console.log('AudioService: Making API call...', { text, voice, speed }); + + const response = await fetch('/v1/audio/speech', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + input: text, + voice: voice, + response_format: 'mp3', + stream: true, + speed: speed + }), + signal: this.controller.signal + }); + + console.log('AudioService: Got response', { status: response.status }); + + if (!response.ok) { + const error = await response.json(); + console.error('AudioService: API error', error); + throw new Error(error.detail?.message || 'Failed to generate speech'); + } + + await this.setupAudioStream(response, onProgress, estimatedChunks); + return this.audio; + } catch (error) { + this.cleanup(); + throw error; + } + } + + async setupAudioStream(response, onProgress, estimatedTotalSize) { + this.audio = new Audio(); + this.mediaSource = new MediaSource(); + this.audio.src = URL.createObjectURL(this.mediaSource); + + return new Promise((resolve, reject) => { + this.mediaSource.addEventListener('sourceopen', async () => { + try { + this.sourceBuffer = this.mediaSource.addSourceBuffer('audio/mpeg'); + await this.processStream(response.body, onProgress, estimatedTotalSize); + resolve(); + } catch (error) { + reject(error); + } + }); + }); + } + + async processStream(stream, onProgress, estimatedChunks) { + const reader = stream.getReader(); + let hasStartedPlaying = false; + let receivedChunks = 0; + + try { + while (true) { + const {value, done} = await reader.read(); + + if (done) { + if (this.mediaSource.readyState === 'open') { + this.mediaSource.endOfStream(); + } + // Ensure we show 100% at completion + onProgress?.(estimatedChunks, estimatedChunks); + this.dispatchEvent('complete'); + this.dispatchEvent('downloadReady'); + return; + } + + this.chunks.push(value); + receivedChunks++; + + await this.appendChunk(value); + + // Update progress based on received chunks + onProgress?.(receivedChunks, estimatedChunks); + + // Start playback if we have enough chunks + if (!hasStartedPlaying && receivedChunks >= 1) { + hasStartedPlaying = true; + if (this.shouldAutoplay) { + // Small delay to ensure buffer is ready + setTimeout(() => this.play(), 100); + } + } + } + } catch (error) { + if (error.name !== 'AbortError') { + throw error; + } + } + } + + async appendChunk(chunk) { + return new Promise((resolve) => { + const appendChunk = () => { + this.sourceBuffer.appendBuffer(chunk); + this.sourceBuffer.addEventListener('updateend', resolve, { once: true }); + }; + + if (!this.sourceBuffer.updating) { + appendChunk(); + } else { + this.sourceBuffer.addEventListener('updateend', appendChunk, { once: true }); + } + }); + } + + play() { + if (this.audio && this.audio.readyState >= 2) { + const playPromise = this.audio.play(); + if (playPromise) { + playPromise.catch(error => { + if (error.name !== 'AbortError') { + console.error('Playback error:', error); + } + }); + } + this.dispatchEvent('play'); + } + } + + pause() { + if (this.audio) { + this.audio.pause(); + this.dispatchEvent('pause'); + } + } + + seek(time) { + if (this.audio) { + const wasPlaying = !this.audio.paused; + this.audio.currentTime = time; + if (wasPlaying) { + this.play(); + } + } + } + + setVolume(volume) { + if (this.audio) { + this.audio.volume = Math.max(0, Math.min(1, volume)); + } + } + + getCurrentTime() { + return this.audio ? this.audio.currentTime : 0; + } + + getDuration() { + return this.audio ? this.audio.duration : 0; + } + + isPlaying() { + return this.audio ? !this.audio.paused : false; + } + + addEventListener(event, callback) { + if (!this.eventListeners.has(event)) { + this.eventListeners.set(event, new Set()); + } + this.eventListeners.get(event).add(callback); + + if (this.audio && ['play', 'pause', 'ended', 'timeupdate'].includes(event)) { + this.audio.addEventListener(event, callback); + } + } + + removeEventListener(event, callback) { + const listeners = this.eventListeners.get(event); + if (listeners) { + listeners.delete(callback); + } + if (this.audio) { + this.audio.removeEventListener(event, callback); + } + } + + dispatchEvent(event, data) { + const listeners = this.eventListeners.get(event); + if (listeners) { + listeners.forEach(callback => callback(data)); + } + } + + cancel() { + if (this.controller) { + this.controller.abort(); + this.controller = null; + } + + // Full cleanup of all resources + if (this.audio) { + this.audio.pause(); + this.audio.src = ''; + this.audio = null; + } + + if (this.mediaSource && this.mediaSource.readyState === 'open') { + try { + this.mediaSource.endOfStream(); + } catch (e) { + // Ignore errors during cleanup + } + } + + this.mediaSource = null; + this.sourceBuffer = null; + this.chunks = []; + this.textLength = 0; + + // Force a hard refresh of the page to ensure clean state + window.location.reload(); + } + + cleanup() { + // Clean up audio elements + if (this.audio) { + // Remove all event listeners + this.eventListeners.forEach((listeners, event) => { + listeners.forEach(callback => { + this.audio.removeEventListener(event, callback); + }); + }); + + this.audio.pause(); + this.audio.src = ''; + this.audio = null; + } + + if (this.mediaSource && this.mediaSource.readyState === 'open') { + try { + this.mediaSource.endOfStream(); + } catch (e) { + // Ignore errors during cleanup + } + } + + this.mediaSource = null; + this.sourceBuffer = null; + this.chunks = []; + this.textLength = 0; + } + + getDownloadUrl() { + if (!this.audio || !this.sourceBuffer || this.chunks.length === 0) return null; + + // Get the buffered data from MediaSource + const buffered = this.sourceBuffer.buffered; + if (buffered.length === 0) return null; + + // Create blob from the original chunks + const blob = new Blob(this.chunks, { type: 'audio/mpeg' }); + return URL.createObjectURL(blob); + } +} + +export default AudioService; diff --git a/web/src/services/VoiceService.js b/web/src/services/VoiceService.js new file mode 100644 index 0000000..92cd4db --- /dev/null +++ b/web/src/services/VoiceService.js @@ -0,0 +1,81 @@ +export class VoiceService { + constructor() { + this.availableVoices = []; + this.selectedVoices = new Set(); + } + + async loadVoices() { + try { + const response = await fetch('/v1/audio/voices'); + if (!response.ok) { + const error = await response.json(); + throw new Error(error.detail?.message || 'Failed to load voices'); + } + + const data = await response.json(); + if (!data.voices?.length) { + throw new Error('No voices available'); + } + + this.availableVoices = data.voices; + + // Select first voice if none selected + if (this.selectedVoices.size === 0) { + const firstVoice = this.availableVoices.find(voice => voice && voice.trim()); + if (firstVoice) { + this.addVoice(firstVoice); + } + } + + return this.availableVoices; + } catch (error) { + console.error('Failed to load voices:', error); + throw error; + } + } + + getAvailableVoices() { + return this.availableVoices; + } + + getSelectedVoices() { + return Array.from(this.selectedVoices); + } + + getSelectedVoiceString() { + return Array.from(this.selectedVoices).join('+'); + } + + addVoice(voice) { + if (this.availableVoices.includes(voice)) { + this.selectedVoices.add(voice); + return true; + } + return false; + } + + removeVoice(voice) { + return this.selectedVoices.delete(voice); + } + + clearSelectedVoices() { + this.selectedVoices.clear(); + } + + filterVoices(searchTerm) { + if (!searchTerm) { + return this.availableVoices; + } + + const term = searchTerm.toLowerCase(); + return this.availableVoices.filter(voice => + voice.toLowerCase().includes(term) + ); + } + + hasSelectedVoices() { + return this.selectedVoices.size > 0; + } +} + +export default VoiceService; \ No newline at end of file diff --git a/web/src/state/PlayerState.js b/web/src/state/PlayerState.js new file mode 100644 index 0000000..a2ea7b1 --- /dev/null +++ b/web/src/state/PlayerState.js @@ -0,0 +1,88 @@ +export class PlayerState { + constructor() { + this.state = { + isPlaying: false, + isGenerating: false, + currentTime: 0, + duration: 0, + volume: 1, + speed: 1, + progress: 0, + error: null + }; + this.listeners = new Set(); + } + + subscribe(listener) { + this.listeners.add(listener); + return () => this.listeners.delete(listener); + } + + notify() { + this.listeners.forEach(listener => listener(this.state)); + } + + setState(updates) { + this.state = { + ...this.state, + ...updates + }; + this.notify(); + } + + setPlaying(isPlaying) { + this.setState({ isPlaying }); + } + + setGenerating(isGenerating) { + this.setState({ isGenerating }); + } + + setProgress(loaded, total) { + const progress = total > 0 ? (loaded / total) * 100 : 0; + this.setState({ progress }); + } + + setTime(currentTime, duration) { + this.setState({ currentTime, duration }); + } + + setVolume(volume) { + this.setState({ volume }); + } + + setSpeed(speed) { + this.setState({ speed }); + } + + setError(error) { + this.setState({ error }); + } + + clearError() { + this.setState({ error: null }); + } + + reset() { + // Keep current speed setting but reset everything else + const currentSpeed = this.state.speed; + const currentVolume = this.state.volume; + + this.setState({ + isPlaying: false, + isGenerating: false, + currentTime: 0, + duration: 0, + progress: 0, + error: null, + speed: currentSpeed, + volume: currentVolume + }); + } + + getState() { + return { ...this.state }; + } +} + +export default PlayerState; \ No newline at end of file diff --git a/web/styles/player.css b/web/styles/player.css index d70add8..214f865 100644 --- a/web/styles/player.css +++ b/web/styles/player.css @@ -122,6 +122,45 @@ margin-top: 0.5rem; } +/* Progress bar styles */ +.generation-progress { + -webkit-appearance: none; + appearance: none; + width: 100%; + height: 6px; + border: none; + background: rgba(99, 102, 241, 0.1); + border-radius: 3px; + margin: 1rem 0; + display: block; +} + +.generation-progress::-webkit-progress-bar { + background: rgba(99, 102, 241, 0.1); + border-radius: 3px; +} + +.generation-progress::-webkit-progress-value { + background: var(--fg-color); + border-radius: 3px; + transition: width 0.2s ease; + box-shadow: 0 0 10px rgba(99, 102, 241, 0.3); +} + +.generation-progress::-moz-progress-bar { + background: var(--fg-color); + border-radius: 3px; + transition: width 0.2s ease; + box-shadow: 0 0 10px rgba(99, 102, 241, 0.3); +} + +.generation-progress::-ms-fill { + background: var(--fg-color); + border-radius: 3px; + transition: width 0.2s ease; + box-shadow: 0 0 10px rgba(99, 102, 241, 0.3); +} + .wave-container canvas { position: absolute; top: 0;