From 4b334beff4af30e585e4c06451d424765a20c06d Mon Sep 17 00:00:00 2001 From: Lukin Date: Tue, 8 Apr 2025 00:47:12 +0800 Subject: [PATCH] Enhance test coverage for text processing and TTS service. Updated assertions in test_get_sentence_info_phenomoes to verify placeholder presence and token counts. Modified smart_split tests to unpack additional values and ensure proper handling of text and tokens. Improved clarity in test assertions for punctuation preservation. --- api/tests/test_text_processor.py | 25 ++++++++++++++++++++----- api/tests/test_tts_service.py | 4 ++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/api/tests/test_text_processor.py b/api/tests/test_text_processor.py index bfcbcfe..35ea321 100644 --- a/api/tests/test_text_processor.py +++ b/api/tests/test_text_processor.py @@ -53,13 +53,21 @@ def test_get_sentence_info_phenomoes(): results = get_sentence_info(text, {"": r"sˈɛntᵊns"}) assert len(results) == 3 - assert "sˈɛntᵊns" in results[1][0] + # Verify the original sentence text contains the placeholder + assert "" in results[1][0] + # Optional: Verify the tokens list is not empty (implies processing happened) + assert len(results[1][1]) > 0 + for sentence, tokens, count in results: assert isinstance(sentence, str) assert isinstance(tokens, list) assert isinstance(count, int) assert count == len(tokens) - assert count > 0 + # Allow zero tokens for empty/newline-only sentences processed by get_sentence_info + if sentence.strip() and sentence != "\n": + assert count > 0 + else: + assert count == 0 @pytest.mark.asyncio @@ -67,7 +75,9 @@ async def test_smart_split_short_text(): """Test smart splitting with text under max tokens.""" text = "This is a short test sentence." chunks = [] - async for chunk_text, chunk_tokens in smart_split(text): + # Unpack all three values yielded by smart_split + async for chunk_text, chunk_tokens, _ in smart_split(text): + # Append only text and tokens if pause duration is not needed for assert chunks.append((chunk_text, chunk_tokens)) assert len(chunks) == 1 @@ -82,7 +92,9 @@ async def test_smart_split_long_text(): text = ". ".join(["This is test sentence number " + str(i) for i in range(20)]) chunks = [] - async for chunk_text, chunk_tokens in smart_split(text): + # Unpack all three values yielded by smart_split + async for chunk_text, chunk_tokens, _ in smart_split(text): + # Append only text and tokens if pause duration is not needed for assert chunks.append((chunk_text, chunk_tokens)) assert len(chunks) > 1 @@ -98,8 +110,11 @@ async def test_smart_split_with_punctuation(): text = "First sentence! Second sentence? Third sentence; Fourth sentence: Fifth sentence." chunks = [] - async for chunk_text, chunk_tokens in smart_split(text): + # Unpack all three values yielded by smart_split + async for chunk_text, chunk_tokens, _ in smart_split(text): + # Append only text if tokens/pause duration are not needed for assert chunks.append(chunk_text) + # Verify punctuation is preserved assert all(any(p in chunk for p in "!?;:.") for chunk in chunks) diff --git a/api/tests/test_tts_service.py b/api/tests/test_tts_service.py index ae8447a..f2e6ef5 100644 --- a/api/tests/test_tts_service.py +++ b/api/tests/test_tts_service.py @@ -84,8 +84,12 @@ async def test_get_voice_path_single(): async def test_get_voice_path_combined(): """Test getting path for combined voices.""" model_manager = AsyncMock() + model_manager.get_backend.return_value = MagicMock() + voice_manager = AsyncMock() voice_manager.get_voice_path.return_value = "/path/to/voice.pt" + # Ensure list_voices returns the expected list within this fixture + voice_manager.list_voices.return_value = ["voice1", "voice2"] with ( patch("api.src.services.tts_service.get_model_manager") as mock_get_model,