diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 6ec3adb..3cc4cc2 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -115,7 +115,9 @@ def handle_url(u: re.Match[str]) -> str: url = url.replace("?", " question-mark ") url = url.replace("=", " equals ") url = url.replace("&", " ampersand ") + url = url.replace("%", " percent ") url = url.replace(":", " colon ") # Handle any remaining colons + url = url.replace("/", " slash ") # Handle any remaining slashes # Clean up extra spaces return re.sub(r'\s+', ' ', url).strip()