diff --git a/api/src/services/text_processing/normalizer.py b/api/src/services/text_processing/normalizer.py index 7c728fb..ca26ffb 100644 --- a/api/src/services/text_processing/normalizer.py +++ b/api/src/services/text_processing/normalizer.py @@ -56,7 +56,7 @@ VALID_UNITS = { "g":"gram", "kg":"kilogram", "mg":"miligram", # Mass "s":"second", "ms":"milisecond", "min":"minutes", "h":"hour", # Time "l":"liter", "ml":"mililiter", "cl":"centiliter", "dl":"deciliter", # Volume - "kph":"kilometer per hour", "mph":"mile per hour","mi/h":"mile per hour", "m/s":"meter per second", "km/h":"kilometer per hour", "mm/s":"milimeter per second","cm/s":"centimeter per second", "ft/s":"feet per second", # Speed + "kph":"kilometer per hour", "mph":"mile per hour","mi/h":"mile per hour", "m/s":"meter per second", "km/h":"kilometer per hour", "mm/s":"milimeter per second","cm/s":"centimeter per second", "ft/s":"feet per second","cm/h":"centimeter per day", # Speed "°c":"degree celsius","c":"degree celsius", "°f":"degree fahrenheit","f":"degree fahrenheit", "k":"kelvin", # Temperature "pa":"pascal", "kpa":"kilopascal", "mpa":"megapascal", "atm":"atmosphere", # Pressure "hz":"hertz", "khz":"kilohertz", "mhz":"megahertz", "ghz":"gigahertz", # Frequency @@ -66,11 +66,12 @@ VALID_UNITS = { "j":"joule", "kj":"kilojoule", "mj":"megajoule", # Energy "Ω":"ohm", "kΩ":"kiloohm", "mΩ":"megaohm", # Resistance (Ohm) "f":"farad", "µf":"microfarad", "nf":"nanofarad", "pf":"picofarad", # Capacitance - "b":"byte", "kb":"kilobyte", "mb":"megabyte", "gb":"gigabyte", "tb":"terabyte", "pb":"petabyte", # Data size - "kbps":"kilobyte per second","mbps":"megabyte per second","gbps":"gigabyte per second", + "b":"bit", "kb":"kilobit", "mb":"megabit", "gb":"gigabit", "tb":"terabit", "pb":"petabit", # Data size + "kbps":"kilobit per second","mbps":"megabit per second","gbps":"gigabit per second","tbps":"terabit per second", "px":"pixel" # CSS units } + # Pre-compiled regex patterns for performance EMAIL_PATTERN = re.compile( r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-z]{2,}\b", re.IGNORECASE @@ -82,7 +83,7 @@ URL_PATTERN = re.compile( re.IGNORECASE, ) -UNIT_PATTERN = re.compile(r"((??@\[\\\]^_`{\|}~ \n]{1})""",re.IGNORECASE) +UNIT_PATTERN = re.compile(r"((? str: return f"{left} {right}{s}" def handle_units(u: re.Match[str]) -> str: - unit=u.group(6).strip() - if unit.lower() in VALID_UNITS: - unit=VALID_UNITS[unit.lower()].split(" ") + unit_string=u.group(6).strip() + unit=unit_string + + print(unit) + if unit_string.lower() in VALID_UNITS: + unit=VALID_UNITS[unit_string.lower()].split(" ") + + # Handles the B vs b case + if unit[0].endswith("bit"): + b_case=unit_string[min(1,len(unit_string) - 1)] + if b_case == "B": + unit[0]=unit[0][:-3] + "byte" + number=u.group(1).strip() unit[0]=INFLECT_ENGINE.no(unit[0],number) return " ".join(unit) diff --git a/docker/cpu/Dockerfile b/docker/cpu/Dockerfile index 369d008..d770a6c 100644 --- a/docker/cpu/Dockerfile +++ b/docker/cpu/Dockerfile @@ -20,7 +20,7 @@ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uvx /usr/local/bin/ # Create non-root user and set up directories and permissions -RUN useradd -m -u 1001 appuser && \ +RUN useradd -m -u 1000 appuser && \ mkdir -p /app/api/src/models/v1_0 && \ chown -R appuser:appuser /app @@ -32,7 +32,7 @@ COPY --chown=appuser:appuser pyproject.toml ./pyproject.toml # Install dependencies RUN --mount=type=cache,target=/root/.cache/uv \ - uv venv --python 3.11 && \ + uv venv --python 3.10 && \ uv sync --extra cpu # Copy project files including models @@ -40,7 +40,6 @@ COPY --chown=appuser:appuser api ./api COPY --chown=appuser:appuser web ./web COPY --chown=appuser:appuser docker/scripts/ ./ RUN chmod +x ./entrypoint.sh -RUN sed -i 's/\r$//' ./entrypoint.sh # Set environment variables ENV PYTHONUNBUFFERED=1 \ diff --git a/docker/gpu/Dockerfile b/docker/gpu/Dockerfile index 7e4606e..46fe956 100644 --- a/docker/gpu/Dockerfile +++ b/docker/gpu/Dockerfile @@ -39,7 +39,7 @@ ENV PHONEMIZER_ESPEAK_PATH=/usr/bin \ # Install dependencies with GPU extras (using cache mounts) RUN --mount=type=cache,target=/root/.cache/uv \ - uv venv --python 3.11 && \ + uv venv --python 3.10 && \ uv sync --extra gpu # Copy project files including models