import re

from veena3modal.processing.text_normalizer import normalize_text


def test_normalize_text_expands_url_currency_date_time_and_removes_digits():
    text = "Visit https://www.example.com for $50.99 on 2025-11-13 at 3:45 PM"
    out = normalize_text(text)

    assert "https" not in out
    assert "www" not in out
    assert "example dot com" in out
    assert "fifty point nine nine dollars" in out
    assert "thirteen november two thousand twenty five" in out
    assert "three forty five p m" in out

    # For this input, normalization should fully expand digits away.
    assert re.search(r"\d", out) is None


def test_normalize_text_email_and_ordinal_expansion():
    out = normalize_text("Email john.doe@example.com. I have 21st apples.")
    assert "john dot doe at example dot com" in out
    assert "twenty first" in out
    assert "21st" not in out


def test_normalize_text_preserves_and_canonicalizes_emotion_tags():
    # NOTE: This normalizer protects square-bracket emotion tags and canonicalizes variants.
    assert normalize_text("Hello [Giggles] world") == "Hello [giggle] world"
    assert normalize_text("Hello [laugh] world") == "Hello [laughs] world"


def test_normalize_text_preserves_indic_text_while_expanding_numbers_and_units():
    out = normalize_text("तापमान 32°C है")
    # Preserve Devanagari
    assert "तापमान" in out and "है" in out
    # Expand the number/unit
    assert "thirty two degrees celsius" in out
    assert "32" not in out


def test_normalize_text_is_idempotent_for_common_inputs():
    text = "I have 5 apples. Hello [laugh]!"
    once = normalize_text(text)
    twice = normalize_text(once)
    assert twice == once