import json

from src.transcript_variant_prompt import (
    InputScriptProfile,
    build_transcript_variant_user_prompt,
    classify_input_script,
    extract_protected_spans,
    get_cacheable_transcript_variant_prompt,
    get_transcript_variant_json_schema,
    romanized_text_is_ascii,
)


def test_classify_fully_roman_hi() -> None:
    text = "ya Google par jaakar whatsapp@support.com par click karke"
    assert classify_input_script(text, "hi") == InputScriptProfile.fully_roman


def test_classify_fully_native_hi() -> None:
    text = "अनुराग ठाकुर जी तमिलनाडु में आकर प्रचार किए हैं"
    assert classify_input_script(text, "hi") == InputScriptProfile.fully_native


def test_classify_fully_native_bn_with_danda() -> None:
    text = "আজকের আলোচনায় আমরা শিক্ষার মান উন্নয়নের কয়েকটি গুরুত্বপূর্ণ দিক তুলে ধরব।"
    assert classify_input_script(text, "bn") == InputScriptProfile.fully_native


def test_classify_mixed_native_latin_hi() -> None:
    text = "आज meeting 12:30 बजे Zoom पर है"
    assert classify_input_script(text, "hi") == InputScriptProfile.mixed_native_latin


def test_extract_protected_spans() -> None:
    text = "website www.zuco.com aur email whatsapp@support.com 12:30"
    assert extract_protected_spans(text) == ["www.zuco.com", "whatsapp@support.com", "12:30"]


def test_romanized_ascii_check() -> None:
    assert romanized_text_is_ascii("Ya Google par jaakar 12:30 baje Zoom par hai")
    assert not romanized_text_is_ascii("या Google par jaakar")


def test_user_prompt_contains_items_json() -> None:
    prompt = build_transcript_variant_user_prompt(
        [
            {
                "id": "x1",
                "language_code": "hi",
                "input_script_profile": "mixed_native_latin",
                "text": "आज meeting 12:30 बजे Zoom पर है",
            }
        ]
    )
    assert "INPUT:" in prompt
    assert "mixed_native_latin" in prompt
    assert "आज meeting 12:30 बजे Zoom पर है" in prompt


def test_variant_prompt_contains_critical_rules() -> None:
    prompt = get_cacheable_transcript_variant_prompt()
    assert "Preserve protected spans exactly" in prompt
    assert "Return a single JSON object matching the schema exactly." in prompt
    assert "whatsapp@support.com" in prompt


def test_variant_json_schema_structure() -> None:
    schema = get_transcript_variant_json_schema()
    assert schema["type"] == "object"
    assert "results" in schema["properties"]
    item_props = schema["properties"]["results"]["items"]["properties"]
    assert set(item_props) == {
        "id",
        "language_code",
        "input_script_profile",
        "native_script_text",
        "romanized_text",
    }


def test_variant_json_schema_has_no_refs() -> None:
    schema_str = json.dumps(get_transcript_variant_json_schema())
    assert "$ref" not in schema_str
