"""Unit tests for manifest schema — no data dependency."""

import json
import tempfile
from pathlib import Path

REQUIRED_FIELDS = [
    "audio_filepath",
    "text",
    "duration",
    "lang",
    "taskname",
    "source_lang",
    "target_lang",
]

SAMPLE_ROW = {
    "audio_filepath": "/data/shards/lang=hi/shard_001/audio.tar",
    "tar_member": "SPEAKER_00_0000_0.03-8.89.flac",
    "text": "नमस्ते दुनिया",
    "duration": 5.2,
    "lang": "hi",
    "taskname": "asr",
    "source_lang": "hi",
    "target_lang": "hi",
}


def test_sample_row_has_all_required_fields():
    for field in REQUIRED_FIELDS:
        assert field in SAMPLE_ROW, f"Missing field: {field}"


def test_sample_row_text_not_empty():
    assert isinstance(SAMPLE_ROW["text"], str)
    assert len(SAMPLE_ROW["text"].strip()) > 0


def test_sample_row_duration_positive():
    assert isinstance(SAMPLE_ROW["duration"], (int, float))
    assert SAMPLE_ROW["duration"] > 0


def test_sample_row_taskname_is_asr():
    assert SAMPLE_ROW["taskname"] == "asr"


def test_sample_row_lang_matches_source_target():
    assert SAMPLE_ROW["lang"] == SAMPLE_ROW["source_lang"]
    assert SAMPLE_ROW["lang"] == SAMPLE_ROW["target_lang"]


def test_sample_row_serializes_to_json():
    line = json.dumps(SAMPLE_ROW, ensure_ascii=False)
    parsed = json.loads(line)
    assert parsed == SAMPLE_ROW


def test_manifest_roundtrip():
    """Write sample rows to JSONL and read back."""
    en_row = {
        **SAMPLE_ROW,
        "lang": "en",
        "source_lang": "en",
        "target_lang": "en",
        "text": "hello world",
    }
    rows = [SAMPLE_ROW, en_row]
    with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
        for row in rows:
            f.write(json.dumps(row, ensure_ascii=False) + "\n")
        tmp_path = f.name

    with open(tmp_path) as f:
        loaded = [json.loads(line) for line in f if line.strip()]

    assert len(loaded) == 2
    assert loaded[0]["lang"] == "hi"
    assert loaded[1]["lang"] == "en"
    Path(tmp_path).unlink()


def test_invalid_row_missing_text():
    row = {**SAMPLE_ROW}
    del row["text"]
    assert "text" not in row


def test_invalid_row_negative_duration():
    row = {**SAMPLE_ROW, "duration": -1.0}
    assert row["duration"] < 0


def test_makefile_uses_python3():
    """Makefile should use $(PYTHON) not bare 'python'."""
    makefile = (Path(__file__).resolve().parent.parent.parent / "Makefile").read_text()
    assert "PYTHON ?=" in makefile, "Makefile should define PYTHON variable"
    for line in makefile.split("\n"):
        stripped = line.strip()
        if stripped.startswith("#") or stripped.startswith("@echo"):
            continue
        if "python " in stripped and "$(PYTHON)" not in stripped:
            if "PYTHON ?=" not in stripped:
                raise AssertionError(f"Makefile has bare 'python' call: {stripped}")
