import numpy as np

from veena3modal.processing.text_chunker import IndicSentenceChunker, crossfade_audio


def _pcm_bytes(value: int, samples: int) -> bytes:
    arr = (np.ones(samples, dtype=np.int16) * value).astype(np.int16)
    return arr.tobytes()


def test_indic_sentence_chunker_returns_single_chunk_when_under_limit():
    chunker = IndicSentenceChunker(max_chunk_length=100)
    text = "नमस्ते।"
    assert chunker.chunk_text(text) == [text]


def test_indic_sentence_chunker_splits_long_text_on_indic_boundaries():
    chunker = IndicSentenceChunker(max_chunk_length=30)
    text = "यह वाक्य एक है। यह वाक्य दो है। यह वाक्य तीन है।"
    chunks = chunker.chunk_text(text)
    assert len(chunks) >= 2
    assert all(len(c) <= 30 for c in chunks)
    # Preserve content markers
    assert any("एक" in c for c in chunks)
    assert any("दो" in c for c in chunks)


def test_detect_language_mix_basic():
    chunker = IndicSentenceChunker(max_chunk_length=100)
    info = chunker.detect_language_mix("Hello नमस्ते")
    assert info["primary"] in {"hindi", "english", "mixed"}
    assert set(info.keys()) == {"primary", "hindi", "telugu", "english"}


def test_crossfade_audio_reduces_total_length_by_overlap_samples():
    audio1 = _pcm_bytes(100, samples=100)
    audio2 = _pcm_bytes(200, samples=100)
    crossfade_samples = 10

    out = crossfade_audio(audio1, audio2, crossfade_samples=crossfade_samples, sample_rate=1000)
    # Output samples = 100 + 100 - crossfade_samples
    assert len(out) == (100 + 100 - crossfade_samples) * 2


def test_crossfade_audio_zero_overlap_is_concat():
    audio1 = _pcm_bytes(100, samples=10)
    audio2 = _pcm_bytes(200, samples=10)
    out = crossfade_audio(audio1, audio2, crossfade_samples=0, sample_rate=1000)
    assert out == audio1 + audio2


