from __future__ import annotations

import gzip
import json

from validations.config import ValidationConfig
from validations.recover_replay_ledger import (
    REPLAY_STATUS_EXTRA_NO_TX,
    REPLAY_STATUS_VALIDATED_EXISTING,
    REPLAY_STATUS_VALIDATED_NEW,
    ReplayLedgerWriter,
    build_replay_ledger_payload,
)


def test_build_replay_ledger_payload_classifies_segments():
    payload = build_replay_ledger_payload(
        video_id="vid-a",
        tx_rows=[
            {
                "segment_file": "seg-existing",
                "detected_language": "hi",
                "transcription": "old",
                "tagged": "old-tagged",
                "quality_score": 0.9,
            },
            {
                "segment_file": "seg-new",
                "expected_language_hint": "te",
                "transcription": "new",
                "tagged": "new-tagged",
                "quality_score": 0.8,
            },
        ],
        replayed_segment_ids=["seg-new", "extra-1", "seg-existing"],
        matched_tx_ids=["seg-new"],
        validated_segment_ids={"seg-existing"},
        extra_regen_ids=["extra-1"],
        flag_summary={"timeout": 1, "error": 0, "rate_limited": 0, "flagged_total": 1},
        worker_id="worker-test",
    )

    by_id = {entry["segment_file"]: entry for entry in payload["entries"]}
    assert by_id["seg-existing"]["replay_status"] == REPLAY_STATUS_VALIDATED_EXISTING
    assert by_id["seg-new"]["replay_status"] == REPLAY_STATUS_VALIDATED_NEW
    assert by_id["extra-1"]["replay_status"] == REPLAY_STATUS_EXTRA_NO_TX
    assert payload["summary"]["validated_existing_segments"] == 1
    assert payload["summary"]["validated_new_segments"] == 1
    assert payload["summary"]["extra_no_tx_segments"] == 1


def test_replay_ledger_writer_uploads_gzip_json():
    class FakeS3:
        def __init__(self):
            self.calls = []

        def put_object(self, **kwargs):
            self.calls.append(kwargs)

    config = ValidationConfig(
        mock_mode=False,
        r2_endpoint_url="https://example.invalid",
        r2_access_key_id="key",
        r2_secret_access_key="secret",
        r2_bucket_output="validation-results",
        recover_replay_ledger_prefix="replay-ledgers",
    )
    writer = ReplayLedgerWriter(config)
    writer._s3 = FakeS3()

    artifact = writer.upload("vid-a", {"video_id": "vid-a", "entries": [{"segment_file": "seg-1"}]})

    assert artifact.key == "replay-ledgers/vid-a.json.gz"
    assert len(writer._s3.calls) == 1
    uploaded = writer._s3.calls[0]
    assert uploaded["Bucket"] == "validation-results"
    assert uploaded["Key"] == "replay-ledgers/vid-a.json.gz"
    assert uploaded["ContentEncoding"] == "gzip"
    decoded = json.loads(gzip.decompress(uploaded["Body"]).decode("utf-8"))
    assert decoded["video_id"] == "vid-a"
