from __future__ import annotations

import argparse
import asyncio
import sys
from pathlib import Path

import duckdb
from dotenv import load_dotenv

ROOT = Path("/home/ubuntu/transcripts")
sys.path.insert(0, str(ROOT))
load_dotenv(ROOT / ".env")

from src.final_export_config import FinalExportConfig
from src.final_export_db import FinalExportPostgresDB


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Seed final export video queue from canonical final parquet")
    parser.add_argument(
        "--input",
        type=Path,
        default=ROOT / "final_data" / "final_cleaned_segments_with_variants_rerouted_repetition_filtered.parquet",
    )
    parser.add_argument("--run-id", default=None)
    parser.add_argument("--limit-videos", type=int, default=0)
    parser.add_argument("--video-id", action="append", default=[])
    parser.add_argument("--priority", type=int, default=0)
    return parser.parse_args()


def build_jobs(args: argparse.Namespace) -> list[dict]:
    con = duckdb.connect()
    video_filter = ""
    params: list[object] = []
    if args.video_id:
        placeholders = ", ".join("?" for _ in args.video_id)
        video_filter = f"WHERE video_id IN ({placeholders})"
        params.extend(args.video_id)
    limit_sql = f"LIMIT {args.limit_videos}" if args.limit_videos > 0 else ""
    rows = con.execute(
        f"""
        SELECT
            video_id,
            count(*) AS total_segments,
            min(corrected_language) AS corrected_language
        FROM read_parquet('{args.input.as_posix()}')
        {video_filter}
        GROUP BY video_id
        ORDER BY video_id
        {limit_sql}
        """,
        params,
    ).fetchall()
    jobs = []
    for video_id, total_segments, corrected_language in rows:
        jobs.append(
            {
                "video_id": str(video_id),
                "status": "pending",
                "priority": args.priority,
                "total_segments": int(total_segments or 0),
                "claimed_by": None,
                "claimed_at": None,
                "spooled_at": None,
                "completed_at": None,
                "attempt_count": 0,
                "error_message": None,
                "metadata_json": {
                    "corrected_language": corrected_language,
                    "source_input": args.input.as_posix(),
                },
            }
        )
    return jobs


async def seed_jobs(jobs: list[dict]):
    config = FinalExportConfig.from_env()
    db = FinalExportPostgresDB(config.database_url)
    await db.connect()
    await db.init_schema()
    await db.seed_video_jobs(jobs)
    await db.close()


def main():
    args = parse_args()
    if args.run_id:
        import os

        os.environ["FINAL_EXPORT_RUN_ID"] = args.run_id
    jobs = build_jobs(args)
    asyncio.run(seed_jobs(jobs))
    print(f"Seeded {len(jobs)} final export queue rows")


if __name__ == "__main__":
    main()
