import subprocess
import json
import os
from datetime import datetime, timezone
from pathlib import Path

OUT = Path("/home/ubuntu/modi")
OUT.mkdir(exist_ok=True)

URLS = [
    "https://www.youtube.com/watch?v=ERW9i1lwnBw",
    "https://www.youtube.com/watch?v=rPf56BcBTdo",
    "https://www.youtube.com/watch?v=xD64_6cgK64",
    "https://www.youtube.com/watch?v=nnTOz3EHR7w",
    "https://www.youtube.com/watch?v=UE2tlpaxbIA",
    "https://www.youtube.com/watch?v=X5DLhrF6YPA",
    "https://www.youtube.com/watch?v=Snlq1jwyOxg",
    "https://www.youtube.com/watch?v=bcpxVRrI40Y",
    "https://www.youtube.com/watch?v=9p75x0UZUCg",
    "https://www.youtube.com/watch?v=hS7gTBmxMS8",
    "https://www.youtube.com/watch?v=Lk17v9ljYh4",
    "https://www.youtube.com/watch?v=QooiAFBuc9c",
    "https://www.youtube.com/watch?v=ID0AbWp8Oks",
    "https://www.youtube.com/watch?v=HOZ6-QlRak8",
    "https://www.youtube.com/watch?v=DbKL7YAV0Vw",
    "https://www.youtube.com/watch?v=UUgR3U0qhY0",
    "https://www.youtube.com/watch?v=oYPtkzjF9fM",
    "https://www.youtube.com/watch?v=VH2ZIZx29j8",
    "https://www.youtube.com/watch?v=hEZ5uTcH5CI",
    "https://www.youtube.com/watch?v=f3JvoR_-H6M",
    "https://www.youtube.com/watch?v=q4-jfcIox8o",
    "https://www.youtube.com/watch?v=MYms8GfcKsI",
    "https://www.youtube.com/watch?v=Ytzlp3Umct8",
    "https://www.youtube.com/watch?v=u2ZZ4c5rHD8",
    "https://www.youtube.com/watch?v=7mjvqLfckWM",
    "https://www.youtube.com/watch?v=0kMELqcLv4A",
    "https://www.youtube.com/watch?v=GxfX6kZCSRU",
    "https://www.youtube.com/watch?v=JdF1AOEYr-8",
    "https://www.youtube.com/watch?v=JmLhbrr8w7c",
    "https://www.youtube.com/watch?v=YzV_JANRm2M",
    "https://www.youtube.com/watch?v=Fb2v5rlwtNg",
    "https://www.youtube.com/watch?v=ux9hRMHvN-k",
    "https://www.youtube.com/watch?v=hPyE079CAKY",
    "https://www.youtube.com/watch?v=l3348bK9HnE",
    "https://www.youtube.com/watch?v=0AUmYfkJhvo",
    "https://www.youtube.com/watch?v=8PMOaqlfVc8",
    "https://www.youtube.com/watch?v=uju2cVQ5jco",
    "https://www.youtube.com/watch?v=OKDjyJUW_L0",
    "https://www.youtube.com/watch?v=HN5HlLdMy0A",
    "https://www.youtube.com/watch?v=FDUVR-QqYvQ",
    "https://www.youtube.com/watch?v=_B340mkjiiI",
    "https://www.youtube.com/watch?v=kxkLGWBlIIw",
    "https://www.youtube.com/watch?v=Y5_wacGRA2o",
    "https://www.youtube.com/watch?v=e1NmbRqoCnc",
    "https://www.youtube.com/watch?v=O1OmvDy8FKk",
    "https://www.youtube.com/watch?v=qaKVFwk6sU4",
    "https://www.youtube.com/watch?v=WdprihSHmmQ",
    "https://www.youtube.com/watch?v=tRdyASp4zQw",
    "https://www.youtube.com/watch?v=x8aRnmiGMLE",
    "https://www.youtube.com/watch?v=Yf7Krbu6Z8g",
    "https://www.youtube.com/watch?v=reFo3Ex7XA0",
    "https://www.youtube.com/watch?v=WC5TmwjMVXI",
    "https://www.youtube.com/watch?v=YdI8JeVOcYE",
    "https://www.youtube.com/watch?v=BtcqEVW2UNA",
    "https://www.youtube.com/watch?v=44XU-2FIq9s",
    "https://www.youtube.com/watch?v=K8zeo3ID6OY",
    "https://www.youtube.com/watch?v=Nv1CQ2LhlyY",
    "https://www.youtube.com/watch?v=H1ogjsNT23I",
    "https://www.youtube.com/watch?v=UXJRtq2ZYrs",
    "https://www.youtube.com/watch?v=p5ziBc4kT_0",
    "https://www.youtube.com/watch?v=tc0JIvSpVGY",
    "https://www.youtube.com/watch?v=pqozg9Tchc4",
    "https://www.youtube.com/watch?v=qSCxH_pjQos",
    "https://www.youtube.com/watch?v=OgafNvdpGHI",
    "https://www.youtube.com/watch?v=EU6CfwqqEqk",
    "https://www.youtube.com/watch?v=jhJGNRd4ZM4",
    "https://www.youtube.com/watch?v=oEB5RnaDZmE",
    "https://www.youtube.com/watch?v=KfSawaMJLWE",
    "https://www.youtube.com/watch?v=nedl4B4foZ4",
    "https://www.youtube.com/watch?v=lQzo-QhR6VY",
    "https://www.youtube.com/watch?v=FjXoZTkkHNA",
    "https://www.youtube.com/watch?v=gy-DJYDMaGo",
    "https://www.youtube.com/watch?v=OCKQkllsWro",
    "https://www.youtube.com/watch?v=AqQQPttZ8n4",
    "https://www.youtube.com/watch?v=n8THtt-mQ4E",
    "https://www.youtube.com/watch?v=1PLzwByBA-g",
    "https://www.youtube.com/watch?v=LMeIlNfDClw",
    "https://www.youtube.com/watch?v=AbhrpckFsoE",
    "https://www.youtube.com/watch?v=_1LkUMfJzzU",
    "https://www.youtube.com/watch?v=sPa6CoJsbAQ",
    "https://www.youtube.com/watch?v=MQh7n9e6RPs",
    "https://www.youtube.com/watch?v=8fSdjyyfkj4",
    "https://www.youtube.com/watch?v=rzbwxsaMJEY",
    "https://www.youtube.com/watch?v=AJO1iyrPI4M",
    "https://www.youtube.com/watch?v=BF2gzqeGr3c",
    "https://www.youtube.com/watch?v=aDrCj9Fp-a8",
    "https://www.youtube.com/watch?v=SfnxbRL7-W8",
    "https://www.youtube.com/watch?v=z7tNK92DSsU",
    "https://www.youtube.com/watch?v=bBBz2xzKtf8",
    "https://www.youtube.com/watch?v=kEy3gILUK6g",
    "https://www.youtube.com/watch?v=5UJX9OabZk0",
    "https://www.youtube.com/watch?v=yt7HSAzE-Ic",
    "https://www.youtube.com/watch?v=RUZK2yElCts",
    "https://www.youtube.com/watch?v=XDh4q3zKYEY",
    "https://www.youtube.com/watch?v=x9UAD-0cpkI",
    "https://www.youtube.com/watch?v=9jGwJH1H7lw",
    "https://www.youtube.com/watch?v=Nj8OAEt4DKU",
    "https://www.youtube.com/watch?v=dVW722GEb_o",
    "https://www.youtube.com/watch?v=O-onSX28erM",
    "https://www.youtube.com/watch?v=Akp_e0ScP2o",
    "https://www.youtube.com/watch?v=9_WsmodRndQ",
    "https://www.youtube.com/watch?v=3QsHg8Qp02U",
    "https://www.youtube.com/watch?v=iwFDnf2heTo",
    "https://www.youtube.com/watch?v=IS2KBjMUVy4",
    "https://www.youtube.com/watch?v=iagwBs8KvtM",
    "https://www.youtube.com/watch?v=eqsGRZfEZJE",
    "https://www.youtube.com/watch?v=IRY0On1Gh3Y",
    "https://www.youtube.com/watch?v=nNURyUAmcvI",
    "https://www.youtube.com/watch?v=gWCJQBpYDRY",
    "https://www.youtube.com/watch?v=3nZkxCsv-mY",
    "https://www.youtube.com/watch?v=u-yHKClIFcM",
    "https://www.youtube.com/watch?v=W8Fow2P92Tw",
    "https://www.youtube.com/watch?v=7HeMRXIGWtE",
    "https://www.youtube.com/watch?v=5d-21prdt1k",
    "https://www.youtube.com/watch?v=_EXRUAOFCqg",
    "https://www.youtube.com/watch?v=7xEF1hUdagU",
    "https://www.youtube.com/watch?v=_gP1mPkVP6g",
    "https://www.youtube.com/watch?v=crUbQyWPRp0",
    "https://www.youtube.com/watch?v=HtAGgc8E4P8",
    "https://www.youtube.com/watch?v=8_PMruGFw44",
    "https://www.youtube.com/watch?v=ZFHa06SCwkY",
    "https://www.youtube.com/watch?v=kpnFP7cKIKU",
    "https://www.youtube.com/watch?v=3BwZz_X0Cxc",
    "https://www.youtube.com/watch?v=0Lc_11QFxMM",
    "https://www.youtube.com/watch?v=bEcYg8Q4aWs",
    "https://www.youtube.com/watch?v=9g6XsVfL6aQ",
    "https://www.youtube.com/watch?v=9M2Qlh1hbRI",
    "https://www.youtube.com/watch?v=XcuZm4fEM6U",
    "https://www.youtube.com/watch?v=dWIGbpKqMY4",
    "https://www.youtube.com/watch?v=g2bNCHFcSlE",
    "https://www.youtube.com/watch?v=-FuVIwO2PwQ",
]

def get_ffprobe_info(filepath):
    try:
        result = subprocess.run(
            ["ffprobe", "-v", "quiet", "-print_format", "json", "-show_format", "-show_streams", filepath],
            capture_output=True, text=True, timeout=30
        )
        info = json.loads(result.stdout)
        stream = info.get("streams", [{}])[0]
        fmt = info.get("format", {})
        return {
            "audio_sample_rate": int(stream.get("sample_rate", 0)),
            "audio_channels": int(stream.get("channels", 0)),
            "audio_duration_sec": round(float(fmt.get("duration", 0)), 2),
            "audio_size_bytes": int(fmt.get("size", 0)),
        }
    except Exception as e:
        return {"error": str(e)}

def download_one(idx, url):
    vid_id = url.split("v=")[1].split("&")[0]
    prefix = f"{idx:03d}_{vid_id}"
    wav_path = OUT / f"{prefix}.wav"
    meta_path = OUT / f"{prefix}_metadata.json"

    if meta_path.exists():
        print(f"  [{idx}/{len(URLS)}] SKIP (already done): {vid_id}")
        return True

    # Get YouTube metadata
    try:
        result = subprocess.run(
            ["yt-dlp", "--dump-json", "--no-download", url],
            capture_output=True, text=True, timeout=60
        )
        yt_info = json.loads(result.stdout)
    except Exception as e:
        print(f"  [{idx}/{len(URLS)}] FAIL metadata: {vid_id} - {e}")
        return False

    # Download audio as wav
    try:
        subprocess.run(
            ["yt-dlp", "-x", "--audio-format", "wav", "-o", str(wav_path), url],
            capture_output=True, text=True, timeout=300
        )
    except Exception as e:
        print(f"  [{idx}/{len(URLS)}] FAIL download: {vid_id} - {e}")
        return False

    if not wav_path.exists():
        print(f"  [{idx}/{len(URLS)}] FAIL no file: {vid_id}")
        return False

    # Get audio file properties
    audio_info = get_ffprobe_info(str(wav_path))

    metadata = {
        "id": yt_info.get("id"),
        "url": url,
        "title": yt_info.get("title"),
        "channel": yt_info.get("channel"),
        "channel_id": yt_info.get("channel_id"),
        "upload_date": yt_info.get("upload_date"),
        "duration_sec": yt_info.get("duration"),
        "description": yt_info.get("description"),
        "view_count": yt_info.get("view_count"),
        "like_count": yt_info.get("like_count"),
        "tags": yt_info.get("tags", []),
        "categories": yt_info.get("categories", []),
        "language": yt_info.get("language"),
        "playlist_index": idx,
        "audio_file": f"{prefix}.wav",
        "audio_format": "wav",
        "audio_sample_rate": audio_info.get("audio_sample_rate"),
        "audio_channels": audio_info.get("audio_channels"),
        "audio_duration_sec": audio_info.get("audio_duration_sec"),
        "audio_size_bytes": audio_info.get("audio_size_bytes"),
        "downloaded_at": datetime.now(timezone.utc).isoformat(),
    }

    with open(meta_path, "w") as f:
        json.dump(metadata, f, indent=2, ensure_ascii=False)

    size_mb = wav_path.stat().st_size / 1024 / 1024
    print(f"  [{idx}/{len(URLS)}] OK: {yt_info.get('title', vid_id)[:60]} ({size_mb:.1f}MB, {yt_info.get('duration', '?')}s)")
    return True

from concurrent.futures import ThreadPoolExecutor, as_completed

WORKERS = 10

print(f"=== Downloading {len(URLS)} videos to {OUT} ({WORKERS} parallel) ===\n")

success = 0
fail = 0

with ThreadPoolExecutor(max_workers=WORKERS) as pool:
    futures = {pool.submit(download_one, idx, url): idx for idx, url in enumerate(URLS, 1)}
    for future in as_completed(futures):
        if future.result():
            success += 1
        else:
            fail += 1

print(f"\n=== DONE: {success} ok, {fail} failed out of {len(URLS)} ===")
