#!/usr/bin/env python3
"""Find COMPLETED videos that have R2 source and local TAR files for validation."""

import os
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from dotenv import load_dotenv
load_dotenv()

from supabase import create_client
from src.r2_client import R2Client

sb = create_client(os.environ['URL'], os.environ['SUPABASE_ADMIN'])
r2 = R2Client(bucket_type='source')

# Get video IDs from R2
print("Getting videos from R2 source bucket...")
response = r2.client.list_objects_v2(Bucket=r2.bucket, Prefix='podcasts/', MaxKeys=200)
files = response.get('Contents', [])

r2_ids = set()
for f in files:
    key = f['Key']
    if key.endswith('.webm'):
        vid = key.replace('podcasts/', '').replace('.webm', '')
        r2_ids.add(vid)

print(f"Found {len(r2_ids)} videos in R2")

# Cross-reference with Supabase COMPLETED videos
result = sb.table('videos').select(
    'youtube_id, title, source_duration_min, usable_percentage, num_speakers, audio_duration_sec'
).eq('status', 'COMPLETED').order('usable_percentage', desc=True).limit(100).execute()
videos = result.data or []

# Find videos that are COMPLETED and in R2
good_videos = []
for v in videos:
    vid = v['youtube_id']
    if vid in r2_ids:
        good_videos.append(v)

print(f"\nCOMPLETED videos with R2 source: {len(good_videos)}")

# Check which have local TAR files
print("\n" + "=" * 80)
print("VIDEOS READY FOR VALIDATION (COMPLETED + R2 source + local TAR)")
print("=" * 80)

valid_count = 0
for v in good_videos[:20]:
    vid = v['youtube_id']
    tar_path = f"./output/{vid}.tar"
    if os.path.exists(tar_path):
        size_mb = os.path.getsize(tar_path) / (1024*1024)
        dur = v.get('source_duration_min', 0) or 0
        pct = v.get('usable_percentage', 0) or 0
        spk = v.get('num_speakers', 0) or 0
        title = (v.get('title') or '')[:45]
        print(f"  {vid} | {dur:5.1f}min | {pct:4.0f}% | {spk:2}spk | {size_mb:6.1f}MB | {title}")
        valid_count += 1

if valid_count == 0:
    print("  No videos found with all criteria met")
else:
    print(f"\nTotal: {valid_count} videos ready for manual validation")

# Also list what we have locally
print("\n" + "=" * 80)
print("ALL LOCAL TAR FILES (top 10 by size)")
print("=" * 80)

tar_files = []
output_dir = './output'
if os.path.exists(output_dir):
    for f in os.listdir(output_dir):
        if f.endswith('.tar'):
            path = os.path.join(output_dir, f)
            size = os.path.getsize(path)
            tar_files.append((f, size))

tar_files.sort(key=lambda x: -x[1])
for f, size in tar_files[:10]:
    vid = f.replace('.tar', '')
    print(f"  {f}: {size/(1024*1024):.1f}MB")
