"""
R2 Cloud Storage module for downloading tar files containing audio segments.
Handles searching, downloading, and extracting audio data from Cloudflare R2.
"""
import os
import re
import tarfile
import json
import shutil
from pathlib import Path
from typing import Optional, Tuple
import boto3
from botocore.config import Config

from config import (
    R2_ENDPOINT_URL, R2_BUCKET, R2_ACCESS_KEY_ID, R2_SECRET_ACCESS_KEY,
    DEFAULT_SETTINGS
)


class R2StorageClient:
    """Client for interacting with R2 cloud storage."""
    
    def __init__(self):
        self.s3_client = boto3.client(
            's3',
            endpoint_url=R2_ENDPOINT_URL,
            aws_access_key_id=R2_ACCESS_KEY_ID,
            aws_secret_access_key=R2_SECRET_ACCESS_KEY,
            config=Config(signature_version='s3v4')
        )
        self.bucket = R2_BUCKET
        
    def find_tar_file(self, video_id: str, use_regex: bool = False) -> Optional[str]:
        """
        Find a tar file for the given video ID in R2.
        
        Args:
            video_id: The YouTube video ID to search for
            use_regex: If True, use regex matching (fallback for non-exact matches)
            
        Returns:
            The S3 key of the found tar file, or None if not found
        """
        # Primary search: exact match with video_id in filename
        paginator = self.s3_client.get_paginator('list_objects_v2')
        
        # Try exact prefix match first
        exact_key = f"{video_id}.tar"
        try:
            self.s3_client.head_object(Bucket=self.bucket, Key=exact_key)
            print(f"[R2] Found exact match: {exact_key}")
            return exact_key
        except:
            pass
        
        # Search with video_id as prefix
        for page in paginator.paginate(Bucket=self.bucket, Prefix=video_id):
            for obj in page.get('Contents', []):
                key = obj['Key']
                if key.endswith('.tar'):
                    print(f"[R2] Found tar file with prefix: {key}")
                    return key
        
        # Fallback: regex search if enabled
        if use_regex:
            pattern = re.compile(rf".*{re.escape(video_id)}.*\.tar$", re.IGNORECASE)
            for page in paginator.paginate(Bucket=self.bucket):
                for obj in page.get('Contents', []):
                    key = obj['Key']
                    if pattern.match(key):
                        print(f"[R2] Found via regex: {key}")
                        return key
        
        print(f"[R2] No tar file found for video_id: {video_id}")
        return None
    
    def download_tar(self, s3_key: str, local_path: str) -> str:
        """
        Download a tar file from R2 to local storage.
        
        Args:
            s3_key: The S3 key of the tar file
            local_path: Local directory to download to
            
        Returns:
            Path to the downloaded tar file
        """
        os.makedirs(local_path, exist_ok=True)
        filename = os.path.basename(s3_key)
        local_file = os.path.join(local_path, filename)
        
        print(f"[R2] Downloading {s3_key} to {local_file}...")
        self.s3_client.download_file(self.bucket, s3_key, local_file)
        print(f"[R2] Download complete: {os.path.getsize(local_file) / 1024 / 1024:.2f} MB")
        
        return local_file
    
    def extract_tar(self, tar_path: str, extract_to: str) -> Tuple[str, dict]:
        """
        Extract a tar file and return paths to metadata and segments.
        
        Args:
            tar_path: Path to the tar file
            extract_to: Directory to extract to
            
        Returns:
            Tuple of (segments_dir, metadata_dict)
        """
        os.makedirs(extract_to, exist_ok=True)
        
        print(f"[R2] Extracting {tar_path}...")
        with tarfile.open(tar_path, 'r') as tar:
            tar.extractall(extract_to)
        
        # Find metadata.json
        metadata_path = None
        segments_dir = None
        
        for root, dirs, files in os.walk(extract_to):
            if 'metadata.json' in files:
                metadata_path = os.path.join(root, 'metadata.json')
            if 'segments' in dirs:
                segments_dir = os.path.join(root, 'segments')
        
        # Load metadata
        metadata = {}
        if metadata_path:
            with open(metadata_path, 'r') as f:
                metadata = json.load(f)
            print(f"[R2] Loaded metadata: {len(metadata.get('segments', []))} segments defined")
        else:
            print("[R2] Warning: metadata.json not found")
        
        if segments_dir and os.path.exists(segments_dir):
            flac_files = list(Path(segments_dir).glob('*.flac'))
            print(f"[R2] Found {len(flac_files)} FLAC segment files")
        else:
            print("[R2] Warning: segments directory not found")
            # Try to find segments in the extracted root
            flac_files = list(Path(extract_to).glob('**/*.flac'))
            if flac_files:
                segments_dir = str(flac_files[0].parent)
                print(f"[R2] Found segments at: {segments_dir}")
        
        return segments_dir, metadata


def download_video_segments(
    video_id: str,
    work_dir: Optional[str] = None,
    use_regex_fallback: bool = True,
    cleanup_tar: bool = True
) -> Tuple[str, dict]:
    """
    Main function to download and extract audio segments for a video.
    
    Args:
        video_id: YouTube video ID
        work_dir: Working directory for downloads (default from config)
        use_regex_fallback: Try regex matching if exact match fails
        cleanup_tar: Remove tar file after extraction
        
    Returns:
        Tuple of (segments_directory, metadata_dict)
    """
    if work_dir is None:
        work_dir = DEFAULT_SETTINGS["work_dir"]
    
    video_work_dir = os.path.join(work_dir, video_id)
    download_dir = os.path.join(video_work_dir, "download")
    extract_dir = os.path.join(video_work_dir, "extracted")
    
    # Clean up any previous runs
    if os.path.exists(video_work_dir):
        shutil.rmtree(video_work_dir)
    
    client = R2StorageClient()
    
    # Find the tar file
    s3_key = client.find_tar_file(video_id, use_regex=use_regex_fallback)
    if not s3_key:
        raise FileNotFoundError(f"No tar file found for video_id: {video_id}")
    
    # Download
    tar_path = client.download_tar(s3_key, download_dir)
    
    # Extract
    segments_dir, metadata = client.extract_tar(tar_path, extract_dir)
    
    # Cleanup tar if requested
    if cleanup_tar and os.path.exists(tar_path):
        os.remove(tar_path)
        print(f"[R2] Cleaned up tar file")
    
    return segments_dir, metadata


if __name__ == "__main__":
    # Test download for the sample video
    test_video_id = "pF_BQpHaIdU"
    try:
        segments_dir, metadata = download_video_segments(test_video_id)
        print(f"\nSegments directory: {segments_dir}")
        print(f"Metadata keys: {list(metadata.keys())}")
    except Exception as e:
        print(f"Error: {e}")
