#!/usr/bin/env python3
"""
Cloudflare R2 Storage Client for Worker

Handles uploading processed results (metadata.json + audio) to R2.
S3-compatible API with Cloudflare-specific optimizations.
"""

import os
import json
import logging
import hashlib
from pathlib import Path
from typing import Optional, Dict, Any
from dataclasses import dataclass

import boto3
from botocore.config import Config as BotoConfig

logger = logging.getLogger("Worker.Storage")


@dataclass
class R2Config:
    """R2 storage configuration."""
    access_key_id: str
    secret_access_key: str
    account_id: str
    bucket: str = "diarization-output"
    
    @property
    def endpoint_url(self) -> str:
        return f"https://{self.account_id}.r2.cloudflarestorage.com"
    
    @classmethod
    def from_env(cls) -> "R2Config":
        """Load R2 config from environment variables."""
        return cls(
            access_key_id=os.environ.get("R2_ACCESS_ID", ""),
            secret_access_key=os.environ.get("R2_SECRET_KEY", ""),
            account_id=os.environ.get("R2_ACCOUNT_ID", ""),
            bucket=os.environ.get("R2_BUCKET", "diarization-output"),
        )


class R2Client:
    """
    Cloudflare R2 client for uploading diarization results.
    
    Usage:
        client = R2Client.from_env()
        
        # Upload metadata
        client.upload_metadata(video_id, metadata_dict)
        
        # Upload audio file
        client.upload_file(video_id, local_path, "original.wav")
        
        # Upload entire result directory
        client.upload_result(video_id, local_dir)
    """
    
    def __init__(self, config: R2Config):
        self.config = config
        self._client = None
    
    @classmethod
    def from_env(cls) -> "R2Client":
        """Create client from environment variables."""
        return cls(R2Config.from_env())
    
    @property
    def client(self):
        """Lazy-initialize S3 client."""
        if self._client is None:
            self._client = boto3.client(
                's3',
                endpoint_url=self.config.endpoint_url,
                aws_access_key_id=self.config.access_key_id,
                aws_secret_access_key=self.config.secret_access_key,
                config=BotoConfig(
                    signature_version='s3v4',
                    retries={'max_attempts': 3, 'mode': 'adaptive'}
                ),
                region_name='auto'
            )
        return self._client
    
    def ensure_bucket(self):
        """Create bucket if it doesn't exist."""
        try:
            self.client.head_bucket(Bucket=self.config.bucket)
            logger.debug(f"Bucket {self.config.bucket} exists")
        except Exception:
            try:
                self.client.create_bucket(Bucket=self.config.bucket)
                logger.info(f"Created bucket: {self.config.bucket}")
            except Exception as e:
                if "BucketAlreadyOwnedByYou" not in str(e):
                    raise
    
    def upload_metadata(self, video_id: str, metadata: Dict[str, Any]) -> str:
        """
        Upload metadata.json for a video.
        
        Args:
            video_id: YouTube video ID
            metadata: Diarization result dict
        
        Returns:
            R2 key path
        """
        key = f"{video_id}/metadata.json"
        
        # Add upload timestamp
        metadata['_uploaded_at'] = __import__('datetime').datetime.utcnow().isoformat()
        
        body = json.dumps(metadata, indent=2, default=str)
        
        self.client.put_object(
            Bucket=self.config.bucket,
            Key=key,
            Body=body.encode('utf-8'),
            ContentType='application/json'
        )
        
        logger.info(f"📤 Uploaded metadata: {key}")
        return key
    
    def upload_file(self, video_id: str, local_path: str, filename: Optional[str] = None) -> str:
        """
        Upload a file to R2.
        
        Args:
            video_id: YouTube video ID
            local_path: Local file path
            filename: Optional override for destination filename
        
        Returns:
            R2 key path
        """
        path = Path(local_path)
        if not path.exists():
            raise FileNotFoundError(f"File not found: {local_path}")
        
        dest_filename = filename or path.name
        key = f"{video_id}/{dest_filename}"
        
        # Determine content type
        content_type = 'application/octet-stream'
        if dest_filename.endswith('.wav'):
            content_type = 'audio/wav'
        elif dest_filename.endswith('.flac'):
            content_type = 'audio/flac'
        elif dest_filename.endswith('.json'):
            content_type = 'application/json'
        
        # Calculate MD5 for integrity
        md5_hash = hashlib.md5()
        with open(path, 'rb') as f:
            for chunk in iter(lambda: f.read(8192), b''):
                md5_hash.update(chunk)
        
        # Upload with multipart for large files
        file_size = path.stat().st_size
        if file_size > 100 * 1024 * 1024:  # > 100MB
            logger.info(f"📤 Uploading large file ({file_size / 1024 / 1024:.1f} MB): {key}")
            self._multipart_upload(key, local_path, content_type)
        else:
            with open(path, 'rb') as f:
                self.client.put_object(
                    Bucket=self.config.bucket,
                    Key=key,
                    Body=f,
                    ContentType=content_type,
                    ContentMD5=__import__('base64').b64encode(md5_hash.digest()).decode()
                )
            logger.info(f"📤 Uploaded file: {key} ({file_size / 1024 / 1024:.1f} MB)")
        
        return key
    
    def _multipart_upload(self, key: str, local_path: str, content_type: str):
        """Upload large file using multipart upload."""
        # Use boto3's managed upload which handles multipart automatically
        self.client.upload_file(
            local_path,
            self.config.bucket,
            key,
            ExtraArgs={'ContentType': content_type}
        )
    
    def upload_result(self, video_id: str, local_dir: str) -> Dict[str, str]:
        """
        Upload all files from a result directory.
        
        Expected structure:
            local_dir/
            ├── metadata.json
            ├── {video_id}_original.wav
            └── (optional other files)
        
        Args:
            video_id: YouTube video ID
            local_dir: Local directory containing results
        
        Returns:
            Dict mapping filename to R2 key
        """
        uploaded = {}
        dir_path = Path(local_dir)
        
        if not dir_path.exists():
            raise FileNotFoundError(f"Directory not found: {local_dir}")
        
        for file_path in dir_path.iterdir():
            if file_path.is_file():
                # Skip processing files
                if file_path.name.endswith('_16k.wav') or file_path.name.endswith('_trimmed.wav'):
                    continue
                
                try:
                    key = self.upload_file(video_id, str(file_path))
                    uploaded[file_path.name] = key
                except Exception as e:
                    logger.error(f"Failed to upload {file_path.name}: {e}")
        
        return uploaded
    
    def check_exists(self, video_id: str) -> bool:
        """Check if video has already been processed (metadata exists)."""
        try:
            self.client.head_object(
                Bucket=self.config.bucket,
                Key=f"{video_id}/metadata.json"
            )
            return True
        except Exception:
            return False
    
    def download_metadata(self, video_id: str) -> Optional[Dict[str, Any]]:
        """Download and parse metadata.json for a video."""
        try:
            response = self.client.get_object(
                Bucket=self.config.bucket,
                Key=f"{video_id}/metadata.json"
            )
            return json.loads(response['Body'].read().decode('utf-8'))
        except Exception:
            return None
    
    def list_videos(self, prefix: str = "", max_keys: int = 1000) -> list:
        """List processed video IDs."""
        videos = []
        paginator = self.client.get_paginator('list_objects_v2')
        
        for page in paginator.paginate(
            Bucket=self.config.bucket,
            Prefix=prefix,
            Delimiter='/',
            MaxKeys=max_keys
        ):
            for prefix_obj in page.get('CommonPrefixes', []):
                video_id = prefix_obj['Prefix'].rstrip('/')
                videos.append(video_id)
        
        return videos


# Quick test
if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)
    
    # Test connection
    client = R2Client(R2Config(
        access_key_id="c3c9190ae7ff98b10271ea8db6940210",
        secret_access_key="eab9394d02b48a865634105b92c74751ec9a311c56884f7aead5d76476c6b576",
        account_id="cb908ed13329eb7b186e06ab51bda190",
        bucket="diarization-output"
    ))
    
    # List existing buckets
    try:
        response = client.client.list_buckets()
        print("Buckets:", [b['Name'] for b in response['Buckets']])
    except Exception as e:
        print(f"Error: {e}")















