#!/usr/bin/env python3
"""
R2 Client Utilities for High-Throughput Pipeline Migration

Manages Cloudflare R2 buckets for:
- Production outputs (1-cleaned-data)
- Test outputs during migration (1-cleaned-data-test)
- Baseline snapshots for comparison (1-cleaned-data-baseline)

Usage:
    from src.r2_client import R2Client

    client = R2Client()
    client.upload_json("path/to/file.json", "video_id/metadata.json")
    client.download_json("video_id/metadata.json", "local/path.json")
"""

import os
import json
import logging
from pathlib import Path
from typing import Dict, Any, Optional, List
from dataclasses import dataclass

import boto3
from botocore.config import Config as BotoConfig

logger = logging.getLogger("R2Client")


@dataclass
class R2Config:
    """R2 bucket configuration."""
    endpoint_url: str
    access_key_id: str
    secret_access_key: str
    bucket: str
    prefix: str = ""


# Bucket configurations
BUCKETS = {
    'production': '1-cleaned-data',
    'test': '1-cleaned-data-test',
    'baseline': '1-cleaned-data-baseline',
    # Source/original videos (DO NOT DELETE). This bucket is expected to contain
    # the original YouTube files re-uploaded to R2 to avoid rate limits.
    'source': 'test',
}


def get_r2_config_from_env() -> R2Config:
    """Load R2 configuration from environment variables."""
    return R2Config(
        endpoint_url=os.environ.get('R2_ENDPOINT_URL', ''),
        access_key_id=os.environ.get('R2_ACCESS_KEY_ID', ''),
        secret_access_key=os.environ.get('R2_SECRET_ACCESS_KEY', ''),
        bucket=os.environ.get('R2_BUCKET', 'test'),
        prefix=os.environ.get('R2_PREFIX', ''),
    )


class R2Client:
    """
    Cloudflare R2 client for pipeline data management.

    Supports multiple bucket targets for A/B testing during migration.
    """

    def __init__(
        self,
        config: Optional[R2Config] = None,
        bucket_type: str = 'test'
    ):
        """
        Initialize R2 client.

        Args:
            config: R2 configuration (loads from env if None)
            bucket_type: One of 'production', 'test', 'baseline'
        """
        self.config = config or get_r2_config_from_env()

        # Override bucket based on type
        if bucket_type in BUCKETS:
            self.bucket = BUCKETS[bucket_type]
        else:
            self.bucket = self.config.bucket

        self.prefix = self.config.prefix

        # Initialize boto3 client
        self._client = None

    @property
    def client(self):
        """Lazy-initialize boto3 client."""
        if self._client is None:
            self._client = boto3.client(
                's3',
                endpoint_url=self.config.endpoint_url,
                aws_access_key_id=self.config.access_key_id,
                aws_secret_access_key=self.config.secret_access_key,
                config=BotoConfig(
                    signature_version='s3v4',
                    retries={'max_attempts': 3, 'mode': 'adaptive'}
                )
            )
        return self._client

    def _full_key(self, key: str) -> str:
        """Get full S3 key with prefix."""
        if self.prefix:
            return f"{self.prefix}/{key}"
        return key

    def upload_file(self, local_path: str, remote_key: str) -> bool:
        """
        Upload a file to R2.

        Args:
            local_path: Path to local file
            remote_key: Remote key (path in bucket)

        Returns:
            True if successful
        """
        try:
            full_key = self._full_key(remote_key)
            self.client.upload_file(local_path, self.bucket, full_key)
            logger.info(f"Uploaded {local_path} -> s3://{self.bucket}/{full_key}")
            return True
        except Exception as e:
            logger.error(f"Upload failed: {e}")
            return False

    def download_file(self, remote_key: str, local_path: str) -> bool:
        """
        Download a file from R2.

        Args:
            remote_key: Remote key (path in bucket)
            local_path: Path to save locally

        Returns:
            True if successful
        """
        try:
            full_key = self._full_key(remote_key)
            Path(local_path).parent.mkdir(parents=True, exist_ok=True)
            self.client.download_file(self.bucket, full_key, local_path)
            logger.info(f"Downloaded s3://{self.bucket}/{full_key} -> {local_path}")
            return True
        except Exception as e:
            logger.error(f"Download failed: {e}")
            return False

    def upload_json(self, data: Dict[str, Any], remote_key: str) -> bool:
        """Upload JSON data directly to R2."""
        try:
            full_key = self._full_key(remote_key)
            body = json.dumps(data, indent=2).encode('utf-8')
            self.client.put_object(
                Bucket=self.bucket,
                Key=full_key,
                Body=body,
                ContentType='application/json'
            )
            logger.info(f"Uploaded JSON -> s3://{self.bucket}/{full_key}")
            return True
        except Exception as e:
            logger.error(f"JSON upload failed: {e}")
            return False

    def download_json(self, remote_key: str) -> Optional[Dict[str, Any]]:
        """Download JSON data from R2."""
        try:
            full_key = self._full_key(remote_key)
            response = self.client.get_object(Bucket=self.bucket, Key=full_key)
            data = json.loads(response['Body'].read().decode('utf-8'))
            logger.info(f"Downloaded JSON <- s3://{self.bucket}/{full_key}")
            return data
        except Exception as e:
            logger.error(f"JSON download failed: {e}")
            return None

    def head_object(self, remote_key: str) -> Optional[Dict[str, Any]]:
        """Return S3 head_object response for a key, or None if missing/unreadable."""
        try:
            full_key = self._full_key(remote_key)
            return self.client.head_object(Bucket=self.bucket, Key=full_key)
        except Exception:
            return None

    def generate_presigned_get_url(self, remote_key: str, expires_in_sec: int = 3600) -> str:
        """
        Generate a presigned GET URL for an object.

        Note: This is a Class B request pattern (no bucket listing).
        """
        full_key = self._full_key(remote_key)
        return self.client.generate_presigned_url(
            'get_object',
            Params={'Bucket': self.bucket, 'Key': full_key},
            ExpiresIn=expires_in_sec,
        )

    def list_keys(self, prefix: str = "", limit: int = 1000) -> List[str]:
        """
        List keys in bucket with optional prefix.

        Args:
            prefix: Additional prefix to filter by
            limit: Maximum keys to return

        Returns:
            List of keys
        """
        try:
            full_prefix = self._full_key(prefix) if prefix else self.prefix
            response = self.client.list_objects_v2(
                Bucket=self.bucket,
                Prefix=full_prefix,
                MaxKeys=limit
            )

            keys = []
            for obj in response.get('Contents', []):
                key = obj['Key']
                # Remove prefix for cleaner output
                if self.prefix and key.startswith(self.prefix):
                    key = key[len(self.prefix):].lstrip('/')
                keys.append(key)

            return keys
        except Exception as e:
            logger.error(f"List failed: {e}")
            return []

    def exists(self, remote_key: str) -> bool:
        """Check if a key exists in the bucket."""
        try:
            full_key = self._full_key(remote_key)
            self.client.head_object(Bucket=self.bucket, Key=full_key)
            return True
        except:
            return False

    def delete(self, remote_key: str) -> bool:
        """Delete a key from the bucket."""
        try:
            # Safety guard: user explicitly forbids deletions from any bucket other
            # than 1-cleaned-data. Keep this invariant even if callers misuse API.
            if self.bucket != '1-cleaned-data':
                raise PermissionError(
                    f"Refusing to delete from bucket '{self.bucket}'. "
                    f"Only '1-cleaned-data' deletions are permitted."
                )
            full_key = self._full_key(remote_key)
            self.client.delete_object(Bucket=self.bucket, Key=full_key)
            logger.info(f"Deleted s3://{self.bucket}/{full_key}")
            return True
        except Exception as e:
            logger.error(f"Delete failed: {e}")
            return False


def copy_to_baseline(video_ids: List[str], source_type: str = 'production') -> int:
    """
    Copy video outputs from source bucket to baseline bucket.

    Args:
        video_ids: List of video IDs to copy
        source_type: Source bucket type ('production' or 'test')

    Returns:
        Number of successfully copied videos
    """
    source = R2Client(bucket_type=source_type)
    baseline = R2Client(bucket_type='baseline')

    copied = 0
    for video_id in video_ids:
        # Copy metadata.json
        metadata_key = f"{video_id}/metadata.json"
        data = source.download_json(metadata_key)
        if data and baseline.upload_json(data, metadata_key):
            copied += 1
            logger.info(f"Copied {video_id} to baseline")
        else:
            logger.warning(f"Failed to copy {video_id}")

    return copied


if __name__ == "__main__":
    # Quick test
    import sys

    print("R2 Client Test")
    print("-" * 40)

    client = R2Client(bucket_type='test')

    # List some keys
    print(f"Bucket: {client.bucket}")
    print(f"Prefix: {client.prefix}")

    keys = client.list_keys(limit=5)
    print(f"Sample keys: {keys[:5]}")
