o
    ~i                     @   s4  d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
 ddlZddlmZ ddlmZmZmZmZmZ G dd	 d	Z		
	
ddede	e dedede
eef f
ddZedkrdZzee\ZZede  edee   W dS  e y Z! zede!  W Y dZ![!dS dZ![!ww dS )z
R2 Cloud Storage module for downloading tar files containing audio segments.
Handles searching, downloading, and extracting audio data from Cloudflare R2.
    N)Path)OptionalTuple)Config   )R2_ENDPOINT_URL	R2_BUCKETR2_ACCESS_KEY_IDR2_SECRET_ACCESS_KEYDEFAULT_SETTINGSc                   @   sh   e Zd ZdZdd Zddededee fdd	Zd
ededefddZ	dedede
eef fddZdS )R2StorageClientz-Client for interacting with R2 cloud storage.c                 C   s&   t jdttttddd| _t| _d S )Ns3s3v4)signature_version)endpoint_urlaws_access_key_idaws_secret_access_keyconfig)	boto3clientr   r	   r
   r   	s3_clientr   bucket)self r   7/home/ubuntu/maya3_transcribe/src/backend/r2_storage.py__init__   s   
zR2StorageClient.__init__Fvideo_id	use_regexreturnc           	      C   s  | j d}| d}z| j j| j|d td|  |W S    Y |j| j|dD ]!}|dg D ]}|d }|drKtd|  |    S q3q+|rt	d	t
| d
tj}|j| jdD ]!}|dg D ]}|d }||rtd|  |    S qlqdtd|  dS )aD  
        Find a tar file for the given video ID in R2.
        
        Args:
            video_id: The YouTube video ID to search for
            use_regex: If True, use regex matching (fallback for non-exact matches)
            
        Returns:
            The S3 key of the found tar file, or None if not found
        list_objects_v2z.tar)BucketKeyz[R2] Found exact match: )r    PrefixContentsr!   z![R2] Found tar file with prefix: z.*z.*\.tar$)r    z[R2] Found via regex: z%[R2] No tar file found for video_id: N)r   get_paginatorhead_objectr   printpaginategetendswithrecompileescape
IGNORECASEmatch)	r   r   r   	paginator	exact_keypageobjkeypatternr   r   r   find_tar_file"   s8   


zR2StorageClient.find_tar_files3_key
local_pathc                 C   sv   t j|dd t j|}t j||}td| d| d | j| j|| tdt j	|d d dd	 |S )
a  
        Download a tar file from R2 to local storage.
        
        Args:
            s3_key: The S3 key of the tar file
            local_path: Local directory to download to
            
        Returns:
            Path to the downloaded tar file
        Texist_okz[R2] Downloading z to ...z[R2] Download complete: i   z.2fz MB)
osmakedirspathbasenamejoinr&   r   download_filer   getsize)r   r6   r7   filename
local_filer   r   r   download_tarN   s   "zR2StorageClient.download_tartar_path
extract_toc                 C   s  t j|dd td| d t|d}|| W d   n1 s%w   Y  d}d}t |D ]\}}}d|v rCt j|d}d|v rNt j|d}q3i }	|r|t|d}
t	
|
}	W d   n1 shw   Y  td	t|	dg  d
 ntd |rt j|rtt|d}tdt| d ||	fS td tt|d}|rt|d j}td|  ||	fS )a  
        Extract a tar file and return paths to metadata and segments.
        
        Args:
            tar_path: Path to the tar file
            extract_to: Directory to extract to
            
        Returns:
            Tuple of (segments_dir, metadata_dict)
        Tr8   z[R2] Extracting r:   rNzmetadata.jsonsegmentsz[R2] Loaded metadata: z segments definedz%[R2] Warning: metadata.json not foundz*.flacz[R2] Found z FLAC segment filesz*[R2] Warning: segments directory not foundz	**/*.flacr   z[R2] Found segments at: )r;   r<   r&   tarfileopen
extractallwalkr=   r?   jsonloadlenr(   existslistr   globstrparent)r   rE   rF   tarmetadata_pathsegments_dirrootdirsfilesmetadataf
flac_filesr   r   r   extract_tarc   s<   	zR2StorageClient.extract_tarN)F)__name__
__module____qualname____doc__r   rS   boolr   r5   rD   r   dictr^   r   r   r   r   r      s    
,"r   Tr   work_diruse_regex_fallbackcleanup_tarr   c                 C   s   |du rt d }tj|| }tj|d}tj|d}tj|r(t| t }|j| |d}|s;t	d|  |
||}	||	|\}
}|rZtj|	rZt|	 td |
|fS )a  
    Main function to download and extract audio segments for a video.
    
    Args:
        video_id: YouTube video ID
        work_dir: Working directory for downloads (default from config)
        use_regex_fallback: Try regex matching if exact match fails
        cleanup_tar: Remove tar file after extraction
        
    Returns:
        Tuple of (segments_directory, metadata_dict)
    Nre   download	extracted)r   z No tar file found for video_id: z[R2] Cleaned up tar file)r   r;   r=   r?   rP   shutilrmtreer   r5   FileNotFoundErrorrD   r^   remover&   )r   re   rf   rg   video_work_dirdownload_dirextract_dirr   r6   rE   rW   r[   r   r   r   download_video_segments   s"   

rq   __main__pF_BQpHaIdUz
Segments directory: zMetadata keys: zError: )NTT)"rb   r;   r*   rI   rM   rj   pathlibr   typingr   r   r   botocore.configr   r   r   r   r	   r
   r   r   rS   rc   rd   rq   r_   test_video_idrW   r[   r&   rQ   keys	Exceptioner   r   r   r   <module>   sL     

2