o
    5i-                     @   s   d Z ddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ eG dd dZG d	d
 d
Zdede	fddZedkrrddlZeejdkrtejd ZeeZed e D ]\ZZede de  qcdS dS dS )aV  
Audio processing module for handling segment duration and chunking.

Splitting strategy: VAD-aware energy-based cutting.
When segments >max_duration_sec, finds the lowest-energy point in a window
around the target split time (8-13s) to avoid cutting mid-speech.
Sequential chunks track their original segment + cut points for later merging.
    N)Path)ListDictOptional)	dataclass)AudioSegmentc                   @   s\   e Zd ZU dZeed< eed< eed< eed< eed< eed< eed< ed	e	fd
dZ
dS )
AudioChunkz2Represents an audio chunk ready for transcription.	file_pathoriginal_segmentchunk_indextotal_chunksstart_msend_msduration_secreturnc                 C   s
   | j dkS )N   )r   )self r   </home/ubuntu/maya3_transcribe/src/backend/audio_processor.pyis_split   s   
zAudioChunk.is_splitN)__name__
__module____qualname____doc__str__annotations__intfloatpropertyboolr   r   r   r   r   r      s   
 r   c                   @   s   e Zd ZdZ				d$dededed	efd
dZdedefddZdedefddZ		d%de
jdedededef
ddZ	d&dededee dee fddZ			d'dedee d ee d!edee f
d"d#ZdS )(AudioProcessorz8Handles audio segment processing with duration controls.      $@       @        flacmax_duration_secmin_duration_secoverlap_secoutput_formatc                 C   s   || _ || _|| _|| _dS )a}  
        Initialize the audio processor.
        
        Args:
            max_duration_sec: Maximum allowed segment duration (hard limit)
            min_duration_sec: Minimum segment duration to process
            overlap_sec: Overlap between chunks when splitting (helps with word boundaries)
            output_format: Output format for split chunks (flac, wav, mp3)
        N)r%   r&   r'   r(   )r   r%   r&   r'   r(   r   r   r   __init__%   s   
zAudioProcessor.__init__r	   r   c                 C   s   t |}t|d S )z)Get duration of an audio file in seconds.     @@)r   	from_filelen)r   r	   audior   r   r   get_audio_duration:   s   
z!AudioProcessor.get_audio_durationc                 C   s   |  |}|| jkS )z4Check if a file needs to be split based on duration.)r.   r%   )r   r	   durationr   r   r   needs_splitting?   s   

zAudioProcessor.needs_splitting  samplessr	target_mssearch_window_msc                 C   s   d}t || d }t t|| d }td|| }t||| }	td}
|}t||	|D ]6}t || d }||||  }t||d k rIq.tt|d d }dt	| }||
k rd|}
|}q.|S )z
        Find the lowest-energy point near target_ms for VAD-aware splitting.
        Searches +/- search_window_ms around target for a silence/low-energy valley.
        Returns the best cut point in milliseconds.
             r   inf   g|=)
r   r,   maxminr   rangenpsqrtmeanlog10)r   r2   r3   r4   r5   frame_msframe_samplestotal_mssearch_start
search_endbest_energybest_mst_msidxframerms	energy_dbr   r   r   _find_energy_valleyD   s&   	z"AudioProcessor._find_energy_valleyN
output_dirchunk_duration_secc                 C   s  |du r| j }t|}t|}|d }tj|dd t|j}|| j kr4t|t|j	ddd||dgS t
j| t
jd}|jdkrN|d	|jjdd
}|d }|j}	t|d }
dg}d}||
 |k r||
 }| ||	|}|| }|dk r~|d }n|dkr|t|d  }|| |}||
 |k sf|| g }t|d }t|D ]N}|| }||d  }||| }t|d }|| jk r|dkrq| d|dd| j }tj||}|j|| jd |t|t|j	|||||d q|S )a  
        Split audio using VAD-aware energy-based cutting.

        Instead of chopping at fixed intervals, finds the lowest-energy point
        in a window around the target split time to avoid cutting mid-speech.
        Target: 8-13s chunks. Searches +/- 3s around target for silence valleys.

        Sequential chunks track their original segment + cut points so they
        can be merged back later (transcripts appended, original audio retained).

        Args:
            file_path: Path to the audio file
            output_dir: Directory to save chunks
            chunk_duration_sec: Target chunk duration (defaults to max_duration_sec)

        Returns:
            List of AudioChunk objects with sequential chunk indices
        Nr*   T)exist_okr   r   r	   r
   r   r   r   r   r   )dtype)axisi   r7   i  i:  _chunk03d.)format)r%   r   r+   r,   osmakedirsr   stemr   namer=   arrayget_array_of_samplesfloat32channelsreshaper?   
frame_rater   rM   appendr<   r&   r(   pathjoinexport)r   r	   rN   rO   r-   total_duration_mstotal_duration_secoriginal_namer2   r3   r4   split_pointscursor	candidatecut_ms	chunk_lenchunks
num_chunksir   r   chunk_audiochunk_durationchunk_filename
chunk_pathr   r   r   split_audiod   sv   








zAudioProcessor.split_audioTsegments_dirmax_segments
skip_shortc              
   C   sd  |du rt j|d}h d}g }|D ]}|t|d|  qt|dd d}|r3|d| }tdt| d	 g }d
}	d
}
|D ]Q}| 	t
|}|rZ|| jk rZ|
d7 }
qE|| jkr~| t
||}|	d7 }	td|j d|ddt| d ntt
||jd
dd
t|d |dg}|| qEtdt| dt| d td|	 d|
  |S )a  
        Process all segments in a directory, splitting as needed.
        
        Args:
            segments_dir: Directory containing audio segments
            output_dir: Directory for split chunks (default: segments_dir/chunks)
            max_segments: Maximum number of segments to process (for testing)
            skip_short: Skip segments shorter than min_duration_sec
            
        Returns:
            List of AudioChunk objects ready for transcription
        Nro      .m4a.mp3.ogg.wav.flac*c                 S   s   | j S N)r\   )xr   r   r   <lambda>   s    z;AudioProcessor.process_segments_directory.<locals>.<lambda>)keyz[Audio] Processing z segments...r   r   z[Audio] Split : z.1fzs -> z chunksr7   rQ   z[Audio] Processed z segments -> z[Audio] Split: z, Skipped (too short): )rY   rd   re   extendr   globsortedprintr,   r.   r   r&   r%   rv   r\   r   r   )r   rw   rN   rx   ry   audio_extensionssegment_filesext
all_chunkssplit_countskipped_countsegment_pathr/   ro   r   r   r   process_segments_directory   sF   
&

z)AudioProcessor.process_segments_directory)r!   r"   r#   r$   )r1   r   )NNT)r   r   r   r   r   r   r)   r.   r   r0   r=   ndarrayr   rM   r   r   r   rv   r   r   r   r   r   r    "   sb    

$
jr    rw   r   c                 C   s   t  }h d}g }|D ]7}t| d| D ]*}z||t| W q tyA } ztd| d|  W Y d}~qd}~ww q|sIddiS t|t	|t	|t| t
|t|t	dd	 |D t	d
d	 |D dS )z
    Get statistics about segments in a directory.
    
    Args:
        segments_dir: Directory containing audio segments
        
    Returns:
        Dictionary with duration statistics
    rz   r   zError reading r   NerrorzNo audio files foundc                 s   s    | ]	}|d krdV  qdS )
   r   Nr   .0dr   r   r   	<genexpr>4      z$get_segment_stats.<locals>.<genexpr>c                 s   s    | ]	}|d k rd V  qdS )r   Nr   r   r   r   r   r   5  r   )total_filesrh   avg_duration_secr&   r%   over_10s_countunder_1s_count)r    r   r   rc   r.   r   	Exceptionr   r,   sumr;   r:   )rw   	processorr   	durationsr   fer   r   r   get_segment_stats  s,   
 r   __main__r   z
Segment Statistics:z  r   )r   rY   mathnumpyr=   pathlibr   typingr   r   r   dataclassesr   pydubr   r   r    r   r   r   sysr,   argvrw   statsr   itemsr   valuer   r   r   r   <module>   s0     t$
