o
    3NPi$
                     @   s~   d Z ddlZddlZddlmZ ddlmZmZ ddlZe	dZ
dedee ded	eeee f fd
dZdefddZdS )z#Utility functions for the pipeline.    N)Path)ListDictzFastPipelineV5.Utils
audio_pathsegments
output_dirreturnc                 C   s  t d t }|d }|jdd t| \}}i }|D ]"}	|	ddkr@|	d dvr@|	d }
|
|vr9g ||
< ||
 |	 qi }| D ]\}}t	|d	d
 dd}t
|}|dkrfd|d |d g}ntt|}g ||< t|d|j D ]Q\}}|| }	tdtd|	d }t|	d | }tt|	d | |	d | }|dd||f }|| d|d  d }tt||| || t| qyqGt | }tdd | D }t d|dd| d |S )z
    Generate sample clips for each speaker for validation.
    
    Strategy: Pick 3 segments per speaker (longest, median, shortest).
    u   🎧 Generating samples...speaker_samplesT)exist_okstatususablespeaker)OVERLAP
NON_SPEECHc                 S   s   | d S )Nduration )xr   r   O/home/ubuntu/.cursor/worktrees/maya3data__SSH__216.81.248.184_/zxg/src/utils.py<lambda>-   s    z"generate_samples.<locals>.<lambda>)keyreverse   r         Ng      @g       @r   startend_sample_z.wavc                 s   s    | ]}t |V  qd S )N)len).0vr   r   r   	<genexpr>F   s    z#generate_samples.<locals>.<genexpr>u   ✅ Samples: z.1fzs | z clips)loggerinfotimemkdir
torchaudioloadgetappenditemssortedr   listrange	enumerateclips_per_speakerminmaxintsavestrsumvalues)r   r   configr   r   samples_dirwaveformsrspeaker_segmentssegspksample_pathsr   segssorted_segsnindicesidxseg_idxclip_durstart_sample
end_sampleclip	clip_pathelapsedtotalr   r   r   generate_samples   sF   
rK   c                 C   sF   t | jd }| r!| D ]}|  q|  td dS dS )zRemove temporary chunk files.chunksz   Cleaned up chunks directoryN)r   parentexistsiterdirunlinkrmdirr!   r"   )r   	chunk_dirfr   r   r   cleanup_chunksL   s   
rT   )__doc__r#   loggingpathlibr   typingr   r   r%   	getLoggerr!   r3   rK   rT   r   r   r   r   <module>   s"   

?