o
    lQi//                     @  s4  d Z ddlmZ ddlZddlZddlZddlmZmZ ddl	m
Z
 ddlmZ ddlZddlZddlmZmZmZmZmZmZmZmZ eeZeG d	d
 d
ZeG dd dZd?d@ddZ		dAdBddZ d?dCddZ!dDd!d"Z"dEd&d'Z#dFd*d+Z$dFd,d-Z%dGd/d0Z&dHd4d5Z'dId7d8Z(dJdKd=d>Z)dS )Lz
Audio polishing pipeline: STEP 1 length-split -> STEP 2 boundary-trim -> STEP 3 silence-pad -> STEP 4 encode.
All trim metadata is tracked for downstream use.
    )annotationsN)	dataclassfield)Path)Optional   )MIN_SEGMENT_DURATION_SMAX_SEGMENT_DURATION_SPREFERRED_MAX_DURATION_SSPLIT_SEARCH_START_SFORCE_CUT_RANGEBOUNDARY_CHECK_MSSILENCE_PAD_MSBOUNDARY_TRIM_MAX_PCTc                   @  s   e Zd ZU ded< ded< dZded< dZded< dZded< dZded	< eZ	ded
< eZ
ded< dZded< dZded< dZded< dZded< dZded< dZded< dZded< dS )TrimMetadatastroriginal_filefloatoriginal_duration_msg        original_start_msoriginal_end_mstrimmed_start_mstrimmed_end_msleading_pad_mstrailing_pad_msfinal_duration_msFbool	was_splitr   intsplit_indexabrupt_start
abrupt_end	discarded discard_reasonN)__name__
__module____qualname____annotations__r   r   r   r   r   r   r   r   r   r   r    r!   r"   r$    r)   r)   ,/home/ubuntu/transcripts/src/audio_polish.pyr      s    
 r   c                   @  s>   e Zd ZU ded< ded< ded< dZded	< d
Zded< dS )PolishedSegment
np.ndarrayaudior   srr   	trim_meta    bytes
flac_bytesr#   r   base64_audioN)r%   r&   r'   r(   r2   r3   r)   r)   r)   r*   r+   -   s   
 r+   
   r-   r,   r.   r   frame_msreturnc                 C  sp   t || d }t| | }|dkr ttt| d gS | d||  ||}ttj|d ddS )z6Compute RMS energy profile with `frame_ms` resolution.  r      Nr   axis)r   lennparraysqrtmeanreshape)r-   r.   r5   	frame_lenn_framesframesr)   r)   r*   _compute_rms_profile6   s   rD         .@rmsthreshold_percentiler   	list[int]c                 C  s&   t | |}t | |k d }| S )zBFind frame indices where RMS is below threshold (silence valleys).r   )r<   
percentilewheretolist)rF   r5   rG   	thresholdvalleysr)   r)   r*   _find_silence_valleys@   s   rN   	frame_idxc                 C  s   | t || d  S Nr7   )r   )rO   r.   r5   r)   r)   r*   _frame_to_sampleH   s   rQ   samplec                 C  s   | | d S rP   r)   )rR   r.   r)   r)   r*   _sample_to_msL   s   rS   r   r   %list[tuple[np.ndarray, TrimMetadata]]c              	   C  s  t | | }|tk r!t||d dd|ddt dd}| |fgS |tkr5t||d |d d}| |fgS t| |}t|}d	}g }d
}	d
}
|	t | k r2t | |	 }|| }|tkr| |	d }t || }|tkrt|t | | d t|	|tt | ||
d
k|
d}|||f nt	t|	|t
d  | }t	t|	|td  | }t|t |}d}|D ]}||  kr|krn q|} nq|du r||k r|t |kr||| }t |d
kr|t| }|du r|}t||}t|t | }| |	| }t || }|tkr*t|t | | d t|	|t||d|
d}|||f |
d7 }
|}	|	t | k sM|r7|S | t|t | | d dddfgS )z4Split segments >10s at silence valleys. Discard <2s.r7   TzToo short: .1fzs < sr   r   r"   r$   )r   r   r   r4   r   N)r   r   r   r   r   r   r   zNo valid split points found)r;   r   r   r
   rD   rN   r	   rS   appendr   r   minr<   argminrQ   )r-   r.   r   
duration_smetarF   rM   r5   resultscurrent_start_sample	split_idx	remainingremaining_schunk	chunk_dursearch_start_framesearch_end_frame	cut_framevforce_range
cut_sampler)   r)   r*   step1_length_splitP   s   




<
rj   r\   tuple[np.ndarray, TrimMetadata]c                 C  s*  |j r| |fS t|t d }tt| t }t| |dd}t|dkr)t|nd}|d }d}t| }	| dt|t|  }
t|
dkrPt	t
|
d nd}||krt| |d krd}t|t|}t|d| |dd	}|r|d }t|||d}t|||_nd
|_| tdt| | d }t|dkrt	t
|d nd}||krt| |d krd}tdt|t||| d   }t||d |dd	}|r||d  }t|||d}	tt| |	 ||_nd
|_| ||	 }t|| tk rd
|_ dt|| dd|_||fS ||fS )zMTrim dirty edges: check first/last 50ms RMS, scan for silence valley to trim.r7      )r5   r   g333333?Nr8      )r5   rG   TzPost-trim too short: rU   rV   )r"   r   r   r;   r   rD   r<   medianrY   r>   r?   rN   rQ   rS   r   r    maxr   r!   r   r$   )r-   r.   r\   check_samplesmax_trim_samplesrF   
median_rmsdirty_threshold
trim_starttrim_endstart_chunk	start_rmsr5   search_limitrM   
trim_frame	end_chunkend_rmssearch_startvalleys_endtrimmedr)   r)   r*   step2_boundary_trim   sJ   $$"r   c                 C  s   |j r| |fS t|t d }tj|| jd}t|| |g}t|_t|_t	|| d |_
t	|| tk rEd|_ dt	|| dd|_||fS )z!Prepend and append 150ms silence.r7   dtypeTzPost-pad too short: rU   rV   )r"   r   r   r<   zerosr   concatenater   r   r;   r   r   r$   )r-   r.   r\   pad_samplessilencepaddedr)   r)   r*   step3_silence_pad   s   r   tuple[bytes, str]c                 C  s:   t  }tj|| |dd | }t|d}||fS )z'Encode to FLAC bytes and base64 string.FLAC)formatascii)ioBytesIOsfwritegetvaluebase64	b64encodedecode)r-   r.   bufr2   b64r)   r)   r*   step4_encode   s
   r   
audio_pathr   list[PolishedSegment]c              
   C  s   t jt| dd\}}|jdkr|jdd}t||| jd}g }|D ]k\}}|jrAt	d| j d|j
  |t|||d q"t|||\}}|jrdt	d	| j d|j
  |t|||d q"t|||\}}|jrz|t|||d q"t||\}	}
|t||||	|
d
 q"|S )zYFull pipeline: load -> split -> trim -> pad -> encode. Returns list (may be >1 if split).float32r   r   r9   )r   z
Discarded : r-   r.   r/   zDiscarded after trim )r-   r.   r/   r2   r3   )r   readr   ndimr?   rj   namer"   loggerdebugr$   rX   r+   r   r   r   )r   r-   r.   splitsr]   rb   r\   r   r   r2   r   r)   r)   r*   polish_segment	  s2   

r   pathc                 C  sr   zt | W S  ty8 } z'td| j d|  ttg dt| jddd| ddgW  Y d	}~S d	}~ww )
z6Wrapper that catches errors for thread pool execution.zFailed to polish r   i>  r   TzPolish error: rW   r   N)	r   	Exceptionr   errorr   r+   r<   r=   r   )r   er)   r)   r*   _safe_polish_segment3  s   

r   segment_paths
list[Path]max_workers
int | Nonec                 C  s   ddl m} ddl}| sg S |pt| pdt| d}tdt|t| }g }||d}|t| D ]}|	| q4W d   |S 1 sGw   Y  |S )z^Polish all segments in parallel. soundfile/numpy release the GIL so threads give real speedup.r   )ThreadPoolExecutorN      r   )r   )
concurrent.futuresr   osrY   	cpu_countr;   rp   mapr   extend)r   r   r   r   resolved_workersall_polishedpoolr]   r)   r)   r*   polish_all_segmentsD  s   
r   )r4   )r-   r,   r.   r   r5   r   r6   r,   )r4   rE   )rF   r,   r5   r   rG   r   r6   rH   )rO   r   r.   r   r5   r   r6   r   )rR   r   r.   r   r6   r   )r-   r,   r.   r   r   r   r6   rT   )r-   r,   r.   r   r\   r   r6   rk   )r-   r,   r.   r   r6   r   )r   r   r6   r   )r   r   r6   r   )N)r   r   r   r   r6   r   )*__doc__
__future__r   r   r   loggingdataclassesr   r   pathlibr   typingr   numpyr<   	soundfiler   configr   r	   r
   r   r   r   r   r   	getLoggerr%   r   r   r+   rD   rN   rQ   rS   rj   r   r   r   r   r   r   r)   r)   r)   r*   <module>   s:    (




c
7

	
*