o
    3NPit5                     @   sJ  d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 ddlZddlZddlZddlmZ edZd!dee d	ed
edefddZ		d"dedee dedeee
eef eef  fddZdd Z			d#de
eef dededededeee ee f fddZ	d$deeeeef  dedeee ee f fdd ZdS )%a  
Speaker diarization with GPU optimization and compute monitoring.

=== OPTIMIZATION (v6.2) ===
Key improvements:
- In-memory chunk processing: Pass waveform tensors directly to pyannote
- No disk writes: Eliminates chunk WAV file I/O
- VAD-aware chunking (cut at silence)
- Compute utilization tracking
- Memory-efficient processing

=== EDGE CASE HANDLING (v6.7) ===
- Retry logic for transient GPU failures
- OOM recovery with cache clearing
- Graceful fallback on persistent failures
    N)Path)ListDictTupleOptionalUnion)MODELSzFastPipelineV6.Diarization      >@vad_segmentstarget_timewindowreturnc           
      C   sr   |}d}t t| d D ]*}| | d }| |d  d }|| }|| d }	t|	| |kr6||kr6|}|	}q|S )u   
    Find silence point near target time for chunk boundary.
    
    Strategy: Find largest gap within ±window of target_time.
    This ensures we don't cut in the middle of speech.
               endstart   )rangelenabs)
r
   r   r   best_cutbest_gapi	gap_startgap_endgap_size
gap_center r   U/home/ubuntu/.cursor/worktrees/maya3data__SSH__216.81.248.184_/zxg/src/diarization.pyfind_vad_safe_cut   s   r   T
audio_path	in_memoryc                 C   s  t d|rdnd d |dur |j}|j}t|d}nt| \}}|	d
 }|jd | }|sOt| jd }	|	jd	d
 ddlm}
 |
|	 g }d}d}||k rt||j |}||k rzt||}|| |jk ryt||j |}n|}t|| }t|| }|dd||f }|r||d}||||| f n|	d|dd }tt||| |t|||| f |}|d7 }||k sY|rdnd}t dt| d| d |S )an  
    Create VAD-aware audio chunks for diarization.
    
    === OPTIMIZATION (v6.2) ===
    - in_memory=True (default): Return waveform tensors directly (no disk I/O!)
    - in_memory=False: Legacy disk-based WAV files
    
    Strategy:
    1. Target chunk size from config
    2. Find optimal cut points at silence boundaries
    3. Never cut in the middle of speech
    
    Args:
        audio_path: Path to audio file
        vad_segments: Speech segments from VAD
        config: Pipeline configuration
        audio_buffer: Optional AudioBuffer (avoids file re-read)
        in_memory: If True, return dicts with waveform tensors instead of file paths
    
    Returns:
        List of (chunk_data, start_time, duration) where chunk_data is either:
        - str: file path (in_memory=False)
        - dict: {"waveform": tensor, "sample_rate": int} (in_memory=True)
    u"   ✂️ Creating VAD-aware chunks (z	in-memorydisk)...Nr   r   chunksT)exist_ok)TEMP_MANAGERr   )waveformsample_ratechunk_03dz.wavzto disku   ✅ Created z	 chunks (z, VAD-aware boundaries))loggerinfowaveform_npr(   torch
from_numpy	unsqueeze
torchaudioloadsqueezenumpyshaper   parentmkdirsrc.audio_bufferr&   registerminchunk_durationr   min_chunk_durationintappendsavestrr   )r    r
   configaudio_bufferr!   r-   srr'   total_duration	chunk_dirr&   r$   r   idx
target_endr   start_sample
end_samplechunk_waveform
chunk_data
chunk_pathmode_strr   r   r   create_chunks6   sR   

"rN   c                 C   s   t | dr| jS | S )z
    Extract Annotation object from diarization result.
    
    === PYANNOTE 4.x COMPATIBILITY ===
    - pyannote 3.x: Returns Annotation directly
    - pyannote 4.x: Returns DiarizeOutput with .speaker_diarization attribute
    speaker_diarization)hasattrrO   )resultr   r   r   _get_annotation_from_result   s   
	rR   Fr   rK   chunk_startextract_overlaps	chunk_idxmax_retriesc                 C   s  d}t |d D ]S}ztj| |j|jd}t|}	t| tr%t| j	}
nd|d}
g }|rddl
m} ||	|\}}|D ]}|||d  ||d	  |d
 dd q>g }|D ](}||d  }||d	  }|| }||jkr|||||
 d|d  d qYn0g }|	jddD ]'\}}}||j }||j }|| }||jkr|||||
 d| d q||fW   S  ty' } za|}t| }d|v sd|v rtd| d|d  d tjdd tj rtj  tj  ||k rtd W Y d}~qntd| d|  ||k rW Y d}~qW Y d}~qd}~w ty\ } z)|}td| d|d  d|  ||k rRtjdd W Y d}~qW Y d}~qd}~ww td| d|d  d|  g g fS )a  
    Diarize a single chunk using the hot model.
    
    === OPTIMIZATION (v6.2) ===
    Supports both file paths AND in-memory waveform dicts.
    No more disk I/O for chunks!
    
    === PYANNOTE 4.x COMPATIBILITY ===
    Handles both pyannote 3.x (returns Annotation) and 4.x (returns DiarizeOutput).
    
    === EDGE CASE HANDLING (v6.7) ===
    - Retry logic for transient GPU failures
    - OOM recovery with cache clearing
    - Graceful fallback on persistent failures
    
    Uses global MODELS singleton to avoid reloading.
    Returns segments with global timestamps, and optionally overlap regions.
    
    Args:
        chunk_data: Either file path (str) OR dict {"waveform": tensor, "sample_rate": int}
        chunk_start: Start time offset for global timestamps
        config: Pipeline configuration
        extract_overlaps: If True, also extract overlap regions (for unified OSD+diarization)
        chunk_idx: Chunk index for speaker naming (used for in-memory chunks)
        max_retries: Number of retries on failure (default: 2)
    
    Returns:
        (segments, overlaps): segments list, and overlaps list (empty if extract_overlaps=False)
    Nr   )min_speakersmax_speakersr)   r*   r   )!extract_overlaps_from_diarizationr   r   durationoverlap)r   r   rZ   type_speaker)r   r   rZ   r^   T)yield_labelzout of memorycuda	   Chunk z	 attempt z(: GPU error, clearing cache and retrying
aggressivez RuntimeError: z	 failed: zChunk z diarization failed after z attempts: )r   r   diarization_pipelinerW   rX   rR   
isinstancer@   r   stemsrc.overlap_detectionrY   r>   min_segment_duration
itertracksr   r   RuntimeErrorlowerr+   warningclear_cacher.   r`   is_availableempty_cachesynchronizetimesleeperror	Exception)rK   rS   rA   rT   rU   rV   
last_errorattempt
raw_resultrQ   chunk_idoverlapsrY   chunk_overlapsclean_segmentsovlsegmentssegglobal_start
global_endrZ   turnr]   r^   e	error_strr   r   r   diarize_chunk   s   %













 
 r   r$   c                 C   s  |rdnd}t d| dt|  dtj d t }g }g }t| D ]a\}\}}	}
t }t||	|||d\}}|| || t | }|rTdt| d	nd}t d
|d  dt|  dt| d| d|dd|
dd |d |j	 dkrtj
dd q$tj
dd |jdd d |jdd d t | }ttdd |D }|rdt| d	nd}t d| d|ddt| d| d| d  ||fS )!ae  
    Run diarization on all chunks with progress tracking.
    
    === OPTIMIZATION (v6.1) ===
    When extract_overlaps=True, this function extracts overlap regions
    from the diarization results in a SINGLE PASS, eliminating the need
    for a separate OSD stage that would run diarization again.
    
    Args:
        chunks: List of (chunk_path, chunk_start, chunk_duration)
        config: Pipeline configuration
        extract_overlaps: If True, also extract and return overlap regions
    
    Returns:
        (all_segments, all_overlaps): segments and overlaps (overlaps empty if not extracted)
    z + OSD u   🎙️ Running diarizationz (z chunks, model: r#   )rT   rU   z, z	 overlapsra   r   /z: z	 segmentsz.1fzs, z.0fzs audio)r   Trb   c                 S      | d S Nr   r   xr   r   r   <lambda>b      z!run_diarization.<locals>.<lambda>)keyc                 S   r   r   r   r   r   r   r   r   c  r   c                 s   s    | ]}|d  V  qdS )r^   Nr   ).0sr   r   r   	<genexpr>f  s    z"run_diarization.<locals>.<genexpr>z | u   ✅ Diarizationzs | z | ~z
 fragments)r+   r,   r   r   _model_namerq   	enumerater   extendclear_cache_every_n_chunksrm   sortset)r$   rA   rT   rM   r   all_segmentsall_overlapsr   rK   rS   	chunk_durt0r}   ry   elapsedoverlap_strunique_speakersr   r   r   run_diarization.  s8   $


@0r   )r	   )NT)Fr   r   )F)__doc__rq   loggingpathlibr   typingr   r   r   r   r   r4   npr.   r1   
src.modelsr   	getLoggerr+   floatr   r@   boolrN   rR   r=   r   r   r   r   r   r   <module>   s`   
 
_

 