o
    q^i<                     @   s>   d Z ddlZddlmZmZ ddlmZ eG dd dZdS )a  
Configuration for Fast Pipeline v6.0 - Adaptive Compute-Aware

Key improvements:
- Auto-detects system resources (nproc, vCPUs, GPU vRAM)
- Adapts worker counts and batch sizes to available compute
- Optimized for micro-level speaker detection (0.4s events)
    N)	dataclassfield)Optionalc                   @   s  e Zd ZU dZejddZee	d< dZ
ee	d< dZee	d< dZee	d< d	Zee	d
< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d< dZee	d < d!Z ee	d"< d!Z!ee	d#< dZ"ee	d$< dZ#ee	d%< d&Z$ee	d'< d(Z%ee	d)< d*Z&ee	d+< d,Z'ee	d-< d,Z(ee	d.< d/Z)ee	d0< d1Z*ee	d2< d3Z+ee	d4< dZ,ee	d5< d6Z-ee	d7< d8Z.ee	d9< d:Z/ee	d;< dZ0ee	d<< d=Z1ee	d>< d?Z2ee	d@< dAZ3ee	dB< dZ4ee	dC< dDZ5ee	dE< dFZ6ee	dG< dZ7ee	dH< dIZ8ee	dJ< dKZ9ee	dL< dDZ:ee	dM< dNZ;ee	dO< dPZ<ee	dQ< dRZ=ee	dS< dTZ>ee	dU< dVZ?ee	dW< dXZ@ee	dY< dZAee	dZ< dKZBee	d[< d\ZCee	d]< d^ZDee	d_< dDZEee	d`< daZFee	db< dNZGee	dc< ddZHee	de< dZIee	df< dZJee	dg< dhZKee	di< djZLee	dk< d:ZMee	dl< dmZNee	dn< dZOee	do< d\ZPee	dp< d?ZQee	dq< dmZRee	dr< dZSee	ds< d:ZTee	dt< d?ZUee	du< dKZVee	dv< dZWee	dw< dZXee	dx< d?ZYee	dy< dzZZee	d{< d|Z[ee	d}< dZ\ee	d~< dZ]ee	d< d*Z^ee	d< dZ_ee	d< dZ`ee	d< d&Zaee	d< dDZbee	d< dZcee	d< dZdee	d< dZeee	d< dZfee	d< dVZgee	d< dehfddZidd ZjdS )Configa  
    Pipeline configuration with adaptive compute settings.
    
    Flow (per instructions.md):
    1. Download (max parallel workers)
    2. Quick VAD (parallel, get speech outline)
    3. Chunk at silence boundaries (VAD-aware)
    4. OSD (mark overlaps as unusable FIRST)
    5. Frame-level segmentation (17ms resolution for micro-changes)
    6. Embeddings + Conservative clustering
    7. Output metadata JSON only
    HF_TOKEN hf_tokenyoutubeinput_sourcer2_source_prefixr2_source_extensionsi  r2_presign_ttl_secTfetch_youtube_metadata_for_r2Fr2_upload_enabled
productionr2_upload_bucket_typer2_upload_prefixr2_upload_skip_if_existsr2_upload_delete_local_tarsegment_audiobackground_export   background_export_workersi>  sample_rateg        intro_skip_secondsoutro_skip_secondsauto_intro_skippreserve_original_audior   original_audio_sample_rate    vad_workersmax_workersdownload_workerschunk_workersg     r@chunk_durationg      >@min_chunk_durationg      ?vad_threshold   vad_min_speech_msvad_min_silence_msi   vad_window_size_samples   vad_speech_pad_msg      N@vad_chunk_sizeuse_community_modelg      @segmentation_stepg      @segmentation_durationg?min_segment_durationdetect_overlapgffffff?overlap_threshold皙?overlap_min_durationd   overlap_padding_msoverlap_density_filter      ?overlap_density_max_gapg      @overlap_density_max_durationfilter_by_qualityg      .@
min_snr_dbg333333?min_quality_scoremin_tts_duration   min_speakers
   max_speakers   embedding_batch_sizei q max_embedding_lengthg?cluster_merge_threshold   min_segments_for_mergeenable_chunk_reassignmentchunk_reassignment_thresholdg333333?chunk_reassignment_severeg333333?chunk_reassignment_min_speechchunk_reassignment_min_portiong       @chunk_reassignment_min_analysischunk_reassignment_persistenceg?%chunk_reassignment_centroid_thresholdenable_music_detectionuse_hybrid_detectiong      ?music_chunk_duration   music_batch_sizemusic_prob_thresholdg      ?noise_prob_thresholdmusic_ratio_cleanmusic_ratio_demucsmusic_mean_cleanmusic_mean_demucsnoise_ratio_cleannoise_ratio_demucsnoise_mean_cleannoise_mean_demucsstrict_tts_modemusic_early_exitmusic_early_exit_sample_ratiog?music_early_exit_thresholdzdata/fast_output_v6
output_dirgenerate_sample_clips   clips_per_speakermax_silence_gapenable_boundary_refinementg      Y@ boundary_refinement_tolerance_msboundary_refinement_min_gap_ms!boundary_refinement_min_remainingclear_cache_every_n_chunksmonitor_computelog_compute_statsauto_tune_resourcesmax_utilizationcompute_configc                 C   s,   |  D ]\}}t| |rt| || qdS )z
        Apply auto-detected optimal settings from ComputeMonitor.
        
        Called after COMPUTE.get_optimal_config()
        N)itemshasattrsetattr)selfrs   keyvalue rz   L/home/ubuntu/.cursor/worktrees/maya3__SSH__216.81.248.184_/nmo/src/config.pyapply_adaptive_settings  s
   
zConfig.apply_adaptive_settingsc                 C   s   t j| jdd | jr/zddlm} | j|jjkr|| j |	|  W n	 t
y.   Y nw d| j  k r=dksBJ d J dd| j  k rPdksUJ d J d| jdks^J d	d
S )z/Validate configuration and auto-tune resources.T)exist_okr   )COMPUTEr9   zmerge threshold must be (0, 1]zVAD threshold must be (0, 1]r4   zmin_segment too smallN)osmakedirsre   rq   src.computer~   rr   	resourcesset_max_utilizationapply_to_configImportErrorrG   r&   r1   )rw   r~   rz   rz   r{   __post_init__   s   &&zConfig.__post_init__N)k__name__
__module____qualname____doc__r   environgetr   str__annotations__r
   r   r   r   intr   boolr   r   r   r   r   r   r   r   r   r   floatr   r   r   r   r    r!   r"   r#   r$   r%   r&   r(   r)   r*   r,   r-   r.   r/   r0   r1   r2   r3   r5   r7   r8   r:   r;   r<   r=   r>   r?   rA   rC   rE   rF   rG   rI   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rV   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rb   rc   rd   re   rf   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   dictr|   r   rz   rz   rz   r{   r      s   
 		
r   )r   r   dataclassesr   r   typingr   r   rz   rz   rz   r{   <module>   s   	