o
    lQi                     @  s   d Z ddlmZ ddlZddlZddlZddlZddlmZm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZmZmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddl m!Z! e"e#Z$eG dd dZ%G dd dZ&dS )z
Batch cycle engine: 1-minute cadence loop.
Fire batch -> collect -> validate -> report.
Handles provider switching on 429 flood.
    )annotationsN)	dataclassfield)Optional   )	WORKER_BATCH_SIZEBATCH_INTERVAL_SECONDSFLOOD_THRESHOLD_PCTPROMPT_VERSIONSCHEMA_VERSIONTRIMMER_VERSIONVALIDATOR_VERSIONTEMPERATURETHINKING_LEVEL)BaseProviderTranscriptionRequestTranscriptionResponseRequestStatus)log_api_stats)validate_transcriptionValidationResult)WorkerStatsc                   @  s   e Zd ZU ded< dZded< dZded< dZded< dZded< d	Zded
< dZ	ded< dZ
ded< dZded< eedZded< eedZded< eedZded< eedZded< dS )BatchResultstrbatch_idr   intsegments_sentsegments_returnedsegments_429segments_error provider_used
cache_hitsg        floatavg_latency_msbatch_duration_ms)default_factoryzlist[TranscriptionResponse]	responseszlist[ValidationResult]validationsz
list[dict]transcription_recordsflag_recordsN)__name__
__module____qualname____annotations__r   r   r   r   r!   r"   r$   r%   r   listr'   r(   r)   r*    r0   r0   +/home/ubuntu/transcripts/src/batch_cycle.pyr      s   
 r   c                   @  s*   e Zd Zddd	ZdddZd ddZdS )!BatchCycleEngineprimary_providerr   fallback_providerOptional[BaseProvider]	worker_idr   video_idc                 C  s   || _ || _|| _|| _d S )N)primaryfallbackr6   r7   )selfr3   r4   r6   r7   r0   r0   r1   __init__1   s   
zBatchCycleEngine.__init__requestslist[TranscriptionRequest]expected_languageaudio_durationsdict[str, float]
trim_metasdict[str, dict]returnr   c                   s  dt  jd d  }t }t|t|| j d}t	
d| dt| d| j   | j|I d H }dd |D }	t|	|_| jrt|	tt|d	 tkrt	d| d
t|	 dt| d dd |	D   fdd|D }
|
r| j|
I d H }dd |D }|D ]}|||j< qt| }d|_dd |D }dd |D }t||_t||_tdd |D |_||_dd |D }|rt|t| nd|_|D ]E}|jrt|j|j|||jd||jd}|j| |  |||}|j!| |j"D ]}|j#|j|$dd |dd qq|D ]}|j%t&j'kr8|j#|j|j%j(|j)dd q t | d |_*t	
d| d|j d|j+ d |j d!|j d"|j d#|jd$d%|j*d$d& t,  |S )'Nbatch_   )r   r   r!   [z	] Firing z requests to c                 S     g | ]
}|j tjkr|qS r0   )statusr   RATE_LIMITED.0rr0   r0   r1   
<listcomp>Q       z.BatchCycleEngine.run_batch.<locals>.<listcomp>r   z] 429 flood (/z), switching to fallbackc                 S  s   h | ]}|j qS r0   
segment_idrJ   r0   r0   r1   	<setcomp>W   s    z-BatchCycleEngine.run_batch.<locals>.<setcomp>c                   s   g | ]	}|j  v r|qS r0   rP   )rK   req	retry_idsr0   r1   rM   X   s    c                 S  s    i | ]}|j tjkr|j|qS r0   )rH   r   rI   rQ   rJ   r0   r0   r1   
<dictcomp>\   s     z.BatchCycleEngine.run_batch.<locals>.<dictcomp>mixedc                 S  rG   r0   )rH   r   SUCCESSrJ   r0   r0   r1   rM   c   rN   c                 S  rG   r0   )rH   r   ERRORrJ   r0   r0   r1   rM   d   rN   c                 s  s    | ]	}|j jrd V  qdS )r   N)token_usage	cache_hitrJ   r0   r0   r1   	<genexpr>g   s    z-BatchCycleEngine.run_batch.<locals>.<genexpr>c                 S  s   g | ]
}|j d kr|j qS )r   )
latency_msrJ   r0   r0   r1   rM   j   rN   r   g      @)rQ   transcription_datar>   audio_duration_s	trim_meta:F)rQ   	flag_typedetailsresolvedi  z] Done: z ok, z 429s, z	 errors, z cache hits, z.0fzms avg latency, zms total)-uuiduuid4hextime	monotonicr   lenr8   get_provider_nameloggerinfo
send_batchr   r9   maxr	   warningrQ   r/   valuesr!   r   r   sumr"   r'   r$   r^   r   getr(   append_build_result_recordr)   flagsr*   splitrH   r   rX   valueerror_messager%   r   r   )r:   r<   r>   r?   rA   r   batch_startresultr'   rate_limited
retry_reqsfallback_responsesresp_mapfr	successeserrors	latenciesrespvrrecordflagr0   rT   r1   	run_batch=   s   &
 &



zBatchCycleEngine.run_batchr   r   r   r   expected_langdictc                 C  s  |j pi }|di }i dtt d| jd|jdd|jv r)|jdd ndd	|d
|d
dd|jd|ddd|ddd|ddd|ddd|ddd|ddd|j	d|j
d|jd|ji d|jd|jd |jd!|jd"|jd#|jd$td%td&td'td(d)d*td+td,|jjrd-p| j d.| jd/|jjd0|jj|jj|jj d1S )2Nspeakeridr7   segment_file
speaker_id_r   speaker0expected_language_hintdetected_languager    lang_mismatch_flagtranscriptiontaggedspeaker_emotionemotionneutralspeaker_stylespeaking_styleconversationalspeaker_pacepacenormalspeaker_accentaccentnum_unknum_inaudiblenum_event_tagsboundary_scoretext_length_per_secoverlap_suspectedquality_scoreasr_eligibletts_clean_eligibletts_expressive_eligibleprompt_versionschema_versiontrimmer_versionvalidator_versionmodel_idzgemini-3-flash-previewtemperaturethinking_levelprovidercachedr6   r[   token_usage_json)input_tokensoutput_tokenscached_tokens)!r^   rs   r   re   rf   r7   rQ   rw   lang_mismatchr   r   r   r   chars_per_secondr   r   r   r   r   r
   r   r   r   r   r   rZ   r[   r8   rk   r6   r   r   r   )r:   r   r   r   datar   r0   r0   r1   ru      s   
	
 !"z%BatchCycleEngine._build_result_recordN)r3   r   r4   r5   r6   r   r7   r   )
r<   r=   r>   r   r?   r@   rA   rB   rC   r   )r   r   r   r   r   r   rC   r   )r+   r,   r-   r;   r   ru   r0   r0   r0   r1   r2   /   s    

^r2   )'__doc__
__future__r   asynciologgingrh   re   dataclassesr   r   typingr   configr   r   r	   r
   r   r   r   r   r   providers.baser   r   r   r   providers.aistudior   	validatorr   r   dbr   	getLoggerr+   rl   r   r2   r0   r0   r0   r1   <module>   s"    ,
