o
    i
<                     @   s  d Z ddlZddlm  mZ ddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZ ddlmZ ejdeee jj e
jdd ddlmZ eee jjd  dd	lmZmZ dd
lmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z* e	j+e	j,dd e	-dZ.eejd Z/eejd Z0dddddddddddd d!Z1d"Z2d#Z3d$Z4d%ed&e5fd'd(Z6d)ed*e5ee7f fd+d,Z8e3fd-e5ee7f d.ed/e9d*e5fd0d1Z:e4fd-e5ee7f d.ed2e9d*e5fd3d4Z;d5d6 Z<e=d7kre>e<  dS dS )8uD  
12-language canary test: determinism + concurrency on both AI Studio and OpenRouter.
Phase 1: Download 1 video per language, extract 5 segments each
Phase 2: Determinism test — 3 runs per provider, compare exact matches
Phase 3: 500-concurrency stress test on AI Studio
Phase 4: 500-concurrency stress test on OpenRouter
    N)defaultdict)Path	MOCK_MODEfalse)load_dotenvz.env)	EnvConfigLANGUAGE_MAP)polish_all_segments)AIStudioProvider)OpenRouterProvider)TranscriptionRequestRequestStatus)CacheManager)R2Clientz1%(asctime)s [%(levelname)s] %(name)s: %(message)s)levelformatcanarycanary_resultscanary_dataefw1HydOOfID7iL59a39BsnM2KMwb86IUX7tqVj0IuOIBFU9eSKO_t4zo-zn8bAjkBMdlo3pvcyqFYmqyv2attoI0zIYKHdR3-w_Ul5eQd1E_Huo8ozSnoys7JE5MDwLvubhR8)asbnenguhiknmlmrorpatate      i  namedatac                 C   sn   t jddd t |  d }t|d}tj||ddtd W d    n1 s(w   Y  td|  d S )	NTparentsexist_okz.jsonw   F)indentensure_asciidefaultz	Saved -> )RESULTS_DIRmkdiropenjsondumpstrloggerinfo)r-   r.   pathf rA   1/home/ubuntu/transcripts/preflight/canary_test.pysave_result9   s   rC   configreturnc                    s  t | }i }t D ]\}}t| }|jddd ztd| d| d |||}|||}|j	sCt
d| d|  W qt|j	dtd  }d	d
 |D dt }	|	sft
d| d W qg }
|	D ]#}|jj}|jjr|jj d|jj }|
t||j||jjd qj|
||< td| dt|
 d|  W q ty } ztd| d| d|  W Y d}~qd}~ww |S )zFDownload tars, extract, polish, return {lang: [TranscriptionRequest]}.Tr/   [z] Downloading z...z] No segments in Nr,   c                 S   s   g | ]}|j js|qS rA   )	trim_meta	discarded).0prA   rA   rB   
<listcomp>U       z(download_and_extract.<locals>.<listcomp>z] All segments discarded_split
segment_idaudio_base64language_codeoriginal_filez	] Ready: z segments from z] Failed to prepare : )r   CANARY_VIDEOSitemsWORK_DIRr8   r=   r>   download_tarextract_tarsegment_pathswarningr	   SEGMENTS_PER_LANGrG   rR   	was_splitsplit_indexappendr   base64_audiolen	Exceptionerror)rD   r2all_requests	lang_codevideo_idlang_dirtar_path	extractedpolishedvalidrequestssegseg_iderA   rA   rB   download_and_extractA   sH   
$(rp   requests_by_langprovider_namenum_runsc                    s  t dd  t d| d| d t d  tt}g }t|D ]}t d|d  d| d	 t }i }t|  D ]r\}	}
t d
|	 dt	|
 d |
|
I dH }|D ]U}|jtjkr|jr|	 d|j }|| |j |jdd|jdd|j|jj|jj|jj|jjd||< q]|jtjkrt d
|	 d|j d|j d|j  q]qAt | }||d t	|t|d|d t d|d  dt	| d|dd ||d k rtdI dH  q%d}d}tdd }g }| D ]p\}}|dd }|| d   d7  < t	||k r|d7 }qd!d" |D  t fd#d$ D rA|d7 }|| d%  d7  < q|d7 }|| dd& td'd$ t t! d  d D t"t	 d t	 d d( q|| }|dkr~|| d) nd}|||||t|dt#||dd* d+}t d,| d- t d.| d| d/|dd0 t| D ]\}}t d.| d|d%  d|d    q|rt d1 |dd2 D ]}t d3|d4  d5|d6   q||d7S )8zIRun same segments N times, compare transcription outputs for exact match.
F======================================================================zDETERMINISM TEST:  (z runs)z	
--- Run    /z ---z  [z
] Sending z segments...N:transcription detected_language)rz   r|   
latency_ms	cache_hitinput_tokenscached_tokensoutput_tokensz] rS   z - r3   )runsegments_oktime_sresultsz  Run z done: z OK in .1fsr   c                   S   s
   dddS )Nr   )totaldeterministicrA   rA   rA   rA   rB   <lambda>   s   
 z&run_determinism_test.<locals>.<lambda>r   c                 S   s   g | ]}| d dqS )rz   r{   )getrI   rrA   rA   rB   rK      rL   z(run_determinism_test.<locals>.<listcomp>c                 3   s    | ]	}| d  kV  qdS )r   NrA   )rI   ttextsrA   rB   	<genexpr>       z'run_determinism_test.<locals>.<genexpr>r      c                 s   s$    | ]\}\}}||kr|V  qd S NrA   )rI   iabrA   rA   rB   r      s   " )segmentr   first_diff_chard      )providerrs   total_segmentsr   non_deterministicdeterminism_pctby_languagediffsz
=== DETERMINISM RESULTS () ===  z segments deterministic (%)z  Non-deterministic segments:r+       r   z: first diff at char r   )summaryrun_details)$r=   r>   r   listrangetime	monotonicsortedrU   r`   
send_batchstatusr   SUCCESStranscription_datarO   r^   r   r}   token_usager~   r   r   r   rb   error_messageroundasynciosleepsplitallnext	enumeratezipmindict)rq   r   rr   rs   all_runsrun_summariesrun_idx	run_startrun_resultsre   rl   	responsesrespkeyrun_timer   r   
lang_statsr   runslangr   pctr   statsdrA   r   rB   run_determinism_testp   s   	((

	
 (
"r   target_concurrencyc                    s@  t dd  t d| d| d t d  g }t|  D ]	\}}|| q#t||k rc|t| d }g }t|D ]}	|D ]}
|t|
j	 d|	 |
j
|
j|
jd qEqA|d	| }t d
t| d t }||I d	H }t | }dd |D }dd |D }dd |D }dd |D }dd |D }tdd |D }tdd |D }tdd |D }tdd |D }i d|d|dt|dt|dt|dt|dt|dt|dd |d!krtt|| dnd!d"|rtt|t| dnd!d#|rtt|t|d  dnd!d$|r4tt|tt|d%  dnd!d&|rAtt|dnd!d'|d(|d)|d*|d+t|tt|d d,i}g }|d	d- D ]}||j	|jd	d. d/ qf|d	d0 D ]}||j	d1|jd	d. d2 q~t d3| d4 t d5t| d6t| d7t| d8t| d9t| d: t d;|d<d=|d   d> t d?|d"  d@|d#  dA|d$  dB t dC| dD| dE|  t dF| d6t| d|d+ dG d<dH |rt dI |d	d, D ]}t dJ|  q||dKS )LzDFire target_concurrency requests simultaneously, measure throughput.rt   ru   zCONCURRENCY TEST: rv   z concurrent)rw   _reprN   NzSending z requests concurrently...c                 S      g | ]
}|j tjkr|qS rA   )r   r   r   r   rA   rA   rB   rK          z(run_concurrency_test.<locals>.<listcomp>c                 S   r   rA   )r   r   ERRORr   rA   rA   rB   rK     r   c                 S   r   rA   )r   r   RATE_LIMITEDr   rA   rA   rB   rK     r   c                 S   r   rA   )r   r   TIMEOUTr   rA   rA   rB   rK     r   c                 S   s   g | ]
}|j d kr|j qS )r   )r}   r   rA   rA   rB   rK     r   c                 s       | ]}|j jV  qd S r   )r   r   r   rA   rA   rB   r         z'run_concurrency_test.<locals>.<genexpr>c                 s   r   r   )r   r   r   rA   rA   rB   r     r   c                 s   r   r   )r   r   r   rA   rA   rB   r     r   c                 s   s    | ]	}|j jrd V  qdS )rw   N)r   r~   r   rA   rA   rB   r   	  r   r   r   actual_sent	successeserrorsrate_limitedtimeoutswall_time_sr3   throughput_rpsr   avg_latency_msp50_latency_msp95_latency_msgffffff?max_latency_mstotal_input_tokenstotal_output_tokenstotal_cached_tokens
cache_hitscache_hit_rater,   
      )r   rb   r+   429)r   r   rb   z
=== CONCURRENCY RESULTS (r   r   rx   z OK, z	 errors, z 429s, z	 timeoutsz  Wall time: r   zs, Throughput: z req/sz  Latency: avg=zms, p50=zms, p95=msz  Tokens: input=z	, output=z	, cached=z  Cache hits: r   r   z  Sample errors:r   )r   r   )r=   r>   r   rU   extendr`   r   r^   r   rO   rP   rQ   rR   r   r   r   sumr   intmaxr   )rq   r   rr   r   rd   re   rl   
multiplierexpandedr   reqstartr   	wall_timer   r   r   r   	latenciestotal_inputtotal_outputtotal_cachedr   r   error_detailsr   ro   rA   rA   rB   run_concurrency_test   s   

	 
$( ">*,

r   c                     sf  t  } tjddd td td td t| I d H }tdd | D }td| dt| d	 t	d
dd |
 D |d |sRtd d S td t| j}| I d H }t| j|d}t||dtI d H }t	d| td t| jd}t||dtI d H }t	d| t||dtI d H }	t	d|	 t||dtI d H }
t	d|
 t| dd |
 D |d |d d|	d |
d dd}t	d| td td td td t|  td!|d d"  d# td$|d d"  d# td%|	d d&  d'|	d d(  d) td*|
d d&  d'|
d d(  d) d S )+NTr/   ru   z7PHASE 1: Downloading canary videos for all 12 languagesc                 s   s    | ]}t |V  qd S r   r`   r   rA   rA   rB   r   ?  r   zmain.<locals>.<genexpr>z

Prepared z segments across z
 languagesphase1_preparationc                 S      i | ]	\}}|t |qS rA   r   rI   kvrA   rA   rB   
<dictcomp>B      zmain.<locals>.<dictcomp>)	languagesr   z No segments available. Aborting.z'

Setting up AI Studio with V2 cache...)api_keycached_content_nameaistudiophase2a_determinism_aistudioz

Setting up OpenRouter...)r  
openrouterphase2b_determinism_openrouterphase3_concurrency_aistudiophase4_concurrency_openrouterc                 S   r   rA   r   r   rA   rA   rB   r   e  r   r   )r  r  )languages_testedsegments_per_languagedeterminismconcurrencycombined_summaryzG
======================================================================zCANARY TEST COMPLETEz  Languages: z  AI Studio determinism: r   %z  OpenRouter determinism: z  AI Studio concurrency: r   rx   r   z OKz  OpenRouter concurrency: )r   r7   r8   r=   r>   rp   r   valuesr`   rC   rU   rb   r   
gemini_keyensure_cacher
   r   DETERMINISM_RUNSr   openrouter_api_keyr   CONCURRENCY_TARGETr   keys)rD   rq   r   cm
cache_namer  det_aistudior  det_openrouterconc_aistudioconc_openroutercombinedrA   rA   rB   main5  s`   















(,r  __main__)?__doc__builtins@py_builtins_pytest.assertion.rewrite	assertionrewrite
@pytest_arr   r:   loggingossysr   collectionsr   pathlibr   r?   insertr<   __file__resolveparentenviron
setdefaultdotenvr   
src.configr   r   src.audio_polishr	   src.providers.aistudior
   src.providers.openrouterr   src.providers.baser   r   src.cache_managerr   src.r2_clientr   basicConfigINFO	getLoggerr=   r7   rV   rT   r[   r  r  r   rC   r   rp   r   r   r   r  __name__r   rA   rA   rA   rB   <module>   s    "
3

q

X
F