o
    ôyÇig  ã                   @   s  d Z ddlZddlZddlZddlZddlmZmZ ddlmZm	Z	 ddl
mZ ddlZddlmZmZ dZdZd	Zd
ZdZdhZe d¡ZedƒZh d£ZddhZdddddddddœZi dd“dd“dd“dd“dd “d!d"“d#d$“d%d&“d'd(“d)d*“d+d,“d-d.“d/d0“d1d2“d3d4“d5d6“d7d8“d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHœ¥Zi dId“dJd“dKd“dLd“dMd “dNd"“dOd$“dPd&“dQd(“dRd*“dSd,“dTd.“dUd0“dVd2“dWd4“dXd6“dYd8“d9d:d;d<dDdEdEdFdGdZœ	¥Zd[e d\e fd]d^„Z!d…d[e d`e d\e fdadb„Z"d…d[e d`e d\e fdcdd„Z#dee dfe fdgdh„Z$d…d[e d`e d\e fdidj„Z%dke dle d\e&fdmdn„Z'dke dle doe dpe dqe dre dse&d\e(fdtdu„Z)dve*d\e*fdwdx„Z+dye dze d{e fd|d}„Z,e-d~kr‚g d¢Z.d€Z/dZ0e1d‚ƒ e.D ]\Z2Z3e/› dƒe3› Z4e0› dƒe2› Z5e,e4e2e5ƒ qbe1d„ƒ dS dS )†a?  
Generate benchmark_outputs conforming to BENCHMARK_SCHEMA.md v1.

Reads raw predictions from benchmark_results/ and produces:
  - metrics.json       (4 metric tiers + normalization_delta + meta)
  - sample_analysis.json  (per-sample with flags)
  - error_analysis.json   (top subs/ins/del + error buckets + diagnosis)
é    N)ÚCounterÚdefaultdict)ÚdatetimeÚtimezone)ÚPath)ÚcerÚwerÚv1zNVIDIA H200 80GBz	qwen3-asrzQwen3-ASR-1.7Bz"BayAreaBoys/indic-asr-benchmark-6kÚenglishz([\u200b-\u200f\u202a-\u202e\ufeff\u00ad]z !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~>   õ   à¥¤õ   à¥¥õ   à¥°r   r   ú'ú"ú-z...)u   â€˜u   â€™u   â€œu   â€u   â€“u   â€”u   â€•u   â€¦ÚzeroÚ0ÚoneÚ1ÚtwoÚ2ÚthreeÚ3ÚfourÚ4ÚfiveÚ5ÚsixÚ6ÚsevenÚ7ÚeightÚ8ÚnineÚ9ÚtenÚ10ÚelevenÚ11ÚtwelveÚ12ÚthirteenÚ13ÚfourteenÚ14ÚfifteenÚ15ÚsixteenÚ16Ú17Ú18Ú19Ú20Ú30Ú40Ú50Ú60Ú70Ú80Ú90Ú100Ú1000Ú100000Ú10000000)Ú	seventeenÚeighteenÚnineteenÚtwentyÚthirtyÚfortyÚfiftyÚsixtyÚseventyÚeightyÚninetyÚhundredÚthousandÚlakhÚcroreu   à¤¶à¥‚à¤¨à¥à¤¯u   à¤à¤•u   à¤¦à¥‹u	   à¤¤à¥€à¤¨u	   à¤šà¤¾à¤°u   à¤ªà¤¾à¤‚à¤šu   à¤›à¤¹u	   à¤¸à¤¾à¤¤u   à¤†à¤ u   à¤¨à¥Œu   à¤¦à¤¸u   à¤—à¥à¤¯à¤¾à¤°à¤¹u   à¤¬à¤¾à¤°à¤¹u   à¤¤à¥‡à¤°à¤¹u   à¤šà¥Œà¤¦à¤¹u   à¤ªà¤‚à¤¦à¥à¤°à¤¹u   à¤¸à¥‹à¤²à¤¹)	u   à¤¸à¤¤à¥à¤°à¤¹u   à¤…à¤ à¤¾à¤°à¤¹u   à¤‰à¤¨à¥à¤¨à¥€à¤¸u	   à¤¬à¥€à¤¸u   à¤¸à¥Œu   à¤¹à¤œà¤¼à¤¾à¤°u   à¤¹à¤œà¤¾à¤°u	   à¤²à¤¾à¤–u   à¤•à¤°à¥‹à¤¡à¤¼ÚtextÚreturnc                 C   s   t  d| ¡} |  ¡ } | S )z Tier 1: NFC unicode + trim only.ÚNFC)ÚunicodedataÚ	normalizeÚstrip)rQ   © rW   ú2/home/ubuntu/training/./generate_schema_outputs.pyÚnorm_rawP   s   rY   Ú Úlanguagec                 C   s¬   t  d| ¡} t d| ¡} t dd| ¡ ¡ } t ¡ D ]
\}}|  ||¡} q|  dd¡} d 	dd„ | D ƒ¡} d 	d	d„ | D ƒ¡} |t
v rG|  ¡ } n|  ¡ } t dd| ¡ ¡ } | S )
ziTier 2 (wer_norm): NFKC + strip ZW + normalize whitespace + standardize punct + remove punct + case fold.ÚNFKCrZ   z\s+ú r   r   c                 s   s    | ]	}|t vr|V  qd S ©N)Ú
BASE_PUNCT©Ú.0ÚcrW   rW   rX   Ú	<genexpr>h   s   € z norm_standard.<locals>.<genexpr>c                 s   s$    | ]}|t vr|tvr|V  qd S r^   )ÚINDIC_PUNCTÚINDIC_REMOVE_PUNCTr`   rW   rW   rX   rc   j   s   €" )rT   rU   ÚZW_CHARSÚsubÚrerV   Ú	QUOTE_MAPÚitemsÚreplaceÚjoinÚLATIN_SCRIPT_LANGSÚlower)rQ   r[   ÚoldÚnewrW   rW   rX   Únorm_standardW   s   
rq   c                 C   s   t | |ƒ} |  dd¡} | S )z0Tier 4 (MER): norm_standard + remove ALL spaces.r]   rZ   )rq   rk   )rQ   r[   rW   rW   rX   Únorm_meru   s   
rr   Úref_normÚhyp_normc              	      s  |   ¡ }|sdS |  dd¡}| dd¡}t|ƒ}||kr d|fS g }t|ƒD ]\}}|D ]}	| |¡ q,q&t|ƒ}
t|ƒ‰ ‡ fdd„t|
d ƒD ƒ}t|
d ƒD ]}||| d< qPtˆ d ƒD ]}||d |< q_td|
d ƒD ]G}tdˆ d ƒD ]=}||d  ||d  kr•||d  |d  || |< qxdt||d  |d  ||d  | || |d  ƒ || |< qxqotƒ }|
ˆ }}|dksÈ|dkr‡|dkrõ|dkrõ||d  ||d  krõ|| | ||d  |d  krõ|d8 }|d8 }nŠ|dkr$|dkr$|| | ||d  |d  d kr$| ||d  ¡ |d8 }|d8 }n[|dkrH|| | ||d  | d krH| ||d  ¡ |d8 }n7|dkr~|| | || |d  d kr~|dkrm| ||d  ¡ n||
k ry| || ¡ |d8 }nn|dksÈ|dksÈt|ƒ|fS )zn
    Word error rate after space-insensitive character alignment.
    Returns (error_words, total_words).
    ©r   r   r]   rZ   r   c                    s   g | ]	}d gˆ d  ‘qS )r   é   rW   )ra   Ú_©ÚmrW   rX   Ú
<listcomp>˜   s    z)space_norm_wer_sample.<locals>.<listcomp>rv   )	Úsplitrk   ÚlenÚ	enumerateÚappendÚrangeÚminÚsetÚadd)rs   rt   Ú	ref_wordsÚref_nospaceÚhyp_nospaceÚtotal_wordsÚchar_to_wordÚword_idxÚwordrw   ÚnÚdpÚiÚjÚtouchedrW   rx   rX   Úspace_norm_wer_sample~   sX   ÿBü
H:$,,


òr   c                 C   s¾   t | |ƒ} t dd| ¡} t dd| ¡} |dkr8|  ¡ }g }|D ]}|tv r-| t| ¡ q| |¡ qd |¡} |dkr]|  ¡ }g }|D ]}|tv rR| t| ¡ qD| |¡ qDd |¡} | S )z?Tier 3 (wer_numcanon): norm_standard + number canonicalization.z	(\d),(\d)z\1\2r
   r]   Úhindi)rq   rh   rg   r{   ÚEN_NUMBER_WORDSr~   rl   ÚHI_NUMBER_WORDS)rQ   r[   ÚwordsÚoutÚwrW   rW   rX   Únorm_numcanonº   s(   


r–   ÚrefÚhypc                 C   s8   |   ¡ s|  ¡ s
dS dS |  ¡ sdS tt| |ƒd dƒS )z5Compute WER for a single sample. Returns 0-100 float.ç        ç      Y@éd   é   )rV   Úroundr   )r—   r˜   rW   rW   rX   Ú
sample_werÙ   s
   rž   Úref_nÚhyp_nÚdetected_langÚexpected_langÚwer_norm_valc                 C   sž  g }| |kr|  d¡ ||kr|  d¡ | ¡ s|  d¡ | |kr*||kr*|  d¡ t d|¡}t d|¡}	||	krC|s>|	rC|  d¡ | ¡ r³|  ¡ r³ttdƒrYtdd	„ | D ƒƒntƒ }
tt d
| ¡ƒ}tt d
|¡ƒ}tt d| ¡ƒ}tt d|¡ƒ}tt d| ¡ƒ}tt d|¡ƒ}tt d| ¡ƒ}tt d|¡ƒ}|r¢|s¢|s®|r¨|s¨|s®|r³|s³|r³|  d¡ |rÄ|rÄ| 	¡ | 	¡ krÄ|  d¡ |dkrÍ|  d¡ |S )NÚexact_matchÚexact_match_normÚempty_hypothesisÚpunctuation_only_diffz\d+Únumeric_mismatchÚscriptc                 s   s*    | ]}|  ¡ s| ¡ rt |¡V  qd S r^   )ÚisspaceÚisalpharT   r©   r`   rW   rW   rX   rc   ö   s   €( zdetect_flags.<locals>.<genexpr>z[\u0900-\u097F]z[\u0980-\u09FF]z[\u0A80-\u0AFF]z[\u0A00-\u0A7F]Úscript_mismatchÚlang_confusionéP   Úhigh_wer)
r~   rV   rh   ÚfindallÚhasattrrT   r   ÚboolÚsearchrn   )r—   r˜   rŸ   r    r¡   r¢   r£   ÚflagsÚ
ref_digitsÚ
hyp_digitsÚref_scriptsÚref_devanagariÚhyp_devanagariÚref_bengaliÚhyp_bengaliÚref_gujaratiÚhyp_gujaratiÚref_gurmukhiÚhyp_gurmukhirW   rW   rX   Údetect_flagsä   sV   




"ÿÿÿþþþ


rÀ   Úsamples_by_langc                  C   s¾  i }t |  ¡ ƒD ]B\}}tƒ }tƒ }tƒ }d}d}d}	d}
d}d}g }g }|D ]À}|d  ¡ }|d  ¡ }| dg ¡}d|v rD|d7 }d|v rL|d7 }d|v rT|d7 }d	|v r\|d7 }| |d
 | dd¡f¡ t|ƒ}t|ƒ}||  ¡ D ]\}}||  |7  < qw||  ¡ D ]\}}||  |7  < qŠtt|ƒt|ƒƒ}t|ƒD ]}|| || kr¼||| || f  d7  < q¤t	|d|… |d|… ƒD ]\}}||krç|dd…  
¡ sãtdd„ |D ƒƒrç|
d7 }
qÊq(|jdd„ d dd„ |dd… D ƒ}dd„ |dd… D ƒ}dd„ |D ƒdd… }dd„ |D ƒdd… }dd„ | d¡D ƒdd„ | d¡D ƒdd„ | d¡D ƒ|||	|
||dœ||||dœd œ||< qt |  ¡ ƒ}d!d"d#d$g g d%œ|d&< |S )'z<Compute top substitutions/insertions/deletions per language.r   rs   rt   r´   r¦   rv   r¨   r§   r¬   ÚidÚwer_normrš   Nc                 s   s    | ]}|  ¡ V  qd S r^   )Úisupperr`   rW   rW   rX   rc   G  s   € z)compute_error_analysis.<locals>.<genexpr>c                 S   s   | d S )Nrv   rW   )ÚxrW   rW   rX   Ú<lambda>K  s    z(compute_error_analysis.<locals>.<lambda>©Úkeyc                 S   ó   g | ]\}}|‘qS rW   rW   ©ra   Úsidrw   rW   rW   rX   rz   L  ó    z*compute_error_analysis.<locals>.<listcomp>é   c                 S   rÉ   rW   rW   rÊ   rW   rW   rX   rz   M  rÌ   éýÿÿÿc                 S   ó$   g | ]}d |  dg ¡v r|d ‘qS )r¨   r´   rÂ   ©Úget©ra   ÚsrW   rW   rX   rz   N  ó   $ c                 S   rÏ   )Úentity_mismatchr´   rÂ   rÐ   rÒ   rW   rW   rX   rz   O  rÔ   c                 S   s    g | ]\\}}}|||d œ‘qS ))r—   r˜   ÚcountrW   )ra   ÚrÚhrb   rW   rW   rX   rz   R  s    

ÿÿé   c                 S   ó   g | ]	\}}||d œ‘qS ©)r‰   rÖ   rW   ©ra   r•   rb   rW   rW   rX   rz   V  ó    ÿÿc                 S   rÚ   rÛ   rW   rÜ   rW   rW   rX   rz   Z  rÝ   )Únumeric_mismatch_countÚpunctuation_only_countÚspacing_tokenization_countÚentity_mismatch_countÚscript_confusion_countÚempty_hypothesis_count)Úworst_samplesÚbest_samplesÚnumeric_mismatch_samplesÚentity_mismatch_samples)Útop_substitutionsÚtop_insertionsÚtop_deletionsÚerror_bucketsÚexamplesúrecognition-limitedÚrecognitionÚmoderateÚlow)Úmodel_diagnosisÚprimary_error_sourceÚnumeric_verbalization_impactÚformatting_impactÚworst_languagesÚbest_languagesÚ__summary__)Úsortedrj   r   r{   rÑ   r~   r€   r|   r   ÚziprÄ   ÚanyÚsortÚmost_commonÚkeys) rÁ   ÚanalysisÚlangÚsamplesÚsubsÚ
insertionsÚ	deletionsrÞ   Úpunct_only_countÚspacing_countÚentity_countrâ   Úempty_countÚ
wer_scoresÚ
sample_idsrÓ   rƒ   Ú	hyp_wordsr´   Úref_counterÚhyp_counterr‰   rÖ   Úmin_lenrŒ   ÚrwÚhwÚbestÚworstÚnumeric_examplesÚentity_examplesÚ	all_langsrW   rW   rX   Úcompute_error_analysis  sœ   €"*€þþþþú	üë
ú	r  Úsrc_dirÚ	ckpt_nameÚout_dirc           H         s‚	  t | ƒd }t | ƒd }t|dd}t |¡}W d  ƒ n1 s"w   Y  t|ƒ}t |¡ di ¡}W d  ƒ n1 s?w   Y  t |ƒ}|jddd g }	|D ]Ï}
|
d	 }|
d
 }|
 dd¡}t|ƒ}t|ƒ}t||ƒ}t||ƒ}t||ƒ}t||ƒ}t	||ƒ}t	||ƒ}|
 dd¡}| 
¡ }t||ƒ}t||ƒ}|s¦|s£dnd}n|s«d}n
tt||ƒd dƒ}t||ƒ\}}|dkrÉt|| d dƒnd}t|||||||ƒ}||krâ|dk râ| d¡ |	 i d|
d “d	|“d
|“d|“d|“d|“d|“d|“d|“d|“d|“d|“d|“d|“d|“d|“d |“|||d!œ¥¡ qSttƒ}|	D ]} || d	   | ¡ q)i ‰ g g }!}"g g }#}$g g }%}&g g }'}(d"\})}*t| ¡ ƒD ]}|| }+t|+ƒ},d#d$„ |+D ƒ}-d%d$„ |+D ƒ}.d&d$„ |+D ƒ}/d'd$„ |+D ƒ}0d(d$„ |+D ƒ}1d)d$„ |+D ƒ}2d*d$„ |+D ƒ}3d+d$„ |+D ƒ}4|-r¦tt|-|.ƒd dƒnd}5|/rµtt|/|0ƒd dƒnd}6|1rÄtt|1|2ƒd dƒnd}7|3rÓtt|3|4ƒd dƒnd}8|/râtt|/|0ƒd dƒnd}9td,d-„ |+D ƒƒ}:td.d-„ |+D ƒƒ};td/d-„ |+D ƒƒ}<|<dkrt|;|< d dƒnd}=|,|5|6|7|=|8|9|:t|5|6 dƒt|6|7 dƒt|6|= dƒt|6|8 dƒd0œd1œ	ˆ |< |! |-¡ |" |.¡ |# |/¡ |$ |0¡ |% |1¡ |& |2¡ |' |3¡ |( |4¡ |)|;7 })|*|<7 }*qVt|!ƒtt|!|"ƒd dƒtt|#|$ƒd dƒtt|%|&ƒd dƒ|*dkr”t|)|* d dƒndtt|'|(ƒd dƒtt|#|$ƒd dƒd2œˆ d3< td4d-„ ˆ D ƒƒ}>t|>ƒtt ‡ fd5d$„|>D ƒ¡dƒtt ‡ fd6d$„|>D ƒ¡dƒtt ‡ fd7d$„|>D ƒ¡dƒtt ‡ fd8d$„|>D ƒ¡dƒtt ‡ fd9d$„|>D ƒ¡dƒtt ‡ fd:d$„|>D ƒ¡dƒd;œˆ d<< ddl}?|?j d=¡}@| d>d¡|ttt | d?d@¡| dAd¡| dBdC¡| dDd¡t! "t#j$¡ %dE¡t&dFt'|@dGœˆ d< g }A|	D ]X} |A i d| d “d	| d	 “d
| d
 “d| d “d| d “d| d “d| d “d| d “d| d “d| d “d| d “d| d “d| d “d| d “dH| dH “dI| dI “¡ qMttƒ}B|AD ]} |B| d	   | ¡ q¬t(|Bƒ}Ct|>‡ fdJdK„dL}D|DddM… |CdN dO< |DdPd… |CdN dQ< t ‡ fdRd$„|>D ƒ¡}Et ‡ fdSd$„|>D ƒ¡}F|EdTkrdU|CdN dV< dW|CdN dX< n2|FdYkrdZ|CdN dV< dW|CdN d[< n d\|CdN dV< |EdYk r$d]nd^|CdN dX< |Fd_k r1d]nd^|CdN d[< t|d d`dd}tj)ˆ |ddadb W d  ƒ n	1 sUw   Y  t|dc d`dd}tj)|A|ddadb W d  ƒ n	1 sxw   Y  t|dd d`dd}tj)|C|ddadb W d  ƒ n	1 s›w   Y  ˆ d3 }Gt*de|› df|Gd › dg|Gd › dh|Gdi › dj|Gdk › 
ƒ ˆ S )lzKProcess one checkpoint: read predictions, compute all tiers, write 3 files.zpredictions.jsonzmetrics.jsonzutf-8)ÚencodingNÚ__meta__T)ÚparentsÚexist_okr[   Ú	referenceÚ
hypothesisrZ   Údetected_languager™   rš   r›   rœ   r   Úspacing_errorrÂ   Úref_rawÚhyp_rawrs   rt   Úref_numcanonÚhyp_numcanonÚref_merÚhyp_merÚwer_rawrÃ   Úspace_norm_werÚsnw_err)Ú	snw_totalÚmerr´   ru   c                 S   ó   g | ]
}|d  r|d ‘qS )rs   r!  rW   rÒ   rW   rW   rX   rz   Ü  ó    z&process_checkpoint.<locals>.<listcomp>c                 S   r,  )rs   r"  rW   rÒ   rW   rW   rX   rz   Ý  r-  c                 S   s   g | ]
}|d  r|d  ‘qS )rs   rW   rÒ   rW   rW   rX   rz   Þ  r-  c                 S   r,  )rs   rt   rW   rÒ   rW   rW   rX   rz   ß  r-  c                 S   r,  )rs   r#  rW   rÒ   rW   rW   rX   rz   à  r-  c                 S   r,  )rs   r$  rW   rÒ   rW   rW   rX   rz   á  r-  c                 S   r,  )rs   r%  rW   rÒ   rW   rW   rX   rz   â  r-  c                 S   r,  )rs   r&  rW   rÒ   rW   rW   rX   rz   ã  r-  c                 s   s     | ]}d |d v rdV  qdS )r¦   r´   rv   NrW   rÒ   rW   rW   rX   rc   ë  ó   € z%process_checkpoint.<locals>.<genexpr>c                 s   ó     | ]}|d  r|d V  qdS )rs   r)  NrW   rÒ   rW   rW   rX   rc   î  r.  c                 s   r/  )rs   r*  NrW   rÒ   rW   rW   rX   rc   ï  r.  )Úraw_to_normÚnorm_to_numcanonÚnorm_to_space_normÚnorm_to_mer)	Ú	n_samplesr'  rÃ   Úwer_numcanonr(  r+  Úcer_normÚempty_hypothesesÚnormalization_delta)r4  r'  rÃ   r5  r(  r+  r6  Ú__overall__c                 s   s    | ]
}|  d ¡s|V  qdS )Ú__N)Ú
startswith©ra   ÚkrW   rW   rX   rc     s   € c                    ó   g | ]}ˆ | d  ‘qS )r'  rW   r<  ©ÚmetricsrW   rX   rz     ó    c                    r>  )rÃ   rW   r<  r?  rW   rX   rz     rA  c                    r>  )r5  rW   r<  r?  rW   rX   rz     rA  c                    r>  )r(  rW   r<  r?  rW   rX   rz     rA  c                    r>  )r+  rW   r<  r?  rW   rX   rz      rA  c                    r>  )r6  rW   r<  r?  rW   rX   rz   !  rA  )Ún_languagesr'  rÃ   r5  r(  r+  r6  Ú__macro_avg__ÚjiwerÚ
checkpointÚ
batch_sizeé@   Úinference_time_secÚtotal_audio_secg
×£pM´ã@Úrtfz%Y-%m-%dT%H:%M:%SZÚvllm)rE  Úcheckpoint_nameÚmodel_idÚ
model_typeÚdatasetrF  rH  rI  rJ  Ú	timestampÚgpuÚ	frameworkÚnormalization_versionÚjiwer_versionr+  r´   c                    s   ˆ |  d S )NrÃ   rW   )r=  r?  rW   rX   rÆ   U  s    z$process_checkpoint.<locals>.<lambda>rÇ   rÍ   r÷   rö   rÎ   rõ   c                    ó   g | ]
}ˆ | d  d ‘qS )r8  r0  rW   r<  r?  rW   rX   rz   Z  r-  c                    rU  )r8  r1  rW   r<  r?  rW   rX   rz   [  r-  é
   zformatting-limitedrñ   Úhighrô   é   znumeric-limitedró   rí   rð   rï   rv   r•   F)ÚindentÚensure_asciizsample_analysis.jsonzerror_analysis.jsonz  z
: wer_raw=z  wer_norm=z  wer_numcanon=r5  z  cer_norm=r6  )+r   ÚopenÚjsonÚloadrÑ   ÚmkdirrY   rq   r–   rr   Ú
capitalizerž   r   r   r   rÀ   r~   r   Úlistrø   rý   r|   r   ÚsumÚextendÚnpÚmeanÚimportlib.metadataÚmetadataÚversionÚMODEL_IDÚ
MODEL_TYPEÚDATASETr   Únowr   ÚutcÚstrftimeÚGPU_NAMEÚNORMALIZATION_VERSIONr  ÚdumpÚprint)Hr  r  r  Ú
preds_pathÚold_metrics_pathÚfÚpredsÚold_metaÚout_pathÚenrichedÚprÿ   r—   r˜   r!  r"  rŸ   r    Úref_ncÚhyp_ncÚref_mÚhyp_mÚdetectedÚexpected_lang_nameÚwer_raw_valr£   Úmer_valr)  r*  Úsnw_valr´   Úby_langrÓ   Úall_ref_rawÚall_hyp_rawÚ	all_ref_nÚ	all_hyp_nÚ
all_ref_ncÚ
all_hyp_ncÚ	all_ref_mÚ	all_hyp_mÚall_snw_errÚall_snw_totalr   rŠ   Úrefs_rawÚhyps_rawÚrefs_nÚhyps_nÚrefs_ncÚhyps_ncÚrefs_mÚhyps_mÚw_rawÚw_normÚw_ncÚw_merÚc_normÚemptyÚlang_snw_errÚlang_snw_totalÚw_snwÚ	lang_keysÚ	importlibÚ	jiwer_verÚsample_analysisÚsamples_for_errorsÚerror_analysisÚsorted_langsÚavg_raw_to_normÚavg_norm_to_ncÚovrW   r?  rX   Úprocess_checkpoint€  sÔ  ÿ
ÿ








ÿ
ÿþýüûúùø	÷
öõôóòñðïì



 ü
÷








ù

ù






òÿþýüûúùø	÷
öõôóòñ
ð

ÿÿÿÿÿr©  Ú__main__))z
ckpt-24000zckpt-24000-vllm)z
ckpt-72000zckpt-72000-vllm)zckpt-100000zckpt-100000-vllm)zckpt-170000zckpt-170000-vllm)zckpt-200000zckpt-200000-vllm)zckpt-250000zckpt-250000-vllm)zckpt-300000zckpt-300000-vllmz'/home/ubuntu/training/benchmark_resultsz1/home/ubuntu/training/benchmark_outputs/qwen3-asrz9Generating schema-v1 outputs for qwen3-asr checkpoints...ú/zH
Done. All outputs in /home/ubuntu/training/benchmark_outputs/qwen3-asr/)rZ   )6Ú__doc__r\  rh   ÚsysrT   Úcollectionsr   r   r   r   Úpathlibr   Únumpyrc  rD  r   r   ro  rn  rh  ri  rj  rm   Úcompilerf   r   r_   rd   re   ri   r‘   r’   ÚstrrY   rq   rr   r   r–   Úfloatrž   r`  rÀ   Údictr  r©  Ú__name__ÚCHECKPOINTSÚbase_resultsÚbase_outputrq  r  Úsrc_dir_namer  r  rW   rW   rW   rX   Ú<module>   s  	
ýÿÿÿÿÿþþþþþýýýýüüüøÿÿÿÿÿþþþþþýýýýüüüù	<ÿÿÿ
ÿ-o 
{
ì