o
    Ni'                     @   s   d dl Z d dlm  mZ d dlmZ d dlmZ dd Z	dd Z
dd	 Zd
d Zdd Zdd Zdd ZdZdd Zdd Zdd ZedkrLe  dS dS )    N)data_filenameparse_registryc                 C   sZ   t d}t | d|  d}tt|dd}| dkr%|d d d	 }|S |d |  }|S )
Nz*cldr-json/cldr-json/cldr-core/supplemental/z.jsonutf-8encodingaliasessupplementalmetadataalias)r   jsonloadopen)datanamecldr_supp_pathfilenamefulldatadata r   H/home/ubuntu/.local/lib/python3.10/site-packages/langcodes/build_data.pyread_cldr_supplemental   s   r   c                  C   8   i } t  D ]}|d dkrd|v r|d | |d < q| S )NTypelanguagezSuppress-ScriptSubtagr   scriptsentryr   r   r   #read_iana_registry_suppress_scripts      
r   c                  C   s0   t  } t D ]}|d dkr| |d  q| S )Nr   scriptr   )setr   addr   r   r   r   read_iana_registry_scripts   s   
r$   c                  C   r   )Nr   r   Macrolanguager   r   )macrosr   r   r   r   !read_iana_registry_macrolanguages"   r    r'   c                  C   s^   i } t  D ]'}|d dkrd|v r|d | |d < qd|v r,d|v r,|d | |d  < q| S )Nr   r   zPreferred-Valuer   Tag)r   lower)replacementsr   r   r   r   read_iana_registry_replacements*   s   
r+   c                 C   sR   t | d| d t|D ]}|| }t d|d|d| d qt d| d d S )N = {file    z: ,})printsorted)outfilenamedkeyvaluer   r   r   write_python_dict6   s
   r9   c                 C   sH   t | d| d tt|D ]}t d|d| d qt d| d d S )Nr,   r-   r/   r0   r1   )r2   r3   r"   )r4   r5   sr7   r   r   r   write_python_set>   s   r;   z*# This file is generated by build_data.py.c               	   C   s   g } dD ]W}t d| d}tt| }|d}|D ]<}|j  D ]2}d|v rT|d dks5J |d d }|d }|d }	| d	| d
|	 d}
| 	|
 q'| 	| q'qqd
| }d| dS )N)r   regionr!   variantzcldr/common/validity/z.xmlz./idValidity/id~[-]|z^(z)$)r   ET
fromstringr   readfindalltextstripsplitappendjoin)validity_optionscodetypevalidity_pathrootmatchesmatchitemprefixrange_start	range_endoptionoptionsr   r   r   read_validity_regexH   s&   

r[   c                  C   s  t d} tt|  }|d}i }|D ]p}|j}|d dd }|dk r|ddkr9|d |d	 fg}n|d |d	 f|d	 |d fg}|D ]:\}}	|	|i }
t
|d
 |
|	< |dkse|	dkr|dkrkd}|	dkrqd}	||	kr|	|i }
t
|d
 d |
|	< qKq|S )Nz)cldr/common/supplemental/languageInfo.xmlzE./languageMatching/languageMatches[@type="written_new"]/languageMatchdesired_      onewaytrue	supporteddistanceshsr)r   rF   rG   r   rH   rI   attribcountget
setdefaultint)language_info_pathrR   rS   tag_distancesrT   attribsn_partspairsr\   rb   desired_distancer   r   r   read_language_distances]   s8   rq   c                  C   sP  t  } t }t }t }t }td}td}i }ddddddd	}i }	i }
d
D ]}|| }|dkr=|||< d|| d< ni ||< | D ]e\}}| }|d  d }|d dkr`||
|< qE|dkrgd}n|dkrnd}n|dkrud}n|dkr{d}||| |< |dkr|d dkr||v rt	dj
|||| d|||< qE|d dkr||	|< qEq(t }tddd d!g}tt|d" td#|d" t|d$|  t|d%|d  t|d&| t|d'|	 t|d(|d)  t|d*| t|d+|d,  t|d-| t|d.|
 t|d/| t|d0| td1|d2|d" W d    d S 1 s!w   Y  d S )3Nr	   likelySubtagstglindhebyidjavhbs)tliniwjijwrd   )languageAliasscriptAliasterritoryAliasr~   undrR   _replacementr   _reasonmacrolanguagenornomolmotwitwbihbhoverlongzP{code!r} is an alpha3 for {replacement!r}, which already has an alpha3: {orig!r})codereplacementorigbibliographiczdata_dicts.pywr   r   r-   z
import re
DEFAULT_SCRIPTSLANGUAGE_REPLACEMENTSLANGUAGE_ALPHA3LANGUAGE_ALPHA3_BIBLIOGRAPHICSCRIPT_REPLACEMENTSr   ALL_SCRIPTSTERRITORY_REPLACEMENTSr   MACROLANGUAGESNORMALIZED_MACROLANGUAGESLIKELY_SUBTAGSLANGUAGE_DISTANCESzVALIDITY = re.compile())r   r$   r'   r+   rq   r   itemsr)   rL   
ValueErrorformatr[   r   r2   GENERATED_HEADERr9   r;   )lang_scriptsall_scriptsmacrolanguagesiana_replacementslanguage_distances
alias_datalikely_subtagsr*   alpha3_mappingalpha3_biblionorm_macrolanguages
alias_typer	   r   r8   r   validity_regexr4   r   r   r   
build_data   s   
	
-

$r   __main__)r   xml.etree.ElementTreeetreeElementTreerF   langcodes.utilr   langcodes.registry_parserr   r   r   r$   r'   r+   r9   r;   r   r[   rq   r   __name__r   r   r   r   <module>   s$    .f
