o
    Ni(M                     @   s  d dl Z d dlZd dlm  mZ d dlZd dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ i ddhd	d
hddhddhddhddhddhddhddhddhddhddhdd hd!h d"d#d$hd%d&hd'd(hd)d*hiZd+d,d-d.d.d/d0d1d2d3dd4d5d6d7d8Zd^d:d;Zd^d<d=Zd>d? Zd@dA ZdBdC ZdDdE ZdFdG ZdHdI ZdJdK ZdLdM ZdNdO ZdPdQ ZdRdS Z dTdU Z!dVdW Z"dXdY Z#dZZ$d[d\ Z%e&d]kre%  dS dS )_    N)Path)defaultdictCounter)normalize_name)data_filenameparse_registryFM057ZA018003021005419019USgswzde-CHCDCGHaniHanbtlfililohiltzmzghznds-NLndscmnzhzfa-AF>   fagbzprsKoreHangmszsmlvltLVLTBreatannaisTagaloga
FilipineraDargwau   интерлингваzSamaritan HebrewzMongol (New Guinea)Romangz	Kalam-Taiu   ምዕራባዊ አፍሪካu   ምስራቃዊ አፍሪካu   ምዕራባዊ አውሮፓu   ምስራቃዊ አውሮፓ))gdbr)eur   )r3   r   )afdar)zaf-NAr5   )zaz-Cyrlia)ensmp)r7   mgt)r7   rmm)r7   taw)furlad)am011)r>   014)r>   155)r>   151Fc                    s  t dd |D  t fdd|D }t|dkr%|d}|d d S |D ]%}|tv rLt|}|| ||t| @ krL|rHtd| | |  S q'|r dkrt	t
}|D ]\}}	}
|
 kri||	 | qYtd|  t| D ]\}	}td|	d	| qwd
S )a  
    Given a name, and a number of possible values it could resolve to,
    find the single value it should resolve to, in the following way:

    - Apply the priority order
    - If names with the highest priority all agree, use that name
    - If there is disagreement that can be resolved by AMBIGUOUS_PREFERENCES,
      use that
    - Otherwise, don't resolve the name (and possibly show a debugging message
      when building the data)
    c                 S   s   g | ]}|d  qS )    .0valrD   rD   L/home/ubuntu/.local/lib/python3.10/site-packages/language_data/build_data.py
<listcomp>   s    z resolve_name.<locals>.<listcomp>c                    s    g | ]}|d   kr|d qS )rC      rD   rE   max_priorityrD   rH   rI           rJ   r   zResolved: {} -> {}z{}:z	{}: {} N)maxr   lenmost_commonAMBIGUOUS_PREFERENCESsetremoveprintformatr   listappendsorteditemsjoin)keyvalsdebug	val_count	unanimouspkeyothersvotesvoterrG   priovotersrD   rK   rH   resolve_name   s0   

rg   c                 C   s<   i }t |  D ]\}}t|||d}|d ur|||< q|S )Nr^   )rY   rZ   rg   )	name_dictr^   resolvedr\   r]   resolved_namerD   rD   rH   resolve_names   s   rl   c                 C   s:   t d| |}tt|dd}|d |  d | }|S )z@
    Read CLDR's names for things in a particular language.
    z9cldr-json/cldr-json/cldr-localenames-full/main/{}/{}.jsonutf-8encodingmainlocaleDisplayNames)r   rV   jsonloadopen)languagecategoryfilenamefulldatadatarD   rD   rH   read_cldr_names   s   
rz   c                 C   s   t | |}g }t| D ]F\}}| |ftv r"t| |f }|d u r"q||kr'qd}|dr3|dkr3qd|v rA|dd\}}d}t|t|krJq|| |||f q|S )N   z	-alt-menumandarinz-alt-rJ   )rz   rY   rZ   	OVERRIDESendswithsplitr   rX   )langcoderv   ry   
name_quadssubtagnamepriority_rD   rD   rH   read_cldr_name_file   s&   
r   c                  C   s   g } g }g }t  D ]R}d }|d dkr| }n|d dkr|}n|d dkr'|}|d ur[|d }d}d|v r7d}d	|ftv rK|d	|td	|f |f q	|d
 D ]}|d	|||f qOq	| ||fS )NTyperu   scriptregionSubtagrC   
Deprecatedr   r7   Description)r   r}   rX   )language_quadsscript_quadsterritory_quadsentrytargetr   r   descrD   rD   rH   read_iana_registry_names   s,   

r   c                  C   s8   i } t  D ]}|d dkrd|v r|d | |d < q| S )Nr   ru   Macrolanguager   r   )macrosr   rD   rD   rH   !read_iana_registry_macrolanguages  s   
r   c                  C   s^   i } t  D ]'}|d dkrd|v r|d | |d < qd|v r,d|v r,|d | |d  < q| S )Nr   ru   zPreferred-Valuer   Tag)r   lower)replacementsr   rD   rD   rH   read_iana_registry_replacements  s   
r   c                 C   sB   t | dd}g }|D ]}| dddg }|t| q
|S )Nrm   rn   ,r{   T)rt   rstripr   rX   tuple)rw   ry   quadslinequadrD   rD   rH   read_csv_names  s   r   c           	      C   s   t | dd}g }|D ]<}| d}|d }||||d df |d g}t|dkrF|d rF|d d}|D ]}||||d	f q:q
|S )
Nrm   rn   	r   rJ      z, )rt   r   r   rX   rP   )	rw   ru   ry   r   r   partscodenamesr   rD   rD   rH   read_wiktionary_names(  s   
r   c                 C   s   |D ]<\}}}}| dd }|di }||i }	||	fD ]}
|
t|g |||f q| |i }||vr>|||< qd S )N-r   und)r   
setdefaultr   rX   )	names_fwd	names_revr   name_languagereferentr   r   short_languagerev_allrev_languagerev_dictnames_for_referentrD   rD   rH   update_names7  s   r   c                 C   s*   t dd t|  D }|| d S )Nc                 s   s"    | ]\}}|| d fV  qdS )rm   N)encode)rF   r\   valuerD   rD   rH   	<genexpr>H  s    
zsave_trie.<locals>.<genexpr>)marisa_trie	BytesTrierY   rZ   save)mappingrw   trierD   rD   rH   	save_trieG  s   

r   c              	   C   sR   |  D ]"\}}tjtd| dd tt|ddtd| d|  d qd S )Nztrie/T)exist_okrh   z	/name_to_z.marisa)rZ   osmakedirsr   r   rl   )rv   r   ru   	lang_dictrD   rD   rH   save_reverse_name_tablesN  s   
r   c                  C   s0   t td} dd t|  D }dd |D S )Nz.cldr-json/cldr-json/cldr-localenames-full/mainc                 S   s(   g | ]}|j d kr|d  r|j qS )rootzlanguages.json)r   exists)rF   subpathrD   rD   rH   rI   Y  s
    z&get_name_languages.<locals>.<listcomp>c                 S   s,   g | ]}d |d   krdkrn n|qS )ar   zrD   )rF   ru   rD   rD   rH   rI   ^  s   , )r   r   rY   iterdir)cldr_main_path	languagesrD   rD   rH   get_name_languagesW  s
   
r   c               	   C   s  dd l } td}tt| }|d}tt}tt}|D ]}|j	d }t
|j	d }t
|j	d d }	|D ]}
|
j	}|d dd	}t
|d
dd }d|v r]t
|d d }nd|v rjt
|d d }n|	}|| }|| | }| | ddg}|d|i}|ddg}|dg}|ddg}|dg}t||gD ]}|t|  tt|7  < qt||||gD ]}|t|  tt|7  < qq9q ||fS )Nr   zsupplementalData.xmlz./territoryInfo/territorytype
populationliteracyPercentd   r   r   populationPercentwritingPercentru   r   	territory)	langcodesr   ET
fromstringrt   readfindallr   intattribfloatreplacegetmaximize_filter_attributesupdate_dictrS   strround)r   rw   r   territorieslanguage_populationlanguage_writing_populationr   t_codet_populationt_literacy_rateru   attrsl_codel_proportionwriting_propl_population	l_writing
written_lswritten_lst	spoken_ltspoken_l
written_lt	written_llangrD   rD   rH   get_population_dataa  sF   


&r   c                 C   sR   t | d| d t| D ]\}}t d|d|d| d qt d| d dS )za
    Write Python code that initializes a given dictionary, with one value on
    each line.
     = {file    z: r   }N)rU   rY   rZ   )outfiler   dr\   r   rD   rD   rH   write_python_dict  s   r   c                 C   sH   t | d| d tt|D ]}t d|d| d qt d| d d S )Nr   r   r   r   r   )rU   rY   rS   )r   r   sr\   rD   rD   rH   write_python_set  s   r  z*# This file is generated by build_data.py.c               	      s  i } i }i }i  t td}t | | t D ]:}t|d}t | | zt|d}t || W n	 ty:   Y nw zt|d}t || W q tyQ   Y qw t \}}	}
t | | t ||	 t ||
 ttdd}t | | t td}t | | td|  td	| td
|  fddt D }|dd |D 7 }t	dddd}t
t|d t|d| t
|d t|d  W d    n1 sw   Y  t \}}t	dddd}t
t|d t|d| t|d| W d    d S 1 sw   Y  d S )Nzoverride_language_names.csvr   scriptsr   zwiktionary/codes-en.csvr7   zextra_language_names.csvru   r   r   c                    s,   g | ]}d |vr| v r| | v r|qS )r   rD   )rF   r   r   rD   rH   rI     s
    zbuild_data.<locals>.<listcomp>c                 S   s    g | ]\}}}}||kr|qS rD   rD   )rF   lang1lang2r   rD   rD   rH   rI     rM   zname_data.pywrm   rn   r   LANGUAGES_WITH_NAME_DATACODE_TO_NAMESzpopulation_data.pyLANGUAGE_SPEAKING_POPULATIONLANGUAGE_WRITING_POPULATION)r   r   r   r   r   FileNotFoundErrorr   r   r   rt   rU   GENERATED_HEADERr  r   r   )language_names_revterritory_names_revscript_names_revoverride_language_datar   language_datascript_dataterritory_dataiana_languagesiana_scriptsiana_territorieswiktionary_dataextra_language_dataname_languagesr   r   r   rD   r  rH   
build_data  s`   









"r  __main__)F)'r   rr   xml.etree.ElementTreeetreeElementTreer   r   pathlibr   collectionsr   r   language_data.namesr   language_data.utilr   language_data.registry_parserr   rR   r}   rg   rl   rz   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  __name__rD   rD   rD   rH   <module>   s    /
!#%(+,4
%
*	%		
8C
