o
    i                     @   sz   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZ G dd dZdS )    N)
TableGroupColumn)Tree)grapheme_patternc                   @   s   e Zd ZdZdZdZddddded	dd
geddgiZed defddZ	defddZ
dejeddf fddZed d!ddZed"dedd fddZed"d!ddZdd ZdS )#ProfilezG
    An Orthography Profile as specified by Moran and Cysouw 2018.
    GraphemeNULLtables	Tutf-8)	delimiterheaderencodingstring)namedatatyperequired)columns
primaryKey)dialecttableSchemaNreturnc                 C   s(   t  | j}t|p
d|d d d< |S )N r	   r   url)copyMDstr)clsfnamemd r    D/home/ubuntu/.local/lib/python3.10/site-packages/segments/profile.pydefault_metadata,   s   zProfile.default_metadataspecsc                    s   t   _t  _|dd _|dd _| _t	
t}t|D ]F\}} j|vr0td jr> fdd| D }| j}|sJtd j|  _| jvr^| j|< q#|d|d	 | q#tt j  _dS )
a  

        Parameters
        ----------
        specs : list of dict
            A list of grapheme specifications.
        kw :
            The following keyword arguments are recognized:
            - fname: Path of the profile or profile metadata.
            - form: Unicode normalization to apply to the data in the profile before use.
            - remaining keyword arguments are assigned as dict to `Profile.metadata`.
        r   Nformzinvalid grapheme specificationc                    s6   i | ]\}}t  j||d u rd nt  j|qS N)unicodedata	normalizer$   .0kvselfr    r!   
<dictcomp>K   s
    z$Profile.__init__.<locals>.<dictcomp>zGrapheme must not be emptyz+line {0}:duplicate grapheme in profile: {1}   )collectionsOrderedDict	graphemessetcolumn_labelspopr   r$   metadatalogging	getLogger__name__	enumerateGRAPHEME_COL
ValueErroritemsunionkeyswarningformatr   listtree)r-   r#   kwlogispecgraphemer    r,   r!   __init__2   s.   




zProfile.__init__c                 c   sX    | j  D ]#\}}| j|i}|dd | jD  |dd | D  |V  qd S )Nc                 S   s   i | ]}|d qS r%   r    )r)   r*   r    r    r!   r.   a   s    z%Profile.iteritems.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r    r    r(   r    r    r!   r.   b   s    )r2   r=   r;   updater4   )r-   rH   rG   resr    r    r!   	iteritems^   s   
zProfile.iteritemsc                    s   z	t |}d}W n tjjy   t  |}|}Y nw t|jdkr*t	d|j
}|jt||d t # td   fdd|jd j|d	D i |}W d   |S 1 saw   Y  |S )
zk
        Read an orthography profile from a metadata file or a default tab-separated profile file.
        N   z2profile description must contain exactly one table)r   r$   ignorec                    s"   g | ]} fd d|  D qS )c                    s.   i | ]\}}|| j kr| jkrd n|qS r%   )r;   r   r(   r   r    r!   r.   w   s    "z0Profile.from_file.<locals>.<listcomp>.<dictcomp>)r=   )r)   drO   r    r!   
<listcomp>w   s
    

z%Profile.from_file.<locals>.<listcomp>r   r   )r   	from_filejsondecoderJSONDecodeError	fromvaluer"   lenr	   r<   common_propsrJ   pathlibPathwarningscatch_warningssimplefilter	iterdicts)r   r   r$   tgopfnamer6   rK   r    rO   r!   rS   e   s0   




zProfile.from_filemappingtextc                    s0   t t|} fdd| D } | S )z
        Create a Profile instance from the Unicode graphemes found in `text`.

        Parameters
        ----------
        text
        mapping

        Returns
        -------
        A Profile instance.

        c                    s.   g | ]\}}t  j|fd |f|fgqS )	frequency)r0   r1   r;   )r)   rH   rd   r   rb   r    r!   rQ      s    z%Profile.from_text.<locals>.<listcomp>)r0   Counterr   findallmost_common)r   rc   rb   r2   r#   r    re   r!   	from_text}   s
   zProfile.from_textc                 C   sR   t |jdd}| }| jd||dW  d    S 1 s"w   Y  d S )Nr   )r    rb   )rZ   r[   open	readlinesri   join)r   r   rb   fplinesr    r    r!   from_textfile   s   $zProfile.from_textfilec                 C   sh   t |  }| jD ]}|| jkr"|jd jjt	|| j
d q
|jd j|  ddd S )z]
        A Profile is represented as tab-separated lines of grapheme specifications.
        r   )r   nullNrR   utf8)r   rW   r"   r4   r;   r	   r   r   appendr   r   writerL   decodestrip)r-   r`   colr    r    r!   __str__   s   

"zProfile.__str__r%   )r   r   rk   )r9   
__module____qualname____doc__r;   r   r   classmethoddictr"   rI   typing	GeneratorrL   rS   r   ri   rq   ry   r    r    r    r!   r      s<    ,r   )r   r   r7   rZ   r\   r0   r&   json.decoderrT   csvwr   r   segments.treer   segments.utilr   r   r    r    r    r!   <module>   s    