o
    i!5                  	   @   s  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlZddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddl m!Z! ddlm"Z"m#Z# ddl$m%Z% ddl&m'Z' e	eee(e)f ef  Z*ee)ee*ee(f f Z+ee)eee(e)f ee(e)f f f Z,ee)ee)eee(e)f ee(e)f f f f Z-dee! dee)ef fddZ.dd Z/G dd de'Z0de1dee1e1f fddZ2dd Z3dS )    N)Path)AnyCallableDictIterableListOptionalTupleUnion   )util)Errors)Language)Matcher)Scorer)IDS)DocSpan)normalize_token_attrsset_token_attrs)Example)SimpleFrozenListregistry)Vocab   )Pipeexamplesreturnc                 K   s   dd }i }| tj| dfi | | tj| dfi | | tj| dfd|i| | tj| dfd|i| | tj| dfi | |S )Nc                 S   s   t | |jS N)getattrkey)tokenattr r#   Q/home/ubuntu/.local/lib/python3.10/site-packages/spacy/pipeline/attributeruler.pymorph_key_getter   s   z/attribute_ruler_score.<locals>.morph_key_gettertagposmorphgetterlemma)updater   score_token_attrscore_token_attr_per_feat)r   kwargsr%   resultsr#   r#   r$   attribute_ruler_score   s$   r0   c                   C   s   t S r   )r0   r#   r#   r#   r$   make_attribute_ruler_scorer.   s   r1   c                   @   s  e Zd ZdZ	d6deddedededee	 d	d
f
ddZ
d7ddZd
d
d
d
ddee	g ee f  dee deee  dee dee d	d
fddZded	efddZdefddZdd Zdeeeeeef eeef f f d	d
fddZdeeeeeeeef eeef f f f d	d
fd d!Z	"d8dee d#ed$ed	d
fd%d&Zdee d	d
fd'd(Zed	e e fd)d*Z!e" fd+ee d	e#fd,d-Z$e" fd.e#d+ee d	d fd/d0Z%e" fd1ee&ef d+ee d	d
fd2d3Z'e" fd1ee&ef d+ee d	d fd4d5Z(d
S )9AttributeRulerzSet token-level attributes for tokens matched by Matcher patterns.
    Additionally supports importing patterns from tag maps and morph rules.

    DOCS: https://spacy.io/api/attributeruler
    attribute_rulerF)validatescorervocabnamer4   r5   r   Nc                C   s>   || _ || _t| j|d| _|| _g | _g | _g | _|| _dS )a  Create the AttributeRuler. After creation, you can add patterns
        with the `.initialize()` or `.add_patterns()` methods, or load patterns
        with `.from_bytes()` or `.from_disk()`. Loading patterns will remove
        any patterns you've added previously.

        vocab (Vocab): The vocab.
        name (str): The pipe name. Defaults to "attribute_ruler".
        scorer (Optional[Callable]): The scoring method. Defaults to
            Scorer.score_token_attr for the attributes "tag", "pos", "morph" and
            "lemma" and Scorer.score_token_attr_per_feat for the attribute
            "morph".

        RETURNS (AttributeRuler): The AttributeRuler component.

        DOCS: https://spacy.io/api/attributeruler#init
        r4   N)	r7   r6   r   matcherr4   attrs_attrs_unnormedindicesr5   )selfr6   r7   r4   r5   r#   r#   r$   __init__9   s   
zAttributeRuler.__init__c                 C   s(   t | j| jd| _g | _g | _g | _dS )zReset all patterns.r8   N)r   r6   r4   r9   r:   r;   r<   r=   r#   r#   r$   clearZ   s   
zAttributeRuler.clear)nlppatternstag_mapmorph_rulesget_examplesrA   rB   rC   rD   c                C   s:   |    |r| | |r| | |r| | dS dS )a  Initialize the attribute ruler by adding zero or more patterns.

        Rules can be specified as a sequence of dicts using the `patterns`
        keyword argument. You can also provide rules using the "tag map" or
        "morph rules" formats supported by spaCy prior to v3.
        N)r@   add_patternsload_from_tag_mapload_from_morph_rules)r=   rE   rA   rB   rC   rD   r#   r#   r$   
initializea   s   

zAttributeRuler.initializedocc              
   C   s\   |   }z| |}| || |W S  ty- } z|| j| |g|W  Y d}~S d}~ww )zApply the AttributeRuler to a Doc and set all attribute exceptions.

        doc (Doc): The document to process.
        RETURNS (Doc): The processed Doc.

        DOCS: https://spacy.io/api/attributeruler#call
        N)get_error_handlermatchset_annotations	Exceptionr7   )r=   rJ   error_handlermatcheser#   r#   r$   __call__x   s   
zAttributeRuler.__call__c                    s.    j |ddd} fdd|D }|  |S )NTF)allow_missingas_spansc                    s*   g | ]\}}}t  jj| |||fqS r#   )intr6   strings).0m_idsrQ   r?   r#   r$   
<listcomp>   s    z(AttributeRuler.match.<locals>.<listcomp>)r9   sort)r=   rJ   rP   r#   r?   r$   rL      s   
zAttributeRuler.matchc              
   C   s   |D ]C\}}}}t ||||d}| j| }| j| }	z||	 }
W n ty=   ttjj| j	|j
dd |D |	ddw t||	 | qdS )zModify the document in place)labelc                 S   s   g | ]}|j qS r#   )text)rW   tr#   r#   r$   rZ      s    z2AttributeRuler.set_annotations.<locals>.<listcomp>)rB   spanindexN)r   r:   r<   
IndexError
ValueErrorr   E1001formatr9   getr\   r   )r=   rJ   rP   attr_idmatch_idstartendr_   r:   r`   r!   r#   r#   r$   rM      s&   


zAttributeRuler.set_annotationsc                 C   s   |  D ];\}}d|ig}t|\}}d|vr'| jj|}| jj| |d< n| jj|d }| jj| |d< | |g| qdS )zLoad attribute ruler patterns from a tag map.

        tag_map (dict): The tag map that maps fine-grained tags to
            coarse-grained tags and morphological features.

        DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
        TAGMORPHN)items_split_morph_attrsr6   
morphologyaddrV   )r=   rC   r&   r:   patternmorph_attrsr(   r#   r#   r$   rG      s   

z AttributeRuler.load_from_tag_mapc                 C   s   |D ]I}|| D ]B}||dg}|| | }t |\}}d|v r2| jj|d }| jj| |d< n|rC| jj|}| jj| |d< | |g| qqdS )a+  Load attribute ruler patterns from morph rules.

        morph_rules (dict): The morph rules that map token text and
            fine-grained tags to coarse-grained tags, lemmas and morphological
            features.

        DOCS: https://spacy.io/api/attributeruler#load_from_morph_rules
        )ORTHrj   rk   N)rm   r6   rn   ro   rV   )r=   rD   r&   wordrp   r:   rq   r(   r#   r#   r$   rH      s   z$AttributeRuler.load_from_morph_rulesr   r:   r`   c                 C   sZ   t t| j}| j| jj|| | j| t	| j|}| j| | j
| dS )aB  Add Matcher patterns for tokens that should be modified with the
        provided attributes. The token at the specified index within the
        matched span will be assigned the attributes.

        patterns (Iterable[List[Dict]]): A list of Matcher patterns.
        attrs (Dict): The attributes to assign to the target token in the
            matched span.
        index (int): The index of the token in the matched span to modify. May
            be negative to index from the end of the span. Defaults to 0.

        DOCS: https://spacy.io/api/attributeruler#add
        N)strlenr:   r9   ro   r6   rV   r;   appendr   r<   )r=   rB   r:   r`   r    r#   r#   r$   ro      s   zAttributeRuler.addc                 C   s   |D ]
}| j di | qdS )ac  Add patterns from a list of pattern dicts with the keys as the
        arguments to AttributeRuler.add.
        patterns (Iterable[dict]): A list of pattern dicts with the keys
            as the arguments to AttributeRuler.add (patterns/attrs/index) to
            add as patterns.

        DOCS: https://spacy.io/api/attributeruler#add_patterns
        Nr#   )ro   )r=   rB   pr#   r#   r$   rF      s   	zAttributeRuler.add_patternsc                 C   s^   g }t t| jD ]#}i }| jt|d |d< | j| |d< | j| |d< || q	|S )zAll the added patterns.r   rB   r:   r`   )	rangeru   r:   r9   re   rt   r;   r<   rv   )r=   all_patternsirw   r#   r#   r$   rB      s   zAttributeRuler.patternsexcludec                    s2   i } fdd|d< fdd|d< t | S )zSerialize the AttributeRuler to a bytestring.

        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (bytes): The serialized object.

        DOCS: https://spacy.io/api/attributeruler#to_bytes
        c                      s   j j dS N)r{   )r6   to_bytesr#   r{   r=   r#   r$   <lambda>      z)AttributeRuler.to_bytes.<locals>.<lambda>r6   c                      s   t  jS r   )srslymsgpack_dumpsrB   r#   r?   r#   r$   r     s    rB   )r   r}   )r=   r{   	serializer#   r~   r$   r}     s   zAttributeRuler.to_bytes
bytes_datac                    2   fdd} fdd|d}t ||  S )a'  Load the AttributeRuler from a bytestring.

        bytes_data (bytes): The data to load.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        returns (AttributeRuler): The loaded object.

        DOCS: https://spacy.io/api/attributeruler#from_bytes
        c                         t|  d S r   )rF   r   msgpack_loadsbr?   r#   r$   load_patterns     z0AttributeRuler.from_bytes.<locals>.load_patternsc                       j j|  dS r|   )r6   
from_bytesr   r~   r#   r$   r   !      z+AttributeRuler.from_bytes.<locals>.<lambda>r6   rB   )r   r   )r=   r   r{   r   deserializer#   r~   r$   r        zAttributeRuler.from_bytespathc                    s.    fddfddd}t ||  dS )zSerialize the AttributeRuler to disk.

        path (Union[Path, str]): A path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.

        DOCS: https://spacy.io/api/attributeruler#to_disk
        c                    r   r|   )r6   to_diskrw   r~   r#   r$   r   2  r   z(AttributeRuler.to_disk.<locals>.<lambda>c                    s   t |  jS r   )r   write_msgpackrB   r   r?   r#   r$   r   3  r   r   N)r   r   )r=   r   r{   r   r#   r~   r$   r   '  s   
zAttributeRuler.to_diskc                    r   )a(  Load the AttributeRuler from disk.

        path (Union[Path, str]): A path to a directory.
        exclude (Iterable[str]): String names of serialization fields to exclude.
        RETURNS (AttributeRuler): The loaded object.

        DOCS: https://spacy.io/api/attributeruler#from_disk
        c                    r   r   )rF   r   read_msgpackr   r?   r#   r$   r   C  r   z/AttributeRuler.from_disk.<locals>.load_patternsc                    r   r|   )r6   	from_diskr   r~   r#   r$   r   G  r   z*AttributeRuler.from_disk.<locals>.<lambda>r   )r   r   )r=   r   r{   r   r   r#   r~   r$   r   7  r   zAttributeRuler.from_disk)r3   )r   N)r   ))__name__
__module____qualname____doc__r0   r   rt   boolr   r   r>   r@   r   r   r   AttributeRulerPatternType
TagMapTypeMorphRulesTyperI   r   rR   rL   rM   r   r
   rU   rG   rH   MatcherPatternTypero   rF   propertyr   rB   r   bytesr}   r   r   r   r   r#   r#   r#   r$   r2   2   s    	

!

	"
*





r2   r:   c                 C   sT   i }i }|   D ]\}}|dv s|t v s|t v r!|||< q|||< q||fS )zSplit entries from a tag map or morph rules dict into to two dicts, one
    with the token-level features (POS, LEMMA) and one with the remaining
    features, which are presumed to be individual MORPH features._)rl   r   keysvalues)r:   other_attrsrq   kvr#   r#   r$   rm   N  s    

rm   c                 C   s,   | dkrt d}|jS tdt d|  )Nmake_attribute_rulerzspacy.pipeline.factorieszmodule z has no attribute )	importlibimport_moduler   AttributeErrorr   )r7   moduler#   r#   r$   __getattr__]  s   
r   )4r   syspathlibr   typingr   r   r   r   r   r   r	   r
   r    r   errorsr   languager   r9   r   r5   r   symbolsr   tokensr   r   tokens._retokenizer   r   trainingr   r   r   r6   r   piper   rU   rt   r   r   r   r   r0   r1   r2   dictrm   r   r#   r#   r#   r$   <module>   s8    ($,  