o
    iE                     @   sR  d dl mZ d dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZmZmZ d d
lm Z  d dl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z) dZ*e +e*Z,dZ-ej.ddge,d dddddede/de de0de0ddfdd Z1G d!d deZ2d%d#d$Z3d"S )&    )islice)	AnyCallableDictIterableIteratorListOptionalSequenceTuple)Errors)LanguageVocab)TrainablePipe)Doc)Examplevalidate_examplesvalidate_get_examples)	minibatch)Config	Optimizerset_dropout_rate)Model)Ragged   )ListenerStateUtils)DocTransformerOutputTransformerModelOutput)TransformerListenerModelTah  
    [transformer]

    [transformer.model]
    @architectures = "spacy-curated-transformers.XlmrTransformer.v1"
    vocab_size = 250002

    [transformer.model.piece_encoder]
    @architectures = "spacy-curated-transformers.XlmrSentencepieceEncoder.v1"

    [transformer.model.with_spans]
    @architectures = "spacy-curated-transformers.WithStridedSpans.v1"
trf_datacurated_transformerzdoc._.trf_datatransformer)assignsdefault_configF)frozenall_layer_outputsnlpnamemodelr$   r%   returnCuratedTransformerc                C   s   t | j||||dS )au  Construct a CuratedTransformer component, which lets you plug a pre-trained
    transformer model into spaCy so you can use it in your pipeline. One or
    more subsequent spaCy components can use the transformer outputs as features
    in its model, with gradients backpropagated to the single shared weights.

    nlp (Language):
        The pipeline.
    name (str):
        The component instance name.
    model (Model):
        One of the supported pre-trained transformer models.
    frozen (bool):
        If `True`, the model's weights are frozen and no backpropagation is performed.
    all_layer_outputs (bool):
        If `True`, the model returns the outputs of all the layers. Otherwise, only the
        output of the last layer is returned. This must be set to `True` if any of the pipe's
        downstream listeners require the outputs of all transformer layers.
    r'   r$   r%   )r*   vocab)r&   r'   r(   r$   r%    r-   c/home/ubuntu/.local/lib/python3.10/site-packages/spacy_curated_transformers/pipeline/transformer.pymake_transformer/   s   r/   c                   @   s  e Zd ZdZdddddedededed	ed
dfddZe	d
e
e fddZe	d
e
e fddZdeded
dfddZdeded
efddZded
dfddZdddee ded
ee fdd Zd!ee d
efd"d#Zd!ee d$ed
dfd%d&Zd'ddd(d)ee d*ed+ee d,eeeef  d
eeef f
d-d.Z d)ee d/ed
dfd0d1Z!dddd2d3e"g ee f d4ee# d5ee" d6ee" fd7d8Z$d9efd:d;Z%d+ed
dfd<d=Z&dd>d!ee d,eeef d+ee d
e'ee"e"f fd?d@Z(dAefdBdCZ)dS )Dr*   au  spaCy pipeline component that provides access to a pre-trained transformer
    model from. Downstream components are connected to this pip using transformer
    listener layers. This works similarly to spaCy's Transformer component and
    TransformerListener sublayer.

    The activations from the transformer are saved in the doc._.trf_data extension
    attribute.
    r!   Fr+   r,   r(   r'   r$   r%   r)   Nc                C   s>   || _ || _|| _i | _i | _t  || _|| _| | dS )az  
        vocab (Vocab):
            The shared vocabulary.
        model (Model):
            One of the supported pre-trained transformer models.
        name (str):
            The component instance name.
        frozen (bool):
            If `True`, the model's weights are frozen and no backpropagation is performed.
        all_layer_outputs (bool):
            If `True`, the model returns the outputs of all the layers. Otherwise, only the
            output of the last layer is returned. This must be set to `True` if any of the pipe's
            downstream listeners require the outputs of all transformer layers.
        N)	r,   r(   r'   listener_mapcfg_install_extensionsr$   r%   _set_model_all_layer_outputs)selfr,   r(   r'   r$   r%   r-   r-   r.   __init__a   s   zCuratedTransformer.__init__c                    s    fdd j D S )z
        RETURNS (List[TransformerListenerModelT]):
            The listener models listening to this component. Usually internals.
        c                    s    g | ]} j | D ]}|q	qS r-   )r0   ).0cmr4   r-   r.   
<listcomp>   s     z0CuratedTransformer.listeners.<locals>.<listcomp>)listening_componentsr9   r-   r9   r.   	listeners   s   zCuratedTransformer.listenersc                 C   s   t | j S )z|
        RETURNS (List[str]):
            The downstream components listening to this component. Usually internals.
        )listr0   keysr9   r-   r-   r.   r;      s   z'CuratedTransformer.listening_componentslistenercomponent_namec                 C   sP   t |std|j d| j|g  || j| vr&| j| | dS dS )z=Add a listener for a downstream component. Usually internals.z!Attempting to register a model ('z=') with the transformer pipethat isn't a transformer listenerN)r   is_listener
ValueErrorr'   r0   
setdefaultappendr4   r?   r@   r-   r-   r.   add_listener   s   
zCuratedTransformer.add_listenerc                 C   sB   || j v r|| j | v r| j | | | j | s| j |= dS dS )zRemove a listener for a downstream component. Usually internals.

        RETURNS (bool):
            `True` if successful, `False` otherwise.
        TF)r0   removerE   r-   r-   r.   remove_listener   s   

z"CuratedTransformer.remove_listener	componentc                 C   sZ   d| j f}tt|ddtr)|j D ]}t|r(t||v r(| 	||j  qdS dS )a  Walk over a model of a processing component, looking for layers that
        are transformer listeners that have an upstream_name that matches
        this component. Listeners can also set their upstream_name attribute to
        the wildcard string '*' to match any `Transformer`.
        You're unlikely to ever need multiple `Transformer` components, so it's
        fine to leave your listeners upstream_name on '*'.
        *r(   N)
r'   
isinstancegetattrr   r(   walkr   rA   get_upstream_namerF   )r4   rI   namesnoder-   r-   r.   find_listeners   s   
z!CuratedTransformer.find_listeners   )
batch_sizestreamrS   c                c   s<    t   t||D ]}| |}| || |E dH  q	dS )a  Apply the pipe to a stream of documents. This usually happens under
        the hood when the nlp object is called on a text and all components are
        applied to the Doc.

        stream (Iterable[Doc]):
            A stream of documents.
        batch_size (int):
            The number of documents to buffer.

        YIELDS (Doc):
            Processed documents in order.
        N)r2   r   predictset_annotations)r4   rT   rS   batchpredsr-   r-   r.   pipe   s   
zCuratedTransformer.pipedocsc                    sX   t   tdd |D s  jdt fdd|D ddS   j  j|S )zApply the pipeline's model to a batch of docs, without modifying them.

        docs (Iterable[Doc]):
            The documents to predict.

        RETURNS (TransformerModelOutput):
            The extracted features of each document.
        c                 s   s    | ]}t |V  qd S N)len)r6   docr-   r-   r.   	<genexpr>   s    z-CuratedTransformer.predict.<locals>.<genexpr>nOc                    s0   g | ]}t  jjd  jjd dgqS )r   )datalengths)r   r(   opsalloc2falloc1i)r6   _r4   widthr-   r.   r:      s    z.CuratedTransformer.predict.<locals>.<listcomp>T)outputslast_layer_only)r2   anyr(   get_dimr   r3   r%   rU   )r4   rZ   r-   rf   r.   rU      s   		zCuratedTransformer.predict
trf_outputc                 C   s,   t ||jD ]\}}t||jd|j_qdS )a;  Assign the extracted features to the Doc objects. By default, a
        DocTransformerOutput object is written to the doc._.trf_data attribute.

        docs (Iterable[Doc]):
            The documents to modify.
        trf_output (TransformerModelOutput):
            The outputs of the transformer model.
        )all_outputsri   N)ziprm   r   ri   re   r   )r4   rZ   rl   r]   tokvecsr-   r-   r.   rV      s
   z"CuratedTransformer.set_annotations        )dropsgdlossesexamplesrq   rr   rs   c                C   s   |du ri }t |d dd |D }t| j| || jd | | j | j|||d\}}}t	|}	| j
dd D ]
}
t|
|	|| q<| j
rUt| j
d |	|| |S )a  Prepare for an update to the transformer.

        Like the `Tok2Vec` component, the `Transformer` component is unusual
        in that it does not receive "gold standard" annotations to calculate
        a weight update. The optimal output of the transformer data is unknown;
        it's a hidden layer inside the network that is updated by backpropagating
        from output layers.

        The `Transformer` component therefore does not perform a weight update
        during its own `update` method. Instead, it runs its transformer model
        and communicates the output and the backpropagation callback to any
        downstream components that have been connected to it via the
        transformer listener sublayer. If there are multiple listeners, the last
        layer will actually backprop to the transformer and call the optimizer,
        while the others simply increment the gradients.

        examples (Iterable[Example]):
            A batch of Example objects. Only the `predicted` doc object is used,
            the reference doc is ignored.
        drop (float):
            The dropout rate.
        sgd (thinc.api.Optimizer):
            The optimizer.
        losses (Dict[str, float]):
            Optional record of the loss during training. Updated using the component
            name as the key.

        RETURNS (Dict[str, float]):
            The updated losses dictionary.
        NzTransformer.updatec                 S   s   g | ]}|j qS r-   )	predicted)r6   egr-   r-   r.   r:   -  s    z-CuratedTransformer.update.<locals>.<listcomp>rp   rr   )r   r   r(   rC   r'   r3   r%   _create_backpropsr   calculate_batch_idr<   receive)r4   rt   rq   rr   rs   rZ   rh   
accum_funcbackprop_funcbatch_idr?   r-   r-   r.   update  s$   &

zCuratedTransformer.updatescoresc                 C   s   dS )zA noop function, for compatibility with the Pipe API. See the `update`
        method for an explanation of the loss mechanics of the component.
        Nr-   )r4   rt   r   r-   r-   r.   get_lossA  s   zCuratedTransformer.get_loss)r&   encoder_loaderpiecer_loaderget_examplesr&   r   r   c                C   sz   t |d |r|| jd_|r|| jd_g }t| dD ]}||j q|s4J tjj	| j
d| jj|d dS )a  Initialize the pipe for training, using data examples if available.

        get_examples (Callable[[], Iterable[Example]]):
            Optional function that returns gold-standard Example objects.
        nlp (Language):
            The current nlp object.
        encoder_loader (Optional[Callable]):
            Initialization callback for the transformer model.
        piecer_loader (Optional[Callable]):
            Initialization callback for the input piece encoder.
        zTransformer.initializer!   piece_encoder
   )r'   )XN)r   r(   get_refinitr   rD   xSpacyErrorsE923formatr'   
initialize)r4   r   r&   r   r   
doc_sampleexampler-   r-   r.   r   G  s   
zCuratedTransformer.initializelabelc                 C   s   t r[   )NotImplementedError)r4   r   r-   r-   r.   	add_labelg  s   zCuratedTransformer.add_labelc                 C   s   | j s| j| dS dS )aQ  Update parameters using the current parameter gradients.
        The Optimizer instance contains the functionality to perform
        the stochastic gradient descent.

        This method is a noop when the pipe is frozen.

        sgd (thinc.api.Optimizer): The optimizer.

        DOCS: https://spacy.io/api/pipe#finish_update
        N)r$   r(   finish_update)r4   rr   r-   r-   r.   r   j  s   z CuratedTransformer.finish_updaterw   c                   s  j jjdj jrj |}d nj |\}fdd|jD dt	t	t
  dttdf dd ffdd	 fd
ddt	t	t
  dttdf dtf fdd}dt	t	t
  dttdf dtf fdd}| jr|fS |fS )N   c                    s   g | ]} fd d|D qS )c                    s"   g | ]}t  |jj|jqS r-   )r   alloc_fdataXdshapera   )r6   t2vrb   r-   r.   r:     s   " zCCuratedTransformer._create_backprops.<locals>.<listcomp>.<listcomp>r-   )r6   
doc_layersr   r-   r.   r:     s    z8CuratedTransformer._create_backprops.<locals>.<listcomp>one_d_outputsoutputs_to_backprop.r)   c                    s@   t t| D ]}|D ]} | |  j| | | j7  _q
qdS )a.  Accumulate transformer loss and gradient. This is passed as a callback
            to all but the last listener. Only the last one does the backprop.

            `outputs_to_backprop` is a tuple of indices indicating to which layers/outputs
            the gradients are to be propagated.
            N)ranger\   r`   )r   r   ij)	d_outputsr-   r.   accumulate_gradient  s
   
"zACuratedTransformer._create_backprops.<locals>.accumulate_gradientc                     sT   t tD ]} t t|  D ]} |  | jd  7  qqt j< dS )z6Reduce the gradient buffer and update the losses dict.r   N)r   r\   r`   sumfloatr'   )r   r   )cum_lossr   rs   r4   r-   r.   update_loss  s
   z9CuratedTransformer._create_backprops.<locals>.update_lossc                    s<   dusJ  | |d   }dur  |S )z>Callback to actually do the backprop. Passed to last listener.Nr   r   )r   r   d_docs)r   
bp_outputsr   r4   rr   r   r-   r.   backprop  s   
z6CuratedTransformer._create_backprops.<locals>.backpropc                    s(    | |d   d ur  g S )Nr   r   )r   r   )r   r4   rr   r   r-   r.   backprop_noop  s
   
z;CuratedTransformer._create_backprops.<locals>.backprop_noop)r(   rb   xpfullr'   r$   rU   begin_updaterm   r   r   r   intr   )r4   rZ   rs   rr   rh   r   r   r-   )	r   r   r   r   rs   rb   r4   rr   r   r.   ry   x  s@   






	z$CuratedTransformer._create_backprops	new_valuec                 C   s   || j djd< d S )Nr!   _all_layer_outputs)r(   r   attrs)r4   r   r-   r-   r.   r3     s   z/CuratedTransformer._set_model_all_layer_outputs)*__name__
__module____qualname____doc__r   r   strboolr5   propertyr   r   r<   r;   rF   rH   r   rQ   r   r   r   r   rY   r   rU   r
   rV   r   r   r	   r   r   r   r   r   r   r   r   r   r   ry   r3   r-   r-   r-   r.   r*   W   s    
"

$


=

 

ONc                   C   s    t tst jtd d d S d S )N)default)r   has_extensionDOC_EXT_ATTRset_extensionr-   r-   r-   r.   r2     s   
r2   )r)   N)4	itertoolsr   typingr   r   r   r   r   r   r	   r
   r   spacyr   r   r   r   spacy.pipeliner   spacy.tokensr   spacy.trainingr   r   r   
spacy.utilr   	thinc.apir   r   r   thinc.modelr   thinc.typesr   models.listenersr   models.outputr   r   models.typesr   DEFAULT_CONFIG_STRfrom_strDEFAULT_CONFIGr   factoryr   r   r/   r*   r2   r-   r-   r-   r.   <module>   sR    ,
#  v