o
    eiX                     @   s   d Z ddlZddlmZmZmZ ddlmZmZm	Z	m
Z
mZmZmZ ddlmZ ddlmZmZ ddlmZ d	d
lmZmZmZmZmZmZmZmZ G dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G dd deZ#G dd deZ$G dd deZ%g dZ&dS )zPyTorch CamemBERT model.    N)BCEWithLogitsLossCrossEntropyLossMSELoss   ),BaseModelOutputWithPoolingAndCrossAttentions!CausalLMOutputWithCrossAttentionsMaskedLMOutputMultipleChoiceModelOutputQuestionAnsweringModelOutputSequenceClassifierOutputTokenClassifierOutput)Unpack)TransformersKwargsauto_docstring)can_return_tuple   )RobertaForCausalLMRobertaForMaskedLMRobertaForMultipleChoiceRobertaForQuestionAnswering RobertaForSequenceClassificationRobertaForTokenClassificationRobertaModelRobertaPreTrainedModelc                   @   s   e Zd ZdZdS )CamembertPreTrainedModelrobertaN)__name__
__module____qualname__base_model_prefix r    r    m/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/camembert/modular_camembert.pyr   ,   s    r   c                   @   s   e Zd ZdS )CamembertModelN)r   r   r   r    r    r    r!   r"   0   s    r"   c                       s   e Zd ZdddZ fddZee								ddejdB dej	dB d	ejdB d
ejdB dej	dB dej	dB dej	dB dejdB de
e deej eB fddZ  ZS )CamembertForMaskedLMz)roberta.embeddings.word_embeddings.weightzlm_head.bias)zlm_head.decoder.weightzlm_head.decoder.biasc                    "   t  | | `t|dd| _d S NF)add_pooling_layersuper__init__	camembertr"   r   selfconfig	__class__r    r!   r)   :      zCamembertForMaskedLM.__init__N	input_idsattention_masktoken_type_idsposition_idsinputs_embedsencoder_hidden_statesencoder_attention_masklabelskwargsreturnc	              
   K   s   | j |f||||||dd|	}
|
d }| |}d}|dur7||j}t }||d| jj|d}t|||
j	|
j
dS )a  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the masked language modeling loss. Indices should be in `[-100, 0, ...,
            config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are ignored (masked), the
            loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`
        T)r2   r3   r4   r5   r6   r7   return_dictr   Nlosslogitshidden_states
attentions)r   lm_headtodevicer   viewr-   
vocab_sizer   r@   rA   )r,   r1   r2   r3   r4   r5   r6   r7   r8   r9   outputssequence_outputprediction_scoresmasked_lm_lossloss_fctr    r    r!   forward@   s4   	
zCamembertForMaskedLM.forward)NNNNNNNN)r   r   r   _tied_weights_keysr)   r   r   torch
LongTensorFloatTensorr   r   tupleTensorr   rL   __classcell__r    r    r.   r!   r#   4   sH    	
r#   c                          e Zd Z fddZee						ddejdB dejdB dejdB dejdB dejdB d	ejdB d
e	e
 deej eB fddZ  ZS )"CamembertForSequenceClassificationc                    r$   r%   r'   r+   r.   r    r!   r)   {   r0   z+CamembertForSequenceClassification.__init__Nr1   r2   r3   r4   r5   r8   r9   r:   c                 K   s6  | j |f||||dd|}|d }	| |	}
d}|dur||
j}| jjdu rN| jdkr4d| j_n| jdkrJ|jtj	ksE|jtj
krJd| j_nd| j_| jjdkrlt }| jdkrf||
 | }n+||
|}n%| jjdkrt }||
d	| j|d	}n| jjdkrt }||
|}t||
|j|jd
S )a  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Tr2   r3   r4   r5   r;   r   N   
regressionsingle_label_classificationmulti_label_classificationr<   r=   )r   
classifierrC   rD   r-   problem_type
num_labelsdtyperN   longintr   squeezer   rE   r   r   r@   rA   r,   r1   r2   r3   r4   r5   r8   r9   rG   rH   r?   r>   rK   r    r    r!   rL      sN   	


"


z*CamembertForSequenceClassification.forwardNNNNNN)r   r   r   r)   r   r   rN   rO   rP   r   r   rQ   rR   r   rL   rS   r    r    r.   r!   rU   z   6    	rU   c                       s   e Zd Z fddZee						ddejdB dejdB dejdB dejdB dejdB d	ejdB d
e	e
 deej eB fddZ  ZS )CamembertForMultipleChoicec                    r$   r%   r'   r+   r.   r    r!   r)      r0   z#CamembertForMultipleChoice.__init__Nr1   r3   r2   r8   r4   r5   r9   r:   c                 K   s<  |dur	|j d n|j d }|dur|d|dnd}	|dur*|d|dnd}
|dur9|d|dnd}|durH|d|dnd}|dur[|d|d|dnd}| j|	f|
|||dd|}|d }| |}| |}|d|}d}|dur||j}t }|||}t	|||j
|jdS )a  
        input_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`):
            Indices of input sequence tokens in the vocabulary.

            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
            [`PreTrainedTokenizer.__call__`] for details.

            [What are input IDs?](../glossary#input-ids)
        token_type_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the multiple choice classification loss. Indices should be in `[0, ...,
            num_choices-1]` where `num_choices` is the size of the second dimension of the input tensors. (See
            `input_ids` above)
        position_ids (`torch.LongTensor` of shape `(batch_size, num_choices, sequence_length)`, *optional*):
            Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0,
            config.max_position_embeddings - 1]`.

            [What are position IDs?](../glossary#position-ids)
        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, num_choices, sequence_length, hidden_size)`, *optional*):
            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This
            is useful if you want more control over how to convert `input_ids` indices into associated vectors than the
            model's internal embedding lookup matrix.
        NrW   r<   T)r4   r3   r2   r5   r;   r=   )shaperE   sizer   dropoutr[   rC   rD   r   r	   r@   rA   )r,   r1   r3   r2   r8   r4   r5   r9   num_choicesflat_input_idsflat_position_idsflat_token_type_idsflat_attention_maskflat_inputs_embedsrG   pooled_outputr?   reshaped_logitsr>   rK   r    r    r!   rL      sF   +	


z"CamembertForMultipleChoice.forwardrc   )r   r   r   r)   r   r   rN   rO   rP   r   r   rQ   rR   r	   rL   rS   r    r    r.   r!   re      rd   re   c                       rT   )CamembertForTokenClassificationc                    r$   r%   r'   r+   r.   r    r!   r)   &  r0   z(CamembertForTokenClassification.__init__Nr1   r2   r3   r4   r5   r8   r9   r:   c                 K   s   | j |f||||dd|}|d }	| |	}	| |	}
d}|dur9||
j}t }||
d| j|d}t||
|j	|j
dS )a-  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the token classification loss. Indices should be in `[0, ..., config.num_labels - 1]`.
        TrV   r   Nr<   r=   )r   ri   r[   rC   rD   r   rE   r]   r   r@   rA   rb   r    r    r!   rL   ,  s2   


z'CamembertForTokenClassification.forwardrc   )r   r   r   r)   r   r   rN   rO   rP   r   r   rQ   rR   r   rL   rS   r    r    r.   r!   rr   %  rd   rr   c                       s   e Zd Z fddZee							ddejdB dejdB dejdB dejdB dejdB d	ejdB d
ejdB de	e
 deej eB fddZ  ZS )CamembertForQuestionAnsweringc                    r$   r%   r'   r+   r.   r    r!   r)   d  r0   z&CamembertForQuestionAnswering.__init__Nr1   r2   r3   r4   r5   start_positionsend_positionsr9   r:   c                 K   s  | j |f||||dd|}	|	d }
| |
}|jddd\}}|d }|d }d}|dury|duryt| dkrF|d}t| dkrS|d}|d}|d|}|d|}t|d}|||}|||}|| d	 }t	||||	j
|	jd
S )a[  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        TrV   r   rW   r<   )dimN)ignore_indexr   )r>   start_logits
end_logitsr@   rA   )r   
qa_outputssplitra   
contiguouslenrh   clampr   r
   r@   rA   )r,   r1   r2   r3   r4   r5   rt   ru   r9   rG   rH   r?   rx   ry   
total_lossignored_indexrK   
start_lossend_lossr    r    r!   rL   j  sH   







z%CamembertForQuestionAnswering.forward)NNNNNNN)r   r   r   r)   r   r   rN   rO   rP   r   r   rQ   rR   r
   rL   rS   r    r    r.   r!   rs   c  s<    	
rs   c                        s   e Zd Z fddZee												ddejdB dejdB dejdB dejdB d	ejdB d
ejdB dejdB dejdB de	e	ej  dB de
dB dejdB deejB dee de	ej eB fddZ  ZS )CamembertForCausalLMc                    r$   r%   r'   r+   r.   r    r!   r)     r0   zCamembertForCausalLM.__init__Nr   r1   r2   r3   r4   r5   r6   r7   r8   past_key_values	use_cachecache_positionlogits_to_keepr9   r:   c                 K   s   |durd}
| j |f|||||||	|
|dd
|}|j}t|tr(t| dn|}| |dd|ddf }d}|durL| jd||| jjd|}t	|||j
|j|j|jdS )aq  
        token_type_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Segment token indices to indicate first and second portions of the inputs. Indices are selected in `[0,1]`:

            - 0 corresponds to a *sentence A* token,
            - 1 corresponds to a *sentence B* token.
            This parameter can only be used when the model is initialized with `type_vocab_size` parameter with value
            >= 2. All the value in this tensor should be always < type_vocab_size.

            [What are token type IDs?](../glossary#token-type-ids)
        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
            Labels for computing the left-to-right language modeling loss (next word prediction). Indices should be in
            `[-100, 0, ..., config.vocab_size]` (see `input_ids` docstring) Tokens with indices set to `-100` are
            ignored (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`

        Example:

        ```python
        >>> from transformers import AutoTokenizer, CamembertForCausalLM, AutoConfig
        >>> import torch

        >>> tokenizer = AutoTokenizer.from_pretrained("almanach/camembert-base")
        >>> config = AutoConfig.from_pretrained("almanach/camembert-base")
        >>> config.is_decoder = True
        >>> model = CamembertForCausalLM.from_pretrained("almanach/camembert-base", config=config)

        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
        >>> outputs = model(**inputs)

        >>> prediction_logits = outputs.logits
        ```NFT)
r2   r3   r4   r5   r6   r7   r   r   r   r;   )r?   r8   rF   )r>   r?   r   r@   rA   cross_attentionsr    )r   last_hidden_state
isinstancer`   slicerB   loss_functionr-   rF   r   r   r@   rA   r   )r,   r1   r2   r3   r4   r5   r6   r7   r8   r   r   r   r   r9   rG   r@   slice_indicesr?   r>   r    r    r!   rL     s@   1zCamembertForCausalLM.forward)NNNNNNNNNNNr   )r   r   r   r)   r   r   rN   rO   rP   rQ   boolrR   r`   r   r   r   rL   rS   r    r    r.   r!   r     sZ    	
r   )r   r#   re   rs   rU   rr   r"   r   )'__doc__rN   torch.nnr   r   r   modeling_outputsr   r   r   r	   r
   r   r   processing_utilsr   utilsr   r   utils.genericr   roberta.modeling_robertar   r   r   r   r   r   r   r   r   r"   r#   rU   re   rr   rs   r   __all__r    r    r    r!   <module>   s"   $	(FO\>J]