o
    eixS                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
mZ eeZd	Z				d,d
ee
 dededB fddZ				d,d
ee
 dededB fddZG dd deZG dd de	ZG dd de	ZG dd deZG dd de	ZG dd de	ZG dd de	ZG dd  d e	ZG d!d" d"e	ZG d#d$ d$e	ZG d%d& d&e	Zd'dd'd'dd'd'd'd'd(	Zeeeeeeeeeed)
Z d*d*d*d*d*d+d*d*d*d*d)
Z!dS )-zGLUE processors and helpers    N)Enum   )PreTrainedTokenizer)logging   )DataProcessorInputExampleInputFeaturesa  This {0} will be removed from the library soon, preprocessing should be handled with the Hugging Face Datasets library. You can have a look at this example script for pointers: https://github.com/huggingface/transformers/blob/main/examples/pytorch/text-classification/run_glue.pyexamples	tokenizer
max_lengthc                 C   s&   t tdt t| |||||dS )av  
    Loads a data file into a list of `InputFeatures`

    Args:
        examples: List of `InputExamples` containing the examples.
        tokenizer: Instance of a tokenizer that will tokenize the examples
        max_length: Maximum example length. Defaults to the tokenizer's max_len
        task: GLUE task
        label_list: List of labels. Can be obtained from the processor using the `processor.get_labels()` method
        output_mode: String indicating the output mode. Either `regression` or `classification`

    Returns:
        Will return a list of task-specific `InputFeatures` which can be fed to the model.

    function)r   task
label_listoutput_mode)warningswarnDEPRECATION_WARNINGformatFutureWarning"_glue_convert_examples_to_features)r
   r   r   r   r   r    r   _/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/data/processors/glue.py!glue_convert_examples_to_features#   s   r   c                    sf  |d u r|j }|d ur6t|  }|d u r#| }td| d|  d u r6t| td d|  dd t|D dtdtt	B d B ffdd	fd
d| D }|dd | D |ddd g }t
t| D ] fdd D }	tdi |	d| i}
||
 qmt| d d D ]\}td td|j  td|   q|S )NzUsing label list z
 for task zUsing output mode c                 S   s   i | ]\}}||qS r   r   ).0ilabelr   r   r   
<dictcomp>T   s    z6_glue_convert_examples_to_features.<locals>.<dictcomp>examplereturnc                    s:   | j d u rd S dkr | j  S dkrt| j S t)Nclassification
regression)r   floatKeyError)r   )	label_mapr   r   r   label_from_exampleV   s   


z>_glue_convert_examples_to_features.<locals>.label_from_examplec                    s   g | ]} |qS r   r   r   r   )r%   r   r   
<listcomp>_   s    z6_glue_convert_examples_to_features.<locals>.<listcomp>c                 S   s   g | ]}|j |jfqS r   )text_atext_br&   r   r   r   r'   b   s    r   T)r   padding
truncationc                    s   i | ]	}| |  qS r   r   )r   k)batch_encodingr   r   r   r   j   s    r      z*** Example ***zguid: z
features: r   )model_max_lengthglue_processors
get_labelsloggerinfoglue_output_modes	enumerater   intr"   rangelenr	   appendguid)r
   r   r   r   r   r   	processorlabelsfeaturesinputsfeaturer   r   )r-   r   r%   r$   r   r   r   @   s:   
 	
r   c                   @   s   e Zd ZdZdZdS )
OutputModer    r!   N)__name__
__module____qualname__r    r!   r   r   r   r   r@   w   s    r@   c                       P   e Zd ZdZ fddZdd Zdd Zdd	 Zd
d Zdd Z	dd Z
  ZS )MrpcProcessorz/Processor for the MRPC data set (GLUE version).c                    (   t  j|i | ttdt d S Nr;   super__init__r   r   r   r   r   selfargskwargs	__class__r   r   rJ         zMrpcProcessor.__init__c                 C   >   t |d  |d  d|d  dt|d  S See base class.idx	sentence1utf-8	sentence2r   r   numpydecodestrrL   tensor_dictr   r   r   get_example_from_tensor_dict      
z*MrpcProcessor.get_example_from_tensor_dictc                 C   s6   t dtj|d  | | tj|ddS )rT   zLOOKING AT 	train.tsvtrain)r2   r3   ospathjoin_create_examples	_read_tsvrL   data_dirr   r   r   get_train_examples   s   z MrpcProcessor.get_train_examplesc                 C      |  | tj|ddS rT   zdev.tsvdevrf   rg   rc   rd   re   rh   r   r   r   get_dev_examples      zMrpcProcessor.get_dev_examplesc                 C   rk   rT   ztest.tsvtestrn   rh   r   r   r   get_test_examples   rp   zMrpcProcessor.get_test_examplesc                 C      ddgS rT   01r   rL   r   r   r   r1         zMrpcProcessor.get_labelsc           
   	   C   sl   g }t |D ]-\}}|dkrq| d| }|d }|d }|dkr$dn|d }	|t||||	d q|S )5Creates examples for the training, dev and test sets.r   -r      rr   Nr:   r(   r)   r   r5   r9   r   
rL   linesset_typer
   r   liner:   r(   r)   r   r   r   r   rf      s   zMrpcProcessor._create_examplesrA   rB   rC   __doc__rJ   r_   rj   ro   rs   r1   rf   __classcell__r   r   rO   r   rE   |   s    	rE   c                       rD   )MnliProcessorz3Processor for the MultiNLI data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ      rQ   zMnliProcessor.__init__c                 C   rR   )rT   rU   premiserW   
hypothesisr   rY   r]   r   r   r   r_      r`   z*MnliProcessor.get_example_from_tensor_dictc                 C   rk   rT   ra   rb   rn   rh   r   r   r   rj      rp   z MnliProcessor.get_train_examplesc                 C   rk   )rT   zdev_matched.tsvdev_matchedrn   rh   r   r   r   ro      rp   zMnliProcessor.get_dev_examplesc                 C   rk   )rT   ztest_matched.tsvtest_matchedrn   rh   r   r   r   rs      rp   zMnliProcessor.get_test_examplesc                 C   s   g dS )rT   )contradiction
entailmentneutralr   rx   r   r   r   r1      ry   zMnliProcessor.get_labelsc           
   	   C   sr   g }t |D ]0\}}|dkrq| d|d  }|d }|d }|dr'dn|d }	|t||||	d q|S )	rz   r   r{      	   rr   Nr}   )r5   
startswithr9   r   r   r   r   r   rf      s   zMnliProcessor._create_examplesr   r   r   rO   r   r          	r   c                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )MnliMismatchedProcessorz>Processor for the MultiNLI Mismatched data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ      rQ   z MnliMismatchedProcessor.__init__c                 C   rk   )rT   zdev_mismatched.tsvdev_mismatchedrn   rh   r   r   r   ro      rp   z(MnliMismatchedProcessor.get_dev_examplesc                 C   rk   )rT   ztest_mismatched.tsvtest_mismatchedrn   rh   r   r   r   rs      rp   z)MnliMismatchedProcessor.get_test_examples)rA   rB   rC   r   rJ   ro   rs   r   r   r   rO   r   r      s
    r   c                       rD   )ColaProcessorz/Processor for the CoLA data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ      rQ   zColaProcessor.__init__c                 C   0   t |d  |d  ddt|d  S rT   rU   sentencerW   Nr   rY   r]   r   r   r   r_         
z*ColaProcessor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj      rp   z ColaProcessor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro      rp   zColaProcessor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs     rp   zColaProcessor.get_test_examplesc                 C   rt   ru   r   rx   r   r   r   r1     ry   zColaProcessor.get_labelsc              	   C   sz   |dk}|r|dd }|rdnd}g }t |D ]"\}}| d| }|| }	|r+dn|d }
|t||	d|
d q|S )rz   rr   r   Nr   r{   r}   r~   )rL   r   r   	test_mode
text_indexr
   r   r   r:   r(   r   r   r   r   rf   	  s   zColaProcessor._create_examplesr   r   r   rO   r   r      r   r   c                       rD   )Sst2Processorz0Processor for the SST-2 data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ     rQ   zSst2Processor.__init__c                 C   r   r   rY   r]   r   r   r   r_     r   z*Sst2Processor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj   (  rp   z Sst2Processor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro   ,  rp   zSst2Processor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs   0  rp   zSst2Processor.get_test_examplesc                 C   rt   ru   r   rx   r   r   r   r1   4  ry   zSst2Processor.get_labelsc           
   	   C   st   g }|dkrdnd}t |D ])\}}|dkrq| d| }|| }|dkr(dn|d }	|t||d|	d q|S )rz   rr   r   r   r{   Nr}   r~   )
rL   r   r   r
   r   r   r   r:   r(   r   r   r   r   rf   8  s   zSst2Processor._create_examplesr   r   r   rO   r   r     r   r   c                       rD   )StsbProcessorz0Processor for the STS-B data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ   I  rQ   zStsbProcessor.__init__c                 C   rR   rS   rY   r]   r   r   r   r_   M  r`   z*StsbProcessor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj   V  rp   z StsbProcessor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro   Z  rp   zStsbProcessor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs   ^  rp   zStsbProcessor.get_test_examplesc                 C   s   dgS )rT   Nr   rx   r   r   r   r1   b  s   zStsbProcessor.get_labelsc           
   	   C   p   g }t |D ]/\}}|dkrq| d|d  }|d }|d }|dkr&dn|d }	|t||||	d q|S )	rz   r   r{      r   rr   Nr   r}   r~   r   r   r   r   rf   f     zStsbProcessor._create_examplesr   r   r   rO   r   r   F  r   r   c                       rD   )QqpProcessorz.Processor for the QQP data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ   w  rQ   zQqpProcessor.__init__c                 C   rR   )rT   rU   	question1rW   	question2r   rY   r]   r   r   r   r_   {  r`   z)QqpProcessor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj     rp   zQqpProcessor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro     rp   zQqpProcessor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs     rp   zQqpProcessor.get_test_examplesc                 C   rt   ru   r   rx   r   r   r   r1     ry   zQqpProcessor.get_labelsc              	   C   s   |dk}|rdnd}|rdnd}g }t |D ]9\}}|dkrq| d|d  }	z|| }
|| }|r5dn|d	 }W n	 tyC   Y qw |t|	|
||d
 q|S )rz   rr   r   r      r|   r   r{   Nr.   r}   )r5   
IndexErrorr9   r   )rL   r   r   r   q1_indexq2_indexr
   r   r   r:   r(   r)   r   r   r   r   rf     s"   zQqpProcessor._create_examplesr   r   r   rO   r   r   t  r   r   c                       rD   )QnliProcessorz/Processor for the QNLI data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ     rQ   zQnliProcessor.__init__c                 C   rR   )rT   rU   questionrW   r   r   rY   r]   r   r   r   r_     r`   z*QnliProcessor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj     rp   z QnliProcessor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro     rp   zQnliProcessor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs     rp   zQnliProcessor.get_test_examplesc                 C   rt   rT   r   not_entailmentr   rx   r   r   r   r1     ry   zQnliProcessor.get_labelsc           
   	   C   r   	rz   r   r{   r   r   rr   Nr   r}   r~   r   r   r   r   rf     r   zQnliProcessor._create_examplesr   r   r   rO   r   r     r   r   c                       rD   )RteProcessorz.Processor for the RTE data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ     rQ   zRteProcessor.__init__c                 C   rR   rS   rY   r]   r   r   r   r_     r`   z)RteProcessor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj     rp   zRteProcessor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro     rp   zRteProcessor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs     rp   zRteProcessor.get_test_examplesc                 C   rt   r   r   rx   r   r   r   r1     ry   zRteProcessor.get_labelsc           
   	   C   r   r   r~   r   r   r   r   rf     r   zRteProcessor._create_examplesr   r   r   rO   r   r     r   r   c                       rD   )WnliProcessorz/Processor for the WNLI data set (GLUE version).c                    rF   rG   rH   rK   rO   r   r   rJ     rQ   zWnliProcessor.__init__c                 C   rR   rS   rY   r]   r   r   r   r_     r`   z*WnliProcessor.get_example_from_tensor_dictc                 C   rk   r   rn   rh   r   r   r   rj     rp   z WnliProcessor.get_train_examplesc                 C   rk   rl   rn   rh   r   r   r   ro     rp   zWnliProcessor.get_dev_examplesc                 C   rk   rq   rn   rh   r   r   r   rs     rp   zWnliProcessor.get_test_examplesc                 C   rt   ru   r   rx   r   r   r   r1      ry   zWnliProcessor.get_labelsc           
   	   C   r   r   r~   r   r   r   r   rf   $  r   zWnliProcessor._create_examplesr   r   r   rO   r   r     r   r   r   )	colamnlimrpcsst-2sts-bqqpqnlirtewnli)
r   r   zmnli-mmr   r   r   r   r   r   r   r    r!   )NNNN)"r   rc   r   enumr   tokenization_pythonr   utilsr   r   r   r	   
get_loggerrA   r2   r   listr6   r   r   r@   rE   r   r   r   r   r   r   r   r   r   glue_tasks_num_labelsr0   r4   r   r   r   r   <module>   s   


 
7/./..4../
