o
    3wi8                     @   s   d dl Zd dlZd dlmZ d dlmZ G dd dZG dd dejj	Z
G dd	 d	ejj	ZddefddZddefddZdS )    N)
DataLoader)DistributedTypec                   @   s&   e Zd ZdddZdd Zd	d
 ZdS )RegressionDataset      @   Nc                 C   sV   t j|}|| _|j|fdt j| _|| j | |jd|fdt j | _d S )N)sizeg?)scaler   )	nprandomdefault_rnglengthnormalastypefloat32xy)selfabr   seedrng r   [/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/accelerate/test_utils/training.py__init__   s   ,zRegressionDataset.__init__c                 C   s   | j S N)r   )r   r   r   r   __len__   s   zRegressionDataset.__len__c                 C   s   | j | | j| dS )Nr   r   r   )r   ir   r   r   __getitem__    s   zRegressionDataset.__getitem__)r   r   r   N)__name__
__module____qualname__r   r   r   r   r   r   r   r      s    
r   c                       (   e Zd Zd fdd	Zd	ddZ  ZS )
RegressionModel4XPUr   Fc                    sL   t    tjtddg | _tjtddg | _d| _	d S )Nr   r   T
superr   torchnn	Parametertensorfloatr   r   first_batchr   r   r   double_output	__class__r   r   r   %   s   

zRegressionModel4XPU.__init__Nc                 C   sH   | j rtd| jj d| jj d|j  d| _ || jd  | jd  S )NModel dtype: , . Input dtype: Fr   r,   printr   dtyper   r   r   r   r   r   forward+   s   $zRegressionModel4XPU.forwardr   r   Fr   r    r!   r"   r   r8   __classcell__r   r   r/   r   r$   $       r$   c                       r#   )
RegressionModelr   Fc                    sD   t    tjt| | _tjt| | _d| _	d S )NTr%   r-   r/   r   r   r   3   s   

zRegressionModel.__init__Nc                 C   s@   | j rtd| jj d| jj d|j  d| _ || j | j S )Nr1   r2   r3   Fr4   r7   r   r   r   r8   9   s   $zRegressionModel.forwardr9   r   r:   r   r   r/   r   r=   2   r<   r=      
batch_sizec                    s   ddl m} ddlm} |dddd}|d|d	}|d
 d}dd t|D fdd}|j|dg dd} fdd}	t|d
 d|	dd}
t|d d|	dd}|
|fS )Nr   load_datasetAutoTokenizerzbert-base-cased!tests/test_samples/MRPC/train.csvtests/test_samples/MRPC/dev.csvtrain
validationcsv
data_filesrG   labelc                 S   s   i | ]\}}||qS r   r   ).0r   vr   r   r   
<dictcomp>I   s    z&mocked_dataloaders.<locals>.<dictcomp>c                    s@   | d | d dd dd}d| v r fdd| d D |d	< |S )
N	sentence1	sentence2T
max_length)
truncationrR   paddingrL   c                    s   g | ]} | qS r   r   )rM   l)label_to_idr   r   
<listcomp>Q   s    zAmocked_dataloaders.<locals>.tokenize_function.<locals>.<listcomp>labelsr   examplesoutputs)rV   	tokenizerr   r   tokenize_functionK   s   z-mocked_dataloaders.<locals>.tokenize_functionTrP   rQ   rL   batchedremove_columnsc                    s.    j tjkrj| ddddS j| dddS )NrR      pt)rT   rR   return_tensorslongest)rT   rd   )distributed_typer   XLApad)rZ   acceleratorr\   r   r   
collate_fn[   s   z&mocked_dataloaders.<locals>.collate_fnr   shufflerk   r?   rH   F   )	datasetsrA   transformersrC   from_pretrainedunique	enumeratemapr   )rj   r?   rA   rC   rK   ro   
label_listr]   tokenized_datasetsrk   train_dataloadereval_dataloaderr   )rj   rV   r\   r   mocked_dataloaders@   s"   


ry   c                    s   ddl m} ddlm} |dj_ddd}|d|d	}fd
d}   |j|dg dd}W d    n1 s?w   Y   fdd}t	|d d|dd}	t	|d d|dd}
|	|
fS )Nr   r@   rB   zHuggingFaceTB/SmolLM-360MrD   rE   rF   rI   rJ   c                    s    | d dd dd}|S )NrP   TF)rS   rR   return_attention_maskr   rY   )r\   r   r   r]   r   s   zGmocked_dataloaders_for_autoregressive_models.<locals>.tokenize_functionTr^   r_   c                    s    j tjkrdntdd | D } jdkrd}n
 jdkr!d}nd }j| d|d	 |d
d}|d d d d	d f |d< |d d d d df |d< t|d jkd|d |d< |S )Nrb   c                 S   s   g | ]}t |d  qS )	input_ids)len)rM   er   r   r   rW      s    zTmocked_dataloaders_for_autoregressive_models.<locals>.collate_fn.<locals>.<listcomp>fp8r>   no   rR   rn   rc   )rT   rR   pad_to_multiple_ofrd   r{   rX   i)	rf   r   rg   maxmixed_precisionrh   r'   wherepad_token_id)rZ   rR   r   batchri   r   r   rk      s(   

 z@mocked_dataloaders_for_autoregressive_models.<locals>.collate_fnrG   Fr   rl   rH   rn   )
ro   rA   rp   rC   rq   	eos_token	pad_tokenmain_process_firstrt   r   )rj   r?   rA   rC   rK   ro   r]   rv   rk   rw   rx   r   ri   r   ,mocked_dataloaders_for_autoregressive_modelsh   s$   


r   )r>   )numpyr
   r'   torch.utils.datar   accelerate.utils.dataclassesr   r   r(   Moduler$   r=   intry   r   r   r   r   r   <module>   s   (