o
     i1&                     @   s  d dl mZ d dlmZmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ d dlmZ eeeeejf f ZG dd	 d	eeZd
efddZdd Zdd ZG dd de	jZG dd de	jZG dd dejZ G dd de Z!G dd de Z"dS )    )Enum)DictUnionN)build_attention)MultiHeadDispatchConfig)xFormerxFormerConfigxFormerEncoderConfig)generate_matching_configc                   @   s   e Zd ZdZdZdS )PoolingmeanclsN)__name__
__module____qualname__MEANCLS r   r   ^/home/ubuntu/.local/lib/python3.10/site-packages/xformers/benchmarks/LRA/code/model_wrapper.pyr      s    r   modec                 C   s$   dd }dd }t j|t j|i|  S )Nc                 S   s   | d d dd d f S )Nr   r   inpr   r   r   pool_cls   s   zpooling.<locals>.pool_clsc                 S   s   | j ddS )N   dim)r   r   r   r   r   	pool_mean"   s   zpooling.<locals>.pool_mean)r   r   r   )r   r   r   r   r   r   pooling   s   r   c                 C   s   |  d}|d tj|tj| jd  }tj|tj|jd}tj|d d d f | d d d df gdd} tj|d d d f |d d d df gdd}| |fS )Nr   r   )dtypedevicer   )sizetorchoneslongr   floatcat)r   mask
vocab_size
batch_sizecls_idcls_maskr   r   r   
append_cls(   s   
..r,   c                 C   s  | d }z
| d d | }W n t y   d }Y nw | d D ]a}|d |d< |d | |d | |d | |d d | ||d d d	< |d |d
  |d d d< |d urg|d d | t|d t|d< t|d j|d _t|t}q| S )Ncommonextra_settings	attentionxformer	dim_modelposition_encoding_configfeedforward_configmulti_head_configname	num_headsdim_head)KeyErrorupdater
   r   r   r/   r	   )configattention_namecommonsextra_attention_settingsbcr   r   r   patch_model_config3   s2   
r?   c                       s,   e Zd Z fddZdejfddZ  ZS )SCHeadc              	      sL   t    tt|d | _tt||t t||d d | _d S )Npooling_moder-   num_classes	super__init__r   r   nn
SequentialLinearReLUmlpblockselfr:   dim_embeddingdim_mlp	__class__r   r   rE   T   s   


zSCHead.__init__r   c                 C   s   |  | |}|S N)rJ   r   )rL   r   	seq_scorer   r   r   forward^   s   zSCHead.forwardr   r   r   rE   r"   TensorrS   __classcell__r   r   rO   r   r@   S   s    
r@   c                       s2   e Zd Z fddZdejdejfddZ  ZS )
SCHeadDualc              	      sP   t    tt|d | _tt|d |t t||d d | _d S )NrA      r-   rB   rC   rK   rO   r   r   rE   d   s   

zSCHeadDual.__init__inp_0inp_1c                 C   s<   |  |}|  |}| tj|||| || gdd}|S )Nr    r   )r   rJ   r"   r&   )rL   rY   rZ   X_0X_1rR   r   r   r   rS   q   s   

$zSCHeadDual.forwardrT   r   r   rO   r   rW   c   s    rW   c                       s   e Zd Z fddZdeeejf dede	fddZ
dd	 Zd
d Zdeeejf dede	fddZddefddZdeeejf dede	fddZdd Zdeeejf dede	fddZdd Z  ZS )
ModelTrunkc                    s   t    |d }|d | _|d d | _t|d | _|d d | _t||| _t	
t|d | _t| jd d | _| jd d	 d
 }| jd d |d  | _d S )Nmodeltrainingmixed_precisionrA   r-   r(   r0   r1   r   r3   hidden_layer_multiplier)rD   rE   config_training
enable_ampr   rA   r(   r?   config_modelr   from_configr   r^   rF   	LayerNormnormrN   )rL   r:   
model_namerd   	ff_configrO   r   r   rE   y   s   

zModelTrunk.__init__batch	batch_idxreturnc                 C   s@   | di |}| j dd | D  | jd|d dd |S )Nc                 S      i | ]
\}}d | |qS )train_r   .0kvr   r   r   
<dictcomp>       z,ModelTrunk.training_step.<locals>.<dictcomp>
train_accuaccuT	sync_distr   )loggerlog_metricsitemslogrL   rj   rk   outputsr   r   r   training_step   s   zModelTrunk.training_stepc                 C   s"   |  |}| jd|d dd d S )Ntrain_accu_meanrv   Trw   )eval_epoch_endr|   )rL   r~   logsr   r   r   training_epoch_end   s   
zModelTrunk.training_epoch_endc                 C   sl   t jj|  | jd dd| jd d}t jjj|| jd | jd | jd  | jd | jd d	}|g|gfS )
Nlearning_rate)g?g+?gư>weight_decay)lrbetasepsr   warmupnum_train_stepslr_decay)	optimizermax_lr	pct_startanneal_strategytotal_steps)r"   optimAdamW
parametersrb   lr_scheduler
OneCycleLR)rL   r   r   r   r   r   configure_optimizers   s"   	zModelTrunk.configure_optimizersc                 C   s   | di |}|S )Nr   r   r}   r   r   r   	eval_step   s   zModelTrunk.eval_steptrainprefixc                    s   i }t dd |D  }| |d< dD ]( t  fdd|D |  |d  | < | j| d  d|  dd	 q|S )
Nc                 S   s   g | ]}|d  qS )countr   rp   xr   r   r   
<listcomp>       z-ModelTrunk.eval_epoch_end.<locals>.<listcomp>r   )rv   lossc                    s   g | ]}|  qS r   r   r   rq   r   r   r      r   __meanTrw   )r"   tensorr%   sumr|   )rL   r~   r   r   countsr   r   r   r      s    
"zModelTrunk.eval_epoch_endc                 C   s@   |  ||}| jdd | D  | jd|d ddd |S )Nc                 S   rm   )val_r   ro   r   r   r   rs      rt   z.ModelTrunk.validation_step.<locals>.<dictcomp>val_accurv   T)rx   prog_bar)r   ry   rz   r{   r|   r}   r   r   r   validation_step   s   zModelTrunk.validation_stepc                 C      | j |dd d S )Nvalr   r   rL   r~   r   r   r   validation_epoch_end      zModelTrunk.validation_epoch_endc                 C   s   |  ||S rQ   )r   )rL   rj   rk   r   r   r   	test_step   s   zModelTrunk.test_stepc                 C   r   )Ntestr   r   r   r   r   r   test_epoch_end   r   zModelTrunk.test_epoch_end)r   )r   r   r   rE   r   strr"   rU   intPLOutputr   r   r   r   r   r   r   r   r   rV   r   r   rO   r   r]   x   s:    
 

r]   c                       s8   e Zd Z fddZdejdejdejfddZ  ZS )
ModelForSCc                    0   t  || t| j| jd d | jd| _d S Nr-   r1   )rM   rN   )rD   rE   r@   rd   rN   seq_classiferrL   r:   rh   rO   r   r   rE         zModelForSC.__init__input_ids_0mask_0labelc           	      C   s   | j tjkrt||| j\}}| | j||d|d }| |}t	j
jdd||}|jdd|kt	j}| | |dd}|S )Nencoder_input_maskr    none	reductionr   r   r   rv   r   )rA   r   r   r,   r(   rg   r^   	unsqueezer   r"   rF   CrossEntropyLossargmaxtofloat32r   r!   )	rL   r   r   r   	token_out
seq_scoresseq_lossseq_accur~   r   r   r   rS      s   
zModelForSC.forwardrT   r   r   rO   r   r      s    
r   c                
       sD   e Zd Z fddZdejdejdejdejdejf
dd	Z  ZS )
ModelForSCDualc                    r   r   )rD   rE   rW   rd   rN   r   r   rO   r   r   rE      r   zModelForSCDual.__init__r   input_ids_1r   mask_1r   c                 C   s   |  |  }}| jtjkr!t||| j\}}t||| j\}}tj||gdd}tj||gdd}| | j	||d|
d }| jtj|ddd }	tjjdd|	|}
|	jdd|ktj}|
 | |dd}|S )	Nr   r   r   r       r   r   r   )r$   rA   r   r   r,   r(   r"   r&   rg   r^   r   r   chunkrF   r   r   r   r   r   r!   )rL   r   r   r   r   r   	input_idsmasks
tokens_outr   r   r   r~   r   r   r   rS      s&   	zModelForSCDual.forwardrT   r   r   rO   r   r      s    
r   )#enumr   typingr   r   pytorch_lightningplr"   torch.nnrF   xformers.componentsr   'xformers.components.multi_head_dispatchr   xformers.factoryr   r   r	   xformers.utilsr
   r   r%   rU   r   r   r   r,   r?   Moduler@   rW   LightningModuler]   r   r   r   r   r   r   <module>   s&   	
 Y#