o
    i?'                     @   s   d dl Z d dlZd dlmZ d dlm  mZ G dd dejjZG dd dejjZ	G dd dejZ
G dd	 d	ejZG d
d dejZG dd dejjZejddfdededededejdedefddZG dd dejZdS )    Nc                       2   e Zd Z	d	 fdd	Zd
ddZdd Z  ZS )ToySingleLinearModelFc                    s2   t    || _|| _tjj|||||d| _d S N)biasdtypedevice)super__init__r   r   torchnnLinearlinear1)self	input_dim
output_dimr   r   has_bias	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/torchao/testing/model_architectures.pyr	      s   

zToySingleLinearModel.__init__   c                 C      t j|| jj| j| jdfS N)r   r   r
   randnr   in_featuresr   r   r   
batch_sizer   r   r   example_inputs      z#ToySingleLinearModel.example_inputsc                 C   s   |  |}|S N)r   r   xr   r   r   forward(   s   
zToySingleLinearModel.forwardFr   __name__
__module____qualname__r	   r   r#   __classcell__r   r   r   r   r      s
    

r   c                       r   )ToyTwoLinearModelFc                    sJ   t    || _|| _tjj|||||d| _tjj|||||d| _d S r   )	r   r	   r   r   r
   r   r   r   linear2)r   r   
hidden_dimr   r   r   r   r   r   r   r	   /   s   
	

zToyTwoLinearModel.__init__r   c                 C   r   r   r   r   r   r   r   r   C   r   z ToyTwoLinearModel.example_inputsc                 C   s   |  |}| |}|S r    )r   r,   r!   r   r   r   r#   M   s   

zToyTwoLinearModel.forwardr$   r%   r&   r   r   r   r   r+   .   s
    

r+   c                       s:   e Zd Z				d
	d fddZdejfdd	Z  ZS )#ConvWithSharedWeightInExportedModel   r   TreturnNc                    sF   t    || _tj||||||d| _t|| _tjdd| _	d S )Nr   T)inplace)
r   r	   n_chunksr   Conv2dconvBatchNorm2dbnReLUrelu)r   r3   in_channelsout_channelskernel_sizestridepaddingr   r   r   r   r	   T   s   

z,ConvWithSharedWeightInExportedModel.__init__c                 C   sV   t j|| jdd}g }|D ]}| |}| |}| |}|| qt j|ddS )Nr   dim)r
   chunkr3   r5   r7   r9   appendcat)r   r"   chunksoutputsrA   outr   r   r   r#   f   s   


z+ConvWithSharedWeightInExportedModel.forward)r/   r   r   T)r0   N)r'   r(   r)   r	   r
   Tensorr#   r*   r   r   r   r   r.   S   s    	r.   c                       s,   e Zd Zejdf fdd	Zdd Z  ZS )LNLinearActivationModelsigmoidc                    s   t    | }t t t t t t	 t
 d}||vr,td| tj|dd| _tj||ddj|d| _|| | _d S )N)r9   rI   	leakyrelurelu6gelusilu	hardswishzUnsupported activation: F)elementwise_affiner1   )r   )r   r	   lowerr   r8   Sigmoid	LeakyReLUReLU6GELUSiLU	Hardswish
ValueError	LayerNormlnr   tofc
activation)r   fc_dim1fc_dim2r   r\   activation_mapr   r   r   r	   r   s   

z LNLinearActivationModel.__init__c                 C   s   |  |}| |}| |S r    )rY   r[   r\   r!   r   r   r   r#      s   


zLNLinearActivationModel.forwardr'   r(   r)   r
   bfloat16r	   r#   r*   r   r   r   r   rH   q   s    rH   c                       sF   e Zd Zddedef fddZdd Zdejd	ejfd
dZ	  Z
S )RMSNormh㈵>r@   epsc                    s&   t    || _tt|| _d S r    )r   r	   rd   r   	Parameterr
   onesweight)r   r@   rd   r   r   r   r	      s   
zRMSNorm.__init__c                 C   s$   |t t j|| ddd| j  S )NT)r@   keepdim)r
   rsqrtmeanrd   r!   r   r   r   _norm   s   $zRMSNorm._normr"   r0   c                 C   s   |  | |}|| j S r    )rl   floattype_asrg   )r   r"   outputr   r   r   r#      s   
zRMSNorm.forward)rc   )r'   r(   r)   intrm   r	   rl   r
   rG   r#   r*   r   r   r   r   rb      s    rb   c                       s.   e Zd Zddejf fdd	Zdd Z  ZS )TransformerBlock      c                    s   t    || _|| _|| | _tjj|d| dd|| _	tjj||dd|| _
|| _t|| | _tjj|| jdd|| _tjj| j|dd|| _t||| _t||| _tj | _d S )Nr/   Fr1   )r   r	   r-   	num_headshead_dimr
   r   r   rZ   qkvproj	mlp_ratiorp   mlp_hidden_dimmlp_fc1mlp_fc2rb   norm1norm2rT   r\   )r   r-   rt   rx   r   r   r   r   r	      s"   

zTransformerBlock.__init__c                 C   sB  |j \}}}|}| |}| |}|||d| j| j}|ddddd}|\}}}	||| j || j}||| j || j}|	|| j || j}	||dd d| jd	   }
tj	|
dd
}
|
|	 }||| j|| j}|dd||| j
}| |}|| }|}| |}| |}| |}| |}|| }|S )Nr/      r   r   rs   rh   g      ?g      ?r?   )shaper|   rv   reshapert   ru   permute	transposer
   softmaxr-   rw   r}   rz   r\   r{   )r   r"   r   seq_len_residualrv   qkvattnr   r   r   r#      s4   








zTransformerBlock.forwardr`   r   r   r   r   rq      s    rq   cudar9   
model_typemr   nhigh_precision_dtyper   r\   c           
      C   s   | dkrt ||||d}|j|dd }||fS d| v rHtd| }	|	r.|	dr.|	dnd}t||||d	|}tj||||d}||fS | d
kret	|dd|d|}tj|d|||d}||fS t
d|  )ab  Create a model and input data for benchmarking.

    Args:
        model_type (str): type of the model to be created
        batch_size (int): batch size of the input data
        device (str): device to run the model on
        high_precision_dtype (torch.dtype): data type of the model
        m, k, n (int): dimensions of the model and input data
    linear)r   r   )r   r   	ln_linearzln_linear_?(\w+)?r   r9   )r\   transformer_blockrr   rs   )rt   rx   r      zUnknown model type: )r   r   researchgrouprH   rZ   r
   r   rq   rW   )
r   r   r   r   r   r   r\   model
input_datamatchr   r   r   create_model_and_input_data   s.   
r   c                       sP   e Zd Zdedededejdejddf fdd	Zd
ejdejfddZ	  Z
S )LlamaModelsLlama4Expertsnum_local_expertsr@   r-   r   r   r0   Nc              	      sn   t    || _|| _ttj|||||d| _ttj|||||d| _	ttj|||||d| _
d S r   )r   r	   r   r@   r   re   r
   r   w1w2w3)r   r   r@   r-   r   r   r   r   r   r	     s<   



z!LlamaModelsLlama4Experts.__init__routed_in_egDc                 C   sZ   | j }| j}||d|}tt|| jt|| j }t|| j	}|d|}|S )Nrh   )
r   r@   viewFrM   r
   bmmr   r   r   )r   r   eDx_egDmiddle_out_egFout_egDr   r   r   r#   <  s   "z LlamaModelsLlama4Experts.forward)r'   r(   r)   rp   r
   r   r   r	   rG   r#   r*   r   r   r   r   r     s&    +r   )r   r
   torch.nnr   torch.nn.functional
functionalr   Moduler   r+   r.   rH   rb   rq   ra   strrp   r   r   r   r   r   r   r   <module>   s:    %P
*