o
    c۷i5                     @   s  d dl mZ d dlZd dlmZ d dlm  mZ d dlmZ d dl	m
Z
mZmZmZ d dl	mZmZ d dl	mZmZ dd Zd	d
 Zdd Zdd Zdd Zdd Zdd ZG dd dZG dd deZG dd deZG dd deZG dd deZG dd  d ZG d!d" d"eZG d#d$ d$eZ G d%d& d&eZ!G d'd( d(eZ"G d)d* d*ej#j$Z%d=d-d.Z&G d/d0 d0ej#j$Z'	,d>d1d2Z(	,d>d3d4Z)G d5d6 d6ej#j$Z*d?d7d8Z+d?d9d:Z,G d;d< d<ej-Z-dS )@    )partialN)Tensor)gemmgemm_add_inplacegemm_act	gemm_dact)
gemm_gatedgemm_dgated)act_to_pytorch_fn_mapgated_to_pytorch_fn_mapc                 C   s,   t j r	|  S | ddkr| S |  S )zEnsure last-dim stride is 1. Under torch.compile use unconditional .contiguous()
    (dynamo can't inspect strides on fake tensors); otherwise check first to avoid copies.
       )torchcompileris_compiling
contiguousstride)t r   B/home/ubuntu/vllm_env/lib/python3.10/site-packages/quack/linear.py_ensure_contiguous   s   
r   c                     s,   t d t  rt fdd| D } | S )Ncudac                 3   s    | ]	}|j  d V  qdS )dtypeN)to).0r   autocast_dtyper   r   	<genexpr>   s    z*linear_fwd_convert_type.<locals>.<genexpr>)r   get_autocast_dtypeis_autocast_enabledtuple)tensorsr   r   r   linear_fwd_convert_type   s   
r#   c                 C   s>   |\}}|s
d\}}|sd }|  ||| jr| d S d  d S )NNN)save_for_backwardfuse_grad_accum)ctxxweight	weight_ogneeds_x_w_gradneeds_input_gradneeds_weight_gradr   r   r   linear_fwd_postprocess   s   "r.   c                 C   s$   | j d r|d usJ |||S d S )Nr   )r,   )r'   doutr)   	matmul_fnr   r   r   linear_bwd_compute_input_grad(   s   

r1   c                 C   s   | j d r<|d usJ |d|jd }| jr!|jd u s!tj r,||j|| j	d}|S ||j||j |j}d |_|S d }|S )Nr   r   )	out_dtype)
r,   reshapeshaper&   gradr   r   r   Tweight_dtype)r'   r/   r(   r*   r0   matmul_inplace_fndweightr   r   r   linear_bwd_compute_weight_grad0   s   
r:   c                 C   s   t | | S )zFRecompute postact from preact using the activation function (no GEMM).)r
   preact
activationr   r   r   _recompute_act_postactA   s   r>   c                 C   s*   t | | ddddf | ddddf S )z:Recompute gated postact from interleaved preact (no GEMM)..N   r   )r   r;   r   r   r   _recompute_gated_postactF   s   *r@   c                   @   s4   e Zd ZeZeeddZeeddZeeddZ	dS )
_LinearOpsTdynamic_schedulerN)
__name__
__module____qualname__r   matmul_fwd_fnr   matmul_bwd_dxmatmul_bwd_dwr   matmul_bwd_dw_inplacer   r   r   r   rA   P   s
    rA   c                   @   s4   e Zd ZeeddZeedddZeedddZdS )_LinearUntunedOpsFtunedTrC   rM   N)rD   rE   rF   r   r   rG   rH   rI   r   r   r   r   rK   W   s    rK   c                   @      e Zd ZeZdS )_LinearActOpsN)rD   rE   rF   r   rG   r   r   r   r   rP   ]       rP   c                   @   s   e Zd ZeeddZdS )_LinearActUntunedOpsFrL   N)rD   rE   rF   r   r   rG   r   r   r   r   rR   a   s    rR   c                   @   rO   )_LinearGatedOpsN)rD   rE   rF   r   rG   r   r   r   r   rS   e   rQ   rS   c                   @   sB   e Zd ZeeddZeedddZeedddZee	dddZ
dS )_LinearGatedUntunedOpsFrL   TrN   N)rD   rE   rF   r   r   rG   r   rH   rI   r   rJ   r   r   r   r   rT   i   s
    rT   c                   @       e Zd ZeeddZeeZdS )_DActLinearOpsTrB   N	rD   rE   rF   r   r   rH   staticmethodr>   recompute_postactr   r   r   r   rV   p       rV   c                   @   "   e Zd ZeedddZeeZdS )_DActLinearUntunedOpsTFrN   NrW   r   r   r   r   r\   u       r\   c                   @   rU   )_DGatedLinearOpsTrB   N	rD   rE   rF   r   r	   rH   rX   r@   rY   r   r   r   r   r^   z   rZ   r^   c                   @   r[   )_DGatedLinearUntunedOpsTFrN   Nr_   r   r   r   r   r`      r]   r`   c                   @   $   e Zd Zedd Zedd ZdS )
LinearFuncc           	   	   C   s   t ||\}}tjjddd\ |j| _|| _|| _|}|jdd }|	d|jd }|j
||j|d}t| |||| jdd d |durJ|jnd| _|duoU| jd | _|j	g ||jd R  W  d   S 1 snw   Y  dS )	z
        x: (..., in_features)
        weight: (out_features, in_features)
        bias: (out_features,) or None
        out: (..., out_features)
        r   FenabledNr   )biasr?   r+   )r#   r   ampautocastr   r7   r&   opsr4   r3   rG   r6   r.   r,   
bias_dtypecompute_dbias)	r'   r(   r)   re   r&   ri   r*   batch_shapeoutr   r   r   forward   s    	$zLinearFunc.forwardc           
      C   s   t jjddd] | j}| j\}}}|jdd }t|d|jd }| jr/|j	d| j
dnd}t| |||j}|durJ|jg ||jd R  nd}t| ||||j|j}	||	|ddfW  d   S 1 shw   Y  dS )+
        dout: (..., out_features)
        r   Frc   Nr   r   r   r   rg   rh   ri   saved_tensorsr4   r   r3   rk   sumrj   r1   rH   r:   rI   rJ   )
r'   r/   ri   r(   r)   r*   rl   dbiasdxr9   r   r   r   backward   s   &$zLinearFunc.backwardNrD   rE   rF   rX   rn   ru   r   r   r   r   rb      s
    
rb   FTc                 C   s   |rt nt}t| ||||S N)rA   rK   rb   apply)r(   r)   re   r&   rM   ri   r   r   r   linear_func   s   ry   c                   @   ra   )LinearActFuncc              	   C   s,  t ||\}}tjjddd} |j| _|| _|| _|}|jdd }	|	d|jd }|j
||j|||d\}
}t| |||| jdd d |
durX|
j	g |	|
jd R  }
|dur_|jnd| _|duoj| jd	 | _| | | d |
|j	g |	|jd R  fW  d   S 1 sw   Y  dS )
z
        x: (..., in_features)
        weight: (out_features, in_features)
        bias: (out_features,) or None
        out: (..., out_features)
        Return both out and post-activation, but only out is differentiable.
        r   Frc   Nr   )re   r=   store_preactr?   rf      )r#   r   rg   rh   r   r7   r&   ri   r4   r3   rG   r6   r.   r,   rj   rk   mark_non_differentiableset_materialize_grads)r'   r(   r)   r=   re   r{   r&   ri   r*   rl   rm   postactr   r   r   rn      s,   	


$zLinearActFunc.forwardc                 G   s   t jjddd_ | j}| j\}}}|jd d }t|d|jd }| jr/|j	d| j
dnd }t| |||j}	|	d urJ|	jg ||	jd R  nd }	t| ||||j|j}
|	|
d |d d d fW  d    S 1 sjw   Y  d S )Nr   Frc   r   r   r   rp   )r'   r/   argsri   r(   r)   r*   rl   rs   rt   r9   r   r   r   ru      s   &$zLinearActFunc.backwardNrv   r   r   r   r   rz      s
    
rz   c              	   C   "   |rt nt}t| ||||||S rw   )rP   rR   rz   rx   r(   r)   r=   re   r{   r&   rM   ri   r   r   r   linear_act_func      r   c              	   C   r   rw   )rS   rT   rz   rx   r   r   r   r   linear_gated_func   r   r   c                   @   ra   )DActLinearFuncc                 C   s   t ||\}}tjjdddU |j| _|| _|| _|}|jdd }|	d|jd }|
||j}	| jd p<| jd }
| jd }t| |||||
fd || _|	j	g ||	jd R  W  d   S 1 sgw   Y  dS )	z
        x: (..., in_features)
        weight: (out_features, in_features)
        out: (..., out_features)
        Takes in an extra preact argument which is the pre-activation, to be used in the backward pass.
        r   Frc   Nr   r   r   rf   )r#   r   rg   rh   r   r7   r&   ri   r4   r3   rG   r6   r,   r.   r=   )r'   r<   r)   r(   r=   r&   ri   r*   rl   rm   need_preactneed_weightr   r   r   rn      s"   
$zDActLinearFunc.forwardc           
      C   s0  t jjddd | j}| j\}}}|jdd }t|d|jd }| jd rE|d|jd }|dus8J |j	|||| j
d\}}n| jd r]|d|jd }||| j
}d}nd	\}}|durr|jg ||jd R  nd}t| ||||j|j}	||	ddddfW  d   S 1 sw   Y  dS )
ro   r   Frc   Nr   r   )r=   r   r$   )r   rg   rh   ri   rq   r4   r   r3   r,   rH   r=   rY   r:   rI   rJ   )
r'   r/   ri   r<   r)   r*   rl   dpreactr(   r9   r   r   r   ru     s*   

$$zDActLinearFunc.backwardNrv   r   r   r   r   r      s
    
r   c                 C       |rt nt}t| |||||S rw   )rV   r\   r   rx   r<   r)   r(   r=   r&   rM   ri   r   r   r   act_linear_func5     r   c                 C   r   rw   )r^   r`   r   rx   r   r   r   r   gated_linear_func:  r   r   c                       sN   e Zd Z				ddededededdf
 fdd	Zd
edefddZ  ZS )LinearFNin_featuresout_featuresre   r&   returnc                    s    t  j|||||d || _d S )N)re   devicer   )super__init__r&   )selfr   r   re   r   r   r&   	__class__r   r   r   @  s   	
zLinear.__init__inputc                 C   sJ   |j r| jd dkr| jd dkrt|| j| j| jdS t|| j| jS )N   r   )r&   )	is_cudar   r   ry   r)   re   r&   Flinear)r   r   r   r   r   rn   L  s   "zLinear.forward)FNNF)	rD   rE   rF   intboolr   r   rn   __classcell__r   r   r   r   r   ?  s"    r   )NFT)NTFT)FT).	functoolsr   r   torch.nnnntorch.nn.functional
functionalr   r   quack.gemm_interfacer   r   r   r   r   r	   r
   r   r   r#   r.   r1   r:   r>   r@   rA   rK   rP   rR   rS   rT   rV   r\   r^   r`   autogradFunctionrb   ry   rz   r   r   r   r   r   r   r   r   r   r   <module>   sH   		

-1


>
