o
    ‚o™i8  ã                   @   sF  d dl mZ d dlZd dlmZ d dlZd dlZd dlmZm	Z	 d dl
mZ dd„ ZG dd„ dejjƒZG d	d
„ d
ejjƒZG dd„ dejjƒZdd„ Z			ddejdejdejdejdeej dedefdd„Z			ddejdejdejdejdeej dedefdd„Z				d dejdejdeej dee	 fdd„ZdS )!é    )ÚreduceN)ÚOptional)ÚGlobalOutlierPoolerÚMatmulLtStatec                 C   s   t tj| dƒS )Né   )r   ÚoperatorÚmul)Úiterable© r
   ú]/home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/research/autograd/_functions.pyÚprod   s   r   c                   @   ó&   e Zd Zeddd„ƒZedd„ ƒZdS )ÚMatMulFP8MixedNé   c                 C   sH  d| _ t|jƒdkrId| _ || _|| _|j}|jd |d kr4tj|jd d… |dd …  |j|jdS tj|jd d… |d d…  |j|jdS t	j
|||d\}	}
t	j|	|
|d |j¡}t	j| ¡ |d	\}}
t	 ||
¡ |j¡}t ||¡}|| _|| _|| _|| _|j|j| _| _t| jd d
… ƒrŸ||f| _|S d| _|S )NFr   Téÿÿÿÿr   ©ÚdtypeÚdevice©ÚcodeÚ	blocksize©r   ©r   é   ©NN)Úis_emptyr   ÚshapeÚAÚBÚtorchÚemptyr   r   ÚFÚquantize_blockwiseÚdequantize_blockwiseÚtoÚquantizeÚfloatÚ
dequantizeÚmatmulÚfw_codeÚbw_codeÚbszÚbsz2Údtype_AÚdtype_BÚanyÚneeds_input_gradÚtensors©Úctxr   r   Úoutr)   r*   r+   r,   ÚB_shapeÚcAÚstateÚfp8AÚcBÚfp8BÚoutputr
   r
   r   Úforward   s0   **
þzMatMulFP8Mixed.forwardc                 C   s  | j rt | j¡t | j¡d d d d d fS | j\}}}}}}}| j\}}d\}}tj|| j	| j
d\}	}
tj|	|
| j
d |j¡}|rRt || ¡  |j¡¡ |j¡}|rzt|jƒdkrd| dd¡ ¡ }n| dd¡ ¡ }t | |j¡|¡ |j¡}||d d d d d fS )Nr   r   r   é   r   r   r   )r   r   Ú
zeros_liker   r   r0   r1   r!   r"   r*   r,   r#   r$   r   r(   ÚtÚlenr   Ú	transposeÚ
contiguous)r3   Úgrad_outputÚ	req_gradAÚ	req_gradBÚ_r   r   Úgrad_AÚgrad_BÚ	cgrad_outr7   Úfp8outÚAtr
   r
   r   Úbackward?   s   "
 zMatMulFP8Mixed.backward©NNNr   r   ©Ú__name__Ú
__module__Ú__qualname__Ústaticmethodr<   rL   r
   r
   r
   r   r      ó
    )r   c                   @   r   )ÚMatMulFP8GlobalNr   c                 C   sF  d| _ t|jƒdkrId| _ || _|| _|j}|jd |d kr4tj|jd d… |dd …  |j|jdS tj|jd d… |d d…  |j|jdS t	j
| ¡ |d\}	}
t	 |	|
¡ |j¡}t	j
| ¡ |d\}}
t	 ||
¡ |j¡}t ||¡}|| _|| _|| _|| _|j|j| _| _t| jd d… ƒrž||f| _|S d	| _|S )
NFr   Tr   r   r   r   r   r   )r   r   r   r   r   r   r    r   r   r!   r%   r&   r'   r$   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r
   r
   r   r<   i   s0   **
þzMatMulFP8Global.forwardc                 C   s,  | j rt | j¡t | j¡d d d d d fS | j\}}}}}}}| j\}}d\}}tj| 	¡ | j
d\}	}
t |	|
¡ |j¡}|rOt || ¡  |j¡¡ |j¡}|rt|jƒdkra| dd¡ ¡ }n| dd¡ ¡ }tj| 	¡ | jd\}}
t ||
¡ |j¡}t | |j¡|¡ |j¡}||d d d d d fS )Nr   r   r=   r   r   r   )r   r   r>   r   r   r0   r1   r!   r%   r&   r*   r'   r$   r   r(   r?   r@   r   rA   rB   r)   )r3   rC   rD   rE   rF   r   r   rG   rH   rI   r7   rJ   rK   r6   Úfp8Atr
   r
   r   rL   “   s"   "
 zMatMulFP8Global.backwardrM   rN   r
   r
   r
   r   rT   e   rS   rT   c                   @   s0   e Zd Zeddee fdd„ƒZedd„ ƒZdS )ÚSwitchBackBnbNr7   c                 C   sd  |pt ƒ }d| _t|jƒdkrQd| _|| _|| _|| _|jd |jd kr;tj|jd d… |jdd …  |j	|j
dS tj|jd d… |jd d…  |j	|j
dS |j}|jd u r^t ¡ |_|j	tjkrnt d|j	› d¡ t|jƒd	kr€| d|jd ¡ ¡ }tj| tj¡|jd
\}}}	}
}|jdkrÍ|d urÍ|jrÀ|}d|d d …|f< |d d …|f }|d d …|f  ¡  ¡ |_||_n|jd u rÌ|jjdf|_n|jsÜ|jd u rÜ|jjdf|_d }|jr/t|dd ƒd urìdnd}| ¡  oü|jd |  d¡k}|r| ¡ }|j!r|r|jd u r.| "¡  t | tj¡¡\|_|_#|_$|_%}|jjdf|_nd}|d uru|jsu||_|jd d …|j &¡ f  '¡ }||j$ dd¡ d  ¡  ¡  |j	¡|_d|d d …|j &¡ f< |d d …|j &¡ f }|jd }t|ƒd	kr|d |d |d f}n|d |d f}t (||j¡}|d u s¨|j	tjkr·tj)||	|j$|d |j	¡}ntj)||	|j$d d |j	¡}| *|¡ |d urÝ|d urÝ|t +||j¡7 }|| _,|| _-|j	|j	|d u rîd n|j	| _.| _/| _0t1| j2d d… ƒr|||f| _3|
|jf| _4ng d¢| _3d| _4|  5d d ¡ t|ƒd	kr'tj'ndd„ }|| |¡ƒS )NFr   Tr   r   r   z'MatMul8bitLt: inputs will be cast from z to float16 during quantizationr=   )Ú	thresholdç        ÚrowÚgradg     À_@)Úbiasr   ©NNNr   c                 S   s   | S )Nr
   )Úxr
   r
   r   Ú<lambda>,  s    z'SwitchBackBnb.forward.<locals>.<lambda>)6r   r   r   r   r   r   r[   r   r    r   r   Úoutlier_poolr   Úget_instanceÚfloat16ÚwarningsÚwarnr@   ÚviewrB   r!   Úint8_double_quantr$   rW   Úhas_fp16_weightsr?   ÚsubBÚidxÚSBÚCBÚgetattrÚis_contiguousÚstrideÚis_trainingÚreset_gradsÚCBtÚSCBÚSCBtÚlongÚcloneÚint8_linear_matmulÚint8_mm_dequantÚadd_r(   r7   Ú
grad_shaper-   r.   Ú
dtype_biasr/   r0   r1   Útensor_statesÚsave_for_backward)r3   r   r   r4   r[   r7   Úinput_shapeÚCAÚCAtÚSCAÚSCAtÚoutlier_colsrh   ÚsubAÚhas_gradÚis_transposedrF   ÚoutliersÚshapeBÚoutput_shapeÚout32r;   Ú
clone_funcr
   r
   r   r<   º   s˜   
,,

"
€ú€(

*
zSwitchBackBnb.forwardc                 C   sF  | j r| jd u r
d nt | j¡}t | j¡t | j¡d |d fS | j\}}}}}| j\}}}	| j\}
}| j	}d  } }}|rE|j
d| jd}t|jƒdkrW| d|jd ¡ ¡ }t | tj¡¡\}}}}}|rot | ¡ |	¡}|rœ|jd ur˜|jj| jdd |j d¡ d¡¡}t ||¡ | j¡ | j¡}ntd	ƒ‚||d |d fS )
Nr   )r   r=   r   T)Úcopyr   g@ €?z7State must contain either CBt or CB matrix for backward)r   r[   r   r>   r   r   r0   r1   rz   r7   Úsumry   r@   r   ÚreshaperB   r!   re   r$   ra   r(   r?   rj   r-   Úmul_rq   Ú	unsqueezer   rd   rx   Ú	Exception)r3   rC   Ú	bias_gradrD   rE   rF   Úreq_gradBiasr~   r‚   r   r€   rh   r7   rG   rH   Ú	grad_biasÚCgradÚCgradtÚSCgradÚSCgradtr   rj   r
   r
   r   rL   /  s*   

&zSwitchBackBnb.backwardr\   )rO   rP   rQ   rR   r   r   r<   rL   r
   r
   r
   r   rV   ¹   s
    trV   c           	      C   sž   | j d }|j d |kr|j d n|j d }g d¢}d\}}t|ƒD ]\}}|||d  kr2|} nq"t|ƒD ]\}}|||d  krJ|} ||fS q7||fS )Nr   r   r   )i   i   r   i   é   é€   é@   r   )r   r   )r   Ú	enumerate)	Úinput_matrixÚweight_matrixÚinput_featuresÚoutput_featuresÚarrayr+   r,   ÚiÚkr
   r
   r   Úget_block_sizesT  s    
"þür¢   r   r   r   r)   r*   r4   r+   r,   c              	   C   ó4   |dks|dkrt | |ƒ\}}t | ||||||¡S ©Nr   )r¢   rT   Úapply©r   r   r)   r*   r4   r+   r,   r
   r
   r   Úmatmul_fp8_globale  ó   	r§   c              	   C   r£   r¤   )r¢   r   r¥   r¦   r
   r
   r   Úmatmul_fp8_mixeds  r¨   r©   rX   r7   c                 C   s*   |pt ƒ }|dkr||_t | ||||¡S )NrX   )r   rW   rV   r¥   )r   r   r4   r7   rW   r[   r
   r
   r   Úswitchback_bnb  s   
rª   )Nr   r   )NNrX   N)Ú	functoolsr   r   Útypingr   rb   r   Ú bitsandbytes.autograd._functionsr   r   Úbitsandbytes.functionalÚ
functionalr!   r   ÚautogradÚFunctionr   rT   rV   r¢   ÚTensorÚintr§   r©   rª   r
   r
   r
   r   Ú<module>   s|    TT ùÿþýüûú
ùùÿþýüûú
ùúÿþýü