o
    Ti                     @   s   d dl Z d dlmZ d dlm  mZ d dlmZ d dl	m
Z d dlmZ daG dd de jjZG dd de jjZG d	d
 d
e jjZdS )    N)	Parameter)get_accelerator)Callablec                   @   s$   e Zd Zedd Zedd ZdS )#DominoAsyncColumnParallelLinearImplc                 C   s>   |  ||| || _|| _t|| }|d ur|| }|S N)save_for_backward
handle_dich_idtorchmatmult)ctxinpweightbiasr   r	   output r   Y/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/domino/async_linear.pyforward   s   z+DominoAsyncColumnParallelLinearImpl.forwardc           	      C   s   | j \}}}d  } }}t||}tj|tdd}|| j| j< ||j	d |j	d  |j	d }||j	d |j	d  |j	d }t|
 |}|d urT|jdd}|||d d fS )NT)groupasync_opr         )dim)saved_tensorsr
   r   dist
all_reduceTP_groupr   r	   viewshaper   sum)	r   grad_outputr   r   r   
grad_inputgrad_weight	grad_biashandler   r   r   backward   s   ""z,DominoAsyncColumnParallelLinearImpl.backwardN)__name__
__module____qualname__staticmethodr   r&   r   r   r   r   r      s
    
	r   c                       s8   e Zd Z		d	def fddZdejfddZ  ZS )
DominoAsyncColumnParallelLinearTFinit_methodc                    s   t t|   || _td kr|attj||t 	 |j
d| _|jr(|| j |rZttj|t 	 |j
d| _|jrXt  | j  W d    d S 1 sQw   Y  d S d S | dd  d S N)devicedtyper   )superr+   __init__skip_bias_addr   r   r
   emptyr   current_device_nameparams_dtyper   perform_initializationr   no_gradzero_register_parameter)self
input_sizeoutput_size	_tp_groupconfigr,   r   r2   	__class__r   r   r1   1   s0   

"z(DominoAsyncColumnParallelLinear.__init__input_c                 C   s<   | j s| jnd }t|| j|||}| j r| jnd }||fS r   )r2   r   r   applyr   )r:   rA   r   r	   r   r   output_biasr   r   r   r   U   s   z'DominoAsyncColumnParallelLinear.forward)TF)	r'   r(   r)   r   r1   r
   Tensorr   __classcell__r   r   r?   r   r+   /   s    $r+   c                       sF   e Zd Z			ddededededed	ef fd
dZdd Z  ZS )RowParallelLinearNoCommTr   Fr;   r<   r,   r   strider2   c                    s   t t|   || _ttj||t  |j	d| _
|jr"|| j
 |rTttj|t  |j	d| _|jrRt  | j  W d    d S 1 sKw   Y  d S d S | dd  d S r-   )r0   rF   r1   r2   r   r
   r3   r   r4   r5   r   r6   r   r7   r8   r9   )r:   r;   r<   r>   r,   r   rG   r2   r?   r   r   r1   a   s4   


"z RowParallelLinearNoComm.__init__c                 C   s8   | j s| jnd }t|| j|}| j r| jnd }||fS r   )r2   r   Flinearr   )r:   rA   r   r   rC   r   r   r   r      s   zRowParallelLinearNoComm.forward)Tr   F)	r'   r(   r)   intr   boolr1   r   rE   r   r   r?   r   rF   _   s$    %rF   )r
   torch.nn.parameterr   torch.nn.functionalnn
functionalrH   deepspeed.acceleratorr   deepspeed.commcommr   typingr   r   autogradFunctionr   Moduler+   rF   r   r   r   r   <module>   s   0