o
    ip                     @  s  d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlm	  mZ d dlmZmZ d d	lmZmZmZmZ d d
lmZmZ d dlmZmZ 	 dd Zdd Zdd ZdddZ dddZ!G dd deZG dd deZ"e#e e"_ e#e!e"_!dS )    )annotations)Callable)partial)	randrangeN)nn)Module)tree_flattentree_unflatten)	rearrangerepeatreduceeinsum)Reduce	Rearrange)ResidualRMSNormc                 C  s   | d uS N )vr   r   e/home/ubuntu/.local/lib/python3.10/site-packages/hyper_connections/hyper_connections_channel_first.pyexists   s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default"   s   r   c                 C  s   | S r   r   )tr   r   r   identity%   s   r   Fc                 C  s8   |r
t  t  fS tdd| d}tdd| d}||fS )Nzb ... -> (b s) ...r   )pattern	reductionsz(b s) ... -> b ...sum)r   Identityr   )num_streamsdisable	expand_fn	reduce_fnr   r   r   "get_expand_reduce_stream_functions*   s
   r$   c                 C  s<   t || dk}|stnt}t|| }t| |d}|g|R S )N   )r!   )r   HyperConnectionsr   r   r$   )r    r!   hyper_conn_klassinit_hyper_conn_fnexpand_reduce_fnsr   r   r   +get_init_and_expand_reduce_stream_functions4   s
   
r*   c                      s$   e Zd Z fddZdd Z  ZS )r   c                   s.   t    |d | _tt|dd| _d S )Ng      ?r%   )super__init__scaler   	Parametertorchzerosgamma)selfdim	__class__r   r   r,   B   s   

zRMSNorm.__init__c                 C  s   t j|dd| j | jd  S )Nr%   r3   )F	normalizer-   r1   )r2   xr   r   r   forwardG   s   zRMSNorm.forward)__name__
__module____qualname__r,   r:   __classcell__r   r   r4   r   r   A   s    r   c                      sN   e Zd Zddddddd fddZd	d
 Zdd ZdddZdd Z  ZS )r&   NTg        )branchlayer_indextanhchannel_firstdropoutr?   Module | Nonec          
        s<  t    || _|rt nt | _t|| _|dks J d|| _	t
|t|| }tt|| _t|df}	d|	|df< ttj|	t|gdd| _tj||d ddd| _tj| jj ttj|ddddtd| _tj| jd j ttd	d
 | _ttd	d
 | _t|| _dS )zN
        Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
        r   z-`num_residual_streams` must be greater than 0r%   g      ?r6   F)biaszb 1 ... -> b ...r   g{Gz?N) r+   r,   r?   r   Tanhr   actr   normnum_residual_streamsr   r   r.   r/   onesstatic_betar0   cateyestatic_alphaConv2ddynamic_alpha_fninitzeros_weight
Sequentialr   dynamic_beta_fndynamic_alpha_scaledynamic_beta_scaleDropoutrC   )
r2   rI   r3   r?   r@   rA   rB   rC   init_residual_indexinit_alpha0r4   r   r   r,   M   s*   

 zHyperConnections.__init__c                 C  s   |  |}| | |}|| j }t|d| jd}|t| jd }| | |}|| j }t|d| jd}|t| j	d }t|d| jd}t
||d}	|	d d ddf |	d d dd df }
}t|d	}|
|t|d
fS )Nz(b s) ... -> b s ...)r   zs t -> s t 1 1z
s -> s 1 1z!b s t ..., b s d ... -> b t d ...r   .r%   b s d ... -> (b s) d ...)beta)rH   rG   rP   rV   r
   rI   rN   rU   rW   rK   r   dict)r2   	residualsnormed	wc_weightdynamic_alphaalpha	dc_weightdynamic_betar\   mix_hbranch_inputr   r   r   width_connection   s   


*
z!HyperConnections.width_connectionc                C  s(   t ||d}t|d}|| }| |S )Nzb d ..., b s ... -> b s d ...r[   )r   r
   rC   )r2   branch_outputr^   r\   outputr   r   r   depth_connection   s   

z!HyperConnections.depth_connectionr   c                   s$   t jr	J d fdd}|S )Nz"branch was already wrapped on initc                   s0    | \}} |g|R i |}||} | S r   )r:   )residualargskwargsrf   add_residualrh   r?   r2   r   r   forward_and_add_residual   s   zBHyperConnections.decorate_branch.<locals>.forward_and_add_residual)r   r?   )r2   r?   rp   r   ro   r   decorate_branch   s   	z HyperConnections.decorate_branchc                   sR    \}  fdd}tjs||fS j|g|R i |}||S )Nc                   s6   t | \^} }}j| fi  } t| g|R |S r   )r   rj   r	   )
branch_outrest	tree_specresidual_kwargsr^   r2   r   r   add_residual_fn   s   z1HyperConnections.forward.<locals>.add_residual_fn)rg   r   r?   )r2   r^   branch_argsbranch_kwargsrf   rw   rh   r   ru   r   r:      s   
zHyperConnections.forward)r?   rD   )r?   r   )	r;   r<   r=   r,   rg   rj   rq   r:   r>   r   r   r4   r   r&   L   s    5

r&   )Fr   )$
__future__r   typingr   	functoolsr   randomr   r/   r   torch.nnr   torch.nn.functional
functionalr7   torch.utils._pytreer   r	   einopsr
   r   r   r   einops.layers.torchr   r   #hyper_connections.hyper_connectionsr   r   r   r   r   r$   r*   r&   staticmethodr   r   r   r   <module>   s,    




~