o
    i$:                     @  sV  d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlm	  mZ d dlmZmZ d dlmZmZ d d	lmZmZmZmZ d d
lmZmZ 	 dd Zdd Zdd Zdd Zdd Z 			d%ddZ!				d&ddZ"G dd deZ#G dd deZ$G dd  d eZ%e&e!e%_!e&e"e%_"G d!d" d"eZ'G d#d$ d$eZ(dS )'    )annotations)Callable)partial)	randrangeN)nncat)Module
Sequential)tree_flattentree_unflatten)	rearrangerepeatreduceeinsum)	RearrangeReducec                 C  s   | d uS N )vr   r   W/home/ubuntu/.local/lib/python3.10/site-packages/hyper_connections/hyper_connections.pyexists      r   c                 C  s   | | dkS )Nr   r   )numdenr   r   r   divisible_by   s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default"   s   r   c                 C  s   | S r   r   )tr   r   r   identity%   s   r   c                 C  s   | | S r   r   )xyr   r   r   add(   r   r!   Fc                 C  s\   |r
t  t  fS |rt|sJ dt| |dd}ntdd| d}tdd| d}||fS )	Nz`dim` must be passed into get_init_and_expand_reduce_stream_functions for returning an expansion function with stream embeddings addedT)expand_to_streamsb ... -> (b s) ...r   )pattern	reductionsz(b s) ... -> b ...sum)r   Identityr   StreamEmbedr   )num_streamsadd_stream_embeddimdisable	expand_fn	reduce_fnr   r   r   "get_expand_reduce_stream_functions-   s   r0      c                 C  s`   t || dko	|dk}|stnt}t|| |d}t| |||d}t|r*t||d}|g|R S )Nr1   )	num_fracs)r+   r,   r-   r,   )r   HyperConnectionsResidualr   r0   r   )r*   r2   r,   r+   r-   hyper_conn_klassinit_hyper_conn_fnexpand_reduce_fnsr   r   r   +get_init_and_expand_reduce_stream_functionsA   s   r9   c                      s$   e Zd Z fddZdd Z  ZS )RMSNormc                   s*   t    |d | _tt|| _d S )Ng      ?)super__init__scaler   	Parametertorchzerosgamma)selfr,   	__class__r   r   r<   W   s   

zRMSNorm.__init__c                 C  s   t j|dd| j | jd  S )Nr3   r1   )F	normalizer=   rA   )rB   r   r   r   r   forward\   s   zRMSNorm.forward__name__
__module____qualname__r<   rH   __classcell__r   r   rC   r   r:   V   s    r:   c                      sH   e Zd Zdddd fddZdd	 Zd
d ZdddZdd Z  ZS )r5   N)branchresidual_transformrN   Module | NonerO   c                  s$   t    || _t|t | _d S r   )r;   r<   rN   r   r   r(   rO   )rB   rN   rO   argskwargsrC   r   r   r<   d   s   
zResidual.__init__c                 C  s   ||t  fS r   )dictrB   	residualsr   r   r   width_connectiono   s   zResidual.width_connectionc                 C  s   ||  | S r   )rO   )rB   branch_outputrU   r   r   r   depth_connectionu   s   zResidual.depth_connectionr   c                   $   t jr	J d fdd}|S )N"branch was already wrapped on initc                   0    | \}} |g|R i |}||} | S r   rH   residualrQ   rR   branch_inputadd_residualrW   rN   rB   r   r   forward_and_add_residual      z:Residual.decorate_branch.<locals>.forward_and_add_residualr   rN   rB   rN   rb   r   ra   r   decorate_branch}      	zResidual.decorate_branchc                   R    \}  fdd}tjs||fS j|g|R i |}||S )Nc                   s6   t | \^} }}j| fi  } t| g|R |S r   )r
   rX   r   
branch_outrest	tree_specresidual_kwargsrU   rB   r   r   add_residual_fn   s   z)Residual.forward.<locals>.add_residual_fnrV   r   rN   rB   rU   branch_argsbranch_kwargsr_   ro   rW   r   rm   r   rH      s   
zResidual.forward)rN   rP   rO   rP   rN   r   )	rJ   rK   rL   r<   rV   rX   rf   rH   rM   r   r   rC   r   r5   c   s    
r5   c                
      sV   e Zd Zdddddddedd	d fd	d
Zdd Zdd ZdddZdd Z  Z	S )r4   NTFg        r1   )	rN   layer_indextanhchannel_firstdropoutadd_branch_out_to_residualnum_input_viewsdepth_residual_fnr2   rN   rP   c       
           s  t    || _|rt nt | _|dksJ || _|dk| _t	d|d| _
t	d| _t||s=J d| d| d|| }t|| _|dksNJ d	|| _t|t|| }|| }|	| }|	dkshJ |	| _t||f}d
||ddf< tt|t|fdd| _tt||| | _ttdd | _|| _|rtt|| _|dkr|fn||f}tt|| _ttdd | _t || _!|| _"|
| _#dS )zN
        Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
        r1   zb ... (f d) -> b ... f d)fzb ... f d -> b ... (f d)zfeature dimension (z() must be divisible by the `num_fracs` ()r   z-`num_residual_streams` must be greater than 0g      ?Nr3   r   g{Gz?)$r;   r<   rN   r   Tanhr(   actr2   	has_fracsr   split_fracsmerge_fracsr   r:   normnum_residual_streamsr   r   rz   r?   r@   r>   r   eyestatic_alphadynamic_alpha_fnonesdynamic_alpha_scalery   static_betadynamic_beta_fndynamic_beta_scaleDropoutrx   rw   r{   )rB   r   r,   rN   ru   rv   rw   rx   ry   rz   r{   r2   init_residual_indexnum_residual_streams_fracsnum_input_views_fracsinit_alpha0dynamic_beta_shaperC   r   r   r<      s@   


 

zHyperConnections.__init__c                 C  s  | j }| jrt|d}| |}t|d|d}| |}| || j }|| j }t| jd|d}|| }| |}d }| j	r`| || j
 }	| jsOt|	d}	|	| j }
t| jd|d}|
| }t||d}| jdkr|d	d
d d f |d	dd d d f }}n|d	d | jd d f |d	| jd d d f }}t|d}| jrt|d}| |}| jrt|d}nt|d}||t|dfS )Nb d ... -> b ... d(b s) ... d -> b ... s dr&   z(f s) d -> f s dz... -> ... 1z... (s f) -> ... s fz'... f1 s f2 t, ... f1 s d -> ... f2 t dr1   .r   zb ... v d -> v b ... db ... d -> b d ...zb ... f s d -> (b s) (f d) ...zb ... f s d -> (b s) ... (f d))beta)r   rw   r   r   r   r   r   r   r   ry   r   r   r   r   r   rz   r   rS   )rB   rU   streamsnormed	wc_weightdynamic_alphar   alphar   	dc_weightdynamic_betar   mix_hr_   r   r   r   rV     s>   







,2



z!HyperConnections.width_connectionc                C  sj   | j sJ | |}| jrt|d}t||d}t|d}| |}| jr*t|d}| ||}| |S )Nr   z)b ... f1 d, b ... f1 s f2 -> b ... f2 s db ... s d -> (b s) ... dr   )ry   r   rw   r   r   r   r{   rx   )rB   rW   rU   r   outputr   r   r   rX   S  s   






z!HyperConnections.depth_connectionr   c                   rY   )NrZ   c                   r[   r   r\   r]   ra   r   r   rb   |  rc   zBHyperConnections.decorate_branch.<locals>.forward_and_add_residualrd   re   r   ra   r   rf   v  rg   z HyperConnections.decorate_branchc                   rh   )Nc                   s@   j s| S t| \^} }}j| fi  } t| g|R |S r   )ry   r
   rX   r   ri   rm   r   r   ro     s
   z1HyperConnections.forward.<locals>.add_residual_fnrp   rq   r   rm   r   rH     s   
zHyperConnections.forward)rN   rP   rt   )
rJ   rK   rL   r!   r<   rV   rX   rf   rH   rM   r   r   rC   r   r4      s    _L
#r4   c                      s*   e Zd Z		d fdd	Zdd Z  ZS )r)   Fc                   s4   t    || _|| _|| _tt||| _	d S r   )
r;   r<   rw   r*   r"   r   r>   r?   r@   stream_embed)rB   r*   r,   rw   r"   rC   r   r   r<     s
   
zStreamEmbed.__init__c                 C  sv   | j rt|d| jd}| jrt|d| jd}nt|d| jd}|| j }| jr1t|d| jd}|S t|d| jd}|S )Nr#   r   (b s) d ... -> b ... s dr   zb ... s d -> (b s) d ...r   )r"   r   r*   rw   r   r   rT   r   r   r   rH     s   
zStreamEmbed.forward)FFrI   r   r   rC   r   r)     s
    r)   c                      s(   e Zd Z	d fdd	Zdd Z  ZS )AttentionPoolReduceStreamFc                   sB   t    || _|| _tj||dd| _| jjj	t
| d S )NF)bias)r;   r<   r*   rw   r   Linearto_attn_logitsweightdatacopy_r?   r   )rB   r*   r,   rw   rC   r   r   r<     s
   
z"AttentionPoolReduceStream.__init__c                 C  sb   | j rt|d| jd}nt|d| jd}| |}|jdd}t|| dd}| j r/t|d}|S )	Nr   r   r   r3   zb ... s d -> b ... dr'   r   )rw   r   r*   r   softmaxr   )rB   rU   attn_logitsattnr   r   r   rH     s   

z!AttentionPoolReduceStream.forward)FrI   r   r   rC   r   r     s    r   )FNF)r1   NFN))
__future__r   typingr   	functoolsr   randomr   r?   r   r   torch.nn.functional
functionalrF   torch.nnr   r	   torch.utils._pytreer
   r   einopsr   r   r   r   einops.layers.torchr   r   r   r   r   r   r!   r0   r9   r:   r5   r4   staticmethodr)   r   r   r   r   r   <module>   sD    

D 
|
$