o
    i]G                     @  sx  d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlm	  mZ d dlmZmZ d dlmZmZ d d	lmZmZmZmZ d d
lmZmZ 	 dd Zdd Zdd Zdd Zdd Z dd Z!d,ddZ"d,ddZ#			d-ddZ$					d.d d!Z%G d"d# d#eZ&G d$d% d%eZ'G d&d' d'eZ(e(Z)e*e$e(_$e*e%e(_%G d(d) d)eZ+G d*d+ d+eZ,dS )/    )annotations)Callable)partial)	randrangeN)nncat)Module
Sequential)tree_flattentree_unflatten)	rearrangerepeatreduceeinsum)	RearrangeReducec                 C  s   | d uS N vr   r   l/home/ubuntu/.local/lib/python3.10/site-packages/hyper_connections/manifold_constrained_hyper_connections.pyexists      r   c                 C  s   | | dkS )Nr   r   )numdenr   r   r   divisible_by    s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default#      r   c                 C  s   | S r   r   )tr   r   r   identity&   s   r    c                 C  s   | | S r   r   )xyr   r   r   add)   r   r#   c                 C  s   t j| d|dS )N   )pdim)F	normalize)r   r&   r   r   r   l1norm.   r   r)      c                 C  s\   | j }|  } | | jddd  } |  }t|D ]}t|dd}t|dd}q||S )NT)r&   keepdimr&   )dtypefloatamaxdetachexpranger)   to)	log_alphaitersr/   alpha_r   r   r   sinkhorn_knopps1   s   
r:   c                 C  sF   | j }|  } t|D ]}tj| dd} tj| dd} q|  |S )Nr+   r-   r.   )r/   r0   r4   r'   log_softmaxr3   r5   )r6   r7   r/   r9   r   r   r   log_domain_sinkhorn_knopps?   s   r<   Fc                 C  s\   |r
t  t  fS |rt|sJ dt| |dd}ntdd| d}tdd| d}||fS )	Nz`dim` must be passed into get_init_and_expand_reduce_stream_functions for returning an expansion function with stream embeddings addedT)expand_to_streamsb ... -> (b s) ...r   )pattern	reductionsz(b s) ... -> b ...sum)r   Identityr   StreamEmbedr   )num_streamsadd_stream_embedr&   disable	expand_fn	reduce_fnr   r   r   "get_expand_reduce_stream_functionsK   s   rJ   r$   c           
      K  sj   t || dko	|dk}|stnt}t|| f||d|}t| |||d}	t|r/t||d}|g|	R S )Nr$   )	num_fracssinkhorn_iters)rF   r&   rG   r-   )r   #ManifoldConstrainedHyperConnectionsResidualr   rJ   r   )
rE   rK   r&   rF   rG   rL   kwargshyper_conn_klassinit_hyper_conn_fnexpand_reduce_fnsr   r   r   +get_init_and_expand_reduce_stream_functions_   s   	rS   c                      s$   e Zd Z fddZdd Z  ZS )RMSNormc                   s*   t    |d | _tt|| _d S )Ng      ?)super__init__scaler   	Parametertorchzerosgamma)selfr&   	__class__r   r   rV   w   s   

zRMSNorm.__init__c                 C  s   t j|dd| j | jd  S )Nr.   r-   r$   )r'   r(   rW   r[   )r\   r!   r   r   r   forward|   s   zRMSNorm.forward__name__
__module____qualname__rV   r_   __classcell__r   r   r]   r   rT   v   s    rT   c                      sH   e Zd Zdddd fddZdd	 Zd
d ZdddZdd Z  ZS )rN   N)branchresidual_transformre   Module | Nonerf   c                  s$   t    || _t|t | _d S r   )rU   rV   re   r   r   rC   rf   )r\   re   rf   argsrO   r]   r   r   rV      s   
zResidual.__init__c                 C  s   ||t  fS r   )dictr\   	residualsr   r   r   width_connection   s   zResidual.width_connectionc                 C  s   ||  | S r   )rf   )r\   branch_outputrk   r   r   r   depth_connection   s   zResidual.depth_connectionr   c                   $   t jr	J d fdd}|S )N"branch was already wrapped on initc                   0    | \}} |g|R i |}||} | S r   r_   residualrh   rO   branch_inputadd_residualrm   re   r\   r   r   forward_and_add_residual      z:Residual.decorate_branch.<locals>.forward_and_add_residualr   re   r\   re   rx   r   rw   r   decorate_branch      	zResidual.decorate_branchc                   R    \}  fdd}tjs||fS j|g|R i |}||S )Nc                   s6   t | \^} }}j| fi  } t| g|R |S r   )r
   rn   r   
branch_outrest	tree_specresidual_kwargsrk   r\   r   r   add_residual_fn   s   z)Residual.forward.<locals>.add_residual_fnrl   r   re   r\   rk   branch_argsbranch_kwargsru   r   rm   r   r   r   r_      s   
zResidual.forward)re   rg   rf   rg   re   r   )	ra   rb   rc   rV   rl   rn   r|   r_   rd   r   r   r]   r   rN      s    
rN   c                      s`   e Zd Zdddddddedddddddd fddZdd Zdd ZdddZdd Z  Z	S )rM   NFg        Tr$   r*   r   )re   layer_indexchannel_firstdropoutrf   add_branch_out_to_residualnum_input_viewsdepth_residual_fnrK   rL   log_domain_sinkhornresidual_mix_constraint_fnforward_method_namesnum_dynamic_alpha_proposalsre   rg   rf   r   Callable | Noner   tuple[str, ...]c                  s6  t    || _|dksJ || _|dk| _td|d| _td| _t||s2J d| d| d|| }t	|| _
|dksCJ d	|| _t|t|| }|| }|	| }|| _|	dks`J |	| _|dk| _|| _t||f}d
||ddf< tt|t|fdd| _tt|||| | _ttdd | _ttdd | _|| _|rtt|| _|dkr|fn||f}tt|| _ttdd | _t|t |st!nt"|d| _#t$|| _%|| _&t|t' | _(|
| _)|| _*| j*D ]}t+| |rJ t,| j|}t-| || qdS )zN
        Appendix J, Algorithm2 in - https://arxiv.org/abs/2409.19606
        r$   zb ... (f d) -> b ... f d)fzb ... f d -> b ... (f d)zfeature dimension (z() must be divisible by the `num_fracs` ()r   z-`num_residual_streams` must be greater than 0g      ?Nr-   g{Gz?r   )r7   ).rU   rV   re   rK   	has_fracsr   split_fracsmerge_fracsr   rT   normnum_residual_streamsr   r   r   has_dynamic_alpha_proposalsr   rY   rZ   r   rX   r   eyestatic_alphadynamic_alpha_fnonespre_branch_scaleresidual_scaler   static_betadynamic_beta_fnh_post_scaler   r:   r<   r   Dropoutr   r   rC   rf   r   r   hasattrgetattrsetattr)r\   r   r&   re   r   r   r   rf   r   r   r   rK   rL   r   r   r   r   init_residual_indexnum_residual_streams_fracsnum_input_views_fracsinit_alpha0dynamic_beta_shapeforward_method_namefnr]   r   r   rV      s\   


 


z,ManifoldConstrainedHyperConnections.__init__c                   s  | j }| |}| jrt|d}| |}t|d|d}| |}|j | }t|| j	 d}t
| j d| jd}t
| j d| j| d}t||f}t
|d| jd}|| }t| j d|d}	||	 }
|
d	d | jf |
d	| jd f }}| }| |}t||fd
d}
| jrt|
dd}
nt|
d}
| |
}
d }| jr|| j  }| jst|d}|| j  }t| j d|d}|| }| d }t|
| d}| jdkr|d	dd d f |d	dd d d f }}n|d	d | jd d f |d	| jd d d f }}t|d}| jrt|d}| |}t|d}t fdd||fD \}}t|r=| }||t|dfS )Nb d ... -> b ... d(b s) ... d -> b ... s drA   z... d, p d e -> p ... ez1 -> sz
n -> (v n)r   z(f s) d -> f s d.r.   r-   zp ... -> ...meanz1 ... -> ...z... -> ... 1z... (s f) -> ... s f   z'... f1 s f2 t, ... f1 s d -> ... f2 t dr$   r   zb ... v d -> v b ... db ... d -> b d ...zb ... f s d -> (b s) ... (f d)c                 3  s    | ]}|  V  qd S r   )r5   ).0r   r/   r   r   	<genexpr>  s    zGManifoldConstrainedHyperConnections.width_connection.<locals>.<genexpr>)beta)r   rf   r   r   r   r   r/   r0   r   r   r   r   rK   r   r   r   r   sigmoidr   r   r   r   r   r   r   r   r   tupler   r5   ri   )r\   rk   streamsnormed	wc_weightr   r   alpha_scaledynamic_alphar   r8   	alpha_prealpha_residualr   	dc_weightdynamic_betar   mix_hru   r   r   r   rl   H  s^   



&




,2





z4ManifoldConstrainedHyperConnections.width_connectionc                C  s~   | j sJ | |}| jrt|d}|j}t| | d}t|d}| |}| jr1t|d}| |	||}| 
|S )Nr   z)b ... f1 d, b ... f1 s f2 -> b ... f2 s db ... s d -> (b s) ... dr   )r   r   r   r   r/   r   r0   r   r   r5   r   )r\   rm   rk   r   r/   outputr   r   r   rn     s   






z4ManifoldConstrainedHyperConnections.depth_connectionr   c                   ro   )Nrp   c                   rq   r   rr   rs   rw   r   r   rx     ry   zUManifoldConstrainedHyperConnections.decorate_branch.<locals>.forward_and_add_residualrz   r{   r   rw   r   r|     r}   z3ManifoldConstrainedHyperConnections.decorate_branchc                   r~   )Nc                   s@   j s| S t| \^} }}j| fi  } t| g|R |S r   )r   r
   rn   r   r   r   r   r   r     s
   zDManifoldConstrainedHyperConnections.forward.<locals>.add_residual_fnr   r   r   r   r   r_     s   
z+ManifoldConstrainedHyperConnections.forward)re   rg   rf   rg   r   r   r   r   r   )
ra   rb   rc   r#   rV   rl   rn   r|   r_   rd   r   r   r]   r   rM      s*     j
%rM   c                      s*   e Zd Z		d fdd	Zdd Z  ZS )rD   Fc                   s4   t    || _|| _|| _tt||| _	d S r   )
rU   rV   r   rE   r=   r   rX   rY   rZ   stream_embed)r\   rE   r&   r   r=   r]   r   r   rV     s
   
zStreamEmbed.__init__c                 C  sv   | j rt|d| jd}| jrt|d| jd}nt|d| jd}|| j }| jr1t|d| jd}|S t|d| jd}|S )Nr>   r   (b s) d ... -> b ... s dr   zb ... s d -> (b s) d ...r   )r=   r   rE   r   r   r   rj   r   r   r   r_     s   
zStreamEmbed.forward)FFr`   r   r   r]   r   rD   
  s
    rD   c                      s(   e Zd Z	d fdd	Zdd Z  ZS )AttentionPoolReduceStreamFc                   sB   t    || _|| _tj||dd| _| jjj	t
| d S )NF)bias)rU   rV   rE   r   r   Linearto_attn_logitsweightdatacopy_rY   r   )r\   rE   r&   r   r]   r   r   rV   /  s
   
z"AttentionPoolReduceStream.__init__c                 C  sb   | j rt|d| jd}nt|d| jd}| |}|jdd}t|| dd}| j r/t|d}|S )	Nr   r   r   r+   r-   zb ... s d -> b ... drB   r   )r   r   rE   r   softmaxr   )r\   rk   attn_logitsattnr   r   r   r_   <  s   

z!AttentionPoolReduceStream.forward)Fr`   r   r   r]   r   r   .  s    r   )r*   )FNF)r$   NFNr*   )-
__future__r   typingr   	functoolsr   randomr   rY   r   r   torch.nn.functional
functionalr'   torch.nnr   r	   torch.utils._pytreer
   r   einopsr   r   r   r   einops.layers.torchr   r   r   r   r   r    r#   r)   r:   r<   rJ   rS   rT   rN   rM   mHCstaticmethodrD   r   r   r   r   r   <module>   sP    



D  >

$