o
    }oi/8                  	   @   s  d dl mZ d dlmZ d dlZd dlmZ d dlmZ z"d dl	m
Z
mZ d dlmZmZ d dlmZmZ d dlmZ d	ZW n eefyY   d d
lmZ eZeZeZeZdZY nw d dlmZ d dlmZmZ d dlmZ d dl m!Z! z
d dl"m#Z$ d	Z%W n ey   dZ%Y nw zd dl&m'Z( d	Z)G dd de!dZ'W n ey   dZ)Y nw z
d dl*m+Z+ d	Z,W n ey   dZ,Y nw eG dd dZ-dd Z.G dd dej/Z0G dd dej/Z1G dd de1Z2G d d! d!e1Z3G d"d# d#ej/Z4dS )$    )	dataclass)UnionN)	rearrange)TELayerNormColumnParallelLinearTERowParallelLinear)IdentityFuncOp
IdentityOp)
ModuleSpecbuild_module)TransformerConfigT)ApexGuardDefaultsF)activation_registry)HyenaFilterHyenaFilterSubmodules)torch_dtype_from_precision)	Singleton)fftconv_func)FlashFFTConvc                   @   s   e Zd Zdd ZdS )r   c                 C   s   t ||| _d S N)FlashFFTConvImplflashfftconv)selfseqlendtype r   c/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/nlp/modules/common/hyena/hyena.py__init__N   s   zFlashFFTConv.__init__N)__name__
__module____qualname__r   r   r   r   r   r   K   s    r   )	metaclass)causal_conv1d_fnc                   @   s^   e Zd ZU eZeeef ed< e	Z
eeef ed< eZeeef ed< eZeeef ed< dS )HyenaOperatorSubmodulesin_projshort_filterimplicit_filterout_projN)r   r   r   r   r#   r   r	   type__annotations__r   r$   r%   r&   r   r   r   r   r"   \   s
   
 r"   c                 K   s"   |  D ]
\}}t| || qd S r   )itemssetattr)clskwargskvr   r   r   auto_assign_attrsd   s   r/   c                       s&   e Zd Zd fdd	Zdd Z  ZS )CausalDepthWiseConv1dTc                    sB   t stdt   || _|| _tj||||d ||d| _d S )NzEMissing causal-conv1d library, please run 'pip install causal-conv1d'   )in_channelsout_channelskernel_sizepaddinggroupsbias)	HAVE_CAUSAL_CONV1DImportErrorsuperr   channelswidthnnConv1d_conv_1d)r   r;   r<   r7   	__class__r   r   r   j   s   
zCausalDepthWiseConv1d.__init__c                 C   s   t || jjd| jjS )Nr1   )r!   r?   weightsqueezer7   )r   xr   r   r   forwardz   s   zCausalDepthWiseConv1d.forward)T)r   r   r   r   rE   __classcell__r   r   r@   r   r0   i   s    r0   c                       sF   e Zd Zdedfdededededeeef de	f fd	d
Z
  ZS )	HyenaConvTNd_modelmax_seq_lengthorderr7   
filter_clsfilter_submodulesc           	         s   t    || _|| _|| _|| _| j| jd  }| jr&tt	|| _
nt|| _
t|| j| jd  f||d|| _d S )Nr1   )
submodulesseq_len)r:   r   rH   rJ   rI   use_biasr=   	Parametertorchrandnr7   zerosr
   filter)	r   rH   rI   rJ   r7   rK   rL   filter_kwargs
bias_shaper@   r   r   r      s$   

zHyenaConv.__init__)r   r   r   r   intboolr   r	   r'   r   r   rF   r   r   r@   r   rG   ~   s"    
rG   c                       sj   e Zd Zdedddfdededededeeef d	e	d
e
de
f fddZdd Zdd Zdd Z  ZS )SingleHeadHyenaConvTNbf16rH   rI   rJ   r7   rK   rL   fftconv_type	precisionc	           
         s   t  j|||f|||d|	 |d u r|dkrtrd}nd}|dvr'td|dkr3|dkr3td|dkr=ts=td|dkrGtsGtd	|dkrQ| j| _d S td
| j	 t
|j| _| j| _d S )Nr7   rK   rL   i    safariflash)r^   r_   z/fftconv_type must be one of ['safari', 'flash']z7Safari-fftconv only supports sequence length up to 8192NSafari-fftconv library not found. Please see README at <tbd> for instructions.zLflashfftconv library not found. Please see README at <tbd> for instructions.   )r:   r   HAVE_SAFARI_FFTCONV
ValueErrorr9   HAVE_FLASHFFTCONV_safari_fft
fftconv_fnr   rI   r   r   
_flash_fft)
r   rH   rI   rJ   r7   rK   rL   r[   r\   rU   r@   r   r   r      s>   
zSingleHeadHyenaConv.__init__c                 C   s   |j tjd}t|||ddS )Nr   F)gelu)torQ   float32safari_fftconv_fn)r   rD   r-   r7   r   r   r   re      s   zSingleHeadHyenaConv._safari_fftc                 C   s(   |  }| ||||jdd  }|S Nr1   dim)
contiguousr   	unsqueeze)r   rD   r-   r7   yr   r   r   rg      s   zSingleHeadHyenaConv._flash_fftc                 C   s0   t | jd| j| jd d| }| |||}|S )Nz(v o) -> o vr1   r.   o)r   r7   rH   rJ   rf   )r   rD   r-   recurrence_idxr7   rr   r   r   r   rE      s   zSingleHeadHyenaConv.forward)r   r   r   r   rW   rX   r   r	   r'   r   strr   re   rg   rE   rF   r   r   r@   r   rY      s4    
	.rY   c                       s^   e Zd Zdedddfdededededed	eeef d
e	de
de
f fddZdd Z  ZS )MultiHeadHyenaConvTNrZ   rH   rI   rJ   	num_headsr7   rK   rL   r[   r\   c
                    s`   |dkrt d|dkrt d| j dtstdt j|||f|||d|
 || _d S )Nr1   zExpecting num_heads > 1ra   z5Multi-head supported only with order == 2 (got order )r`   r]   )rc   rJ   rb   r9   r:   r   rx   )r   rH   rI   rJ   rx   r7   rK   rL   r[   r\   rU   r@   r   r   r      s$   
	zMultiHeadHyenaConv.__init__c              
   C   s.   | j jtjd}t|||dd|| j|d}|S )Nrh   FT)ri   output_hbl_layoutr.   head_dimq)r7   rj   rQ   rk   rl   rx   )r   r.   r-   x1x2r7   rr   r   r   r   rE      s   zMultiHeadHyenaConv.forward)r   r   r   r   rW   rX   r   r	   r'   r   rv   r   rE   rF   r   r   r@   r   rw      s4    
	
rw   c                       sf   e Zd Z							ddeded	ed
ededededef fddZdd Z	dd Z
dd Z  ZS )HyenaOperatorra   r1              identityNconfigrI   rJ   rx   dropoutshort_filter_order
activationrM   c
                    s  t    |du rtttttd}|dk rtd| j d|j	}|| dkr1td| d| || }t
| ||||||||d		 t|  | _t|| _t|j| j| jd
 | j | j| jjdddddd
| _t|j| j| j| j| jjdddddd
| _| j| jd
  }t|j|| j| _| j| j| jg}|j|
d< |jj|
d< | jd
krt|i |
| _| j | _!dS |"| j t#|i |
| _| j$| _!dS )aF  
        Hyena operator described in the paper https://arxiv.org/pdf/2302.10866.pdf

        Args:
            max_seq_length: (int): Maximum input sequence length.
            order: (int): Depth of the Hyena recurrence. Defaults to 2
            num_heads: (int): Number of heads. Defaults to 1
            dropout: (float): Dropout probability. Defaults to 0.0
            short_filter_order: (int): Length of the explicit input convolutional filter. Defaults to 3
            activation: (str): type of act between kernel output and output projection (default identity)
        N)r#   r$   r%   r&   ra   zOrder must be at least 2, (got ry   r   zModel dimension z  must be divisible by num heads )rH   rJ   rI   rx   r{   r   r   mcore_configr1   FTr#   )r   init_methodgather_outputr7   skip_bias_add	is_experttp_comm_buffer_namer&   )r   r   r7   input_is_parallelr   r   r   rK   rL   )%r:   r   r"   r   r0   r   r   rc   rJ   hidden_sizer/   r   r   r=   Dropoutr   r
   r#   rH   r   r   r&   output_layer_init_methodr$   r   r{   rI   r%   rM   rx   rY   	long_convconv_single_headconv_fwd_fnappendrw   conv_multi_head)r   r   rI   rJ   rx   r   r   r   rM   layer_numberlong_conv_kwargsrH   r{   total_widthlong_conv_argsr@   r   r   r     s|   


zHyenaOperator.__init__c           
      O   s   | d}t|| j}| |}t|tr|d n|}t|d}| j|}t|d| j	dd }| 
|dd |f }|jtjd}| ||}t|d}| |}| |}t|tra|\}}	nd }	t|d}||	fS )	Nr   zl b d -> b d lzc l v -> c v l)r.   .rh   zb d l -> b l dzb l d -> l b d)sizeminrI   r#   
isinstancetupler   r   rT   r{   r$   rj   rQ   rk   r   r   r&   )
r   uargsr,   ll_filterr-   ucrr   r7   r   r   r   rE   e  s$   








zHyenaOperator.forwardc                 C   s~   t |d| j| jd d}|j| jdd^ }}tt|dd  D ]\}}| || }| j||| |d}q!||d  }|S )Nz(o v) l -> o v lr1   rs   rn   )r-   ru   r   )	r   r{   rJ   splitrH   	enumeratereversedr   r   )r   r   r-   rD   r.   rt   x_irr   r   r   r   r     s   zHyenaOperator.conv_single_headc                 C   sB   |j | jdd\}}}| }| }| }| ||||}|S rm   )r   rH   rp   r   )r   r   r-   r}   r~   r.   rr   r   r   r   r     s   zHyenaOperator.conv_multi_head)ra   r1   r   r   r   NN)r   r   r   r   rW   floatrv   r"   r   rE   r   r   rF   r   r   r@   r   r     s8    	cr   )5dataclassesr   typingr   rQ   torch.nnr=   einopsr   +megatron.core.extensions.transformer_enginer   r   %megatron.core.transformer.identity_opr   r   $megatron.core.transformer.spec_utilsr	   r
   ,megatron.core.transformer.transformer_configr   HAVE_MEGATRON_COREr9   ModuleNotFoundError2nemo.collections.nlp.modules.common.megatron.utilsr   #nemo.collections.common.parts.utilsr   6nemo.collections.nlp.modules.common.hyena.hyena_filterr   r   &nemo.collections.nlp.parts.utils_funcsr   nemo.utils.metaclassesr   9nemo.collections.nlp.modules.common.hyena.fftconv_wrapperr   rl   rb   r   r   r   rd   causal_conv1dr!   r8   r"   r/   Moduler0   rG   rY   rw   r   r   r   r   r   <module>   sd   !	>&