o
    }oî                     @   sj  U d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZ d dlmZ d dl m!Z!m"Z"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) G dd deZde
j*fddZ+eG dd dee"j,Z-eG dd de-Z.eG dd de.Z/eG dd de-Z0eG dd  d e0Z1eG d!d" d"e-Z2eG d#d$ d$e2Z3eG d%d& d&e-Z4eG d'd( d(e4Z5eG d)d* d*e2Z6eG d+d, d,e4Z7e"8ed-G d.d/ d/e"j9def Z:e"8ed0G d1d2 d2e:Z;e0e1e2e6e3e4e7e5e.e/d3
Z<e=e>e	e- f e?d4< g d5Z@dS )6    N)	dataclass)Path)CallableLiteralOptionalType)parallel_state)GPTInferenceWrapper)InferenceWrapperConfig)AttnBackend)TransformerConfig)GPTModelgpt_data_step)hyena_stack_spechyena_stack_spec_no_te)
HyenaModel)hyena_no_weight_decay_cond)get_vocab_sizeioteardown)NEMO_MODELS_CACHE)TransformFns)loggingc                   @   sv   e Zd ZdZ	ddejfddZ						ddejdejdeej d	eej d
eej deej dejfddZdS )r   z
    This is a wrapper around the MCoreHyenaModel to allow for inference. Our model follows the same API
      as the GPTModel, but the megatron model class is different so we need to handle the inference wrapper
      slightly differently.
    Nreturnc                 C   s   | j }|rt|tu rnt|dd}|s|du st|tur"tdd}| jdur.| jj}nt| jdr9| jj}ntdt	|jj
||||d}t||}|S )a  
        Gets the inference wrapper for the Hyena model.

        Args:
            params_dtype: The data type for model parameters
            inference_batch_times_seqlen_threshold: Threshold for batch size * sequence length during inference
            inference_max_seq_length: Maximum sequence length for inference

        Returns:
            GPTInferenceWrapper: The inference wrapper for the model

        Raises:
            ValueError: If MCoreHyenaModel instance not found or vocab size cannot be determined
        moduleNz@Exact MCoreHyenaModel instance not found in the model structure.
vocab_sizezlUnable to find vocab size. Either pass in a tokenizer with vocab size, or set vocab size in the model config)hidden_sizeparams_dtype&inference_batch_times_seqlen_thresholdpadded_vocab_sizeinference_max_seq_length)r   typeMCoreHyenaModelgetattr
ValueError	tokenizerr   hasattrconfigr
   r   r	   )selfr   r   r    mcore_modelr   inference_wrapper_configmodel_inference_wrapper r,   X/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/gpt/model/hyena.pyget_inference_wrapper0   s2   



z HyenaModel.get_inference_wrapper	input_idsposition_idsattention_masklabelsdecoder_input	loss_maskc	                 C   s8   |durd|ini }	| j |||f||||d|	}
|
S )a7  
        Forward pass of the Hyena model.

        Args:
            input_ids: Input token IDs
            position_ids: Position IDs for input tokens
            attention_mask: Optional attention mask
            labels: Optional labels for loss computation
            decoder_input: Optional decoder input
            loss_mask: Optional loss mask
            inference_context: Optional inference parameters
            packed_seq_params: Optional parameters for packed sequences

        Returns:
            torch.Tensor: Output tensor from the model
        Npacked_seq_params)r3   r2   inference_contextr4   )r   )r(   r/   r0   r1   r2   r3   r4   r6   r5   extra_kwargsoutput_tensorr,   r,   r-   forward`   s   
zHyenaModel.forwardN)NNNNNN)	__name__
__module____qualname____doc__torchTensorr.   r   r9   r,   r,   r,   r-   r   )   s6    
4
r   r   c                 C   s4   |d |d |d |d d}d|d< | di |S )	a  
    Performs a forward step for the Hyena model.

    Args:
        model: The Hyena model
        batch: Dictionary containing input batch data with keys:
            - tokens: Input token IDs
            - position_ids: Position IDs
            - labels: Labels for loss computation
            - loss_mask: Mask for loss computation

    Returns:
        torch.Tensor: Output from the model forward pass
    tokensr0   r2   r4   )r/   r0   r2   r4   Nr1   r,   r,   )modelbatchforward_argsr,   r,   r-   hyena_forward_step   s   rE   c                       s  e Zd ZU dZdZeed< dZeed< ej	Z
ejed< dZeed< dZeed< d	Zeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZed ed< dZeed< d Z eed!< dZ!e"e ed"< dZ#eed#< d$Z$eed%< dZ%eed&< dZ&eed'< d(Z'eed)< dZ(eed*< dZ)eed+< dZ*eed,< d-Z+eed.< e,j-Z.e,ed/< dZ/eed0< d1Z0eed2< d3Z1eed4< d5Z2eed6< e3Z4e5ed7< e6Z7e5ed8< dZ8eed9< dZ9eed:< dZ:eed;< dZ;eed<< dZ<eed=< dZ=eed>< dZ>eed?< dZ?eed@< dZ@eedA< dZAeedB< dZBeedC< dZCeedD< dEZDeedF< dZEeedG< dZFeedH< dZGeedI<  fdJdKZHdQdLe"e dMdNfdOdPZI  ZJS )RHyenaConfigz
    Configuration dataclass for Hyena.

    For adjusting ROPE when doing context extension, set seq_len_interpolation_factor relative to 8192.
    For example, if your context length is 512k, then set the factor to 512k / 8k = 64.
    Ffp16_lm_cross_entropyTparallel_outputr   fp16bf16   
num_layersi   r      num_attention_headsNnum_groups_hyenanum_groups_hyena_mediumnum_groups_hyena_short        hybrid_attention_ratiohybrid_mlp_ratiohybrid_override_patternpost_processpre_processi   
seq_lengthrope)learned_absoluterY   noneposition_embedding_typeg      ?rotary_percenti'  rotary_baseseq_len_interpolation_factorapply_rope_fusion   make_vocab_size_divisible_bygated_linear_unitfp32_residual_connectionRMSNormnormalizationadd_bias_linearhidden_dropoutattention_dropoutư>layernorm_epsilonattention_backendget_attention_mask_from_fusionfullrecompute_granularityuniformrecompute_method   recompute_num_layersforward_step_fndata_step_fntokenizer_model_pathhyena_init_methodhyena_output_layer_init_methodhyena_filter_no_wd"remove_activation_post_first_layeradd_attn_proj_biascross_entropy_loss_fusiontp_comm_overlapbias_activation_fusionbias_dropout_add_fusionadd_bias_outputuse_tenormalized_weightedto_upperuse_short_conv_biasvortex_style_fp8use_b2b_causal_conv1dc                    s$   t    | jrt| _dS d| _dS )zP
        Post-initialization hook that sets up weight decay conditions.
        N)super__post_init__ry   r   hyena_no_weight_decay_cond_fnr(   	__class__r,   r-   r      s   
zHyenaConfig.__post_init__vp_stager   r"   c                 C   s  | j rdn| j| _t| dddu r|du sJ dt| fi d| jr#tnPtdt| |j| j	d| j
d| jd	| jd
| jd| jd| jd| jd| jd| jdt dt ddd| jd| jd| j d| j}|S dt| |j| j	d| j
d| jd	| jd
| jd| jd| jd| jd| jd| jdt dt ddd| jd| jd| j d| j}|S )a  
        Configures and returns a Hyena model instance based on the config settings.

        Args:
            tokenizer: Tokenizer to use for the model
            vp_stage: Virtual pipeline stage

        Returns:
            MCoreHyenaModel: Configured Hyena model instance
        F$virtual_pipeline_model_parallel_sizeNzGVirtual pipeline model parallelism is temporarily unsupported in Hyena.r   r   max_sequence_lengthrO   rP   rQ   rU   r\   r]   r^   r_   rW   rV   #share_embeddings_and_output_weightsTrw   rx   rz   r{   )rz   r~   r#   r"   r   r   r   r   r   rb   rX   rO   rP   rQ   rU   r\   r]   r^   r_   r   is_pipeline_first_stageis_pipeline_last_stagerw   rx   r{   )r(   r%   r   rB   r,   r,   r-   configure_model   s   	
	
zHyenaConfig.configure_modelr:   )Kr;   r<   r=   r>   rG   bool__annotations__rH   r?   bfloat16r   dtyperI   rJ   rL   intr   rN   rO   rP   rQ   rS   floatrT   rU   strrV   rW   rX   r\   r   r]   r^   r_   r   r`   rb   rc   rd   rf   rg   rh   ri   rk   r   flashrl   rm   ro   rq   rs   rE   rt   r   r   ru   rv   rw   rx   ry   rz   r{   r|   r}   r~   r   r   r   r   r   r   r   r   r   __classcell__r,   r,   r   r-   rF      sr   
  rF   c                   @   sr  e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< dZeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< ejZejed< d Zeed!< dZeed"< dZeed#< d$Z eed%< d&Z!eed'< d(Z"eed)< d*Z#eed+< d,Z$eed-< d.Z%eed/< dZ&eed0< dZ'eed1< dZ(eed2< d3S )4HyenaTestConfigz'Configuration for testing Hyena models.zSDH*rU   rr   rL       rX      r   rO      rP   rQ   rM   rb   
byte-leveltokenizer_librarybasemapping_type +  ffn_hidden_sizeTrc       rN   Fuse_cpu_initializationrR   rh   ri   r   re   rf   add_qkv_biasrg   rj   rk   rn   ro   rp   rq   rK   rs   
small_initrw   	wang_initrx   ry   r   r   N))r;   r<   r=   r>   rU   r   r   rL   r   rX   r   rO   rP   rQ   rb   r   r   r   rc   r   rN   r   rh   r   ri   r?   r   r   r   rf   r   rg   rk   ro   rq   rs   rw   rx   ry   r   r   r,   r,   r,   r-   r     s>   
 r   c                   @   6   e Zd ZU dZdZeed< dZeed< dZeed< dS )HyenaNVTestConfig  
    Several unintentional design choices were made to the original Arc implementation that are required to use the
    original Arc checkpoints, but may result in less stable model training. If you are training from scratch,
    these are the recommended configs.
    Frz   r{   Tr   N	r;   r<   r=   r>   rz   r   r   r{   r   r,   r,   r,   r-   r   8  
   
 r   c                   @   sf  e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed < ejZejed!< d"Zeed#< dZeed$< dZ eed%< d&Z!eed'< d(Z"eed)< d*Z#eed+< dZ	eed< d,Z$eed-< d.Z%eed/< dZ&eed0< d1S )2Hyena1bConfigz,Config matching the 1b 8k context Evo2 modelzSDH*SDHSDH*SDHSDH*SDHSDH*rU      rL      rs   r   rX   i  r   rO   ra   rP   rQ   rM   rb   r   r   r   r   i   r   Trc      rN   Fr   rR   rh   ri   r   re   rf   r   rg   rj   rk   rn   ro   rp   rq   r   rw   r   rx   ry   N)'r;   r<   r=   r>   rU   r   r   rL   r   rs   rX   r   rO   rP   rQ   rb   r   r   r   rc   r   rN   r   rh   r   ri   r?   r   r   r   rf   r   rg   rk   ro   rq   rw   rx   ry   r,   r,   r,   r-   r   E  s<   
 r   c                   @   r   )HyenaNV1bConfigr   Frz   r{   Tr   Nr   r,   r,   r,   r-   r   g  r   r   c                   @   sZ  e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< dZeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< ejZejed< dZeed < dZeed!< dZeed"< d#Z eed$< d%Z!eed&< d'Z"eed(< d)Z#eed*< d+Z$eed,< d-Z%eed.< dZ&eed/< d0S )1Hyena7bConfigz,Config matching the 7b 8k context Evo2 modelz SDH*SDHSDH*SDHSDH*SDHSDH*SDHSDH*rU   r   rL   r   rX   r   r   rO   r   rP   rQ   rM   rb   r   r   r   r   r   r   Trc   rN   Fr   rR   rh   ri   r   re   rf   r   rg   rj   rk   rn   ro   rp   rq   rr   rs   r   rw   r   rx   ry   N'r;   r<   r=   r>   rU   r   r   rL   r   rX   r   rO   rP   rQ   rb   r   r   r   rc   r   rN   r   rh   r   ri   r?   r   r   r   rf   r   rg   rk   ro   rq   rs   rw   rx   ry   r,   r,   r,   r-   r   t  :   
 r   c                   @   r   )HyenaNV7bConfigr   Frz   r{   Tr   Nr   r,   r,   r,   r-   r     r   r   c                   @   sZ  e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed< dZeed	< d
Zeed< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< ejZejed< dZeed < dZeed!< dZeed"< d#Z eed$< d%Z!eed&< d'Z"eed(< d)Z#eed*< d+Z$eed,< d-Z%eed.< dZ&eed/< d0S )1Hyena40bConfigz-Config matching the 40b 8k context Evo2 modelz2SDH*SDHSDH*SDHSDH*SDHSDH*SDHSDH*SDH*SDHSDH*SDHSDH*rU   2   rL   r   rX   r   rO   i   rP   rQ   rM   rb   r   r   r   r   iU  r   Trc   @   rN   Fr   rR   rh   ri   r   re   rf   r   rg   rj   rk   rn   ro   rp   rq   rK   rs   r   rw   r   rx   ry   Nr   r,   r,   r,   r-   r     r   r   c                   @   r   )HyenaNV40bConfigr   Frz   r{   Tr   Nr   r,   r,   r,   r-   r     r   r   c                   @      e Zd ZU dZdZeed< dS )Hyena7bARCLongContextConfigpThe checkpoint from ARC requires padding to the FFN dim
    due to constraintes from large TP size for training.i ,  r   Nr;   r<   r=   r>   r   r   r   r,   r,   r,   r-   r        
 r   c                   @   r   )Hyena40bARCLongContextConfigr   i X  r   Nr   r,   r,   r,   r-   r     r   r   pytorchc                       s|   e Zd ZdZddef fddZdefddZd	d
 Zdde	dede	fddZ
dd Zedd ZedefddZ  ZS )PyTorchHyenaImporterzL
    Importer class for converting PyTorch Hyena models to NeMo format.
    Npathc                    s   t  | |}||_|S )z
        Creates a new importer instance.

        Args:
            path: Path to the PyTorch model
            model_config: Optional model configuration

        Returns:
            PyTorchHyenaImporter instance
        )r   __new__model_config)clsr   r   instancer   r,   r-   r     s   zPyTorchHyenaImporter.__new__r   c                 C   s   t | j| jdS )zt
        Initializes a new HyenaModel instance.

        Returns:
            HyenaModel: Initialized model
        )r%   )r   r'   r%   r   r,   r,   r-   init  s   zPyTorchHyenaImporter.initc                 C   s   t jt| ddS )+
        Returns the source model.
        cpu)map_location)r?   loadr   r   r,   r,   r-   get_source_model  s   z%PyTorchHyenaImporter.get_source_model
torch_distoutput_pathcheckpoint_formatc                 C   s   |   }d|v r|d }G dd d}|  }| j|d|d}|| jj dd | D }||| jj|}|| jj | || | 	|| t
d|  t|| ~~|S )	a  
        Applies the model conversion from PyTorch to NeMo format.

        Args:
            output_path: Path to save the converted model
            checkpoint_format: Format for saving checkpoints

        Returns:
            Path: Path to the saved NeMo model
        rB   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )z.PyTorchHyenaImporter.apply.<locals>.ModelStatez_Wrapper around the source model state dictionary that also handles some weight transformations.c                 S   s    || _ | |}|| _|| _dS )a  Wrapper around the source model state dictionary that also handles some weight transformations.

                Args:
                    state_dict: original state dictionary from the source model
                    num_layers: number of layers in the source model
                N)rL   transform_source_dict_state_dictfp32_suffixes)r(   
state_dictrL   r   r,   r,   r-   __init__  s   

z7PyTorchHyenaImporter.apply.<locals>.ModelState.__init__c                 S      | j S )zReturn the state dictionary.)r   r   r,   r,   r-   r   #  s   z9PyTorchHyenaImporter.apply.<locals>.ModelState.state_dictc              
   S   s~   | j  D ]7\}}d|vr<|j|kr"td| d|j d| d |dd }|| jv r2tj}n|}|	|| j |< qdS )	z1Convert the state dictionary to the target dtype._extrazConverting z from z (source model) to z (target model).N)
r   itemsr   r   warningsplitr   r?   float32to)r(   r   kvk_suffix_dtyper,   r,   r-   r   '  s   
 
z1PyTorchHyenaImporter.apply.<locals>.ModelState.toc                 S   sN   ddl m} | D ]\}}d|v sd|v r$|ddd| jf ||< q
|S )zAdjust the medium filter.r   )rF   zfilter.hzfilter.decayN):nemo.collections.llm.gpt.model.megatron.hyena.hyena_configrF   r   hyena_medium_conv_len)r(   updated_datarF   r   r   r,   r,   r-   adjust_medium_filter4  s   zCPyTorchHyenaImporter.apply.<locals>.ModelState.adjust_medium_filterc           	      S   s   ddl }dd t| jD }| j|| jd < i }t|d  D ]J}d|v r.|d | q |d|}|rbt|d	}||v rY|	d
| d
t
|| |}|d | ||< q |d | ||< q |d | ||< q | |}|S )zTransform the source state dictionary, applying some challenging layer name re-mappings and
                removing extra keys, as well as truncating a filter that didn't need to extend to the full
                sequence length dim.
                r   Nc                 S   s   i | ]}|d  |qS )rK   r,   ).0ir,   r,   r-   
<dictcomp>D  s    zXPyTorchHyenaImporter.apply.<locals>.ModelState.transform_source_dict.<locals>.<dictcomp>   r   r   zsequential\.(\d+)   z\b)rerangerL   listkeyspopsearchr   groupsubr   r   )	r(   sourcer   	layer_mapr   keymatchoriginal_layer_numnew_keyr,   r,   r-   r   =  s"   
zDPyTorchHyenaImporter.apply.<locals>.ModelState.transform_source_dictN)	r;   r<   r=   r>   r   r   r   r   r   r,   r,   r,   r-   
ModelState  s    	r   F)ckpt_async_savesave_ckpt_formatc                 S   s*   h | ]\}}|j tjkr|d d qS )r   r   )r   r?   r   r   )r   npr,   r,   r-   	<setcomp>^  s   * z-PyTorchHyenaImporter.apply.<locals>.<setcomp>z.Converted Hyena model to Nemo, model saved to )r   r   
nemo_setupr   r'   r   named_parametersrL   convert_state	nemo_saver   infor   )r(   r   r   r   r   targettrainerr   r,   r,   r-   apply  s    G
zPyTorchHyenaImporter.applyc              	   C   s  i }d|d< d|dt | jj d< | jj}t| jjD ]7\}}|r0d| d|d| d< nd| d|d| d< d| d	|d| d
< |dkr|r\d| d|d| d< nd| d|d| d< d| d|d| d< d| d|d| d< d| d|d| d< d| d|d| d< |dkrd| d|d| d< q|dkrd| d|d| d< d| d|d| d< d| d|d| d< q|dkrd| d|d| d< d| d|d| d< d| d|d| d< d| d|d| d< q|dkrN|rd| d|d| d< nd| d|d| d< d| d|d| d< d| d|d| d< d| d|d| d< qtd | tj|||tjd!d"t	j
d#gd$S )%z
        Converts the state dictionary from source format to target format.

        Args:
            source: Source model state
            target: Target model

        Returns:
            Result of applying state transforms
        z embedding.word_embeddings.weightz#sequential.0.word_embeddings.weightzdecoder.final_norm.weightzsequential.z.norm.weightzdecoder.layers.z!.mlp.linear_fc1.layer_norm_weightz.pre_mlp_layernorm.weightz.mlp.linear_fc2.weightz.mlp.w3.weight*z).mixer.dense_projection.layer_norm_weightz.input_layernorm.weightz.mixer.dense_projection.weightz(.mixer.hyena_proj_conv.short_conv_weightz.mixer.dense.weightz.mixer.dense.biasSz).mixer.mixer.short_conv.short_conv_weightDz.mixer.mixer.conv_biasz.mixer.mixer.filter.hz.mixer.mixer.filter.decayHz.mixer.mixer.filter.gammaz.mixer.mixer.filter.Rz.mixer.mixer.filter.pz,.self_attention.linear_qkv.layer_norm_weightz!.self_attention.linear_qkv.weightz".self_attention.linear_proj.weightz .self_attention.linear_proj.biaszUnknown symbol: )zsequential.*.mlp.w1.weightzsequential.*.mlp.w2.weightz&decoder.layers.*.mlp.linear_fc1.weight)
source_key
target_keyfn)mapping
transforms)lenr'   rU   r   	enumerater$   r   apply_transformsstate_transformr   	merge_fc1)r(   r   r  r  
te_enabledr   symbolr,   r,   r-   r  k  sr   











z"PyTorchHyenaImporter.convert_statec                 C   s   ddl m} || jjd}|S )zd
        Gets the tokenizer for the model.

        Returns:
            Tokenizer instance
        r   )get_nmt_tokenizer)library)3nemo.collections.nlp.modules.common.tokenizer_utilsr  r   r   )r(   r  r%   r,   r,   r-   r%     s
   zPyTorchHyenaImporter.tokenizerc                 C   r   )zn
        Gets the model configuration.

        Returns:
            HyenaConfig: Model configuration
        )r   r   r,   r,   r-   r'     s   zPyTorchHyenaImporter.configr:   )r   )r;   r<   r=   r>   r   r   r   r   r   r   r
  r  propertyr%   rF   r'   r   r,   r,   r   r-   r     s    	g[
r   hfc                   @   s   e Zd ZdZdd ZdS )HuggingFaceSavannaHyenaImportera2  
    Importer class for converting HuggingFace Savanna Hyena models to NeMo format.
        See: https://huggingface.co/arcinstitute/savanna_evo2_7b for an example of a savanna model that this can
        import and convert to NeMo format. Any of the Arc models that start with "savanna_" should work.
    c                 C   s@  ddl }ddlm} tjt| r&tdt|   t	j
t| dddS dt| v r6t| d\}}nt| }d}|d	d
 }tt| }| d}z
|||||d}W n ty   td|  tj||}tj|r~td|  ng }	d}
	 z||||| d|
 d}|	| |
d7 }
W n |jjy   Y nw qt|d1}|	D ]&}t|d}	 |d}|sn|| qW d   n1 sw   Y  qW d   n1 sw   Y  |	D ],}zt| W n ty } ztd| d|  W Y d}~nd}~ww td| qY nw t	j
|dddS )r   r   N)hf_hub_downloadzLoading model from local path r   F)r   weights_only:/r   z.pt)repo_id	local_dirrevisionfilenamez?Single path download failed, try loading checkpoint shards for zFound Tz.partr   wbrbi   zError removing z: z,Cleaned up shards, final checkpoint saved to)huggingface_hub.errorshuggingface_hubr!  osr   existsr   r   r  r?   r   r   r   	Exceptionr   joinappenderrorsEntryNotFoundErroropenreadwriteremoveOSErrorprint)r(   r,  r!  r%  r'  	modelnamedownload_dirweights_filenameweights_pathpartspart_num	part_pathoutfilepartinfilechunker,   r,   r-   r     s|   





 +z0HuggingFaceSavannaHyenaImporter.get_source_modelN)r;   r<   r=   r>   r   r,   r,   r,   r-   r     s    r   )
1b1b_nv7b7b_arc_longcontext7b_nv40b40b_arc_longcontext40b_nvtesttest_nvHYENA_MODEL_OPTIONS)rF   r   r   r   r   r   r   r   r   r   r   rP  )Ar-  dataclassesr   pathlibr   typingr   r   r   r   r?   megatron.corer   Jmegatron.core.inference.model_inference_wrappers.gpt.gpt_inference_wrapperr	   Imegatron.core.inference.model_inference_wrappers.inference_wrapper_configr
   megatron.core.transformer.enumsr   ,megatron.core.transformer.transformer_configr   #nemo.collections.llm.gpt.model.baser   r   ?nemo.collections.llm.gpt.model.megatron.hyena.hyena_layer_specsr   r   9nemo.collections.llm.gpt.model.megatron.hyena.hyena_modelr   r"   9nemo.collections.llm.gpt.model.megatron.hyena.hyena_utilsr   nemo.lightningr   r   r   nemo.lightning.baser   nemo.lightning.io.stater   
nemo.utilsr   r@   rE   IOMixinrF   r   r   r   r   r   r   r   r   r   r   model_importerModelConnectorr   r   rP  dictr   r   __all__r,   r,   r,   r-   <module>   sx   
`r"!  
  
O