o
    }oi>                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d d	lmZmZmZ d d
lmZ d dlmZ erhd dlmZ d dlmZ d dlm Z  d dl!m"Z" eG dd deZeG dd deZ#eG dd deZ$eG dd deZ%eG dd deZ&eG dd deZ'G dd deZ(e)e(dG dd  d ej*d!e(f Z+e,e(dG d"d# d#ej*e(d!f Z-g d$Z.dS )%    )	dataclass)Path)TYPE_CHECKING	AnnotatedCallableOptionalN)nn)squared_relu)	GPTConfigGPTModeltorch_dtype_from_mcore_config)Config)OptimizerModuleioteardown)TransformFns)dtype_from_hfNemotronConfigNemotronForCausalLMAutoTokenizer)TokenizerSpecc                   @   s"  e Zd ZU dZdZeed< eZe	ed< dZ
eed< dZeed< dZeed	< d
Zeed< d
Zeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zee ed!< d"Z ee ed#< d$Z!eed%< d&S )'r   zQ
    Configuration class for the Nemotron Config, inheriting from GPTConfig.
    	LayerNormnormalizationactivation_funcropeposition_embedding_typeF#share_embeddings_and_output_weightsadd_bias_linearg        hidden_dropoutattention_dropoutg      ?rotary_percentTmasked_softmax_fusionpersist_layer_normbias_dropout_add_fusionlayernorm_zero_centered_gammacross_entropy_loss_fusionapply_rope_fusion    
num_layers   
seq_length   hidden_size $  ffn_hidden_size   num_attention_heads   num_query_groups   kv_channelsS!uq?init_method_stdN)"__name__
__module____qualname____doc__r   str__annotations__r	   r   r   r   r   boolr    r!   floatr"   r#   r$   r%   r&   r'   r(   r)   r+   intr-   r/   r1   r3   r5   r   r7   r9    rC   rC   [/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/gpt/model/nemotron.pyr   %   s0   
 r   c                   @   sv   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZeed< dZee ed< dZeed< dS )Nemotron3Config4BzZ
    Configuration class for the Nemotron3 4B Config, inheriting from NemotronConfig.
    r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   N)r:   r;   r<   r=   r+   rB   r?   r-   r/   r1   r3   r5   r7   r   r9   rA   rC   rC   rC   rD   rE   G   s   
 rE   c                   @   sz   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< dZ
eed	< d
Zee ed< d
Zee ed< dZeed< d
S )Nemotron3Config8BzZ
    Configuration class for the Nemotron3 8B Config, inheriting from NemotronConfig.
    r*   r+   r,   r-   r/   i @  r1   r3   Nr5   r7   g{Gz?r9   r:   r;   r<   r=   r+   rB   r?   r-   r/   r1   r3   r5   r   r7   r9   rA   rC   rC   rC   rD   rF   W      
 rF   c                   @   sz   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZee ed< dZee ed< dZeed< dS )Nemotron3Config22Bz[
    Configuration class for the Nemotron3 22B Config, inheriting from NemotronConfig.
    (   r+   r,   r-      r/    `  r1   0   r3   Nr5   r7   gMb?r9   rG   rC   rC   rC   rD   rI   g   rH   rI   c                   @   sz   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZee ed< dZee ed< dZeed< dS )Nemotron4Config15Bz[
    Configuration class for the Nemotron4 15B Config, inheriting from NemotronConfig.
    r*   r+   r,   r-   rK   r/   rL   r1   rM   r3   r4   r5   Nr7   r8   r9   rG   rC   rC   rC   rD   rN   w   rH   rN   c                   @   sz   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< dZ
eed
< dZee ed< dZee ed< dZeed< dS )Nemotron4Config340Bz\
    Configuration class for the Nemotron4 340B Config, inheriting from NemotronConfig.
    `   r+   r,   r-   i H  r/   i   r1   r3   r4   r5   Nr7   g o_y?r9   rG   rC   rC   rC   rD   rO      rH   rO   c                       sf   e Zd ZdZ				d
deee ee f dee ded dee	e
jge
jf  f fdd	Z  ZS )NemotronModelz
    Nemotron model implementation based on the GPT model architecture.

    This class provides a high-level interface for Nemotron models,
    implementing the specific architecture and settings needed for Nemotron models.
    Nconfigoptim	tokenizerr   model_transformc                    s   t  j|pt |||d d S )N)rS   rT   rU   )super__init__r   )selfrR   rS   rT   rU   	__class__rC   rD   rW      s   zNemotronModel.__init__)NNNN)r:   r;   r<   r=   r   r   r   r   r   r   r   ModulerW   __classcell__rC   rC   rY   rD   rQ      s    	rQ   hfc                   @   sX   e Zd ZdZdefddZdedefddZdd	 Ze	dddZ
e	defddZdS )HFNemotronImporterz
    Importer for converting Hugging Face Nemotron models to NeMo format.

    This class handles the conversion of Hugging Face's NemotronForCausalLM models
    to NeMo's Nemotron format, including weight mapping and configuration translation.
    returnc                 C   s   t | j| jdS )z
        Initialize a NeMo NemotronModel instance.

        Returns:
            NemotronModel: Initialized NeMo Nemotron model with the appropriate configuration
                        and tokenizer.
        )rT   )rQ   rR   rT   rX   rC   rC   rD   init   s   zHFNemotronImporter.initoutput_pathc                 C   sp   ddl m} td |jt| dd}|  }| |}| || | || td|  t	|| ~~|S )z
        Apply the conversion from HF to NeMo format.

        Args:
            output_path: Path where the converted model will be saved

        Returns:
            Path: Path to the saved NeMo model
        r   r   z!Start converting Nemotron model..autotorch_dtypez1Converted Nemotron model to Nemo, model saved to )
transformersr   printfrom_pretrainedr>   ra   
nemo_setupconvert_state	nemo_saver   )rX   rb   r   sourcetargettrainerrC   rC   rD   apply   s   


zHFNemotronImporter.applyc                 C   sB   ddddddddd	d
dd}t jddtjdg}t j||||dS )aS  
        Convert state dict from HF format to NeMo format.

        Maps the weights from the HF model to the NeMo model according to
        the appropriate mapping scheme.

        Args:
            source: Source HF model
            target: Target NeMo model

        Returns:
            The result of applying the transforms
         embedding.word_embeddings.weight2decoder.layers.*.self_attention.linear_proj.weight&decoder.layers.*.mlp.linear_fc1.weight&decoder.layers.*.mlp.linear_fc2.weight<decoder.layers.*.self_attention.linear_qkv.layer_norm_weight:decoder.layers.*.self_attention.linear_qkv.layer_norm_bias1decoder.layers.*.mlp.linear_fc1.layer_norm_weight/decoder.layers.*.mlp.linear_fc1.layer_norm_biasdecoder.final_layernorm.weightdecoder.final_layernorm.biasoutput_layer.weight)model.embed_tokens.weight&model.layers.*.self_attn.o_proj.weight!model.layers.*.mlp.up_proj.weight#model.layers.*.mlp.down_proj.weight%model.layers.*.input_layernorm.weight#model.layers.*.input_layernorm.bias.model.layers.*.post_attention_layernorm.weight,model.layers.*.post_attention_layernorm.biasmodel.norm.weightmodel.norm.biaslm_head.weightz&model.layers.*.self_attn.q_proj.weightz&model.layers.*.self_attn.k_proj.weightz&model.layers.*.self_attn.v_proj.weight1decoder.layers.*.self_attention.linear_qkv.weight
source_key
target_keyfnmapping
transforms)r   state_transformr   	merge_qkvapply_transformsrX   rl   rm   r   r   rC   rC   rD   rj      s&   z HFNemotronImporter.convert_stater   c                 C   s   ddl m} || t| S )z
        Get the tokenizer for the HF model.

        Returns:
            AutoTokenizer: Tokenizer instance initialized from the HF model's tokenizer
        r   r   )=nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   save_hf_tokenizer_assetsr>   )rX   r   rC   rC   rD   rT      s   zHFNemotronImporter.tokenizerc                 C   s   ddl m} |t| }dd }tdi d|jd|jd|jd|jd	|jd
|j	d|j
d|jd|jd|jd||jdddt|tjkdt|tjkdt|dt|dd}|S )z
        Create a NeMo NemotronConfig from the HF model config.

        Translates the HF configuration parameters to the equivalent NeMo
        configuration.

        Returns:
            NemotronConfig: NeMo configuration for Nemotron models
        r   r   c                 S   s(   d}| | dkr|d }| | dks|S )Nr6   r      rC   )
vocab_sizebaserC   rC   rD   make_vocab_size_divisible_by  s
   z?HFNemotronImporter.config.<locals>.make_vocab_size_divisible_byr+   r/   r1   r3   r9   r-   layernorm_epsilonr5   rotary_baser#   r   r   Ffp16bf16params_dtyper7   head_dimNrC   )rf   r   rh   r>   num_hidden_layersr/   intermediate_sizer3   initializer_rangemax_position_embeddingsnorm_epsnum_key_value_heads
rope_thetapartial_rotary_factorr   r   torchfloat16bfloat16getattr)rX   HFNemotronConfigrl   r   outputrC   rC   rD   rR     sJ   	

zHFNemotronImporter.configN)r_   r   )r:   r;   r<   r=   rQ   ra   r   ro   rj   propertyrT   r   rR   rC   rC   rC   rD   r^      s    
)r^   r   c                   @   sT   e Zd ZdZejfdddZdedefddZd	d
 Z	e
dd Ze
dddZdS )HFNemotronExporterz
    Exporter for converting NeMo NemotronModel to Hugging Face format.

    This class handles the conversion of NeMo's NemotronModel to Hugging Face's
    NemotronForCausalLM format, including weight mapping and configuration translation.
    r_   r   c                 C   sR   ddl m} ddlm} |  |j| j|dW  d   S 1 s"w   Y  dS )z
        Initialize a HF NemotronForCausalLM instance.

        Args:
            dtype: Data type for model parameters

        Returns:
            NemotronForCausalLM: Initialized HF Nemotron model
        r   )AutoModelForCausalLM)no_init_weightsrd   N)rf   r   transformers.modeling_utilsr   from_configrR   )rX   dtyper   r   rC   rC   rD   ra   ?  s
   
$zHFNemotronExporter.initrb   c                 C   sP   |  t| \}}| t|j}| ||}| }|| | j| |S )N)		nemo_loadr>   ra   r   rR   rj   cpusave_pretrainedrT   )rX   rb   rl   _rm   rC   rC   rD   ro   O  s   
zHFNemotronExporter.applyc              
   C   s^   ddddddddd	d
	}t jddtjdt jddtjdt jddtjdg}t j||||dS )ac  
        Convert state dict from NeMo format to HF format.

        Maps the weights from the NeMo model to the HF model according to
        the appropriate mapping scheme.

        Args:
            source: Source NeMo model
            target: Target HF model

        Returns:
            The target model with weights transferred from source
        r|   r}   r~   r   r   r   r   r   r   )	rq   rr   rs   rt   ru   rv   rw   rx   ry   r   r   r   rz   r   rp   r{   r   )r   r   r   	split_qkvprune_paddingr   r   rC   rC   rD   rj   Z  s6   	z HFNemotronExporter.convert_statec                 C   s   t t| jjjS )z
        Get the tokenizer from the NeMo model.

        Returns:
            TokenizerSpec: Tokenizer from the NeMo model
        )r   load_contextr>   modelrT   r`   rC   rC   rD   rT     s   zHFNemotronExporter.tokenizerr   c                 C   sr   ddl m} tjt| dd}||j|j|j|j|j	dur |j	n|j|j |j
|j|j|j|j|j|j| jjdS )zCreate a HF NemotronConfig from the NeMo model config.

        Translates the NeMo configuration parameters to the equivalent HF
        configuration.

        Returns:
            HFNemotronConfig: HF configuration for Nemotron models
        r   r   zmodel.config)subpathN)r   r/   r   r3   r   tie_word_embeddingsr   r   r   r   r   r   r   )rf   r   r   r   r>   r+   r/   r1   r3   r7   r   r-   r9   r   r5   r   r#   rT   r   )rX   r   rl   rC   rC   rD   rR     s&   


zHFNemotronExporter.configN)r_   r   )r_   r   )r:   r;   r<   r=   r   r   ra   r   ro   rj   r   rT   rR   rC   rC   rC   rD   r   6  s    1
	r   )r   rE   rF   rI   rN   rO   rQ   )/dataclassesr   pathlibr   typingr   r   r   r   r   r   "nemo.collections.llm.fn.activationr	   #nemo.collections.llm.gpt.model.baser
   r   r   nemo.collections.llm.utilsr   nemo.lightningr   r   r   nemo.lightning.io.stater   nemo.lightning.pytorch.utilsr   rf   r   r   r   r   r   1nemo.collections.common.tokenizers.tokenizer_specr   rE   rF   rI   rN   rO   rQ   model_importerModelConnectorr^   model_exporterr   __all__rC   rC   rC   rD   <module>   sH   !
 
 