o
    }oi 8                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
m  mZ d dl	mZ d dlmZmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ ered dlmZ d dlmZ d dl m!Z! eG dd deZ"eG dd de"Z#G dd deZ$e%e$dG dd dej&de$f Z'e(e$dG dd dej&e$df Z)ej*ddddej+fddZ,ej*ddddej+fd d!Z-g d"Z.dS )#    )	dataclass)Path)TYPE_CHECKING	AnnotatedCallableOptionalN)nn)	GPTConfigGPTModeltorch_dtype_from_mcore_config)Config)OptimizerModuleioteardown)TransformFns)dtype_from_hfAutoModelForCausalLMAutoTokenizer)TokenizerSpecc                   @   s   e Zd ZU dZdZeed< ejZ	e
ed< dZeed< dZeed< d	Zeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dS )Baichuan2ConfigzR
    Configuration class for the Baichuan2 Config, inheriting from GPTConfig.
    RMSNormnormalizationactivation_funcTgated_linear_unitFadd_bias_linear   
seq_lengthg{Gz?init_method_stdgư>layernorm_epsilong        hidden_dropoutattention_dropout#share_embeddings_and_output_weightsN)__name__
__module____qualname____doc__r   str__annotations__Fsilur   r   r   boolr   r   intr   r    floatr!   r"   r#    r/   r/   [/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/gpt/model/baichuan.pyr   $   s   
 r   c                   @   sZ   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< dZ
eed	< d
Zeed< dS )Baichuan2Config7Bz[
    Configuration class for the Baichuan2 7B Config, inheriting from Baichuan2Config.
        
num_layersr   hidden_sizenum_attention_headsnum_query_groupsi +  ffn_hidden_sizeropeposition_embedding_typeN)r$   r%   r&   r'   r3   r-   r)   r4   r5   r6   r7   r9   r(   r/   r/   r/   r0   r1   6   s   
 r1   c                       sf   e Zd ZdZ				d
deee ee f dee ded dee	e
jge
jf  f fdd	Z  ZS )Baichuan2Modelz
    Baichuan2 model implementation based on the GPT model architecture.

    This class provides a high-level interface for Baichuan2 models,
    implementing the specific architecture and settings needed for Baichuan2 models.
    Nconfigoptim	tokenizerr   model_transformc                    s   t  j|pt |||d d S )N)r<   r=   r>   )super__init__r   )selfr;   r<   r=   r>   	__class__r/   r0   r@   L   s   
zBaichuan2Model.__init__)NNNN)r$   r%   r&   r'   r   r   r   r   r   r   r   Moduler@   __classcell__r/   r/   rB   r0   r:   D   s    	r:   hfc                   @   sX   e Zd ZdZdefddZdedefddZdd	 Ze	dddZ
e	defddZdS )HFBaichuan2Importerz
    Importer for converting Hugging Face Baichuan2 models to NeMo format.

    This class handles the conversion of Hugging Face's BaichuanForCausalLM models
    to NeMo's Baichuan2 format, including weight mapping and configuration translation.
    returnc                 C   s   t | j| jdS )z
        Initialize a NeMo Baichuan2Model instance.

        Returns:
            Baichuan2Model: Initialized NeMo Llama model with the appropriate configuration
                        and tokenizer.
        )r=   )r:   r;   r=   rA   r/   r/   r0   inita   s   zHFBaichuan2Importer.initoutput_pathc                 C   sj   ddl m} |jt| ddd}|  }| |}| || | || td|  t	|| ~~|S )z
        Apply the conversion from HF to NeMo format.

        Args:
            output_path: Path where the converted model will be saved

        Returns:
            Path: Path to the saved NeMo model
        r   r   Tautotrust_remote_codetorch_dtypez1Converted Baichuan model to Nemo, model saved to )
transformersr   from_pretrainedr(   rJ   
nemo_setupconvert_state	nemo_saveprintr   )rA   rK   r   sourcetargettrainerr/   r/   r0   applyk   s   


zHFBaichuan2Importer.applyc                 C   <   dddddddd}t tjd	d
tjdg}tj||||dS )aS  
        Convert state dict from HF format to NeMo format.

        Maps the weights from the HF model to the NeMo model according to
        the appropriate mapping scheme.

        Args:
            source: Source HF model
            target: Target NeMo model

        Returns:
            The result of applying the transforms
         embedding.word_embeddings.weight2decoder.layers.*.self_attention.linear_proj.weight&decoder.layers.*.mlp.linear_fc2.weight<decoder.layers.*.self_attention.linear_qkv.layer_norm_weight1decoder.layers.*.mlp.linear_fc1.layer_norm_weightdecoder.final_layernorm.weightoutput_layer.weight)model.embed_tokens.weight&model.layers.*.self_attn.o_proj.weight#model.layers.*.mlp.down_proj.weight%model.layers.*.input_layernorm.weight.model.layers.*.post_attention_layernorm.weightmodel.norm.weightlm_head.weightz#model.layers.*.mlp.gate_proj.weightz!model.layers.*.mlp.up_proj.weight&decoder.layers.*.mlp.linear_fc1.weight
source_key
target_keyfnmapping
transforms)_import_qkvr   state_transformr   	merge_fc1apply_transformsrA   rV   rW   rp   rq   r/   r/   r0   rS          z!HFBaichuan2Importer.convert_stater   c                 C   s"   ddl m} || t| ddS )z
        Get the tokenizer for the HF model.

        Returns:
            AutoTokenizer: Tokenizer instance initialized from the HF model's tokenizer
        r   r   TrN   )=nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   save_hf_tokenizer_assetsr(   )rA   r   r/   r/   r0   r=      s   zHFBaichuan2Importer.tokenizerc                 C   s   ddl m} |jt| dd}dd }t|j|j|j|j|j	|j
d||jd|jdkr-d	nd
t|tjkt|tjkt|d}|S )a  
        Create a NeMo Baichuan2Config from the HF model config.

        Translates the HF configuration parameters to the equivalent NeMo
        configuration.

        Returns:
            Baichuan2Config: NeMo configuration for Baichuan2 models
        r   )
AutoConfigTrx   c                 S   s(   d}| | dkr|d }| | dks|S )N   r      r/   )
vocab_sizebaser/   r/   r0   make_vocab_size_divisible_by   s
   z@HFBaichuan2Importer.config.<locals>.make_vocab_size_divisible_byFr2   r8   alibi)r3   r4   r7   r5   r   r    r   r   r#   r9   fp16bf16params_dtype)rP   r{   rQ   r(   r   num_hidden_layersr4   intermediate_sizer5   initializer_rangerms_norm_epsr~   r   torchfloat16bfloat16)rA   HFAutoConfigrV   r   outputr/   r/   r0   r;      s&   zHFBaichuan2Importer.configN)rH   r   )r$   r%   r&   r'   r:   rJ   r   rY   rS   propertyr=   r   r;   r/   r/   r/   r0   rG   X   s    
"rG   r   c                   @   sJ   e Zd ZdZejdfdddZddedefdd	Zd
d Z	e
dd ZdS )HFBaichuan2Exporterz
    Exporter for converting NeMo Baichuan2Model to Hugging Face format.

    This class handles the conversion of NeMo's Baichuan2Model to Hugging Face's
    BaichuanForCausalLM format, including weight mapping and configuration translation.
    NrH   r   c                 C   s   ddl m}m} ddlm} |du rd}|   |j|dd}|j|d|d}t|d	 |W  d   S 1 s:w   Y  dS )
z
        Initialize a HF BaichuanForCausalLM instance.

        Args:
            dtype: Data type for model parameters

        Returns:
            AutoModelForCausalLM: Initialized HF Baichuan model
        r   )r{   r   )no_init_weightsNzbaichuan-inc/Baichuan2-7B-BaseTrx   rM   r   )	rP   r{   r   transformers.modeling_utilsr   rQ   from_configtyperegister_for_auto_class)rA   dtype
model_namer{   r   r   r;   hf_modelr/   r/   r0   rJ      s   
$zHFBaichuan2Exporter.initrK   c                 C   sT   |  t| \}}| jt|j|d}| ||}| }|| | j| |S )N)r   )		nemo_loadr(   rJ   r   r;   rS   cpusave_pretrainedr=   )rA   rK   target_model_namerV   _rW   r/   r/   r0   rY      s   
zHFBaichuan2Exporter.applyc                 C   rZ   )ac  
        Convert state dict from NeMo format to HF format.

        Maps the weights from the NeMo model to the HF model according to
        the appropriate mapping scheme.

        Args:
            source: Source NeMo model
            target: Target HF model

        Returns:
            The target model with weights transferred from source
        rb   rc   rd   re   rf   rg   rh   )r[   r\   r]   r^   r_   r`   ra   rj   ri   rk   ro   )_export_qkvr   rs   r   	split_fc1ru   rv   r/   r/   r0   rS     rw   z!HFBaichuan2Exporter.convert_statec                 C   s   t t| jjjS )z
        Get the tokenizer from the NeMo model.

        Returns:
            TokenizerSpec: Tokenizer from the NeMo model
        )r   load_contextr(   modelr=   rI   r/   r/   r0   r=   -  s   zHFBaichuan2Exporter.tokenizer)rH   r   )N)r$   r%   r&   r'   r   r   rJ   r   rY   rS   r   r=   r/   r/   r/   r0   r      s    "r   z&model.layers.*.self_attn.W_pack.weightz1decoder.layers.*.self_attention.linear_qkv.weight)rl   rm   ctxc              	   C   sn  | j j}|j}|j}|| }|j}|j}|dd|f}|d  }||f|dd   }	||f|dd   }
|d  j	|	 }|d  j	|
 }|d  j	|
 }t
d|f|dd   |}t|D ]B}t
|||| |d | d d d d f f}t
||||d d d d d f f}t
||||d d d d d f f}qe|||d|   |g}|S )Nr         r}   )rW   r;   r5   r6   r4   kv_channels	unflattensizesqueezeviewr   emptytype_asrangecatreshape)r   qkv_weightsmegatron_confighead_numr6   heads_per_groupr4   	head_sizeold_tensor_shapenew_q_tensor_shapenew_kv_tensor_shapeqkvir/   r/   r0   rr   8  s(    0(*rr   c                    s   | j j}|j}|j}||  |j}|j}|d|  }||||g}t fddt	|D }t
 | d }	t
 d | d }
t|| d|||	 d|||
 d|gS )Nr}   c                    s,   g | ]}t  d  |  d  |   qS )r}   )r   arange).0r   r   r/   r0   
<listcomp>g  s    z_export_qkv.<locals>.<listcomp>r   )rV   r;   r5   r6   r4   r   r   r   r   r   r   )r   r   r   r   r6   r4   r   qkv_total_dimq_slicek_slicev_slicer/   r   r0   r   V  s*   
r   )r   r1   r:   )/dataclassesr   pathlibr   typingr   r   r   r   r   torch.nn.functionalr   
functionalr*   #nemo.collections.llm.gpt.model.baser	   r
   r   nemo.collections.llm.utilsr   nemo.lightningr   r   r   nemo.lightning.io.stater   nemo.lightning.pytorch.utilsr   rP   r   ry   r   1nemo.collections.common.tokenizers.tokenizer_specr   r   r1   r:   model_importerModelConnectorrG   model_exporterr   rs   TransformCTXrr   r   __all__r/   r/   r/   r0   <module>   sH   
 
]