o
    }oi5                     @   sf  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
m  mZ d dl	mZ d dlmZmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ erkd dlmZ d dlmZ d dl m!Z! d dl"m#Z# eG dd deZ$eG dd de$Z%G dd deZ&e'e&dG dd dej(de&f Z)e*e&dG dd dej(e&df Z+g dZ,dS )    )	dataclass)Path)TYPE_CHECKING	AnnotatedCallableOptionalN)nn)	GPTConfigGPTModeltorch_dtype_from_mcore_config)Config)OptimizerModuleioteardown)TransformFns)dtype_from_hfGPTBigCodeConfigGPTBigCodeForCausalLMAutoTokenizer)TokenizerSpecc                   @   s   e Zd ZU dZdZeed< ejZ	e
ed< dZeed< dZeed< d	Zeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dS )StarcoderConfigzR
    Configuration class for the Starcoder Config, inheriting from GPTConfig.
    	LayerNormnormalizationactivation_funcTadd_bias_lineari    
seq_lengthlearned_absoluteposition_embedding_typeg?hidden_dropoutattention_dropoutg{Gz?init_method_stdgh㈵>layernorm_epsilonF#share_embeddings_and_output_weightsNkv_channels   num_query_groupsattention_softmax_in_fp32bias_activation_fusionbias_dropout_fusion)__name__
__module____qualname____doc__r   str__annotations__Fgelur   r   r   boolr   intr    r!   floatr"   r#   r$   r%   r&   r(   r)   r*   r+    r7   r7   \/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/gpt/model/starcoder.pyr   %   s"   
 r   c                   @   sN   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dS )StarcoderConfig15Bz\
    Configuration class for the Starcoder 15B Config, inheriting from StarcoderConfig.
    (   
num_layersi   hidden_sizei `  ffn_hidden_size0   num_attention_headsg{Gz?r#   N)r,   r-   r.   r/   r;   r5   r1   r<   r=   r?   r#   r6   r7   r7   r7   r8   r9   =   s   
 r9   c                       sf   e Zd ZdZ				d
deee ee f dee ded dee	e
jge
jf  f fdd	Z  ZS )StarcoderModelz
    Starcoder model implementation based on the GPT model architecture.

    This class provides a high-level interface for Starcoder models,
    implementing the specific architecture and settings needed for Starcoder models.
    Nconfigoptim	tokenizerr   model_transformc                    s   t  j|pt |||d d S )N)rB   rC   rD   )super__init__r   )selfrA   rB   rC   rD   	__class__r7   r8   rF   R   s   
zStarcoderModel.__init__)NNNN)r,   r-   r.   r/   r   r   r   r   r   r   r   ModulerF   __classcell__r7   r7   rH   r8   r@   J   s    	r@   hfc                   @   sX   e Zd ZdZdefddZdedefddZdd	 Ze	dddZ
e	defddZdS )HFStarcoderImporterz
    Importer for converting Hugging Face Starcoder models to NeMo format.

    This class handles the conversion of Hugging Face's GPTBigCodeForCausalLM models
    to NeMo's Starcoder format, including weight mapping and configuration translation.
    returnc                 C   s   t | j| jdS )z
        Initialize a NeMo StarcoderModel instance.

        Returns:
            StarcoderModel: Initialized NeMo Starcoder model with the appropriate configuration
                        and tokenizer.
        )rC   )r@   rA   rC   rG   r7   r7   r8   initg   s   zHFStarcoderImporter.initoutput_pathc                 C   sh   ddl m} |jt| dd}|  }| |}| || | || td|  t	|| ~~|S )z
        Apply the conversion from HF to NeMo format.

        Args:
            output_path: Path where the converted model will be saved

        Returns:
            Path: Path to the saved NeMo model
        r   r   autotorch_dtypez2Converted Starcoder model to Nemo, model saved to )
transformersr   from_pretrainedr0   rP   
nemo_setupconvert_state	nemo_saveprintr   )rG   rQ   r   sourcetargettrainerr7   r7   r8   applyq   s   


zHFStarcoderImporter.applyc                 C   sz   i ddddddddd	d
dddddddddddddddddddddd d!d"}t j|||d#S )$aS  
        Convert state dict from HF format to NeMo format.

        Maps the weights from the HF model to the NeMo model according to
        the appropriate mapping scheme.

        Args:
            source: Source HF model
            target: Target NeMo model

        Returns:
            The result of applying the transforms
        transformer.wte.weight embedding.word_embeddings.weighttransformer.wpe.weight$embedding.position_embeddings.weight"transformer.h.*.attn.c_proj.weight2decoder.layers.*.self_attention.linear_proj.weight transformer.h.*.attn.c_proj.bias0decoder.layers.*.self_attention.linear_proj.bias"transformer.h.*.attn.c_attn.weight1decoder.layers.*.self_attention.linear_qkv.weight transformer.h.*.attn.c_attn.bias/decoder.layers.*.self_attention.linear_qkv.biastransformer.h.*.mlp.c_fc.weight&decoder.layers.*.mlp.linear_fc1.weighttransformer.h.*.mlp.c_fc.bias$decoder.layers.*.mlp.linear_fc1.bias!transformer.h.*.mlp.c_proj.weight&decoder.layers.*.mlp.linear_fc2.weighttransformer.h.*.mlp.c_proj.bias$decoder.layers.*.mlp.linear_fc2.biastransformer.h.*.ln_1.weight<decoder.layers.*.self_attention.linear_qkv.layer_norm_weighttransformer.h.*.ln_1.bias:decoder.layers.*.self_attention.linear_qkv.layer_norm_biastransformer.h.*.ln_2.weight1decoder.layers.*.mlp.linear_fc1.layer_norm_weighttransformer.h.*.ln_2.bias/decoder.layers.*.mlp.linear_fc1.layer_norm_biastransformer.ln_f.weightdecoder.final_layernorm.weighttransformer.ln_f.biasdecoder.final_layernorm.biaslm_head.weightoutput_layer.weight)mapping)r   apply_transforms)rG   r[   r\   r   r7   r7   r8   rX      sH   	
z!HFStarcoderImporter.convert_stater   c                 C   s   ddl m} || t| S )z
        Get the tokenizer for the HF model.

        Returns:
            AutoTokenizer: Tokenizer instance initialized from the HF model's tokenizer
        r   r   )=nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   save_hf_tokenizer_assetsr0   )rG   r   r7   r7   r8   rC      s   zHFStarcoderImporter.tokenizerc                 C   st   ddl m} |t| }dd }t|j|j|j|j|j	|j
|jd||jdt|tjkt|tjkt|d}|S )a  
        Create a NeMo StarcoderConfig from the HF model config.

        Translates the HF configuration parameters to the equivalent NeMo
        configuration.

        Returns:
            StarcoderConfig: NeMo configuration for Starcoder models
        r   r   c                 S   s(   d}| | dkr|d }| | dks|S )N   r      r7   )
vocab_sizebaser7   r7   r8   make_vocab_size_divisible_by   s
   z@HFStarcoderImporter.config.<locals>.make_vocab_size_divisible_byr'   F)r;   r<   r=   r?   r#   r   r$   r(   r   r%   fp16bf16params_dtype)rU   r   rV   r0   r   n_layern_embdn_innern_headinitializer_rangen_positionslayer_norm_epsilonr   r   torchfloat16bfloat16)rG   HFStarcoderConfigr[   r   outputr7   r7   r8   rA      s&   zHFStarcoderImporter.configN)rN   r   )r,   r-   r.   r/   r@   rP   r   r^   rX   propertyrC   r   rA   r7   r7   r7   r8   rM   ^   s    
$rM   r   c                   @   sT   e Zd ZdZejfdddZdedefddZd	d
 Z	e
dd Ze
dddZdS )HFStarcoderExporterz
    Exporter for converting NeMo StarcoderModel to Hugging Face format.

    This class handles the conversion of NeMo's StarcoderModel to Hugging Face's
    GPTBigCodeForCausalLM format, including weight mapping and configuration translation.
    rN   r   c                 C   sR   ddl m} ddlm} |  |j| j|dW  d   S 1 s"w   Y  dS )z
        Initialize a HF GPTBigCodeForCausalLM instance.

        Args:
            dtype: Data type for model parameters

        Returns:
            GPTBigCodeForCausalLM: Initialized HF Starcoder model
        r   r   )no_init_weightsrS   N)rU   r   transformers.modeling_utilsr   _from_configrA   )rG   dtyper   r   r7   r7   r8   rP      s
   
$zHFStarcoderExporter.initrQ   c                 C   sP   |  t| \}}| t|j}| ||}| }|| | j| |S )N)		nemo_loadr0   rP   r   rA   rX   cpusave_pretrainedrC   )rG   rQ   r[   _r\   r7   r7   r8   r^      s   
zHFStarcoderExporter.applyc                 C   sZ   ddddddddd	d
dddddd}t jddtjdt jddtjdg}t j||||dS )ac  
        Convert state dict from NeMo format to HF format.

        Maps the weights from the NeMo model to the HF model according to
        the appropriate mapping scheme.

        Args:
            source: Source NeMo model
            target: Target HF model

        Returns:
            The target model with weights transferred from source
        ra   rc   re   rg   ri   rk   rm   ro   rq   rs   ru   rw   ry   r{   r}   )rb   rd   rf   rh   rj   rl   rn   rp   rr   rt   rv   rx   rz   r|   r~   r`   r_   )
source_key
target_keyfnr   r   )r   
transforms)r   state_transformr   prune_paddingr   )rG   r[   r\   r   r   r7   r7   r8   rX     s8   z!HFStarcoderExporter.convert_statec                 C   s   t t| jjjS )z
        Get the tokenizer from the NeMo model.

        Returns:
            TokenizerSpec: Tokenizer from the NeMo model
        )r   load_contextr0   modelrC   rO   r7   r7   r8   rC   4  s   zHFStarcoderExporter.tokenizerr   c                 C   sn   ddl m} tjt| dd}|dg|j|j|j|j|j	dur"|j	n|j|j |j
|j|j|j|j| jjdS )zCreate a HF GPTBigCodeConfig from the NeMo model config.

        Translates the NeMo configuration parameters to the equivalent HF
        configuration.

        Returns:
            HFStarcoderConfig: HF configuration for Starcoder models
        r   r   zmodel.config)subpathr   N)architecturesnum_hidden_layersr<   intermediate_sizer?   head_dimtie_word_embeddingsmax_position_embeddingsr   norm_epsnum_key_value_headsr   )rU   r   r   r   r0   r;   r<   r=   r?   r&   r%   r   r#   r$   r(   rC   r   )rG   r   r[   r7   r7   r8   rA   >  s$   


zHFStarcoderExporter.configN)rN   r   )rN   r   )r,   r-   r.   r/   r   r   rP   r   r^   rX   r   rC   rA   r7   r7   r7   r8   r      s    .
	r   )r   r9   r@   )-dataclassesr   pathlibr   typingr   r   r   r   r   torch.nn.functionalr   
functionalr2   #nemo.collections.llm.gpt.model.baser	   r
   r   nemo.collections.llm.utilsr   nemo.lightningr   r   r   nemo.lightning.io.stater   nemo.lightning.pytorch.utilsr   rU   r   r   r   r   r   1nemo.collections.common.tokenizers.tokenizer_specr   r   r9   r@   model_importerModelConnectorrM   model_exporterr   __all__r7   r7   r7   r8   <module>   s6   
 
}