o
    }oi\-                     @   s  d dl mZ d dlmZ d dlmZmZmZmZ d dl	Z	d dl
mZ d dlmZ d dl	mZ d dlmZ d d	lmZmZ d d
lmZ d dlmZmZmZ d dlmZ d dlmZ erld dlmZ d dl m!Z! d dl"m#Z# eG dd deZ$eG dd de$Z%eG dd de$Z&G dd de%Z'G dd de&Z(G dd deZ)e*e)dG dd dej+d e)f Z,e-e)dG d!d" d"ej+e)d f Z.g d#Z/dS )$    )	dataclass)Path)TYPE_CHECKING	AnnotatedCallableOptionalN)parallel_state)AttnBackend)nn)openai_gelu)	GPTConfigGPTModel)Config)OptimizerModuleioteardown)TransformFns)dtype_from_hfGemmaForCausalLMAutoTokenizer)TokenizerSpecc                   @   s   e Zd ZU dZdZeed< eZe	ed< dZ
eed< dZeed< d	Zeed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< ejZeed< dS )GemmaConfigzGemma basic configRMSNormnormalizationactivation_funcTgated_linear_unitropeposition_embedding_typeFadd_bias_lineari    
seq_length   kv_channelsg        attention_dropouthidden_dropout#share_embeddings_and_output_weightslayernorm_zero_centered_gammaattention_backendN)__name__
__module____qualname____doc__r   str__annotations__r   r   r   r   boolr   r    r!   intr#   r$   floatr%   r&   r'   r	   flashr(    r3   r3   X/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/gpt/model/gemma.pyr   )   s   
 r   c                   @   sN   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dS )GemmaConfig2BzGemma 2B config   
num_layersi   hidden_size   num_attention_heads   num_query_groupsi @  ffn_hidden_sizeNr)   r*   r+   r,   r7   r0   r.   r8   r:   r<   r=   r3   r3   r3   r4   r5   @      
 r5   c                   @   sN   e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< d	Z
eed
< dS )GemmaConfig7BzGemma 7B config   r7   i   r8      r:   r<   i `  r=   Nr>   r3   r3   r3   r4   r@   K   r?   r@   c                   @      e Zd ZdZdS )CodeGemmaConfig2BzCode Gemma 2B configNr)   r*   r+   r,   r3   r3   r3   r4   rD   V       rD   c                   @   rC   )CodeGemmaConfig7BzCode Gemma 7B configNrE   r3   r3   r3   r4   rG   \   rF   rG   c                       s~   e Zd ZdZ				ddeee ee f dee ded dee	e
jge
jf  f fdd	Zdd
ee f fddZ  ZS )
GemmaModel Nconfigoptim	tokenizerr   model_transformc                    s   t  j|pt |||d dS )rI   )rK   rL   rM   N)super__init__r   )selfrJ   rK   rL   rM   	__class__r3   r4   rO   e   s   zGemmaModel.__init__vp_stagec                    sR   ddl m} ddlm} t j|d |pd}tjd|dr'|| jj	| dS dS )rI   r   )extend_instance)EmbeddingScalingMixin)rS   F)ignore_virtualrS   N)
#nemo.collections.common.parts.utilsrT   %nemo.collections.llm.gpt.model.gemma2rU   rN   configure_modelr   is_pipeline_first_stagemodule	embedding)rP   rS   rT   rU   rQ   r3   r4   rY   o   s   zGemmaModel.configure_model)NNNN)N)r)   r*   r+   r,   r   r   r   r   r   r   r
   ModulerO   r0   rY   __classcell__r3   r3   rQ   r4   rH   b   s      
rH   hfc                   @   sX   e Zd ZdZdefddZdedefddZdd	 Ze	dddZ
e	defddZdS )HFGemmaImporterrI   returnc                 C   s   t | j| jdS )rI   )rL   )rH   rJ   rL   rP   r3   r3   r4   init   s   zHFGemmaImporter.initoutput_pathc                 C   sh   ddl m} |jt| dd}|  }| |}| || | || td|  t	|| ~~|S )rI   r   r   auto)torch_dtypez.Converted Gemma model to Nemo, model saved to )
transformersr   from_pretrainedr-   rc   
nemo_setupconvert_state	nemo_saveprintr   )rP   rd   r   sourcetargettrainerr3   r3   r4   apply   s   

zHFGemmaImporter.applyc                 C   H   ddddddd}t jdd	tjd
t jddtjd
g}t j||||dS )rI    embedding.word_embeddings.weight2decoder.layers.*.self_attention.linear_proj.weight&decoder.layers.*.mlp.linear_fc2.weight<decoder.layers.*.self_attention.linear_qkv.layer_norm_weight1decoder.layers.*.mlp.linear_fc1.layer_norm_weightdecoder.final_layernorm.weight)model.embed_tokens.weight&model.layers.*.self_attn.o_proj.weight#model.layers.*.mlp.down_proj.weight%model.layers.*.input_layernorm.weight.model.layers.*.post_attention_layernorm.weightmodel.norm.weightz&model.layers.*.self_attn.q_proj.weightz&model.layers.*.self_attn.k_proj.weightz&model.layers.*.self_attn.v_proj.weight1decoder.layers.*.self_attention.linear_qkv.weight
source_key
target_keyfnz#model.layers.*.mlp.gate_proj.weightz!model.layers.*.mlp.up_proj.weight&decoder.layers.*.mlp.linear_fc1.weightmapping
transforms)r   state_transformr   	merge_qkv	merge_fc1apply_transformsrP   rm   rn   r   r   r3   r3   r4   rj      s&   
	zHFGemmaImporter.convert_stater   c                 C   s   ddl m} || t| S )rI   r   r   )=nemo.collections.common.tokenizers.huggingface.auto_tokenizerr   save_hf_tokenizer_assetsr-   )rP   r   r3   r3   r4   rL      s   zHFGemmaImporter.tokenizerc                 C   s   ddl m} ddl m} |t| }|t| }dd }t|j|j|j|j|j	|j
|j|jd||jdt|tjkt|tjkt||d}|S )rI   r   r   )GenerationConfigc                 S   s(   d}| | dkr|d }| | dks|S )N   r      r3   )
vocab_sizebaser3   r3   r4   make_vocab_size_divisible_by   s
   z<HFGemmaImporter.config.<locals>.make_vocab_size_divisible_byT)r7   r8   r=   r:   init_method_stdlayernorm_epsilonr<   rotary_baser   r   r&   fp16bf16params_dtypegeneration_config)rg   r   r   rh   r-   num_hidden_layersr8   intermediate_sizer:   initializer_rangerms_norm_epsnum_key_value_heads
rope_thetar   r   torchfloat16bfloat16)rP   HFGemmaConfigr   rm   r   r   outputr3   r3   r4   rJ      s.   zHFGemmaImporter.configN)ra   r   )r)   r*   r+   r,   rH   rc   r   rp   rj   propertyrL   r   rJ   r3   r3   r3   r4   r`      s    r`   r   c                   @   sN   e Zd ZdZdddZdedefddZd	d
 Zedd Z	edddZ
dS )HFGemmaExporterrI   ra   r   c                 C   sN   ddl m} ddlm} |  || jW  d    S 1 s w   Y  d S )Nr   )AutoModelForCausalLM)no_init_weights)rg   r   transformers.modeling_utilsr   from_configrJ   )rP   r   r   r3   r3   r4   rc      s
   
$zHFGemmaExporter.initrd   c                 C   sH   |   }| t| \}}| ||}| }|| | j| |S rI   )rc   	nemo_loadr-   rj   cpusave_pretrainedrL   )rP   rd   rn   rm   _r3   r3   r4   rp      s   
zHFGemmaExporter.applyc                 C   rq   )rI   rx   ry   rz   r{   r|   r}   )rr   rs   rt   ru   rv   rw   r   r~   r   r   r   r   )r   r   r   	split_qkv	split_fc1r   r   r3   r3   r4   rj      s&   
	zHFGemmaExporter.convert_statec                 C   s   t t| jjjS r   )r   load_contextr-   modelrL   rb   r3   r3   r4   rL     s   zHFGemmaExporter.tokenizerr   c                 C   sj   t jt| dd}ddlm} |dg|j|j|j|j|j	dur"|j	n|j|j |j
|j|j|j| jjdS )rI   zmodel.config)subpathr   r   r   N)architecturesr   r8   r   r:   head_dimmax_position_embeddingsr   r   r   r   )r   r   r-   rg   r   r7   r8   r=   r:   r#   r!   r   r   r<   rL   r   )rP   rm   r   r3   r3   r4   rJ     s"   

zHFGemmaExporter.configN)ra   r   )ra   r   )r)   r*   r+   r,   rc   r   rp   rj   r   rL   rJ   r3   r3   r3   r4   r      s    

r   )r   r5   r@   rD   rG   rH   )0dataclassesr   pathlibr   typingr   r   r   r   r   megatron.corer   megatron.core.transformer.enumsr	   r
   "nemo.collections.llm.fn.activationr   #nemo.collections.llm.gpt.model.baser   r   nemo.collections.llm.utilsr   nemo.lightningr   r   r   nemo.lightning.io.stater   nemo.lightning.pytorch.utilsr   rg   r   r   r   1nemo.collections.common.tokenizers.tokenizer_specr   r   r5   r@   rD   rG   rH   model_importerModelConnectorr`   model_exporterr   __all__r3   r3   r3   r4   <module>   s>   



`R