o
    TiF                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ	zd dl
Z
d dlZd dlZW n	 ey7   Y nw d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ ejjejjejjejjejjejjejjejjejjd	Z ej!j"ej!j#ej!j$ej!j#dZ%ej&j'ej&j(dZ)i Z*d	d
 Z+G dd deZ,G dd de j-Z.e+dG dd de.Z/e+dG dd de/Z0e+dG dd de/Z1e+dG dd de/Z2e+dG dd de/Z3e+dG dd  d e/Z4e+d!G d"d# d#e.Z5e+d$G d%d& d&e.Z6e+d'G d(d) d)e.Z7e+d*G d+d, d,e.Z8e+d-G d.d/ d/e.Z9e+d0G d1d2 d2e/Z:e+d3G d4d5 d5e/Z;e+d6G d7d8 d8e/Z<e+d9G d:d; d;e.Z=e+d<G d=d> d>e=Z>e+d?G d@dA dAe.Z?e+dBG dCdD dDe.Z@e+dEG dFdG dGe.ZAe+dHG dIdJ dJe.ZBe+dKG dLdM dMe.ZCe+dNe+dOG dPdQ dQe.ZDe+dRG dSdT dTe.ZEe+dUG dVdW dWe.ZFe+dXG dYdZ dZe.ZGe+d[G d\d] d]e.ZHe+d^G d_d` d`e.ZIe+daG dbdc dce.ZJe+ddG dedf dfe.ZKe+dgG dhdi dieKZLe+djG dkdl dle.ZMe+dmG dndo doe.ZNe+dpG dqdr dre.ZOe+dsG dtdu due.ZPe+dvG dwdx dxe.ZQdydz ZReSd{krWeR  g d|g d}g d~g dg dg dg dg dg dg dg ddZTe+dG dd de.ZUdS )    N)ListOptional)utils)	Converter)attention_speccommon_spec
model_spectransformer_specwav2vec2_specwav2vec2bert_specwhisper_spec)	gelu	gelu_fastgelu_newgelu_pythongelu_pytorch_tanh
quick_gelurelusiluswish)linearsullama3longrope)gemmgemvc                    s    fdd}|S )z5Registers a model loader for this configuration name.c                    s   |  t  < | S N)_MODEL_LOADERS)clsconfig_name W/home/ubuntu/.local/lib/python3.10/site-packages/ctranslate2/converters/transformers.py	decorator<   s   
z"register_loader.<locals>.decoratorr!   )r    r#   r!   r   r"   register_loader9   s   r$   c                   @   st   e Zd ZdZ						ddedee deee  dedee d	ed
efddZdd Z	dd Z
dd Zdd ZdS )TransformersConverterz/Converts models from Hugging Face Transformers.NFmodel_name_or_pathactivation_scales
copy_filesload_as_float16revisionlow_cpu_mem_usagetrust_remote_codec                 C   s.   || _ || _|| _|| _|| _|| _|| _dS )a  Initializes the converter.

        Arguments:
          model_name_or_path: Name of the pretrained model to download, or path to the
            directory containing the pretrained model.
          activation_scales: Path to the pre-computed activation scales. Models may
            use them to rescale some weights to smooth the intermediate activations
            and improve the quantization accuracy. See
            https://github.com/mit-han-lab/smoothquant.
          copy_files: List of filenames to copy from the Hugging Face model to the
            converted model directory.
          load_as_float16: Load the model weights as float16. More precisely, the model
            will be loaded with ``from_pretrained(..., dtype=torch.float16)``.
          revision: Revision of the model to download from the Hugging Face Hub.
          low_cpu_mem_usage: Enable the flag ``low_cpu_mem_usage`` when loading the model
            with ``from_pretrained``.
          trust_remote_code: Allow converting models using custom code.
        N)_model_name_or_path_activation_scales_copy_files_load_as_float16	_revision_low_cpu_mem_usage_trust_remote_code)selfr&   r'   r(   r)   r*   r+   r,   r!   r!   r"   __init__F   s   
zTransformersConverter.__init__c              	   C   st  t   tjj| j| jd}|jj}t	
|}|d u r+td|dtt	 f tt|j}tj}d| jr;t jnt|dd pFt|dd i}| jrP| j|d< | jrX| j|d< | jr`| j|d< | j|| jfi |}i }| jru| j|d< | j|| jfi |}	|||	}
| jrt j| jd	d
}||
| | jr| jD ]
}|
| | q|
W  d    S 1 sw   Y  d S )N)r,   z]No conversion is registered for the model configuration %s (supported configurations are: %s), dtypetorch_dtyper*   r+   r,   cpu)map_location)torchno_gradtransformers
AutoConfigfrom_pretrainedr-   r3   	__class____name__r   get
ValueErrorjoinsortedkeysgetattrarchitecture_nameAutoTokenizerr0   float16r1   r2   
load_modelload_tokenizerr.   loadsmooth_activationr/   register_fileget_model_file)r4   configr    loadermodel_classtokenizer_classkwargsmodeltokenizer_kwargs	tokenizerspecr'   filenamer!   r!   r"   _loadj   s\   


	





$zTransformersConverter._loadc                 K      |j |fi |S r   r?   )r4   rS   r&   rU   r!   r!   r"   rK         z TransformersConverter.load_modelc                 K   r\   r   r]   )r4   rT   r&   rU   r!   r!   r"   rL      r^   z$TransformersConverter.load_tokenizerc                 C   sz   t j| jrt j| j|}nz
tj| j|d}W n tjjy'   d }Y nw |d u s2t j	|s;t
d|| jf |S )N)repo_idrZ   z"File %s does not exist in model %s)ospathisdirr-   rD   huggingface_hubhf_hub_downloadr   EntryNotFoundErrorisfilerC   )r4   rZ   ra   r!   r!   r"   rP      s    
z$TransformersConverter.get_model_file)NNFNFF)rA   
__module____qualname____doc__strr   r   boolr5   r[   rK   rL   rP   r!   r!   r!   r"   r%   C   s6    

$;r%   c                   @   s   e Zd ZdZedd Zejdd Zdd Z	dd	 Z
d
d Zdd Zdd ZejjfddZdd Zdd Zdd Zdd ZdS )ModelLoaderzRBase class for loading Transformers models into a CTranslate2 model specification.c                 C      d S r   r!   r4   r!   r!   r"   rH         zModelLoader.architecture_namec                 C   s   t  r   NotImplementedErrorr4   rV   r!   r!   r"   get_model_spec   s   zModelLoader.get_model_specc                 C   s6   |  |}| |j|| | ||}| || |S r   )rs   
set_configrQ   get_vocabularyset_vocabulary)r4   rV   rX   rY   tokensr!   r!   r"   __call__   s
   
zModelLoader.__call__c                 C   s"   dd t |  dd dD S )Nc                 S   s   g | ]\}}|qS r!   r!   ).0token_r!   r!   r"   
<listcomp>   s    z.ModelLoader.get_vocabulary.<locals>.<listcomp>c                 S   s   | d S N   r!   )itemr!   r!   r"   <lambda>   s    z,ModelLoader.get_vocabulary.<locals>.<lambda>)key)rE   	get_vocabitemsr4   rV   rX   r!   r!   r"   ru      s
   zModelLoader.get_vocabularyc                 C   rm   r   r!   r4   rY   rw   r!   r!   r"   rv         zModelLoader.set_vocabularyc                 C   rm   r   r!   r4   rQ   rV   rX   r!   r!   r"   rt      r   zModelLoader.set_configc                 C   s   |j |_|j|_d S r   weightgammabiasbetar4   rY   moduler!   r!   r"   set_layer_norm      zModelLoader.set_layer_normc                 C   sr   |t jjkr|j|_n|j|_|j|_|j|_t	|t
jr%|jdd|_t|dr5|jd ur7|j|_d S d S d S )Nr   r~   r   )r   QuantizationCT2r   qweightscalesweight_scaleqzerosweight_zero
isinstancer=   Conv1D	transposehasattrr   )r4   rY   r   
quant_typer!   r!   r"   
set_linear   s   
zModelLoader.set_linearc                 C   s   |j |_ d S r   )r   r   r!   r!   r"   set_embeddings      zModelLoader.set_embeddingsc                 C   s4   |j |_t|dd}|dkr|j|d  |_d S d S )Noffsetr   r   	encodingsrG   r4   rY   r   r   r!   r!   r"   set_position_encodings   s
   z"ModelLoader.set_position_encodingsc                 C   s   t d)Nz7No activation smoothing logic is defined for this modelrp   )r4   rY   r'   r!   r!   r"   rN      s   zModelLoader.smooth_activationc                 C   s   t |dd }|r<|dp|d}|dkrd }nt|}|d u r/td|dt f |dd}|d	|}n
d }d}t |d	|}|||fS )
Nrope_scalingtype	rope_typedefaultkRoPE scaling type '%s' is not yet implemented. The following RoPE scaling types are currently supported: %sr6   factorr~   
rope_theta)rG   rB   _SUPPORTED_ROPE_SCALINGrq   rD   rF   )r4   rQ   default_rope_thetar   r   rotary_scaling_typerotary_scaling_factorr   r!   r!   r"   get_rotary_params   s$   

zModelLoader.get_rotary_paramsN)rA   rg   rh   ri   propertyrH   abcabstractmethodrs   rx   ru   rv   rt   r   r   r   r   r   r   r   rN   r   r!   r!   r!   r"   rl      s     

	rl   
BartConfigc                       sb   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	dd Z
dddZdd Z  ZS )
BartLoaderc                 C      dS )NBartForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zBartLoader.architecture_namec              	   C   s   t jj|jj|jjf|jj|jjt|jj	 t
|jddd}| |j|jj | |j|jj | |jj|j t
|dd }|d urR|  dkrR| |jj_|S )Nnormalize_embeddingTpre_norm
activationlayernorm_embeddingfinal_logits_biasr   )r	   TransformerSpecfrom_configrQ   encoder_layersdecoder_layersencoder_attention_headsnormalize_before_SUPPORTED_ACTIVATIONSactivation_functionrG   set_encoderencoderrV   set_decoderdecoderr   
projectionlm_headnonzeronumelsqueezer   )r4   rV   rY   r   r!   r!   r"   rs     s   
zBartLoader.get_model_specc                    s2   t  ||}|jjt|k r|d |jj }|S r   )superru   rQ   
vocab_sizelenr4   rV   rX   rw   r@   r!   r"   ru   2  s   zBartLoader.get_vocabularyc                 C      | | || d S r   register_source_vocabularyregister_target_vocabularyr   r!   r!   r"   rv   8     
zBartLoader.set_vocabularyc                 C   s,   |j |_ |j|_|j|_||jj|_d S r   )	bos_token	eos_token	unk_tokenconvert_ids_to_tokensrQ   decoder_start_token_iddecoder_start_tokenr   r!   r!   r"   rt   <  s   
zBartLoader.set_configc                 C   s   |  || t|j|jD ]2\}}| j|j|jdd | |jj|j	 | 
|jj|j | 
|jj|j | |jj|j qd S NTself_attention)set_common_layersziplayerlayersset_attentionr   	self_attnr   
layer_normself_attn_layer_normr   ffnlinear_0fc1linear_1fc2final_layer_norm)r4   rY   r   
layer_specr   r!   r!   r"   r   D  s   zBartLoader.set_encoderc                 C   s   |  || t|j|jD ]J\}}| j|j|jdd | |jj|j	 t
|dr<| j|j|jdd | |jj|j | |jj|j | |jj|j | |jj|j qd S )NTr   encoder_attnF)r   r   r   r   r   r   r   r   r   r   r   	attentionr   encoder_attn_layer_normr   r   r   r   r   r   r   )r4   rY   r   r   r   r!   r!   r"   r   V  s2   
zBartLoader.set_decoderFc                 C   s   dd t dD }| |d |j | |d |j | |d |j |r0t|jd | nt|jd |d d  t|jd |dd   | |jd |j d S )Nc                 S      g | ]}t  qS r!   r   
LinearSpecry   r{   r!   r!   r"   r|   t      z,BartLoader.set_attention.<locals>.<listcomp>   r   r~      )	ranger   q_projk_projv_projr   fuse_linearr   out_projr4   rY   r   r   split_layersr!   r!   r"   r   s  s   zBartLoader.set_attentionc                 C   s   dd l }t|ds|jjr||jjnd}n|j}||_| |j	|j
 | t|jtr2|jd n|j|j t|drE| |j|j t|drT| |j|j d S d S )Nr   embed_scale      ?r   r   )mathr   rQ   scale_embeddingsqrtd_modelr  scale_embeddingsr   position_encodingsembed_positionsr   r   
embeddingslistembed_tokensr   r   r   )r4   rY   r   r  r  r!   r!   r"   r     s(   


	
zBartLoader.set_common_layersF)rA   rg   rh   r   rH   rs   ru   rv   rt   r   r   r   r   __classcell__r!   r!   r   r"   r     s    

r   MarianConfigc                       sP   e Zd Zedd Z fddZdd Z fddZ fd	d
Zdd Z	  Z
S )MarianMTLoaderc                 C   r   )NMarianMTModelr!   rn   r!   r!   r"   rH     ro   z MarianMTLoader.architecture_namec                    s*   d|j _d|j _t |}| | |S NF)rQ   r   r   r   rs   _remove_pad_weightsr4   rV   rY   r   r!   r"   rs     s
   
zMarianMTLoader.get_model_specc                 C   s   |j |_ |j|_|j |_d S r   )r   r   r   r   r!   r!   r"   rt     s   zMarianMTLoader.set_configc                    s   d|_ t || d S NT)start_from_zero_embeddingr   r   r4   rY   r   r   r!   r"   r     s   zMarianMTLoader.set_decoderc                    s&   t  ||}|d dkr|  |S )Nr   z<pad>)r   ru   popr   r   r!   r"   ru     s   zMarianMTLoader.get_vocabularyc                 C   s   |j jd |jj|jjg}|d jjd d }|D ]0}|jjd |d kr-|jd d |_t|tjrI|	 rI|j
jd |d krI|j
d d |_
qd S )Nr   r~   r   )r   r  r   r   r   shaper   r   r   has_biasr   )r4   rY   vocab_specsnew_vocab_size
vocab_specr!   r!   r"   r    s    

z"MarianMTLoader._remove_pad_weights)rA   rg   rh   r   rH   rs   rt   r   ru   r  r  r!   r!   r   r"   r    s    
	r  M2M100Configc                       s<   e Zd Zedd Z fddZdd Z fddZ  ZS )	M2M100Loaderc                 C   r   )NM2M100ForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zM2M100Loader.architecture_namec                    s   d|j _d|j _t |S )NTF)rQ   r   r   r   rs   rr   r   r!   r"   rs        zM2M100Loader.get_model_specc                 C   s   |j |jd  |_d S r   )weightsr   r   r   r!   r!   r"   r     s   z#M2M100Loader.set_position_encodingsc                    s   t  ||}|d |jkr||j|  |jdg D ]}||vr)|| qt	|d|j
jt| }|dkrE|dd t|D 7 }|S )Nr   additional_special_tokensnum_madeup_wordsr   c                 S   s   g | ]}d | qS )zmadeupword%dr!   ry   ir!   r!   r"   r|     r   z/M2M100Loader.get_vocabulary.<locals>.<listcomp>)r   ru   r   insertunk_token_idr  special_tokens_maprB   appendrG   rQ   r   r   r   )r4   rV   rX   rw   rz   r'  r   r!   r"   ru     s   
zM2M100Loader.get_vocabulary)	rA   rg   rh   r   rH   rs   r   ru   r  r!   r!   r   r"   r"    s    
r"  MBartConfigc                   @       e Zd Zedd Zdd ZdS )MBartLoaderc                 C   r   )NMBartForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zMBartLoader.architecture_namec                 C   s@   |j |_ |j|_|j|_t|jdd dv rd |_d S |j|_d S )NrT   )MBartTokenizerN)r   r   r   rG   rQ   r   r   r!   r!   r"   rt     s   
zMBartLoader.set_configNrA   rg   rh   r   rH   rt   r!   r!   r!   r"   r0        
r0  PegasusConfigc                   @   r/  )PegasusLoaderc                 C   r   )NPegasusForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zPegasusLoader.architecture_namec                 C   s$   |j |_|j|_|j|_|j |_d S r   )	pad_tokenr   r   r   r   r   r!   r!   r"   rt     s   zPegasusLoader.set_configNr3  r!   r!   r!   r"   r6  	  r4  r6  	OPTConfigc                       s\   e Zd Zedd Zdd Zdd Zdd Zd	d
 Z fddZ	dd Z
 fddZ  ZS )	OPTLoaderc                 C   r   )NOPTForCausalLMr!   rn   r!   r!   r"   rH     ro   zOPTLoader.architecture_namec                 C   s^   t jj|jj|jj|jjt|jj |jj	|jj
kd}| |j|jj | |jj|j |S )N)r   r   project_in_out)r	   TransformerDecoderModelSpecr   rQ   num_hidden_layersnum_attention_headsdo_layer_norm_beforer   r   word_embed_proj_dimhidden_sizer   r   rV   r   r   r   r  r!   r!   r"   rs     s   
zOPTLoader.get_model_specc                 C   sb   t |jjD ](\}}d| }t|jj|jjd |d|   t|jj|jj	|d|   qd S )Nzmodel.decoder.layers.%dr   z%s.self_attn.q_projz%s.fc1)
	enumerater   r   r   rN   r   r   r   r   r   )r4   rY   r'   r)  r   layer_scoper!   r!   r"   rN   )  s   


zOPTLoader.smooth_activationc                 C      | | d S r   register_vocabularyr   r!   r!   r"   rv   9     zOPTLoader.set_vocabularyc                 C      |j |_ |j|_|j|_d S r   r   r   r   r   r!   r!   r"   rt   <  r$  zOPTLoader.set_configc                    sd   t  || |jd ur| |j|j |jd ur!| |j|j |jd ur0| |j|j d S d S r   )r   r   
project_inr   project_outr   r   r   r  r   r!   r"   r   A  s   


zOPTLoader.set_decoderc                 C   s*   d|_ | |j|j | |j|j d S r  )r
  r   r  r  r   r  r  r   r!   r!   r"   r   K  s   zOPTLoader.set_common_layersc                    sZ   t  ||}d}t|d dkr+d|}||vr|| |d7 }t|d dks|S )Nr      zmadeupword{:04d}r~   )r   ru   r   formatr-  )r4   rV   rX   rw   r)  symbolr   r!   r"   ru   P  s   

zOPTLoader.get_vocabulary)rA   rg   rh   r   rH   rs   rN   rv   rt   r   r   ru   r  r!   r!   r   r"   r:    s    

r:  GPTBigCodeConfigc                       sH   e Zd Zedd Zdd Zdd Z fddZd	d
 Zdd Z	  Z
S )GPTBigCodeMHALoaderc                 C   r   )NGPTBigCodeForCausalLMr!   rn   r!   r!   r"   rH   _  ro   z%GPTBigCodeMHALoader.architecture_namec                 C   sL   t jj|jj|jjdt|jj dd}| |j	|j
 | |j	j|j |S )NT)r   r   multi_query_attentionr	   r=  r   rQ   n_layern_headr   r   r   r   transformerr   r   r   r  r!   r!   r"   rs   c  s   
z"GPTBigCodeMHALoader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv   p  rH  z"GPTBigCodeMHALoader.set_vocabularyc                    >   t  ||}|jjt| }t|D ]	}|d|  q|S Nz<extra_id_%d>r   ru   rQ   r   r   r   r-  r4   rV   rX   rw   	extra_idsr)  r   r!   r"   ru   s  
   z"GPTBigCodeMHALoader.get_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   |  r$  zGPTBigCodeMHALoader.set_configc                 C      d|_ | |j|j | |j|j | |j|j	 t
|j|jD ]B\}}| |jj|j | |jjd |jj | |jjd |jj | |jj|j | |jj|jj | |jj|jj q"d S NFr   r~   r
  r   r  wter   r  wper   r   ln_fr   r   hr   ln_1r   r   attnc_attnc_projr   ln_2r   mlpc_fcr   r4   rY   r   r   r   r!   r!   r"   r        zGPTBigCodeMHALoader.set_decoder)rA   rg   rh   r   rH   rs   rv   ru   rt   r   r  r!   r!   r   r"   rQ  ]  s    
	rQ  
GPT2Configc                   @   8   e Zd Zedd Zdd Zdd Zdd Zd	d
 ZdS )
GPT2Loaderc                 C   r   )NGPT2LMHeadModelr!   rn   r!   r!   r"   rH     ro   zGPT2Loader.architecture_namec                 C   sJ   t jj|jj|jjdt|jj d}| |j	|j
 | |j	j|j |S )NT)r   r   rT  r  r!   r!   r"   rs     s   
zGPT2Loader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zGPT2Loader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt     r$  zGPT2Loader.set_configc                 C   r^  r_  r`  rl  r!   r!   r"   r     rm  zGPT2Loader.set_decoderN	rA   rg   rh   r   rH   rs   rv   rt   r   r!   r!   r!   r"   rp    s    
rp  
GPTJConfigc                   @   ro  )
GPTJLoaderc                 C   r   )NGPTJForCausalLMr!   rn   r!   r!   r"   rH     ro   zGPTJLoader.architecture_namec              
   C   sb   t jj|jj|jjdt|jj |jjdddd}| 	|j
|j|jj|jj | |j
j|j |S NTFr   r   
rotary_dimrotary_interleaveparallel_residualshared_layer_norm)r	   r=  r   rQ   rU  rV  r   r   rx  r   r   rW  r   r   r   r  r!   r!   r"   rs     s$   
zGPTJLoader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zGPTJLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt     r$  zGPTJLoader.set_configc           
      C   s   d|_ | |j|j | |j|j t|j|j	D ]V\}}| |j
|j |jjj}|jjj}|jjj}	t|||}t|||}t|||	f|jjd _| |jjd |jj | |jj|jj | |jj|jj qd S r_  )r
  r   r  ra  r   r   rc  r   r   rd  r{  re  rf  r   r   r   r   r   permute_for_sliced_rotaryr;   catr   r   r   r  r   r   rj  fc_inr   fc_out)
r4   rY   r   rx  	num_headsr   r   qwkwvwr!   r!   r"   r     s   


zGPTJLoader.set_decoderNrr  r!   r!   r!   r"   rt    s    
rt  CodeGenConfigc                       H   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	  Z
S )CodeGenLoaderc                 C   r   )NCodeGenForCausalLMr!   rn   r!   r!   r"   rH     ro   zCodeGenLoader.architecture_namec              
   C   s   t jj|jj|jjdt|jj |jjdddd}d}t	|jdr(|jj
dv r(d}| j|j|j|jj|jj|jj|d | |jj|j |S )	NTFrw     head_dim)      rM  )mp_num)r	   r=  r   rQ   rU  rV  r   r   rx  r   r  r   r   rW  n_embdr   r   r   )r4   rV   rY   r  r!   r!   r"   rs     s.   
zCodeGenLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru     s
   zCodeGenLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv      rH  zCodeGenLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   #  r$  zCodeGenLoader.set_configc                    s8  d|_ | |j|j | |j|j td|d 	ddj
  }||  t fdd|D }t|j|jD ]^\}	}
| |	j|
j |
jjj}||d d f }|jddd\}}}t|||}t|||}t|||f|	jjd _| |	jjd |
jj | |	jj|
j j! | |	jj"|
j j# q;d S )	NFr   r   r   c                    s$   g | ]}t |  |d    qS )r~   )r;   aranger(  	local_dimr!   r"   r|   0  s   $ z-CodeGenLoader.set_decoder.<locals>.<listcomp>dimr~   )$r
  r   r  ra  r   r   rc  npr  reshapeTflattentolistr;   r}  r   r   rd  r{  re  rf  qkv_projr   chunkr   r|  r   r   r   r  r   r   rj  r~  r   r  )r4   rY   r   rx  r  	embed_dimr  base_permutationpermutationr   r   r  new_qkv_projr  r  r  r!   r  r"   r   (  s(   "
zCodeGenLoader.set_decoderrA   rg   rh   r   rH   rs   ru   rv   rt   r   r  r!   r!   r   r"   r    s    

r  GPTNeoXConfigc                       r  )GPTNeoXLoaderc                 C   r   )NGPTNeoXForCausalLMr!   rn   r!   r!   r"   rH   N  ro   zGPTNeoXLoader.architecture_namec              
   C   st   t jj|jj|jjdt|jj t|jj	|jj
|jj  d|jjdd}| |j|j|jj | |jj|j |S rv  )r	   r=  r   rQ   r>  r?  r   
hidden_actint
rotary_pctrB  use_parallel_residualr   r   gpt_neoxr   r   	embed_outr  r!   r!   r"   rs   R  s"   
zGPTNeoXLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   e  r]  zGPTNeoXLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   n  rH  zGPTNeoXLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   q  r$  zGPTNeoXLoader.set_configc                 C   sJ  d|_ | |j|j | |j|j t|j|j	D ]\}}t
|dr4| |j|j | |j|j n| |jj|j | |jj|j |jjj}|jjj}||dd|jd ddd|jd }||ddddd}||jjd _||jjd _| |jjd |jj | |jj|jj | |jj|jj qd S )NFinput_layer_normr   r   r   r~   ) r
  r   r  embed_inr   r   r   r   r   r   r   r  input_layernormpost_attention_layer_normpost_attention_layernormr   r   r   query_key_valuer   r   r  r  swapaxesr   r   denser   rj  dense_h_to_4hr   dense_4h_to_h)r4   rY   r   r  r   r   qkv_wqkv_br!   r!   r"   r   v  s8   




zGPTNeoXLoader.set_decoderr  r!   r!   r   r"   r  L  s    
	r  WhisperConfigc                       sp   e Zd Zedd Zdd Zdd Zdd Z fd	d
Zdd Z	 fddZ
 fddZdd Zdd Z  ZS )WhisperLoaderc                 C   r   )NWhisperForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zWhisperLoader.architecture_namec                 C   sZ   t |jj|jj|jj|jj}| |j|j	j | 
|j|j	j | |jj|j |S r   )r   WhisperSpecrQ   r   r   r   decoder_attention_headsr   r   rV   r   r   r   r   proj_outr  r!   r!   r"   rs     s   zWhisperLoader.get_model_specc                    s0   g d t dg }|sg S  fdd|D S )N)z<|endoftext|>z<|startoftranscript|>z<|translate|>z<|transcribe|>z<|startoflm|>z<|startofprev|>z<|nocaptions|>z<|notimestamps|>r&  c                    s   g | ]}| vr |qS r!   )convert_tokens_to_ids)ry   rz   non_lang_special_tokensrX   r!   r"   r|     s
    z>WhisperLoader._get_lang_ids_from_tokenizer.<locals>.<listcomp>)rG   )r4   rX   additional_tokensr!   r  r"   _get_lang_ids_from_tokenizer  s   z*WhisperLoader._get_lang_ids_from_tokenizerc                 C   s   t |dd }|d ur)|j|_|j|_t|dr|j|_t|dr(t|j	 |_
n|jj|_|jj|_t|j|_t |dd d u rH| ||_
|jd u rg|jj}|jj}ttt|d |t||_d S d S )Ngeneration_configalignment_heads
lang_to_idlang_idsr   )rG   suppress_tokenssuppress_idsbegin_suppress_tokenssuppress_ids_beginr   r  rE   r  valuesr  rQ   _WHISPER_ALIGNMENT_HEADSrB   name_or_pathr  r   r  r  	itertoolsproductr   )r4   rQ   rV   rX   
gen_config
num_layersr  r!   r!   r"   rt     s0   





zWhisperLoader.set_configc                    s6   t  ||}|dd t|jjt| D  |S )Nc                 s   s    | ]	}d |d  V  qdS )z<|%.2f|>g{Gz?Nr!   r(  r!   r!   r"   	<genexpr>  s
    

z/WhisperLoader.get_vocabulary.<locals>.<genexpr>)r   ru   extendr   rQ   r   r   r   r   r!   r"   ru     s
   
zWhisperLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zWhisperLoader.set_vocabularyc                    s2   |  |j|j |  |j|j t || d S r   )
set_conv1dconv1conv2r   r   )r4   rY   r   r   r!   r"   r     s   zWhisperLoader.set_encoderc                    s"   |  |j|j t || d S r   )r   r  r  r   r   r  r   r!   r"   r     s   zWhisperLoader.set_decoderc                 C   s$   |  |j|j | |j|j d S r   )r   r  r  r   r   r   r!   r!   r"   r        zWhisperLoader.set_common_layersc                 C   s   |j |_ |j|_d S r   r   r   r   r!   r!   r"   r    r   zWhisperLoader.set_conv1d)rA   rg   rh   r   rH   rs   r  rt   ru   rv   r   r   r   r  r  r!   r!   r   r"   r    s    
r  Wav2Vec2Configc                       sh   e Zd Zedd Zdd Zdd Zdd Zd	d
 Zdd Z	dd Z
dd Z fddZdd Z  ZS )Wav2Vec2Loaderc                 C   r   )NWav2Vec2ForCTCr!   rn   r!   r!   r"   rH     ro   z Wav2Vec2Loader.architecture_namec                 C   s   t |jjdd}t|jjj|jjjj|jjjj|j	j
jd |}|jjjD ]}|j|_|j|_|jj|_|jj|_|jj|_q&| |j||jj |S Nreturn_hiddenFr   )rG   wav2vec2rQ   r
   Wav2Vec2Specnum_feat_extract_layersr   r>  r?  r   r   r  r   r   r   r   r   feed_forwardintermediate_act_fnactivation_fnintermediate_denser   output_denser   r   )r4   rV   r  rY   r   r!   r!   r"   rs   	  s    

	

zWav2Vec2Loader.get_model_specc                 C   rm   r   r!   r   r!   r!   r"   rt     r   zWav2Vec2Loader.set_configc                 C      |  S r   r   r   r!   r!   r"   ru   !     zWav2Vec2Loader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   $  rH  zWav2Vec2Loader.set_vocabularyc                 C   s   |j d jj|jj_|j d jj|jj_| |jj|j d j t|j|j dd  D ]\}}|jj|j_|jj|j_| |j|j q+d S )Nr   r~   )	conv_layersconvr   feat_layer0r   r   r   r   
feat_layer)r4   rY   feature_extractor
spec_layermodule_layerr!   r!   r"   set_feature_extractor'  s   z$Wav2Vec2Loader.set_feature_extractorc                 C   $   |  |j|j | |j|j d S r   r   fp_layer_normr   r   fp_projectionr   r4   rY   feature_projectionr!   r!   r"   set_feature_projection4  r  z%Wav2Vec2Loader.set_feature_projectionc                 C   s   |j jjj |j jj_|j jj |j jj_|j  D ]}|j |_q| tdd|j	f |j jj|j j_|j jj|j j_d S r}   )
pos_conv_embedr  r   datafloatr   
parametersr;   randnrB  )r4   rY   r   rQ   paramr!   r!   r"   set_pos_conv_embed8  s   
z!Wav2Vec2Loader.set_pos_conv_embedc                    sp   |  ||jj | ||jj | ||jj| t ||jj t	|jj
dd}|s6| |j|j d S d S Nr  F)r  r  r  r  r  r  r   r   r   rG   rQ   r   r   )r4   rY   rV   rQ   r  r   r!   r"   r   E  s   zWav2Vec2Loader.set_encoderc                 C   s   |  |j|j d S r   )r   r   r   r!   r!   r"   r   N  s   z Wav2Vec2Loader.set_common_layers)rA   rg   rh   r   rH   rs   rt   ru   rv   r  r  r  r   r   r  r!   r!   r   r"   r    s    
	r  Wav2Vec2BertConfigc                   @   st   e Zd Zedd Zdd Zdd Zdd Zd	d
 Zdd Z		dddZ
dd Zdd Zdd Zdd Zdd ZdS )Wav2Vec2BertLoaderc                 C   r   )NWav2Vec2BertForCTCr!   rn   r!   r!   r"   rH   T  ro   z$Wav2Vec2BertLoader.architecture_namec                 C   sH   t |jjdd}t|jjj|jjj|jjj	d |}| 
|j| |S r  )rG   wav2vec2_bertrQ   r   Wav2Vec2BertSpecnum_adapter_layersr>  r   r   r  r   r   )r4   rV   r  rY   r!   r!   r"   rs   X  s   z!Wav2Vec2BertLoader.get_model_specc                 C   rm   r   r!   r   r!   r!   r"   rt   c  r   zWav2Vec2BertLoader.set_configc                 C   r  r   r  r   r!   r!   r"   ru   f  r  z!Wav2Vec2BertLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   i  rH  z!Wav2Vec2BertLoader.set_vocabularyc                 C   r  r   r  r  r!   r!   r"   r  l  r  z)Wav2Vec2BertLoader.set_feature_projectionNc                 C   s   dd t dD }| |d |j | |d |j | |d |j t|jd | | |jd |j |s;|rT|j	j
|_td||_td||_d S d S )	Nc                 S   r   r!   r   r   r!   r!   r"   r|   s  r   z4Wav2Vec2BertLoader.set_attention.<locals>.<listcomp>r   r   r~   r   r   int32)r   r   linear_qlinear_klinear_vr   r   r   
linear_outdistance_embeddingr   !relative_asymmetric_position_keysr  r7   r   relative_left_max_positionrelative_right_max_position)r4   rY   r   left_max_positionright_max_positionr  r!   r!   r"   r   p  s   


z Wav2Vec2BertLoader.set_attentionc                 C   s  t ||D ]\}}| |j|j | |jj|jj | |jj	|jj
 | |j|j|| | |j|j | |j|jj | |j|jj | |j|jj | |j|jj | |j|jj | |j|j | |jj|jj | |jj	|jj
 | |j |j! qd S r   )"r   r   enc_ffn1_layer_normffn1_layer_normr   enc_ffn1r   ffn1r  r   r  r   enc_attnr   enc_attn_layer_normr   enc_conv_layer_normconv_moduler   r  enc_conv_pointwise_conv1pointwise_conv1enc_conv_depthwise_convdepthwise_convenc_conv_depthwise_layer_normdepthwise_layer_normenc_conv_pointwise_conv2pointwise_conv2enc_ffn2_layer_normffn2_layer_normenc_ffn2ffn2enc_final_layer_normr   )r4   spec_layersr   r  r  slayerr   r!   r!   r"   set_wav2vec2bert_encoder  s:   



z+Wav2Vec2BertLoader.set_wav2vec2bert_encoderc                 C   s   t ||D ]H\}}| |j|j | |j|j | |j|j | |j	|j
 | |j|j | |j|j | |jj|jj | |jj|jj qd S r   )r   r   adpt_residual_layer_normresidual_layer_normr  adpt_residual_convresidual_convadpt_attn_layer_normr   adpt_attn_convself_attn_convr   adpt_attn_layerr   adpt_ffn_layer_normffn_layer_normr   adpt_ffnr   r   r  r   r  )r4   r"  r   r#  r   r!   r!   r"   set_wav2vec2bert_adapter  s   z+Wav2Vec2BertLoader.set_wav2vec2bert_adapterc                 C   st   |  ||jj | |j|jjj|jjj|jjj	 | 
|j|jjj t|jjdd}|s8| |j|j d S d S r  )r  r  r  r$  r   r   r   rQ   left_max_position_embeddingsright_max_position_embeddingsr0  adapter_layersadapterrG   r   r   )r4   rY   rV   r  r!   r!   r"   r     s   zWav2Vec2BertLoader.set_encoderc                 C   s"   |j |_ |jd ur|j|_d S d S r   r  r   r!   r!   r"   r       
zWav2Vec2BertLoader.set_conv1dc                 C   s"   |j |_|jd ur|j|_d S d S r   r   r   r!   r!   r"   r     r5  z!Wav2Vec2BertLoader.set_layer_norm)NN)rA   rg   rh   r   rH   rs   rt   ru   rv   r  r   r$  r0  r   r  r   r!   r!   r!   r"   r  R  s    

 r  T5Configc                       st   e Zd Zedd Zdd Z fddZdd Zd	d
 ZdddZ	dd Z
dd Zdd ZdddZdd Z  ZS )T5Loaderc                 C   r   )NT5ForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zT5Loader.architecture_namec              	   C   s   t jj|jj|jjf|jjdt|jj |jj	ddd}| 
|j|j | j
|j|jdd | |jj|j |jjrB|jjd |j_|S )NT)r   r   ffn_glurelative_attention_biasrms_norm)
is_decoderg      )r	   r   r   rQ   r  num_decoder_layersr  r   dense_act_fnis_gated_act	set_stackr   r   r   r   r   tie_word_embeddingsr	  scale_outputsr  r!   r!   r"   rs     s   

zT5Loader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru     r]  zT5Loader.get_vocabularyc                 C   r   r   r   r   r!   r!   r"   rv     r   zT5Loader.set_vocabularyc                 C   sD   |j |_|j|_|j|_t|jdr||jj|_d S |j |_d S )Nr   )	r8  r   r   r   r   rQ   r   r   r   r   r!   r!   r"   rt     s   
zT5Loader.set_configFc                 C   s   |  |j|j | t|jtr|jd n|j|j d|_t	t
|j|jD ]:\}\}}| |j|jd  |dkrK|jd j}|j|j_|j|j_|rW| |j|jd  | |j|jd  q'd S )Nr   Fr~   r   )r   r   r   r   r   r  r  r  r
  rC  r   r   blockset_self_attentionr   r:  relative_attention_max_distanceset_cross_attentionr   set_ffnr   )r4   rY   r   r<  r)  r   rC  first_self_attentionr!   r!   r"   r@    s(   
	zT5Loader.set_stackc                 C   sh   t |dr| |j|jj | |j|jj n	| |j|jj | |j|jj	 | 
|j|j d S )Nlinear_0_noact)r   r   r   DenseReluDensewi_0rI  wi_1wir   wor   r   r   r!   r!   r"   rG    s   
zT5Loader.set_ffnc                 C   s&   | j ||jdd | |j|j d S r   )r   SelfAttentionr   r   r   r!   r!   r"   rD  "  s   zT5Loader.set_self_attentionc                 C   s"   |  ||j | |j|j d S r   )r   EncDecAttentionr   r   r   r!   r!   r"   rF  &  s   zT5Loader.set_cross_attentionc                 C   s   d|_ dd tdD }| |d |j | |d |j | |d |j |r3t|jd | nt|jd |d d  t|jd |dd   | |jd |j	 |j
rk|jj|_td	|j|_d S d S )
Nr  c                 S   r   r!   r   r   r!   r!   r"   r|   -  r   z*T5Loader.set_attention.<locals>.<listcomp>r   r   r~   r   r   r  )queries_scaler   r   qkvr   r   r   ohas_relative_attention_biasr:  r   r  r7   r   rE  r  r!   r!   r"   r   *  s    


zT5Loader.set_attentionc                 C      |j |_d S r   r   r   r4   rY   r   r!   r!   r"   r   @  r   zT5Loader.set_layer_normr  )rA   rg   rh   r   rH   rs   ru   rv   rt   r@  rG  rD  rF  r   r   r  r!   r!   r   r"   r7    s    
	


r7  	MT5Configc                   @   s   e Zd Zedd ZdS )	MT5Loaderc                 C   r   )NMT5ForConditionalGenerationr!   rn   r!   r!   r"   rH   F  ro   zMT5Loader.architecture_nameN)rA   rg   rh   r   rH   r!   r!   r!   r"   r[  D  s    r[  BloomConfigc                       P   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	dd Z
  ZS )BloomLoaderc                 C   r   )NBloomForCausalLMr!   rn   r!   r!   r"   rH   M  ro   zBloomLoader.architecture_namec              	   C   sL   t jj|jj|jjdtjjdddd}| 	|j
|j | |j
j|j |S )NT)r   r   r   alibialibi_use_positive_positions)r	   r=  r   rQ   rU  rV  r   
ActivationGELUTanhr   r   rW  r   r   r   r  r!   r!   r"   rs   Q  s   
zBloomLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   `  r]  zBloomLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   i  rH  zBloomLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   l  r$  zBloomLoader.set_configc                 C   s   d|_ | |j|j | |j|j | |j|j t	|j
|jD ]E\}}| |jj|j | |jjd |jj|jj | |jjd |jj | |jj|j | |jj|jj | |jj|jj q"d S r_  )r
  r   r  word_embeddingsr   r   word_embeddings_layernormr   rc  r   r   rd  r   r  set_qkv_linearr   r  r  r   r  r   r  r   rj  r  r   r  rl  r!   r!   r"   r   q  s,   


zBloomLoader.set_decoderc                 C   st   |j }||dd|jd }|dd}|d|jd }|j}||dd}|dd}|d}||_ ||_d S )Nr   r   r   r~   )r   r  r  r   r   )r4   rY   r   r  r   r   r!   r!   r"   rg    s   

zBloomLoader.set_qkv_linear)rA   rg   rh   r   rH   rs   ru   rv   rt   r   rg  r  r!   r!   r   r"   r_  K  s    
	r_  	MPTConfigc                       r^  )	MPTLoaderc                 C   r   NAutoModelForCausalLMr!   rn   r!   r!   r"   rH     ro   zMPTLoader.architecture_namec                 C   s6   t jj|jj|jjdtjjdd}| 	|j
|j |S )NT)r   r   ra  )r	   r=  r   rQ   n_layersn_headsr   rc  GELUr   r   rW  r  r!   r!   r"   rs     s   zMPTLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru     r]  zMPTLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zMPTLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt     r$  zMPTLoader.set_configc                 C   s   |  |j|j | |j|j d|_|jj|j_t	|j
|jD ]B\}}| |jj|j | |jjd |jj | |jjd |jj | |jj|j | |jj|jj | |jj|jj q d S r_  )r   r  ra  r   r   norm_fr
  r   r   r   r   blocksr   norm_1r   r   rf  Wqkvr  r   norm_2r   up_projr   	down_projrl  r!   r!   r"   r     s   zMPTLoader.set_decoderc                 C   s   |j |_t|j|_d S r   )r   r   r;   
zeros_liker   r   r!   r!   r"   r     s   zMPTLoader.set_layer_norm)rA   rg   rh   r   rH   rs   ru   rv   rt   r   r   r  r!   r!   r   r"   ri    s    
	ri  GemmaConfigc                       r^  )GemmaLoaderc                 C   r   )NGemmaForCausalLMr!   rn   r!   r!   r"   rH     ro   zGemmaLoader.architecture_namec                 C   s   |j j}|j j}t|j d|}||krd }t|j dd}tjj|||dkr)tjj	ntjj
dddddt|j dd	||j jd
}| |j|j | |jj|j |j jd |jj_|S )Nnum_key_value_headshidden_activationr   r   Tr   Fr   '  )	r   r   r9  r;  rx  ry  rotary_basenum_heads_kvr        ?rQ   r>  r?  rG   r	   r=  r   r   rc  rn  rd  r  r   r   rV   r   r   r   rB  r  multiply_by_sqrt_depthr4   rV   r  r  r~  activation_configrY   r!   r!   r"   rs     s6   zGemmaLoader.get_model_specc                    ^   t  ||}|jjt| }t|D ]	}|d|  q|jjt|k r-|d |jj }|S rY  rZ  r[  r   r!   r"   ru        zGemmaLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zGemmaLoader.set_vocabularyc                 C   &   |j |_ |j|_|j|_|jj|_d S r   r   r   r   rQ   rms_norm_epslayer_norm_epsilonr   r!   r!   r"   rt        zGemmaLoader.set_configc                 C      |j |_d|_d S r  r   r   layer_norm_use_residualrY  r!   r!   r"   r        
zGemmaLoader.set_layer_normc           	      C   s  d|_ d|_| |j|j | |j|j t|j	|j
D ]j\}}| |jj|j | |jj|j |jjj}|jjj}|jjj}|jjj}t|||g|jjd _||jjd _| |jj|jj | |jj|jj | |jj|jj t |d t |d t!"  qd S NTFr   r~   r   rj  )#r
  r  r   r  r  r   r   normr   r   r   r   r  r   r  r   r   r   r   r   o_projr;   r}  r   r   r   rj  	gate_projrI  rt  r   ru  delattrgccollect	r4   rY   r   r   r   wqwkwvrN  r!   r!   r"   r     s0   








zGemmaLoader.set_decoderrA   rg   rh   r   rH   rs   ru   rv   rt   r   r   r  r!   r!   r   r"   rx    s    
#rx  Gemma2Configc                       r^  )Gemma2Loaderc                 C   r   )NGemma2ForCausalLMr!   rn   r!   r!   r"   rH   3  ro   zGemma2Loader.architecture_namec                 C   s   |j j}|j j}t|j d|}||krd }t|j dd}tjj|||dkr)tjj	ntjj
dddddt|j dd	||j jdd
}| |j|j | |jj|j |j jd |jj_|S )Nrz  r{  r   r   Tr   Fr   r|  )
r   r   r9  r;  rx  ry  r}  r~  r  pre_post_layer_normr  r  r  r!   r!   r"   rs   7  s8   zGemma2Loader.get_model_specc                    r  rY  rZ  r[  r   r!   r"   ru   [  r  zGemma2Loader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   f  rH  zGemma2Loader.set_vocabularyc                 C   r  r   r  r   r!   r!   r"   rt   i  r  zGemma2Loader.set_configc                 C   r  r  r  rY  r!   r!   r"   r   o  r  zGemma2Loader.set_layer_normc           	      C   s0  d|_ d|_| |j|j | |j|j t|j	|j
D ]x\}}| |j|j | |j|j | |j|j | |j|j |jjj}|jjj}|jjj}|jjj}t|||g|jjd _||jjd _| |jj|j j! | |jj"|j j# | |jj$|j j% t&|d t&|d t'(  qd S r  ))r
  r  r   r  r  r   r   r  r   r   r   r  r  r  r  pre_feedforward_layer_normpre_feedforward_layernormpost_feedforward_layer_normpost_feedforward_layernormr   r   r   r   r   r  r;   r}  r   r   r   r   r   rj  r  rI  rt  r   ru  r  r  r  r  r!   r!   r"   r   s  s8   






zGemma2Loader.set_decoderr  r!   r!   r   r"   r  1  s    
$r  LlamaConfigc                       X   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	e
jjfddZ  ZS )LlamaLoaderc                 C   r   )NLlamaForCausalLMr!   rn   r!   r!   r"   rH     ro   zLlamaLoader.architecture_namec                 C   sB  |j j}|j j}t|j d|}||krd }| |j d\}}}t|j dd }|rNd }	|jdkr5t|j}	|	d u rGt	d|jd
t f |j}
|j}ntjj}	d }
d }tjj||tjjddddd	|||||	|
|d
}| |j|j|	 | |jj|j t|j dd }|tjjkr|jjD ]}|d |j _!|d |j _"q|S )Nrz  r|  quantization_configawqkQuantization type '%s' is not yet implemented. The following Quantization types are currently supported: %sr6   Tr   Fr   r   r9  r;  rx  ry  r   r   r}  r~  r   quant_group_size
quant_bitsr   low_freq_factorhigh_freq_factor)#rQ   r>  r?  rG   r   quant_method_SUPPORTED_QUANTIZATIONrB   versionrq   rD   rF   
group_sizebitsr   r   r   r	   r=  r   rc  SWISHr   r   rV   r   r   r   r   RotaryScalingTypeLlama3r   r   rotary_low_freq_factorrotary_high_freq_factor)r4   rV   r  r  r~  r   r   r   r  r   r  r  rY   r   r   r!   r!   r"   rs     sn   


zLlamaLoader.get_model_specc                    r  rY  rZ  r[  r   r!   r"   ru     r  zLlamaLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zLlamaLoader.set_vocabularyc                 C   s4   |j |_ |j|_|jd ur|jnd|_|jj|_d S N r  r   r!   r!   r"   rt     s
   zLlamaLoader.set_configc                 C   rW  r   rX  rY  r!   r!   r"   r     r   zLlamaLoader.set_layer_normc                 C     d|_ | |j|j | |j|j t|j|j	D ]\}}| |j
j|j | |jj|j dd tdD }| j|d |jj|d | j|d |jj|d | j|d |jj|d |tjjkrnt|j
jd | n|tjjkrvdnd}t|j
jd || | j|j
jd |jj|d | j|jj|jj|d | j|jj |jj!|d | j|jj"|jj#|d t$|d	 t$|d
 t%&  qd S )NFc                 S   r   r!   r   r   r!   r!   r"   r|     r   z+LlamaLoader.set_decoder.<locals>.<listcomp>r   r   r   r~   r   r   rj  'r
  r   r  r  r   r   r  r   r   r   r   r  r   r  r   r   r   r   r   r   r   r   r   r   r   r   AWQ_GEMMfuse_linear_prequantr  r   rj  r  rI  rt  r   ru  r  r  r  r4   rY   r   r   r   r   r  cc_dimr!   r!   r"   r     sX   





zLlamaLoader.set_decoderrA   rg   rh   r   rH   rs   ru   rv   rt   r   r   r   r   r   r  r!   r!   r   r"   r    s    
Br  Gemma3TextConfigGemma3Configc                       r  )Gemma3Loaderc                 C   r   )NGemma3ForCausalLMr!   rn   r!   r!   r"   rH   1  ro   zGemma3Loader.architecture_namec                 C   s  |j j}|j j}t|j d|}||krd }|j j}t|j dd}t|j dd}t|j dd}t|j dd	}	t|j d
d }
t|j dd }|rb|jdkrPt|j}|d u r[t	d|j |j
}|j}ntjj}d }d }tjj||f|dkrxtjjntjjddd|d||||	d|||dd}|
| _t|
D ];\}}|jj| }|dkrtd||j_tdd|j_q|dkrtd||j_td|	|j_q| |j|j | | !|jj"|j# |S )Nrz  r{  r   r   @B rope_local_base_freqr|  sliding_windowi   layer_typesr  r  z.Quantization type '%s' is not yet implemented.r   TF)r   r   r9  r;  rx  ry  r}  r~  r  r  r  r   r  r  qk_normfull_attentionfloat32r  r   sliding_attention)$rQ   r>  r?  rG   r  r  r  rB   r  rq   r  r  r   r   r   r	   r=  r   rc  rn  rd  _layer_typesrC  r   r   r  r7   r   r   r}  r  r   rV   r   r   r   )r4   rV   r  r  r~  r  r  r   r  r  r  r  r   r  r  rY   r)  
layer_typer   r!   r!   r"   rs   5  s   


zGemma3Loader.get_model_specc                    r  rY  rZ  r[  r   r!   r"   ru     r  zGemma3Loader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zGemma3Loader.set_vocabularyc                 C   sF   |j |_ |j|_t|drt|jtr|j rd|_d S |j|_d S )Nchat_templatez<end_of_turn>)r   r   r   r   r  rj   stripr   r   r!   r!   r"   rt     s   

zGemma3Loader.set_configc                 C   r  r  r  rY  r!   r!   r"   r     r  zGemma3Loader.set_layer_normc                 C   s  d|_ d|_| |j|j | |j|j t|j	|j
D ]\}}| |j|j | |j|j | |j|j | |j|j | |jj|jj | |jj|jj dd tdD }| j|d |jj|d | j|d |jj|d | j|d	 |jj|d |tjjkrt |jj!d | n|tjj"krdnd}t#|jj!d || | j|jj!d |jj$|d | j|j%j&|j'j(|d | j|j%j)|j'j*|d | j|j%j+|j'j,|d t-|d
 t-|d t./  qd S )NTFc                 S   r   r!   r   r   r!   r!   r"   r|     r   z,Gemma3Loader.set_decoder.<locals>.<listcomp>r   r   r  r~   r   r   rj  )0r
  r  r   r  r  r   r   r  r   r   r   r  r  r  r  r  r  r  r  r   q_normr   k_normr   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r   rj  r  rI  rt  r   ru  r  r  r  r  r!   r!   r"   r     sn   



zGemma3Loader.set_decoderr  r!   r!   r   r"   r  .  s    
Ur  MistralConfigc                       r  )MistralLoaderc                 C   r   )NMistralForCausalLMr!   rn   r!   r!   r"   rH     ro   zMistralLoader.architecture_namec                 C   s  |j j}|j j}t|j d|}||krd }t|j dd}| |j d\}}}t|j dd }	|	rS|	jdkr:t|	j}
|
d u rLt	d|	jd
t f |	j}|	j}ntjj}
d }d }tjj||ftjjd	d	d	dd
||||||
|||j jd}| j|j|j|
d | |jj|j |S )Nrz  r  r   r|  r  r  r  r6   TF)r   r   r9  r;  rx  ry  r   r   r}  r~  r  r   r  r  r  r  )rQ   r>  r?  rG   r   r  r  rB   r  rq   rD   rF   r  r  r   r   r   r	   r=  r   rc  r  r  r   r   rV   r   r   r   )r4   rV   r  r  r~  r  r   r   r   r  r   r  r  rY   r!   r!   r"   rs     sb   

zMistralLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   0  r]  zMistralLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   9  rH  zMistralLoader.set_vocabularyc                 C   r  r   r  r   r!   r!   r"   rt   <  r  zMistralLoader.set_configc                 C   rW  r   rX  rY  r!   r!   r"   r   B  r   zMistralLoader.set_layer_normc                 C   r  )NFc                 S   r   r!   r   r   r!   r!   r"   r|   Q  r   z-MistralLoader.set_decoder.<locals>.<listcomp>r   r   r  r~   r   r   rj  r  r  r!   r!   r"   r   E  sX   





zMistralLoader.set_decoderr  r!   r!   r   r"   r    s    
:	r  Qwen2Configc                       r  )Qwen2Loaderc                 C   r   )NQwen2ForCausalLMr!   rn   r!   r!   r"   rH   z  ro   zQwen2Loader.architecture_namec                 C   s  |j j}|j j}t|j d|}||krd }| |j d\}}}t|j dd }|rNd }	|jdkr5t|j}	|	d u rGt	d|jd
t f |j}
|j}ntjj}	d }
d }tjj||tjjddddd	|||||	|
|d
}| |j|j|	 | |jj|j |S )Nrz  r|  r  r  r  r6   Tr   Fr  )rQ   r>  r?  rG   r   r  r  rB   r  rq   rD   rF   r  r  r   r   r   r	   r=  r   rc  r  r   r   rV   r   r   r   )r4   rV   r  r  r~  r   r   r   r  r   r  r  rY   r!   r!   r"   rs   ~  s\   

zQwen2Loader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru     s
   zQwen2Loader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zQwen2Loader.set_vocabularyc                 C   D   |j d ur|j n|j|_ |j|_|jd ur|jnd|_|jj|_d S r  r   r8  r   r   rQ   r  r  r   r!   r!   r"   rt        
zQwen2Loader.set_configc                 C   rW  r   rX  rY  r!   r!   r"   r     r   zQwen2Loader.set_layer_normc                 C   r  )NFc                 S   r   r!   r   r   r!   r!   r"   r|     r   z+Qwen2Loader.set_decoder.<locals>.<listcomp>r   r   r  r~   r   r   rj  r  r  r!   r!   r"   r     sX   





zQwen2Loader.set_decoderr  r!   r!   r   r"   r  x  s    
8r  Qwen3Configc                       r  )Qwen3Loaderc                 C   r   )NQwen3ForCausalLMr!   rn   r!   r!   r"   rH   	  ro   zQwen3Loader.architecture_namec                 C   s$  |j j}|j j}t|j d|}t|j d|j j| }||kr d }| |j d\}}}t|j dd }	|	rYd }
|	jdkr@t|	j	}
|
d u rRt
d|	jdt f |	j}|	j}ntjj}
d }d }tjj||ftjjddd|j jd	|||||d|
||d
}| |j|j|
 | |jj|j |S )Nrz  r  r  r  r  r  r6   TF)r   r   r9  r;  rx  ry  r   r   r}  r~  r  r  r   r  r  )rQ   r>  r?  rG   rB  r   r  r  rB   r  rq   rD   rF   r  r  r   r   r   r	   r=  r   rc  r  r  r   r   rV   r   r   r   )r4   rV   r  r  r~  r  r   r   r   r  r   r  r  rY   r!   r!   r"   rs   	  sh   

zQwen3Loader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   G	  s
   zQwen3Loader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   N	  rH  zQwen3Loader.set_vocabularyc                 C   r  r  r  r   r!   r!   r"   rt   Q	  r  zQwen3Loader.set_configc                 C   rW  r   rX  rY  r!   r!   r"   r   ]	  r   zQwen3Loader.set_layer_normc           	      C   s  d|_ | |j|j | |j|j tt|j	|j
D ]\}\}}| |jj|j | |jj|j | |jj|jj | |jj|jj dd tdD }| j|d |jj|d | j|d |jj|d | j|d |jj|d |tjjkrt|jjd | n|tjjkrdnd}t|jjd || | j|jjd |jj|d | j|jj |j!j"|d | j|jj#|j!j$|d | j|jj%|j!j&|d t'|d	 t'|d
 t()  qd S )NFc                 S   r   r!   r   r   r!   r!   r"   r|   t	  r   z+Qwen3Loader.set_decoder.<locals>.<listcomp>r   r   r  r~   r   r   rj  )*r
  r   r  r  r   r   r  rC  r   r   r   r   r  r   r  r  r   r  r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   rj  r  rI  rt  r   ru  r  r  r  )	r4   rY   r   r   	layer_idxr   r   r  r  r!   r!   r"   r   `	  sd   





zQwen3Loader.set_decoderr  r!   r!   r   r"   r  	  s    
<r  MixFormerSequentialConfigc                       r  )MixFormerSequentialLoaderc                 C   r   rj  r!   rn   r!   r!   r"   rH   	  ro   z+MixFormerSequentialLoader.architecture_namec              
   C   s\   t jj|jj|jjdt|jj |jjdddd}| 	|j
|j | |j
j|jd j |S )NTFr  r  r   r   rx  ry  rz  r{  r   )r	   r=  r   rQ   rU  rV  r   r   rx  r   r   r   r   r   r   r  r!   r!   r"   rs   	  s   
z(MixFormerSequentialLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   	  r]  z(MixFormerSequentialLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   	  rH  z(MixFormerSequentialLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   	  r$  z$MixFormerSequentialLoader.set_configc                 C   s   d|_ | |j|d j | |j|d j t|j|dd D ]8\}}| |j	|j | 
|jjd |jj | 
|jjd |jj | 
|jj|jj | 
|jj|jj q!d S )NFr   r   r~   )r
  r   r  ra  r   r   lnr   r   r{  r   r   r   mixerrr  r  r   r   rj  r   r   r   rl  r!   r!   r"   r   	  s   z%MixFormerSequentialLoader.set_decoderr  r!   r!   r   r"   r  	  s    
	r  	PhiConfigc                       r  )	PhiLoaderc                 C   r   rj  r!   rn   r!   r!   r"   rH   	  ro   zPhiLoader.architecture_namec              
   C   sl   t jj|jj|jjdt|jj |jjdddd}| 	|j
|j | |j
j|jj | |j
j|jj |S )NTFr  )r	   r=  r   rQ   rU  rV  r   r   rx  r   r   rW  r   r   r   r   r   r   r  r  r!   r!   r"   rs   	  s   
zPhiLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   	  r]  zPhiLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   	  rH  zPhiLoader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   	  r$  zPhiLoader.set_configc                 C   s   d|_ | |j|jj t|j|jD ]8\}}| |j	|j
 | |jjd |jj | |jjd |jj | |jj|jj | |jj|jj qd S r_  )r
  r   r  embdra  r   r   rd  r   r{  r  r   r   r   r  rr  r  r   r   rj  r   r   r   rl  r!   r!   r"   r   	  s   zPhiLoader.set_decoderr  r!   r!   r   r"   r  	  s    
	r  
Phi3Configc                       s`   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	dd Z
ejjfddZ  ZS )
Phi3Loaderc                 C   r   rj  r!   rn   r!   r!   r"   rH   
  ro   zPhi3Loader.architecture_namec                 C   sx  |j j}|j j}t|j d|}||krd }t|j dd}t|j dd}t|j dd }|rMt|d }|dd}	|d u rLtd	|d d
t f nd }d}	t|j dd }
|
rd }|
j	dkrgt
|
j}|d u rytd|
j	d
t
 f |
j}|
j}ntjj}d }d }tjj||ftjjddddd||	t|j dd||||||d}| |j|j| | |jj|j |S )Nrz   original_max_position_embeddingsr   max_position_embeddingsr   r   r   r~   r   r6   r  r  r  TFr   r|  )r   r   r9  r;  rx  ry  r   r   r}  r  r  r~  r   r  r  )rQ   r>  r?  rG   r   rB   rq   rD   rF   r  r  r  r  r  r   r   r   r	   r=  r   rc  r  r   r   rV   r   r   r   )r4   rV   r  r  r~  r  r  r   r   r   r  r   r  r  rY   r!   r!   r"   rs   

  s~   
zPhi3Loader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   S
  r]  zPhi3Loader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   \
  rH  zPhi3Loader.set_vocabularyc                 C   rI  r   rJ  r   r!   r!   r"   rt   _
  r$  zPhi3Loader.set_configc                 C   rW  r   rX  rY  r!   r!   r"   r   d
  r   zPhi3Loader.set_layer_normc                 C   s(   t j|t jd|_t j|t jd|_d S )N)r7   )r;   tensorr  rotary_scaling_long_factorrotary_scaling_short_factor)r4   rY   r  r  r!   r!   r"   set_rotary_embeddingsg
  s   z Phi3Loader.set_rotary_embeddingsc                 C   s  d|_ | |j|j | |j|j t|j|j	D ]\}}| |j
j|j | |jj|j | j|j
jd |jj|d | j|j
jd |jj|d |jjjd urh|jjjd urh| |j
|jjj|jjj |tjjkr|jjjjddd\}}||jj_||jj_nB|jjj jddd\}}	|jjj!jddd\}
}|jjj"jddd\}}||jj_|
|jj_#||jj_$|	|jj_||jj_#||jj_$| j|jj%|jj&|d t'|d t'|d t()  qd S )	NFr   r  r~   r   r  r   rj  )*r
  r   r  r  r   r   r  r   r   r   r   r  r   r  r   r   r   r  r  
rotary_emblong_factorshort_factorr  r   r   r   rj  gate_up_projr   r  r   rI  r   r   r   r   r   r   ru  r  r  r  )r4   rY   r   r   r   r   r  rt  gate_qweight
up_qweightgate_scales	up_scalesgate_qzeros	up_qzerosr!   r!   r"   r   q
  sb   















zPhi3Loader.set_decoder)rA   rg   rh   r   rH   rs   ru   rv   rt   r   r  r   r   r   r   r  r!   r!   r   r"   r  
  s    
I	
r  RWConfigc                       sZ   e Zd Zedd Zdd Zdd Z fddZd	d
 Zdd Z	dd Z
dddZ  ZS )RWLoaderc                 C   r   rj  r!   rn   r!   r!   r"   rH   
  ro   zRWLoader.architecture_namec                 C   s.   |j j| _|j j| _t|j dd | _d| _d S )N	n_head_kvnum_kv)rQ   rU  _num_layersrV  
_num_headsrG   _num_heads_kv_num_kv_attrrr   r!   r!   r"   get_falcon_spec
     


zRWLoader.get_falcon_specc                 C   s   |  | t|jddrd}n| j}tjj| j| jdt	j
j|jjdd|jjr(dnd d|jj|dk|d}| |j|j | |jj|j |S )Nmulti_queryFr~   Tr   )
r   r   ra  rb  scale_alibirx  ry  rz  r{  r~  )r  rG   rQ   r  r	   r=  r   r   r  r   rc  rn  ra  rotaryparallel_attnr   r   rW  r   r   r   )r4   rV   r~  rY   r!   r!   r"   rs   
  s*   
zRWLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru   
  r]  zRWLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv   
  rH  zRWLoader.set_vocabularyc                 C   s   |j |_|j |_ |j |_d S r   )r   r   r   r   r!   r!   r"   rt   
  r$  zRWLoader.set_configc                 C   sN  d|_ | |j|j | |j|j t|j|j	D ]\}}t
|dr4| |j|j | |j|j n t
|drB| |j|j n| |jj|j | |jj|j t|j| j}|dkrl| |jjd |jj n| |jjd |jj|jj||jjk r|nd  | |jjd |jj | |jj|jj | |jj|jj  qd S )NFln_attnr{  r~   r   )!r
  r   r  re  r   r   rc  r   r   rd  r   r  r
  r  ln_mlpr{  r  r   r   r  rG   r  r   r   r  rg  r  r  r   rj  r  r   r  )r4   rY   r   r   r   r  r!   r!   r"   r   
  sB   





zRWLoader.set_decoderNc                 C   s~  |j }|d u r"||dd|jd }|dd}|d|jd }nC|jd ||d   }|d|| d ||jd }|j|| ddgdd\}}}	t||| d||| d|	|| dg}||_ |jd ur|j}
|d u r|
|dd}
|
dd}
|
d}
n1|
d|| d |}
|
j|| ddgdd\}}}	t||| ||| |	|| g}
|
|_d S d S )Nr   r   r   r~   r   r  )r   r  r  r   splitr;   r}  r   )r4   rY   r   r  r  r   r  rR  rS  rT  r   r!   r!   r"   rg    sB   

zRWLoader.set_qkv_linearr   )rA   rg   rh   r   rH   r  rs   ru   rv   rt   r   rg  r  r!   r!   r   r"   r  
  s    
	(r  FalconConfigc                   @   s   e Zd Zdd ZdS )FalconLoaderc                 C   s.   |j j| _|j j| _t|j dd | _d| _d S )Nnum_kv_heads)rQ   r>  r   r?  r  rG   r  r  rr   r!   r!   r"   r  >  r  zFalconLoader.get_falcon_specN)rA   rg   rh   r  r!   r!   r!   r"   r  <  s    r  DistilBertConfigc                   @   s0   e Zd Zedd Zdd Zdd Zdd Zd	S )
DistilBertLoaderc                 C   r   )NDistilBertModelr!   rn   r!   r!   r"   rH   G  ro   z"DistilBertLoader.architecture_namec                 C   s\  t j|jj|jjdt|jj dd}t |}d|j_	| 
|jjd |jj | |jj|jj | |jj|jj t|jj|jjD ]g\}}dd tdD }| |d |jj | |d |jj | |d	 |jj t|jjd | | |jjd |jj  | |jj!|j" | |j#j$|j#j% | |j#j&|j#j' | |j#j!|j( qD|S )
NFTr   r   c                 S   r   r!   r   r   r!   r!   r"   r|   d  r   z3DistilBertLoader.get_model_spec.<locals>.<listcomp>r   r~   r   ))r	   TransformerEncoderSpecrQ   rl  rm  r   r   TransformerEncoderModelSpecr   r
  r   r  re  r   r  position_embeddingsr   r   	LayerNormr   r   rW  r   r   r   q_link_linv_linr   r   r   r   out_linr   sa_layer_normr   r   lin1r   lin2output_layer_normr4   rV   encoder_specrY   r   r   r  r!   r!   r"   rs   K  sH   

zDistilBertLoader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv   w  rH  zDistilBertLoader.set_vocabularyc                 C   s   |j |_ d|_d S )Ng-q=)r   r  r   r!   r!   r"   rt   z  r  zDistilBertLoader.set_configN)rA   rg   rh   r   rH   rs   rv   rt   r!   r!   r!   r"   r  E  s    
,r  
BertConfigc                       s@   e Zd Zedd Zdd Z fddZdd Zd	d
 Z  Z	S )
BertLoaderc                 C   r   )N	BertModelr!   rn   r!   r!   r"   rH     ro   zBertLoader.architecture_namec              	   C   s  |j jdksJ tj|j j|j jdt|j j ddtj	j
d}tj|dtjjd}d|j_| |jjd |jj | |jjd |jj | |jj|jj | |jj|jj | |j|jj t|jj|jjD ]n\}}d	d
 t dD }| |d |j!j"j# | |d |j!j"j$ | |d |j!j"j% t&'|j(j)d | | |j(j)d |j!j*j | |j(j+|j!j*j | |j,j-|j.j | |j,j/|j*j | |j,j+|j*j qj|S )NabsoluteFTr   r   r   r   num_source_embeddingsembeddings_mergepooling_layerpooling_activationr   r~   c                 S   r   r!   r   r   r!   r!   r"   r|     r   z-BertLoader.get_model_spec.<locals>.<listcomp>r   )0rQ   position_embedding_typer	   r  r>  r?  r   r  r   EmbeddingsMergeADDr  rc  Tanhr   r
  r   r  re  token_type_embeddingsr   r  r  r   r   r  r   pooler_densepoolerr  r   r   r   r   r4   queryr   valuer   r   r   r   outputr   r   r   intermediater   r  r!   r!   r"   rs     sZ   

zBertLoader.get_model_specc                    rX  rY  rZ  r[  r   r!   r"   ru     r]  zBertLoader.get_vocabularyc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zBertLoader.set_vocabularyc                 C      |j |_ |jj|_d S r   r   rQ   layer_norm_epsr  r   r!   r!   r"   rt        zBertLoader.set_config)
rA   rg   rh   r   rH   rs   ru   rv   rt   r  r!   r!   r   r"   r"    s    
8	r"  XLMRobertaConfigc                   @   ro  )XLMRobertaLoaderc                 C   r   )N#XLMRobertaForSequenceClassificationr!   rn   r!   r!   r"   rH     ro   z"XLMRobertaLoader.architecture_namec              	   C   s  |j jdksJ tj|j j|j jdt|j j ddtj	j
d}|jjd u r'd}nd}tj||tjjd}d|j_| |jjd |jjj | |jjd |jjj | |jj|jjj | |jj|jjj |rs| |j|jjj t|jj |jjj D ]n\}}d	d
 t!dD }| |d |j"j#j$ | |d |j"j#j% | |d |j"j#j& t'(|j)j*d | | |j)j*d |j"j+j | |j)j,|j"j+j | |j-j.|j/j | |j-j0|j+j | |j-j,|j+j q}|S )Nr$  FTr   r%  r(  r   r~   c                 S   r   r!   r   r   r!   r!   r"   r|     r   z3XLMRobertaLoader.get_model_spec.<locals>.<listcomp>r   )1rQ   r+  r	   r  r>  r?  r   r  r   r,  r-  robertar1  r  rc  r.  r   r
  r   r  re  r/  r   r  r  r   r   r  r   r0  r  r   r   r   r   r4   r2  r   r3  r   r   r   r   r4  r   r   r   r5  r   r4   rV   r   r)  rY   r   r   r  r!   r!   r"   rs     sd   

zXLMRobertaLoader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zXLMRobertaLoader.set_vocabularyc                 C   r6  r   r7  r   r!   r!   r"   rt     r9  zXLMRobertaLoader.set_configc                 C   8   |j |_t|dd}|dkr|j|d d  |_d S d S Npadding_idxr   r~   r   r   r!   r!   r"   r     
   z'XLMRobertaLoader.set_position_encodingsN	rA   rg   rh   r   rH   rs   rv   rt   r   r!   r!   r!   r"   r;        
>r;  RobertaConfigc                   @   ro  )RobertaLoaderc                 C   r   )NRobertaModelr!   rn   r!   r!   r"   rH   "  ro   zRobertaLoader.architecture_namec              	   C     |j jdksJ tj|j j|j jdt|j j ddtj	j
d}|jd u r&d}nd}tj||tjjd}d|j_| |jjd |jj | |jjd |jj | |jj|jj | |jj|jj |rm| |j|jj t|jj|jjD ]n\}}d	d
 t dD }| |d |j!j"j# | |d |j!j"j$ | |d |j!j"j% t&'|j(j)d | | |j(j)d |j!j*j | |j(j+|j!j*j | |j,j-|j.j | |j,j/|j*j | |j,j+|j*j qv|S )Nr$  FTr   r%  r(  r   r~   c                 S   r   r!   r   r   r!   r!   r"   r|   Q  r   z0RobertaLoader.get_model_spec.<locals>.<listcomp>r   0rQ   r+  r	   r  r>  r?  r   r  r   r,  r-  r1  r  rc  r.  r   r
  r   r  re  r/  r   r  r  r   r   r  r   r0  r  r   r   r   r   r4   r2  r   r3  r   r   r   r   r4  r   r   r   r5  r   r>  r!   r!   r"   rs   &  d   


zRobertaLoader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv   d  rH  zRobertaLoader.set_vocabularyc                 C   r6  r   r7  r   r!   r!   r"   rt   g  r9  zRobertaLoader.set_configc                 C   r?  r@  r   r   r!   r!   r"   r   k  rB  z$RobertaLoader.set_position_encodingsNrC  r!   r!   r!   r"   rF     rD  rF  CamembertConfigc                   @   ro  )CamembertLoaderc                 C   r   )NCamembertModelr!   rn   r!   r!   r"   rH   t  ro   z!CamembertLoader.architecture_namec              	   C   rH  )Nr$  FTr   r%  r(  r   r~   c                 S   r   r!   r   r   r!   r!   r"   r|     r   z2CamembertLoader.get_model_spec.<locals>.<listcomp>r   rI  r>  r!   r!   r"   rs   x  rJ  zCamembertLoader.get_model_specc                 C   rE  r   rF  r   r!   r!   r"   rv     rH  zCamembertLoader.set_vocabularyc                 C   r6  r   r7  r   r!   r!   r"   rt     r9  zCamembertLoader.set_configc                 C   r?  r@  r   r   r!   r!   r"   r     rB  z&CamembertLoader.set_position_encodingsNrC  r!   r!   r!   r"   rL  r  rD  rL  c               	   C   s   t jt jd} | jdddd | jddd | jd	d
dd | jddd | jdddd | jdddd t|  |  }t|j|j	|j
|jdv |j|j|jd}|| d S )N)formatter_classz--modelTzaName of the pretrained model to download, or path to a directory containing the pretrained model.)requiredhelpz--activation_scaleszPath to the pre-computed activation scales. Models may use them to rescale some weights to smooth the intermediate activations and improve the quantization accuracy. See https://github.com/mit-han-lab/smoothquant.)rP  z--copy_files+zWList of filenames to copy from the Hugging Face model to the converted model directory.)nargsrP  z
--revisionz<Revision of the model to download from the Hugging Face Hub.z--low_cpu_mem_usage
store_truezNEnable the flag low_cpu_mem_usage when loading the model with from_pretrained.)actionrP  z--trust_remote_codez*Allow converting models using custom code.)rJ   int8_float16)r'   r(   r)   r*   r+   r,   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr%   rV   r'   r(   quantizationr*   r+   r,   convert_from_args)parserargs	converterr!   r!   r"   main  sV   	
	ra  __main__))r~   r   )r   r   )r      r   r   r   r~   r   r   r   r   r   r  ))r   r   rd  rf  rg  rh  )r   rc  )rg  r     rc  r~   )rc  rc  )rc  rj  )re  )r  r   )r  r   ri  rk  )rc  r   )rc  r  )rc     ))rl  rl  rj  r   )rj  r   )rj  rM  )rM  r   )rM  rc  rM  rj  	   r   )rp  r  )rp  rM  )rp  
   )rq  r   )rq  r~   )rq  r   )rq  r   )rq  rl  )rq     rr  r   rr  r  )
)rc  r   )rc  rp  )rM  r   )rM  r  rn  )rM  rM  ro  )rp  rj  )rp  rp  )rq  rc  )rt  )   r~   )ru     )ru  ru     r  )   r   )ry  r  )ry  rp  )   rv  rz  ru  )   rj  )r|  rq  )r|  rx     r   )r~  r   )r~  rp  )r~  ru  )   rv  ))   rx  rw  )rx  rx  ry  r~   r}  )   r  )	)rp     rs  rt  )rr  rz  )   rj  )r  rr  )r  rz  )r  r   )r  rx  ))rq  rv  )r  rz  )ry  rr  )ry  rv  )ry  r  )rz  rx  )rz  ry  )r|  r  )r|  rr  )r|  r  r  rr  )r  r   )r  r   )r  r   )r  rp  )r  rv  )r  rc  )r  rj  )r  r  )   rc  )   r~   )r  rv  )   rx  )
rm  )rq  rz  )rv  r|  )r  rv  r  r{  r  )r  r  )   r~   )r  rl  )zopenai/whisper-tiny.enzopenai/whisper-tinyzopenai/whisper-base.enzopenai/whisper-basezopenai/whisper-small.enzopenai/whisper-smallzopenai/whisper-medium.enzopenai/whisper-mediumzopenai/whisper-largezopenai/whisper-large-v2zopenai/whisper-large-v3T5GemmaConfigc                   @   sX   e Zd Zedd Zdd Zdd Zdd Zd	d
 Ze	j
jfddZe	j
jfddZdS )T5GemmaLoaderc                 C   r   )NT5GemmaForConditionalGenerationr!   rn   r!   r!   r"   rH     ro   zT5GemmaLoader.architecture_namec                 C   s   |j jd |_d S )Nr  )r   r  r   rY  r!   r!   r"   r     r^   zT5GemmaLoader.set_layer_normc                 C   s&  |j j}|j j}t|j dd}|j}t|d|}||krd }tj|j|jdt|j	 dd|j
dt|dd|d||j
d}|j}t|d|}	|	|krJd }	tj|j|jdt|j	 ddd|j
dt|dd|dd|	|j
d	}
t||
}| |j|jj| | |j|jj|tjj | |jj|jjj |S )
Nr  i   rz  TFr   r|  )r   r   r9  r;  rx  ry  r}  r  r  r~  r  )r   r   r9  r;  with_encoder_attentionrx  ry  r}  r  r   external_pre_post_encoder_layersr~  r  )rQ   r   r   rG   r?  r	   r  r>  r   r{  r  TransformerDecoderSpecr   r   rV   r   r   r   r   r   r   r  )r4   rV   encoder_configdecoder_configr  encoder_num_headsencoder_num_heads_kvr   decoder_num_headsdecoder_num_heads_kvr   rY   r!   r!   r"   rs     sr   

zT5GemmaLoader.get_model_specc                 C   r   r   r   r   r!   r!   r"   rv     r   zT5GemmaLoader.set_vocabularyc                 C   s\   |j |_ |j|_|j|_t|jdr|jjj|_nt|jdr%|jj|_nd|_|j |_d S )Nr   r  gư>)	r   r   r   r   rQ   r   r  r  r   r   r!   r!   r"   rt     s   zT5GemmaLoader.set_configc                 C   s  d|_ t|jtr|jd n|j}| ||j |jd |_| |j	|j
 |}tt|j|jD ]\}\}}	| |j|	j | |j|	j dd tdD }
| j|
d |	jj|d | j|
d |	jj|d | j|
d	 |	jj|d t|jjd |
 | j|jjd |	jj|d | |j|	j | |j |	j! | j|j"j#|	j$j%|d | j|j"j&|	j$j'|d | j|j"j(|	j$j)|d t*|	d
 t*|	d t+,  q1d S )NTr   r  c                 S   r   r!   r   r   r!   r!   r"   r|     r   z-T5GemmaLoader.set_encoder.<locals>.<listcomp>r   r  r~   r   r   rj  )-r
  r   r  r  r   r  rB  r  r   r   r  rC  r   r   r   r  pre_self_attn_layernormr  post_self_attn_layernormr   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r   r   rj  r  rI  rt  r   ru  r  r  r  )r4   rY   r   r  r   encoder_emb_specr   r)  r   r   qkv_split_layersr!   r!   r"   r     sb   



zT5GemmaLoader.set_encoderc           
      C   sD  d|_ d|_| |j|j |jd |j_| |j|j	 t
t|j|jD ]\}\}}| |j|j | |j|j dd tdD }| j|d |jj|d | j|d	 |jj|d | j|d
 |jj|d t|jjd | | j|jjd	 |jj|d | |j|j | |j|j  | j|j!jd |j"j|d dd td
D }	| j|	d |j"j|d | j|	d	 |j"j|d t|j!jd	 |	 | j|j!jd
 |j"j|d | |j#|j$ | |j%|j& | j|j'j(|j)j*|d | j|j'j+|j)j,|d | j|j'j-|j)j.|d t/|d t/|d t/|d t01  q&d S )NTFr  c                 S   r   r!   r   r   r!   r!   r"   r|   :  r   z-T5GemmaLoader.set_decoder.<locals>.<listcomp>r   r   r  r~   r   c                 S   r   r!   r   r   r!   r!   r"   r|   ^  r   r   
cross_attnrj  )2r
  r  r   r  r  rB  r  r   r   r  rC  r   r   r   r  r  r  r  r   r   r   r   r   r   r   r   r   r   r  )external_pre_encoder_attention_layer_normpre_cross_attn_layernorm*external_post_encoder_attention_layer_normpost_cross_attn_layernormr   r  r  r  r  r  r   r   rj  r  rI  rt  r   ru  r  r  r  )
r4   rY   r   r  r   r)  r   r   r  kv_split_layersr!   r!   r"   r   &  s   






zT5GemmaLoader.set_decoderN)rA   rg   rh   r   rH   r   rs   rv   rt   r   r   r   r   r   r!   r!   r!   r"   r    s    
D
Br  )Vr   rV  r  r  r`   typingr   r   numpyr  rc   r;   r=   ImportErrorctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   r   r	   r
   r   r   rc  rn  rd  GELUSigmoidRELUr  r   r  LinearSur  r   r   r  AWQ_GEMVr  r   r$   r%   ABCrl   r   r  r"  r0  r6  r:  rQ  rp  rt  r  r  r  r  r  r7  r[  r_  ri  rx  r  r  r  r  r  r  r  r  r  r  r  r  r"  r;  rF  rL  ra  rA   r  r  r!   r!   r!   r"   <module>   s   $
|Z 7!F2(8YOfNt|M7_g  A   33 , 9NQQQ
:

 