o
    is                    @   sX  d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ	zd dl
Z
d dlZd dlZW n	 ey7   Y nw d dlmZ d dlmZ d dlmZmZmZmZmZmZ ejjejjejjejjejjejjejjejjejjd	Zej j!ej j"ej j#dZ$ej%j&ej%j'dZ(i Z)d	d
 Z*G dd deZ+G dd de j,Z-e*dG dd de-Z.e*dG dd de.Z/e*dG dd de.Z0e*dG dd de.Z1e*dG dd de.Z2e*dG dd  d e.Z3e*d!G d"d# d#e-Z4e*d$G d%d& d&e-Z5e*d'G d(d) d)e-Z6e*d*G d+d, d,e-Z7e*d-G d.d/ d/e-Z8e*d0G d1d2 d2e.Z9e*d3G d4d5 d5e.Z:e*d6G d7d8 d8e-Z;e*d9G d:d; d;e;Z<e*d<G d=d> d>e-Z=e*d?G d@dA dAe-Z>e*dBG dCdD dDe-Z?e*dEG dFdG dGe-Z@e*dHG dIdJ dJe-ZAe*dKG dLdM dMe-ZBe*dNG dOdP dPe-ZCe*dQG dRdS dSe-ZDe*dTG dUdV dVe-ZEe*dWG dXdY dYe-ZFe*dZG d[d\ d\eFZGe*d]G d^d_ d_e-ZHe*d`G dadb dbe-ZIe*dcG ddde dee-ZJdfdg ZKeLdhkreK  g dig djg dkg dlg dmg dng dog dpg dqg drg dsdtZMdS )u    N)ListOptional)utils)	Converter)attention_speccommon_spec
model_spectransformer_specwav2vec2_specwhisper_spec)	gelu	gelu_fastgelu_newgelu_pythongelu_pytorch_tanh
quick_gelurelusiluswish)linearsullama3)gemmgemvc                    s    fdd}|S )z5Registers a model loader for this configuration name.c                    s   |  t  < | S N)_MODEL_LOADERS)clsconfig_name `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/ctranslate2/converters/transformers.py	decorator:   s   
z"register_loader.<locals>.decoratorr   )r   r!   r   r   r    register_loader7   s   r"   c                   @   st   e Zd ZdZ						ddedee deee  dedee d	ed
efddZdd Z	dd Z
dd Zdd ZdS )TransformersConverterz/Converts models from Hugging Face Transformers.NFmodel_name_or_pathactivation_scales
copy_filesload_as_float16revisionlow_cpu_mem_usagetrust_remote_codec                 C   s.   || _ || _|| _|| _|| _|| _|| _dS )a  Initializes the converter.

        Arguments:
          model_name_or_path: Name of the pretrained model to download, or path to the
            directory containing the pretrained model.
          activation_scales: Path to the pre-computed activation scales. Models may
            use them to rescale some weights to smooth the intermediate activations
            and improve the quantization accuracy. See
            https://github.com/mit-han-lab/smoothquant.
          copy_files: List of filenames to copy from the Hugging Face model to the
            converted model directory.
          load_as_float16: Load the model weights as float16. More precisely, the model
            will be loaded with ``from_pretrained(..., torch_dtype=torch.float16)``.
          revision: Revision of the model to download from the Hugging Face Hub.
          low_cpu_mem_usage: Enable the flag ``low_cpu_mem_usage`` when loading the model
            with ``from_pretrained``.
          trust_remote_code: Allow converting models using custom code.
        N)_model_name_or_path_activation_scales_copy_files_load_as_float16	_revision_low_cpu_mem_usage_trust_remote_code)selfr$   r%   r&   r'   r(   r)   r*   r   r   r    __init__D   s   
zTransformersConverter.__init__c              	   C   sh  t   tjj| j| jd}|jj}t	
|}|d u r+td|dtt	 f tt|j}tj}d| jr;t jnt|dd i}| jrJ| j|d< | jrR| j|d< | jrZ| j|d< | j|| jfi |}i }| jro| j|d< | j|| jfi |}	|||	}
| jrt j| jdd	}||
| | jr| jD ]
}|
| | q|
W  d    S 1 sw   Y  d S )
N)r*   z]No conversion is registered for the model configuration %s (supported configurations are: %s), torch_dtyper(   r)   r*   cpu)map_location)torchno_gradtransformers
AutoConfigfrom_pretrainedr+   r1   	__class____name__r   get
ValueErrorjoinsortedkeysgetattrarchitecture_nameAutoTokenizerr.   float16r/   r0   
load_modelload_tokenizerr,   loadsmooth_activationr-   register_fileget_model_file)r2   configr   loadermodel_classtokenizer_classkwargsmodeltokenizer_kwargs	tokenizerspecr%   filenamer   r   r    _loadh   sZ   








$zTransformersConverter._loadc                 K      |j |fi |S r   r<   )r2   rP   r$   rR   r   r   r    rH         z TransformersConverter.load_modelc                 K   rY   r   rZ   )r2   rQ   r$   rR   r   r   r    rI      r[   z$TransformersConverter.load_tokenizerc                 C   sz   t j| jrt j| j|}nz
tj| j|d}W n tjjy'   d }Y nw |d u s2t j	|s;t
d|| jf |S )N)repo_idrW   z"File %s does not exist in model %s)ospathisdirr+   rA   huggingface_hubhf_hub_downloadr   EntryNotFoundErrorisfiler@   )r2   rW   r^   r   r   r    rM      s    
z$TransformersConverter.get_model_file)NNFNFF)r>   
__module____qualname____doc__strr   r   boolr3   rX   rH   rI   rM   r   r   r   r    r#   A   s6    

$:r#   c                   @   sz   e Zd ZdZedd Zejdd Zdd Z	dd	 Z
d
d Zdd Zdd ZejjfddZdd Zdd Zdd ZdS )ModelLoaderzRBase class for loading Transformers models into a CTranslate2 model specification.c                 C      d S r   r   r2   r   r   r    rE         zModelLoader.architecture_namec                 C   s   t  r   NotImplementedErrorr2   rS   r   r   r    get_model_spec   s   zModelLoader.get_model_specc                 C   s6   |  |}| |j|| | ||}| || |S r   )rp   
set_configrN   get_vocabularyset_vocabulary)r2   rS   rU   rV   tokensr   r   r    __call__   s
   
zModelLoader.__call__c                 C   s"   dd t |  dd dD S )Nc                 S   s   g | ]\}}|qS r   r   ).0token_r   r   r    
<listcomp>   s    z.ModelLoader.get_vocabulary.<locals>.<listcomp>c                 S   s   | d S N   r   )itemr   r   r    <lambda>   s    z,ModelLoader.get_vocabulary.<locals>.<lambda>)key)rB   	get_vocabitemsr2   rS   rU   r   r   r    rr      s
   zModelLoader.get_vocabularyc                 C   rj   r   r   r2   rV   rt   r   r   r    rs         zModelLoader.set_vocabularyc                 C   rj   r   r   r2   rN   rS   rU   r   r   r    rq      r   zModelLoader.set_configc                 C   s   |j |_|j|_d S r   )weightgammabiasbetar2   rV   moduler   r   r    set_layer_norm      zModelLoader.set_layer_normc                 C   sd   |t jjkr|j|_n|j|_|j|_|j|_t	|t
jr%|jdd|_|jd ur0|j|_d S d S Nr   r{   )r   QuantizationCT2r   qweightscalesweight_scaleqzerosweight_zero
isinstancer:   Conv1D	transposer   )r2   rV   r   
quant_typer   r   r    
set_linear   s   

zModelLoader.set_linearc                 C   s   |j |_ d S r   )r   r   r   r   r    set_embeddings      zModelLoader.set_embeddingsc                 C   s4   |j |_t|dd}|dkr|j|d  |_d S d S )Noffsetr   r   	encodingsrD   r2   rV   r   r   r   r   r    set_position_encodings   s
   z"ModelLoader.set_position_encodingsc                 C   s   t d)Nz7No activation smoothing logic is defined for this modelrm   )r2   rV   r%   r   r   r    rK      s   zModelLoader.smooth_activationN)r>   rd   re   rf   propertyrE   abcabstractmethodrp   ru   rr   rs   rq   r   r   r   r   r   r   r   rK   r   r   r   r    ri      s    

	ri   
BartConfigc                       sb   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	dd Z
dddZdd Z  ZS )
BartLoaderc                 C      dS )NBartForConditionalGenerationr   rk   r   r   r    rE      rl   zBartLoader.architecture_namec              	   C   s   t jj|jj|jjf|jj|jjt|jj	 t
|jddd}| |j|jj | |j|jj | |jj|j t
|dd }|d urR|  dkrR| |jj_|S )Nnormalize_embeddingTpre_norm
activationlayernorm_embeddingfinal_logits_biasr   )r	   TransformerSpecfrom_configrN   encoder_layersdecoder_layersencoder_attention_headsnormalize_before_SUPPORTED_ACTIVATIONSactivation_functionrD   set_encoderencoderrS   set_decoderdecoderr   
projectionlm_headnonzeronumelsqueezer   )r2   rS   rV   r   r   r   r    rp     s   
zBartLoader.get_model_specc                    s2   t  ||}|jjt|k r|d |jj }|S r   )superrr   rN   
vocab_sizelenr2   rS   rU   rt   r=   r   r    rr     s   zBartLoader.get_vocabularyc                 C      | | || d S r   register_source_vocabularyregister_target_vocabularyr   r   r   r    rs        
zBartLoader.set_vocabularyc                 C   s,   |j |_ |j|_|j|_||jj|_d S r   )	bos_token	eos_token	unk_tokenconvert_ids_to_tokensrN   decoder_start_token_iddecoder_start_tokenr   r   r   r    rq   !  s   
zBartLoader.set_configc                 C   s   |  || t|j|jD ]2\}}| j|j|jdd | |jj|j	 | 
|jj|j | 
|jj|j | |jj|j qd S NTself_attention)set_common_layersziplayerlayersset_attentionr   	self_attnr   
layer_normself_attn_layer_normr   ffnlinear_0fc1linear_1fc2final_layer_norm)r2   rV   r   
layer_specr   r   r   r    r   )  s   zBartLoader.set_encoderc                 C   s   |  || t|j|jD ]J\}}| j|j|jdd | |jj|j	 t
|dr<| j|j|jdd | |jj|j | |jj|j | |jj|j | |jj|j qd S )NTr   encoder_attnF)r   r   r   r   r   r   r   r   r   r   hasattr	attentionr   encoder_attn_layer_normr   r   r   r   r   r   r   )r2   rV   r   r   r   r   r   r    r   ;  s2   
zBartLoader.set_decoderFc                 C   s   dd t dD }| |d |j | |d |j | |d |j |r0t|jd | nt|jd |d d  t|jd |dd   | |jd |j d S )Nc                 S      g | ]}t  qS r   r   
LinearSpecrv   rx   r   r   r    ry   Y      z,BartLoader.set_attention.<locals>.<listcomp>   r   r{      )	ranger   q_projk_projv_projr   fuse_linearr   out_projr2   rV   r   r   split_layersr   r   r    r   X  s   zBartLoader.set_attentionc                 C   sz   |j |_| |j|j | t|jtr|jd n|j|j	 t
|dr,| |j|j t
|dr;| |j|j d S d S )Nr   r   r   )embed_scalescale_embeddingsr   position_encodingsembed_positionsr   r   
embeddingslistembed_tokensr   r   r   r   r   r   r   r    r   f  s   

	
zBartLoader.set_common_layersF)r>   rd   re   r   rE   rp   rr   rs   rq   r   r   r   r   __classcell__r   r   r   r    r      s    

r   MarianConfigc                       sP   e Zd Zedd Z fddZdd Z fddZ fd	d
Zdd Z	  Z
S )MarianMTLoaderc                 C   r   )NMarianMTModelr   rk   r   r   r    rE   z  rl   z MarianMTLoader.architecture_namec                    s*   d|j _d|j _t |}| | |S NF)rN   r   r   r   rp   _remove_pad_weightsr2   rS   rV   r   r   r    rp   ~  s
   
zMarianMTLoader.get_model_specc                 C   s   |j |_ |j|_|j |_d S r   )r   r   r   r   r   r   r    rq     s   zMarianMTLoader.set_configc                    s   d|_ t || d S NT)start_from_zero_embeddingr   r   r2   rV   r   r   r   r    r     s   zMarianMTLoader.set_decoderc                    s&   t  ||}|d dkr|  |S )Nr   z<pad>)r   rr   popr   r   r   r    rr     s   zMarianMTLoader.get_vocabularyc                 C   s   |j jd |jj|jjg}|d jjd d }|D ]0}|jjd |d kr-|jd d |_t|tjrI|	 rI|j
jd |d krI|j
d d |_
qd S )Nr   r{   r   )r   r   r   r   r   shaper   r   r   has_biasr   )r2   rV   vocab_specsnew_vocab_size
vocab_specr   r   r    r    s    

z"MarianMTLoader._remove_pad_weights)r>   rd   re   r   rE   rp   rq   r   rr   r  r   r   r   r   r    r   x  s    
	r   M2M100Configc                       s<   e Zd Zedd Z fddZdd Z fddZ  ZS )	M2M100Loaderc                 C   r   )NM2M100ForConditionalGenerationr   rk   r   r   r    rE     rl   zM2M100Loader.architecture_namec                    s   d|j _d|j _t |S )NTF)rN   r   r   r   rp   ro   r   r   r    rp        zM2M100Loader.get_model_specc                 C   s   |j |jd  |_d S r   )weightsr   r   r   r   r   r    r     s   z#M2M100Loader.set_position_encodingsc                    s   t  ||}|d |jkr||j|  |jD ]}||vr%|| qt|d|j	j
t| }|dkrA|dd t|D 7 }|S )Nr   num_madeup_wordsr   c                 S   s   g | ]}d | qS )zmadeupword%dr   rv   ir   r   r    ry     r   z/M2M100Loader.get_vocabulary.<locals>.<listcomp>)r   rr   r   insertunk_token_idr  additional_special_tokensappendrD   rN   r   r   r   )r2   rS   rU   rt   rw   r  r   r   r    rr     s   

zM2M100Loader.get_vocabulary)	r>   rd   re   r   rE   rp   r   rr   r   r   r   r   r    r    s    
r  MBartConfigc                   @       e Zd Zedd Zdd ZdS )MBartLoaderc                 C   r   )NMBartForConditionalGenerationr   rk   r   r   r    rE     rl   zMBartLoader.architecture_namec                 C   s:   |j |_ |j|_|j|_|jjdv rd |_d S |j|_d S )N)MBartTokenizerN)r   r   r   rN   rQ   r   r   r   r   r    rq     s   
zMBartLoader.set_configNr>   rd   re   r   rE   rq   r   r   r   r    r        
r  PegasusConfigc                   @   r  )PegasusLoaderc                 C   r   )NPegasusForConditionalGenerationr   rk   r   r   r    rE     rl   zPegasusLoader.architecture_namec                 C   s$   |j |_|j|_|j|_|j |_d S r   )	pad_tokenr   r   r   r   r   r   r   r    rq     s   zPegasusLoader.set_configNr  r   r   r   r    r!    r  r!  	OPTConfigc                       s\   e Zd Zedd Zdd Zdd Zdd Zd	d
 Z fddZ	dd Z
 fddZ  ZS )	OPTLoaderc                 C   r   )NOPTForCausalLMr   rk   r   r   r    rE     rl   zOPTLoader.architecture_namec                 C   s^   t jj|jj|jj|jjt|jj |jj	|jj
kd}| |j|jj | |jj|j |S )N)r   r   project_in_out)r	   TransformerDecoderModelSpecr   rN   num_hidden_layersnum_attention_headsdo_layer_norm_beforer   r   word_embed_proj_dimhidden_sizer   r   rS   r   r   r   r  r   r   r    rp     s   
zOPTLoader.get_model_specc                 C   sb   t |jjD ](\}}d| }t|jj|jjd |d|   t|jj|jj	|d|   qd S )Nzmodel.decoder.layers.%dr   z%s.self_attn.q_projz%s.fc1)
	enumerater   r   r   rK   r   r   r   r   r   )r2   rV   r%   r  r   layer_scoper   r   r    rK     s   


zOPTLoader.smooth_activationc                 C      | | d S r   register_vocabularyr   r   r   r    rs        zOPTLoader.set_vocabularyc                 C      |j |_ |j|_|j|_d S r   r   r   r   r   r   r   r    rq     r  zOPTLoader.set_configc                    sd   t  || |jd ur| |j|j |jd ur!| |j|j |jd ur0| |j|j d S d S r   )r   r   
project_inr   project_outr   r   r   r  r   r   r    r     s   


zOPTLoader.set_decoderc                 C   s*   d|_ | |j|j | |j|j d S r  )r   r   r   r   r   r   r   r   r   r   r    r   &  s   zOPTLoader.set_common_layersc                    sZ   t  ||}d}t|d dkr+d|}||vr|| |d7 }t|d dks|S )Nr      zmadeupword{:04d}r{   )r   rr   r   formatr  )r2   rS   rU   rt   r  symbolr   r   r    rr   +  s   

zOPTLoader.get_vocabulary)r>   rd   re   r   rE   rp   rK   rs   rq   r   r   rr   r   r   r   r   r    r%    s    

r%  GPTBigCodeConfigc                       sH   e Zd Zedd Zdd Zdd Z fddZd	d
 Zdd Z	  Z
S )GPTBigCodeMHALoaderc                 C   r   )NGPTBigCodeForCausalLMr   rk   r   r   r    rE   :  rl   z%GPTBigCodeMHALoader.architecture_namec                 C   sL   t jj|jj|jjdt|jj dd}| |j	|j
 | |j	j|j |S )NT)r   r   multi_query_attentionr	   r(  r   rN   n_layern_headr   r   r   r   transformerr   r   r   r  r   r   r    rp   >  s   
z"GPTBigCodeMHALoader.get_model_specc                 C   r0  r   r1  r   r   r   r    rs   K  r3  z"GPTBigCodeMHALoader.set_vocabularyc                    >   t  ||}|jjt| }t|D ]	}|d|  q|S Nz<extra_id_%d>r   rr   rN   r   r   r   r  r2   rS   rU   rt   	extra_idsr  r   r   r    rr   N  
   z"GPTBigCodeMHALoader.get_vocabularyc                 C   r4  r   r5  r   r   r   r    rq   W  r  zGPTBigCodeMHALoader.set_configc                 C      d|_ | |j|j | |j|j | |j|j	 t
|j|jD ]B\}}| |jj|j | |jjd |jj | |jjd |jj | |jj|j | |jj|jj | |jj|jj q"d S NFr   r{   r   r   r   wter   r   wper   r   ln_fr   r   hr   ln_1r   r   attnc_attnc_projr   ln_2r   mlpc_fcr   r2   rV   r   r   r   r   r   r    r   \     zGPTBigCodeMHALoader.set_decoder)r>   rd   re   r   rE   rp   rs   rr   rq   r   r   r   r   r   r    r<  8  s    
	r<  
GPT2Configc                   @   8   e Zd Zedd Zdd Zdd Zdd Zd	d
 ZdS )
GPT2Loaderc                 C   r   )NGPT2LMHeadModelr   rk   r   r   r    rE   m  rl   zGPT2Loader.architecture_namec                 C   sJ   t jj|jj|jjdt|jj d}| |j	|j
 | |j	j|j |S )NT)r   r   r?  r  r   r   r    rp   q  s   
zGPT2Loader.get_model_specc                 C   r0  r   r1  r   r   r   r    rs   }  r3  zGPT2Loader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zGPT2Loader.set_configc                 C   rI  rJ  rK  rW  r   r   r    r     rX  zGPT2Loader.set_decoderN	r>   rd   re   r   rE   rp   rs   rq   r   r   r   r   r    r[  k  s    
r[  
GPTJConfigc                   @   rZ  )
GPTJLoaderc                 C   r   )NGPTJForCausalLMr   rk   r   r   r    rE     rl   zGPTJLoader.architecture_namec              
   C   sb   t jj|jj|jjdt|jj |jjdddd}| 	|j
|j|jj|jj | |j
j|j |S NTFr   r   
rotary_dimrotary_interleaveparallel_residualshared_layer_norm)r	   r(  r   rN   r@  rA  r   r   rc  r   r   rB  r   r   r   r  r   r   r    rp     s$   
zGPTJLoader.get_model_specc                 C   r0  r   r1  r   r   r   r    rs     r3  zGPTJLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zGPTJLoader.set_configc           
      C   s   d|_ | |j|j | |j|j t|j|j	D ]V\}}| |j
|j |jjj}|jjj}|jjj}	t|||}t|||}t|||	f|jjd _| |jjd |jj | |jj|jj | |jj|jj qd S rJ  )r   r   r   rL  r   r   rN  r   r   rO  rf  rP  rQ  r   r   r   r   r   permute_for_sliced_rotaryr8   catr   r   r   r   r   r   rU  fc_inr   fc_out)
r2   rV   r   rc  	num_headsr   r   qwkwvwr   r   r    r     s   


zGPTJLoader.set_decoderNr]  r   r   r   r    r_    s    
r_  CodeGenConfigc                       H   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	  Z
S )CodeGenLoaderc                 C   r   )NCodeGenForCausalLMr   rk   r   r   r    rE     rl   zCodeGenLoader.architecture_namec              
   C   s   t jj|jj|jjdt|jj |jjdddd}d}t	|jdr(|jj
dv r(d}| j|j|j|jj|jj|jj|d | |jj|j |S )	NTFrb     head_dim)      r8  )mp_num)r	   r(  r   rN   r@  rA  r   r   rc  r   rt  r   r   rB  n_embdr   r   r   )r2   rS   rV   rw  r   r   r    rp     s.   
zCodeGenLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr     s
   zCodeGenLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zCodeGenLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zCodeGenLoader.set_configc                    s8  d|_ | |j|j | |j|j td|d 	ddj
  }||  t fdd|D }t|j|jD ]^\}	}
| |	j|
j |
jjj}||d d f }|jddd\}}}t|||}t|||}t|||f|	jjd _| |	jjd |
jj | |	jj|
j j! | |	jj"|
j j# q;d S )	NFr   r   r   c                    s$   g | ]}t |  |d    qS )r{   )r8   aranger  	local_dimr   r    ry     s   $ z-CodeGenLoader.set_decoder.<locals>.<listcomp>dimr{   )$r   r   r   rL  r   r   rN  npry  reshapeTflattentolistr8   rh  r   r   rO  rf  rP  rQ  qkv_projr   chunkr   rg  r   r   r   r   r   r   rU  ri  r   rj  )r2   rV   r   rc  rk  	embed_dimrw  base_permutationpermutationr   r   r  new_qkv_projrl  rn  rm  r   rz  r    r     s(   "
zCodeGenLoader.set_decoderr>   rd   re   r   rE   rp   rr   rs   rq   r   r   r   r   r   r    rq    s    

rq  GPTNeoXConfigc                       rp  )GPTNeoXLoaderc                 C   r   )NGPTNeoXForCausalLMr   rk   r   r   r    rE   )  rl   zGPTNeoXLoader.architecture_namec              
   C   st   t jj|jj|jjdt|jj t|jj	|jj
|jj  d|jjdd}| |j|j|jj | |jj|j |S ra  )r	   r(  r   rN   r)  r*  r   
hidden_actint
rotary_pctr-  use_parallel_residualr   r   gpt_neoxr   r   	embed_outr  r   r   r    rp   -  s"   
zGPTNeoXLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   @  rH  zGPTNeoXLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   I  r3  zGPTNeoXLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq   L  r  zGPTNeoXLoader.set_configc                 C   sJ  d|_ | |j|j | |j|j t|j|j	D ]\}}t
|dr4| |j|j | |j|j n| |jj|j | |jj|j |jjj}|jjj}||dd|jd ddd|jd }||ddddd}||jjd _||jjd _| |jjd |jj | |jj|jj | |jj|jj qd S )NFinput_layer_normr   r   r   r{   ) r   r   r   embed_inr   r   r   r   r   r   r   r  input_layernormpost_attention_layer_normpost_attention_layernormr   r   r   query_key_valuer   r   r  r  swapaxesr   r   denser   rU  dense_h_to_4hr   dense_4h_to_h)r2   rV   r   rk  r   r   qkv_wqkv_br   r   r    r   Q  s8   




zGPTNeoXLoader.set_decoderr  r   r   r   r    r  '  s    
	r  WhisperConfigc                       sp   e Zd Zedd Zdd Zdd Zdd Z fd	d
Zdd Z	 fddZ
 fddZdd Zdd Z  ZS )WhisperLoaderc                 C   r   )NWhisperForConditionalGenerationr   rk   r   r   r    rE   y  rl   zWhisperLoader.architecture_namec                 C   sZ   t |jj|jj|jj|jj}| |j|j	j | 
|j|j	j | |jj|j |S r   )r   WhisperSpecrN   r   r   r   decoder_attention_headsr   r   rS   r   r   r   r   proj_outr  r   r   r    rp   }  s   zWhisperLoader.get_model_specc                    s$   g d  fddt |j|jD S )N)z<|endoftext|>z<|startoftranscript|>z<|translate|>z<|transcribe|>z<|startoflm|>z<|startofprev|>z<|nocaptions|>z<|notimestamps|>c                    s   g | ]
\}}| vr|qS r   r   )rv   token_idrw   non_lang_special_tokensr   r    ry     s
    z>WhisperLoader._get_lang_ids_from_tokenizer.<locals>.<listcomp>)r   additional_special_tokens_idsr  )r2   rU   r   r  r    _get_lang_ids_from_tokenizer  s   

z*WhisperLoader._get_lang_ids_from_tokenizerc                 C   s   t |dd }|d ur)|j|_|j|_t|dr|j|_t|dr(t|j	 |_
n|jj|_|jj|_t|j|_t |dd d u rH| ||_
|jd u rg|jj}|jj}ttt|d |t||_d S d S )Ngeneration_configalignment_heads
lang_to_idlang_idsr   )rD   suppress_tokenssuppress_idsbegin_suppress_tokenssuppress_ids_beginr   r  rB   r  valuesr  rN   _WHISPER_ALIGNMENT_HEADSr?   name_or_pathr  r   r  r   	itertoolsproductr   )r2   rN   rS   rU   
gen_config
num_layersrk  r   r   r    rq     s0   





zWhisperLoader.set_configc                    s6   t  ||}|dd t|jjt| D  |S )Nc                 s   s    | ]	}d |d  V  qdS )z<|%.2f|>g{Gz?Nr   r  r   r   r    	<genexpr>  s
    

z/WhisperLoader.get_vocabulary.<locals>.<genexpr>)r   rr   extendr   rN   r   r   r   r   r   r    rr     s
   
zWhisperLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zWhisperLoader.set_vocabularyc                    s2   |  |j|j |  |j|j t || d S r   )
set_conv1dconv1conv2r   r   )r2   rV   r   r   r   r    r     s   zWhisperLoader.set_encoderc                    s"   |  |j|j t || d S r   )r   r   r   r   r   r  r   r   r    r     s   zWhisperLoader.set_decoderc                 C   s$   |  |j|j | |j|j d S r   )r   r   r   r   r   r   r   r   r    r        zWhisperLoader.set_common_layersc                 C   s   |j |_ |j|_d S r   )r   r   r   r   r   r    r    r   zWhisperLoader.set_conv1d)r>   rd   re   r   rE   rp   r  rq   rr   rs   r   r   r   r  r   r   r   r   r    r  w  s    
r  Wav2Vec2Configc                       sh   e Zd Zedd Zdd Zdd Zdd Zd	d
 Zdd Z	dd Z
dd Z fddZdd Z  ZS )Wav2Vec2Loaderc                 C   r   )NWav2Vec2ForCTCr   rk   r   r   r    rE     rl   z Wav2Vec2Loader.architecture_namec                 C   sz   t |jjj|jjjj|jjjj}|jjjD ]}|j	|_
|j|_|jj|_|jj|_|jj|_q| |j||jj |S r   )r
   Wav2Vec2Specwav2vec2rN   num_feat_extract_layersr   r)  r*  r   r   r   r   r   feed_forwardintermediate_act_fnactivation_fnintermediate_denser   output_denser   r   )r2   rS   rV   r   r   r   r    rp     s   



zWav2Vec2Loader.get_model_specc                 C   rj   r   r   r   r   r   r    rq     r   zWav2Vec2Loader.set_configc                 C   s   |  S r   )r   r   r   r   r    rr     s   zWav2Vec2Loader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zWav2Vec2Loader.set_vocabularyc                 C   s   |j d jj|jj_|j d jj|jj_| |jj|j d j t|j|j dd  D ]\}}|jj|j_|jj|j_| |j|j q+d S r   )	conv_layersconvr   feat_layer0r   r   r   r   
feat_layer)r2   rV   feature_extractor
spec_layermodule_layerr   r   r    set_feature_extractor  s   z$Wav2Vec2Loader.set_feature_extractorc                 C   s$   |  |j|j | |j|j d S r   )r   fp_layer_normr   r   fp_projectionr   )r2   rV   feature_projectionr   r   r    set_feature_projection
  r  z%Wav2Vec2Loader.set_feature_projectionc                 C   s   |j jjj |j jj_|j jj |j jj_|j  D ]}|j |_q| tdd|j	f |j jj|j j_|j jj|j j_d S rz   )
pos_conv_embedr  r   datafloatr   
parametersr8   randnr-  )r2   rV   r   rN   paramr   r   r    set_pos_conv_embed  s   
z!Wav2Vec2Loader.set_pos_conv_embedc                    sX   |  ||jj | ||jj | ||jj| t ||jj | 	|j
|j
 d S r   )r  r  r  r  r  r  r   r   r   r   r   )r2   rV   rS   rN   r   r   r    r     s
   zWav2Vec2Loader.set_encoderc                 C   s   |  |j|j d S r   )r   r   r   r   r   r    r   "  s   z Wav2Vec2Loader.set_common_layers)r>   rd   re   r   rE   rp   rq   rr   rs   r  r  r  r   r   r   r   r   r   r    r    s    
r  T5Configc                       st   e Zd Zedd Zdd Z fddZdd Zd	d
 ZdddZ	dd Z
dd Zdd ZdddZdd Z  ZS )T5Loaderc                 C   r   )NT5ForConditionalGenerationr   rk   r   r   r    rE   (  rl   zT5Loader.architecture_namec              	   C   s   t jj|jj|jjf|jjdt|jj |jj	ddd}| 
|j|j | j
|j|jdd | |jj|j |jjrB|jjd |j_|S )NT)r   r   ffn_glurelative_attention_biasrms_norm)
is_decoderg      )r	   r   r   rN   r  num_decoder_layersrk  r   dense_act_fnis_gated_act	set_stackr   r   r   r   r   tie_word_embeddingsd_modelscale_outputsr  r   r   r    rp   ,  s   

zT5Loader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   @  rH  zT5Loader.get_vocabularyc                 C   r   r   r   r   r   r   r    rs   I  r   zT5Loader.set_vocabularyc                 C   sD   |j |_|j|_|j|_t|jdr||jj|_d S |j |_d S )Nr   )	r#  r   r   r   r   rN   r   r   r   r   r   r   r    rq   M  s   
zT5Loader.set_configFc                 C   s   |  |j|j | t|jtr|jd n|j|j d|_t	t
|j|jD ]:\}\}}| |j|jd  |dkrK|jd j}|j|j_|j|j_|rW| |j|jd  | |j|jd  q'd S )Nr   Fr{   r   )r   r   r   r   r   r   r   r   r   r.  r   r   blockset_self_attentionr   r  relative_attention_max_distanceset_cross_attentionr   set_ffnr   )r2   rV   r   r  r  r   r  first_self_attentionr   r   r    r  X  s(   
	zT5Loader.set_stackc                 C   sh   t |dr| |j|jj | |j|jj n	| |j|jj | |j|jj	 | 
|j|j d S )Nlinear_0_noact)r   r   r   DenseReluDensewi_0r  wi_1wir   wor   r   r   r   r   r    r  w  s   
zT5Loader.set_ffnc                 C   s&   | j ||jdd | |j|j d S r   )r   SelfAttentionr   r   r   r   r   r    r    s   zT5Loader.set_self_attentionc                 C   s"   |  ||j | |j|j d S r   )r   EncDecAttentionr   r   r   r   r   r    r    s   zT5Loader.set_cross_attentionc                 C   s   d|_ dd tdD }| |d |j | |d |j | |d |j |r3t|jd | nt|jd |d d  t|jd |dd   | |jd |j	 |j
rk|jj|_td	|j|_d S d S )
Ng      ?c                 S   r   r   r   r   r   r   r    ry     r   z*T5Loader.set_attention.<locals>.<listcomp>r   r   r{   r   r   int32)queries_scaler   r   qkvr   r   r   ohas_relative_attention_biasr  r   r~  dtypetyper  r   r   r   r    r     s    


zT5Loader.set_attentionc                 C      |j |_d S r   r   r   r2   rV   r   r   r   r    r     r   zT5Loader.set_layer_normr   )r>   rd   re   r   rE   rp   rr   rs   rq   r  r  r  r  r   r   r   r   r   r   r    r  &  s    
	


r  	MT5Configc                   @   s   e Zd Zedd ZdS )	MT5Loaderc                 C   r   )NMT5ForConditionalGenerationr   rk   r   r   r    rE     rl   zMT5Loader.architecture_nameN)r>   rd   re   r   rE   r   r   r   r    r    s    r  BloomConfigc                       P   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	dd Z
  ZS )BloomLoaderc                 C   r   )NBloomForCausalLMr   rk   r   r   r    rE     rl   zBloomLoader.architecture_namec              	   C   sL   t jj|jj|jjdtjjdddd}| 	|j
|j | |j
j|j |S )NT)r   r   r   alibialibi_use_positive_positions)r	   r(  r   rN   r@  rA  r   
ActivationGELUTanhr   r   rB  r   r   r   r  r   r   r    rp     s   
zBloomLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr     rH  zBloomLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zBloomLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zBloomLoader.set_configc                 C   s   d|_ | |j|j | |j|j | |j|j t	|j
|jD ]E\}}| |jj|j | |jjd |jj|jj | |jjd |jj | |jj|j | |jj|jj | |jj|jj q"d S rJ  )r   r   r   word_embeddingsr   r   word_embeddings_layernormr   rN  r   r   rO  r   r  set_qkv_linearr   r  rk  r   r  r   r  r   rU  r  r   r  rW  r   r   r    r     s,   


zBloomLoader.set_decoderc                 C   st   |j }||dd|jd }|dd}|d|jd }|j}||dd}|dd}|d}||_ ||_d S )Nr   r   r   r{   )r   r  r  r   r   )r2   rV   r   rk  r   r   r   r   r    r    s   

zBloomLoader.set_qkv_linear)r>   rd   re   r   rE   rp   rr   rs   rq   r   r  r   r   r   r   r    r
    s    
	r
  	MPTConfigc                       r	  )	MPTLoaderc                 C   r   NAutoModelForCausalLMr   rk   r   r   r    rE     rl   zMPTLoader.architecture_namec                 C   s6   t jj|jj|jjdtjjdd}| 	|j
|j |S )NT)r   r   r  )r	   r(  r   rN   n_layersn_headsr   r  GELUr   r   rB  r  r   r   r    rp     s   zMPTLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   
  rH  zMPTLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zMPTLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zMPTLoader.set_configc                 C   s   |  |j|j | |j|j d|_|jj|j_t	|j
|jD ]B\}}| |jj|j | |jjd |jj | |jjd |jj | |jj|j | |jj|jj | |jj|jj q d S rJ  )r   r   rL  r   r   norm_fr   r   r   r   r   blocksr   norm_1r   r   rQ  Wqkvr   r   norm_2r   up_projr   	down_projrW  r   r   r    r     s   zMPTLoader.set_decoderc                 C   s   |j |_t|j|_d S r   )r   r   r8   
zeros_liker   r   r   r   r    r   +  s   zMPTLoader.set_layer_norm)r>   rd   re   r   rE   rp   rr   rs   rq   r   r   r   r   r   r   r    r    s    
	r  GemmaConfigc                       r	  )GemmaLoaderc                 C   r   )NGemmaForCausalLMr   rk   r   r   r    rE   2  rl   zGemmaLoader.architecture_namec                 C   s   |j j}|j j}t|j d|}||krd }t|j dd}tjj|||dkr)tjj	ntjj
dddddt|j dd	||j jd
}| |j|j | |jj|j |j jd |jj_|S )Nnum_key_value_headshidden_activationr   r   Tr   F
rope_theta'  )	r   r   r  r  rc  rd  rotary_basenum_heads_kvrt        ?rN   r)  r*  rD   r	   r(  r   r   r  r  r  rt  r   r   rS   r   r   r   r-  r   multiply_by_sqrt_depthr2   rS   r  rk  r*  activation_configrV   r   r   r    rp   6  s6   zGemmaLoader.get_model_specc                    ^   t  ||}|jjt| }t|D ]	}|d|  q|jjt|k r-|d |jj }|S rD  rE  rF  r   r   r    rr   Y     zGemmaLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   d  r3  zGemmaLoader.set_vocabularyc                 C   &   |j |_ |j|_|j|_|jj|_d S r   r   r   r   rN   rms_norm_epslayer_norm_epsilonr   r   r   r    rq   g     zGemmaLoader.set_configc                 C      |j |_d|_d S r  r   r   layer_norm_use_residualr  r   r   r    r   m     
zGemmaLoader.set_layer_normc           	      C   s  d|_ d|_| |j|j | |j|j t|j	|j
D ]j\}}| |jj|j | |jj|j |jjj}|jjj}|jjj}|jjj}t|||g|jjd _||jjd _| |jj|jj | |jj|jj | |jj|jj t |d t |d t!"  qd S NTFr   r{   r   rU  )#r   r  r   r   r   r   r   normr   r   r   r   r  r   r  r   r   r   r   r   o_projr8   rh  r   r   r   rU  	gate_projr  r  r   r   delattrgccollect	r2   rV   r   r   r   wqwkwvr  r   r   r    r   q  s0   








zGemmaLoader.set_decoderr>   rd   re   r   rE   rp   rr   rs   rq   r   r   r   r   r   r   r    r#  0  s    
#r#  Gemma2Configc                       r	  )Gemma2Loaderc                 C   r   )NGemma2ForCausalLMr   rk   r   r   r    rE     rl   zGemma2Loader.architecture_namec                 C   s   |j j}|j j}t|j d|}||krd }t|j dd}tjj|||dkr)tjj	ntjj
dddddt|j dd	||j jdd
}| |j|j | |jj|j |j jd |jj_|S )Nr%  r&  r   r   Tr   Fr'  r(  )
r   r   r  r  rc  rd  r)  r*  rt  pre_post_layer_normr+  r,  r.  r   r   r    rp     s8   zGemma2Loader.get_model_specc                    r0  rD  rE  rF  r   r   r    rr     r1  zGemma2Loader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zGemma2Loader.set_vocabularyc                 C   r2  r   r3  r   r   r   r    rq     r6  zGemma2Loader.set_configc                 C   r7  r  r8  r  r   r   r    r     r:  zGemma2Loader.set_layer_normc           	      C   s0  d|_ d|_| |j|j | |j|j t|j	|j
D ]x\}}| |j|j | |j|j | |j|j | |j|j |jjj}|jjj}|jjj}|jjj}t|||g|jjd _||jjd _| |jj|j j! | |jj"|j j# | |jj$|j j% t&|d t&|d t'(  qd S r;  ))r   r  r   r   r   r   r   r<  r   r   r   r  r  r  r  pre_feedforward_layer_normpre_feedforward_layernormpost_feedforward_layer_normpost_feedforward_layernormr   r   r   r   r   r=  r8   rh  r   r   r   r   r   rU  r>  r  r  r   r   r?  r@  rA  rB  r   r   r    r     s8   






zGemma2Loader.set_decoderrF  r   r   r   r    rH    s    
$rH  LlamaConfigc                       X   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	e
jjfddZ  ZS )LlamaLoaderc                 C   r   )NLlamaForCausalLMr   rk   r   r   r    rE     rl   zLlamaLoader.architecture_namec                 C   s  |j j}|j j}t|j d|}||krd }t|j dd }|rD|dp&|d }t|}|d }|d u rCtd|d dt f nd }d}t|j d	d }	|	rwd }
|	j	d
kr^t
|	j}
|
d u rptd|	j	dt
 f |	j}|	j}ntjj}
d }d }tjj||tjjddddd||t|j dd||
||d}| |j|j|
 | |jj|j |tjjkr|jjD ]}|d |j _!|d |j _"q|S )Nr%  rope_scalingr  	rope_typefactorkRoPE scaling type '%s' is not yet implemented. The following RoPE scaling types are currently supported: %sr4   r{   quantization_configawqkQuantization type '%s' is not yet implemented. The following Quantization types are currently supported: %sTr   Fr'  r(  )r   r   r  r  rc  rd  rotary_scaling_typerotary_scaling_factorr)  r*  r   quant_group_size
quant_bitslow_freq_factorhigh_freq_factor)#rN   r)  r*  rD   r?   _SUPPORTED_ROPE_SCALINGrn   rA   rC   quant_method_SUPPORTED_QUANTIZATIONversion
group_sizebitsr   r   r   r	   r(  r   r  SWISHr   r   rS   r   r   r   r   RotaryScalingTypeLlama3r   r   rotary_low_freq_factorrotary_high_freq_factor)r2   rS   r  rk  r*  rS  rT  rZ  r[  rW  r   r\  r]  rV   r   r   r   r    rp     s   


zLlamaLoader.get_model_specc                    r0  rD  rE  rF  r   r   r    rr   K  r1  zLlamaLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   V  r3  zLlamaLoader.set_vocabularyc                 C   s4   |j |_ |j|_|jd ur|jnd|_|jj|_d S )N r3  r   r   r   r    rq   Y  s
   zLlamaLoader.set_configc                 C   r  r   r  r  r   r   r    r   a  r   zLlamaLoader.set_layer_normc                 C     d|_ | |j|j | |j|j t|j|j	D ]\}}| |j
j|j | |jj|j dd tdD }| j|d |jj|d | j|d |jj|d | j|d |jj|d |tjjkrnt|j
jd | n|tjjkrvdnd}t|j
jd || | j|j
jd |jj|d | j|jj|jj|d | j|jj |jj!|d | j|jj"|jj#|d t$|d	 t$|d
 t%&  qd S )NFc                 S   r   r   r   r   r   r   r    ry   q  r   z+LlamaLoader.set_decoder.<locals>.<listcomp>r   r   r   r{   r   r   rU  'r   r   r   r   r   r   r<  r   r   r   r   r  r   r  r   r   r   r   r   r   r   r   r   r   r   r   AWQ_GEMMfuse_linear_prequantr=  r   rU  r>  r  r  r   r   r?  r@  rA  r2   rV   r   r   r   r   r   cc_dimr   r   r    r   d  sX   





zLlamaLoader.set_decoderr>   rd   re   r   rE   rp   rr   rs   rq   r   r   r   r   r   r   r   r   r   r    rQ    s    
MrQ  MistralConfigc                       rP  )MistralLoaderc                 C   r   )NMistralForCausalLMr   rk   r   r   r    rE     rl   zMistralLoader.architecture_namec                 C   s^  |j j}|j j}t|j d|}||krd }t|j dd}t|j dd }|rDt|d }|d }|d u rCtd|d dt f nd }d	}t|j d
d }	|	ru|	j	dkr\t
|	j}
|
d u rntd|	j	dt
 f |	j}|	j}ntjj}
d }d }tjj||tjjddddd||t|j dd|||
||d}| j|j|j|
d | |jj|j |S )Nr%  sliding_windowr   rS  r  rU  rV  r4   r{   rW  rX  rY  TFr'  r(  )r   r   r  r  rc  rd  rZ  r[  r)  r*  rw  r   r\  r]  rm  )rN   r)  r*  rD   r`  r?   rn   rA   rC   ra  rb  rc  rd  re  r   r   r   r	   r(  r   r  rf  r   r   rS   r   r   r   )r2   rS   r  rk  r*  rw  rS  rZ  r[  rW  r   r\  r]  rV   r   r   r    rp     sr   
zMistralLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr     rH  zMistralLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zMistralLoader.set_vocabularyc                 C   r2  r   r3  r   r   r   r    rq     r6  zMistralLoader.set_configc                 C   r  r   r  r  r   r   r    r     r   zMistralLoader.set_layer_normc                 C   rl  )NFc                 S   r   r   r   r   r   r   r    ry     r   z-MistralLoader.set_decoder.<locals>.<listcomp>r   r   rm  r{   r   r   rU  rn  rq  r   r   r    r     sX   





zMistralLoader.set_decoderrs  r   r   r   r    ru    s    
D	ru  MixFormerSequentialConfigc                       rp  )MixFormerSequentialLoaderc                 C   r   r  r   rk   r   r   r    rE   ,  rl   z+MixFormerSequentialLoader.architecture_namec              
   C   s\   t jj|jj|jjdt|jj |jjdddd}| 	|j
|j | |j
j|jd j |S )NTFr  rk  r   r   rc  rd  re  rf  r   )r	   r(  r   rN   r@  rA  r   r   rc  r   r   r   r   r   r   r  r   r   r    rp   0  s   
z(MixFormerSequentialLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   @  rH  z(MixFormerSequentialLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   I  r3  z(MixFormerSequentialLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq   L  r  z$MixFormerSequentialLoader.set_configc                 C   s   d|_ | |j|d j | |j|d j t|j|dd D ]8\}}| |j	|j | 
|jjd |jj | 
|jjd |jj | 
|jj|jj | 
|jj|jj q!d S )NFr   r   r{   )r   r   r   rL  r   r   lnr   r   rf  r   r   r   mixerr  r   r   r   rU  r   r   r   rW  r   r   r    r   Q  s   z%MixFormerSequentialLoader.set_decoderr  r   r   r   r    ry  *  s    
	ry  	PhiConfigc                       rp  )	PhiLoaderc                 C   r   r  r   rk   r   r   r    rE   `  rl   zPhiLoader.architecture_namec              
   C   sl   t jj|jj|jjdt|jj |jjdddd}| 	|j
|j | |j
j|jj | |j
j|jj |S )NTFrz  )r	   r(  r   rN   r@  rA  r   r   rc  r   r   rB  r   r   r   r   r   r   r{  r  r   r   r    rp   d  s   
zPhiLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   u  rH  zPhiLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   ~  r3  zPhiLoader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zPhiLoader.set_configc                 C   s   d|_ | |j|jj t|j|jD ]8\}}| |j	|j
 | |jjd |jj | |jjd |jj | |jj|jj | |jj|jj qd S rJ  )r   r   r   embdrL  r   r   rO  r   rf  r{  r   r   r   r|  r  r   r   r   rU  r   r   r   rW  r   r   r    r     s   zPhiLoader.set_decoderr  r   r   r   r    r~  ^  s    
	r~  
Phi3Configc                       sX   e Zd Zedd Zdd Z fddZdd Zd	d
 Zdd Z	dd Z
dd Z  ZS )
Phi3Loaderc                 C   r   r  r   rk   r   r   r    rE     rl   zPhi3Loader.architecture_namec                 C   s   |j j}|j j}t|j d|}||krd }t|j dd}t|j dd}t|j dd }|rMt|d }|dd}	|d u rLtd	|d d
t f nd }d}	t	j
j||tjjddddd||	t|j dd|||d}
| |
j|j | |
jj|j |
S )Nr%   original_max_position_embeddingsr   max_position_embeddingsrS  r  rU  r{   rV  r4   TFr'  r(  )r   r   r  r  rc  rd  rZ  r[  r)  r  r  r*  )rN   r)  r*  rD   r`  r?   rn   rA   rC   r	   r(  r   r   r  rf  r   r   rS   r   r   r   )r2   rS   r  rk  r*  r  r  rS  rZ  r[  rV   r   r   r    rp     sR   zPhi3Loader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr     rH  zPhi3Loader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs     r3  zPhi3Loader.set_vocabularyc                 C   r4  r   r5  r   r   r   r    rq     r  zPhi3Loader.set_configc                 C   r  r   r  r  r   r   r    r     r   zPhi3Loader.set_layer_normc                 C   s(   t j|t jd|_t j|t jd|_d S )N)r   )r8   tensorfloat32rotary_scaling_long_factorrotary_scaling_short_factor)r2   rV   r  r  r   r   r    set_rotary_embeddings  s   z Phi3Loader.set_rotary_embeddingsc                 C   s*  d|_ | |j|j | |j|j t|j|j	D ]x\}}| |j
j|j | |jj|j | |j
jd |jj | |j
jd |jj |jjjd urd|jjjd urd| |j
|jjj|jjj |jjjjddd\}}||jj_||jj_| |jj|jj t|d t|d t !  qd S )NFr   r{   r   r|  r   rU  )"r   r   r   r   r   r   r<  r   r   r   r   r  r   r  r   r   r   r  r=  
rotary_emblong_factorshort_factorr  rU  gate_up_projr   r  r   r  r   r   r?  r@  rA  )r2   rV   r   r   r   r>  r  r   r   r    r     s:   






zPhi3Loader.set_decoder)r>   rd   re   r   rE   rp   rr   rs   rq   r   r  r   r   r   r   r   r    r    s    
0	
r  RWConfigc                       sZ   e Zd Zedd Zdd Zdd Z fddZd	d
 Zdd Z	dd Z
dddZ  ZS )RWLoaderc                 C   r   r  r   rk   r   r   r    rE     rl   zRWLoader.architecture_namec                 C   s.   |j j| _|j j| _t|j dd | _d| _d S )N	n_head_kvnum_kv)rN   r@  _num_layersrA  
_num_headsrD   _num_heads_kv_num_kv_attrro   r   r   r    get_falcon_spec     


zRWLoader.get_falcon_specc                 C   s   |  | t|jddrd}n| j}tjj| j| jdt	j
j|jjdd|jjr(dnd d|jj|dk|d}| |j|j | |jj|j |S )Nmulti_queryFr{   Tr   )
r   r   r  r  scale_alibirc  rd  re  rf  r*  )r  rD   rN   r  r	   r(  r   r  r  r   r  r  r  rotaryparallel_attnr   r   rB  r   r   r   )r2   rS   r*  rV   r   r   r    rp     s*   
zRWLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   2  rH  zRWLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   ;  r3  zRWLoader.set_vocabularyc                 C   s   |j |_|j |_ |j |_d S r   )r   r   r   r   r   r   r    rq   >  r  zRWLoader.set_configc                 C   sN  d|_ | |j|j | |j|j t|j|j	D ]\}}t
|dr4| |j|j | |j|j n t
|drB| |j|j n| |jj|j | |jj|j t|j| j}|dkrl| |jjd |jj n| |jjd |jj|jj||jjk r|nd  | |jjd |jj | |jj|jj | |jj|jj  qd S )NFln_attnrf  r{   r   )!r   r   r   r  r   r   rN  r   r   rO  r   r  r  r  ln_mlprf  r  r   r   r  rD   r  r   r   r  r  rk  r  r   rU  r  r   r  )r2   rV   r   r   r   r  r   r   r    r   C  sB   





zRWLoader.set_decoderNc                 C   s~  |j }|d u r"||dd|jd }|dd}|d|jd }nC|jd ||d   }|d|| d ||jd }|j|| ddgdd\}}}	t||| d||| d|	|| dg}||_ |jd ur|j}
|d u r|
|dd}
|
dd}
|
d}
n1|
d|| d |}
|
j|| ddgdd\}}}	t||| ||| |	|| g}
|
|_d S d S )Nr   r   r   r{   r   r|  )r   r  r  r   splitr8   rh  r   )r2   rV   r   rk  r  r   rt  r  r  r  r   r   r   r    r  k  sB   

zRWLoader.set_qkv_linearr   )r>   rd   re   r   rE   r  rp   rr   rs   rq   r   r  r   r   r   r   r    r    s    
	(r  FalconConfigc                   @   s   e Zd Zdd ZdS )FalconLoaderc                 C   s.   |j j| _|j j| _t|j dd | _d| _d S )Nnum_kv_heads)rN   r)  r  r*  r  rD   r  r  ro   r   r   r    r    r  zFalconLoader.get_falcon_specN)r>   rd   re   r  r   r   r   r    r    s    r  DistilBertConfigc                   @   s0   e Zd Zedd Zdd Zdd Zdd Zd	S )
DistilBertLoaderc                 C   r   )NDistilBertModelr   rk   r   r   r    rE     rl   z"DistilBertLoader.architecture_namec                 C   s\  t j|jj|jjdt|jj dd}t |}d|j_	| 
|jjd |jj | |jj|jj | |jj|jj t|jj|jjD ]g\}}dd tdD }| |d |jj | |d |jj | |d	 |jj t|jjd | | |jjd |jj  | |jj!|j" | |j#j$|j#j% | |j#j&|j#j' | |j#j!|j( qD|S )
NFTr   r   c                 S   r   r   r   r   r   r   r    ry     r   z3DistilBertLoader.get_model_spec.<locals>.<listcomp>r   r{   r   ))r	   TransformerEncoderSpecrN   r  r  r   r   TransformerEncoderModelSpecr   r   r   r   r  r   r   position_embeddingsr   r   	LayerNormr   r   rB  r   r   r   q_link_linv_linr   r   r   r   out_linr   sa_layer_normr   r   lin1r   lin2output_layer_normr2   rS   encoder_specrV   r   r   r   r   r   r    rp     sH   

zDistilBertLoader.get_model_specc                 C   r0  r   r1  r   r   r   r    rs     r3  zDistilBertLoader.set_vocabularyc                 C   s   |j |_ d|_d S )Ng-q=)r   r5  r   r   r   r    rq     r:  zDistilBertLoader.set_configN)r>   rd   re   r   rE   rp   rs   rq   r   r   r   r    r    s    
,r  
BertConfigc                       s@   e Zd Zedd Zdd Z fddZdd Zd	d
 Z  Z	S )
BertLoaderc                 C   r   )N	BertModelr   rk   r   r   r    rE     rl   zBertLoader.architecture_namec              	   C   s  |j jdksJ tj|j j|j jdt|j j ddtj	j
d}tj|dtjjd}d|j_| |jjd |jj | |jjd |jj | |jj|jj | |jj|jj | |j|jj t|jj|jjD ]n\}}d	d
 t dD }| |d |j!j"j# | |d |j!j"j$ | |d |j!j"j% t&'|j(j)d | | |j(j)d |j!j*j | |j(j+|j!j*j | |j,j-|j.j | |j,j/|j*j | |j,j+|j*j qj|S )NabsoluteFTr   r   r   r   num_source_embeddingsembeddings_mergepooling_layerpooling_activationr   r{   c                 S   r   r   r   r   r   r   r    ry   	  r   z-BertLoader.get_model_spec.<locals>.<listcomp>r   )0rN   position_embedding_typer	   r  r)  r*  r   r  r   EmbeddingsMergeADDr  r  Tanhr   r   r   r   r  token_type_embeddingsr   r   r  r   r   r  r   pooler_densepoolerr  r   r   r   r   r2   queryr~   valuer   r   r   r   outputr   r   r   intermediater   r  r   r   r    rp     sZ   

zBertLoader.get_model_specc                    rC  rD  rE  rF  r   r   r    rr   	  rH  zBertLoader.get_vocabularyc                 C   r0  r   r1  r   r   r   r    rs   !	  r3  zBertLoader.set_vocabularyc                 C      |j |_ |jj|_d S r   r   rN   layer_norm_epsr5  r   r   r   r    rq   $	     zBertLoader.set_config)
r>   rd   re   r   rE   rp   rr   rs   rq   r   r   r   r   r    r    s    
8	r  XLMRobertaConfigc                   @   rZ  )XLMRobertaLoaderc                 C   r   )N#XLMRobertaForSequenceClassificationr   rk   r   r   r    rE   +	  rl   z"XLMRobertaLoader.architecture_namec              	   C   s  |j jdksJ tj|j j|j jdt|j j ddtj	j
d}|jjd u r'd}nd}tj||tjjd}d|j_| |jjd |jjj | |jjd |jjj | |jj|jjj | |jj|jjj |rs| |j|jjj t|jj |jjj D ]n\}}d	d
 t!dD }| |d |j"j#j$ | |d |j"j#j% | |d |j"j#j& t'(|j)j*d | | |j)j*d |j"j+j | |j)j,|j"j+j | |j-j.|j/j | |j-j0|j+j | |j-j,|j+j q}|S )Nr  FTr   r  r  r   r{   c                 S   r   r   r   r   r   r   r    ry   Z	  r   z3XLMRobertaLoader.get_model_spec.<locals>.<listcomp>r   )1rN   r  r	   r  r)  r*  r   r  r   r  r  robertar  r  r  r  r   r   r   r   r  r  r   r   r  r   r   r  r   r  r  r   r   r   r   r2   r  r~   r  r   r   r   r   r  r   r   r   r  r   )r2   rS   r  r  rV   r   r   r   r   r   r    rp   /	  sd   

zXLMRobertaLoader.get_model_specc                 C   r0  r   r1  r   r   r   r    rs   m	  r3  zXLMRobertaLoader.set_vocabularyc                 C   r  r   r  r   r   r   r    rq   p	  r  zXLMRobertaLoader.set_configc                 C   s8   |j |_t|dd}|dkr|j|d d  |_d S d S )Npadding_idxr   r{   r   r   r   r   r    r   t	  s
   z'XLMRobertaLoader.set_position_encodingsN)	r>   rd   re   r   rE   rp   rs   rq   r   r   r   r   r    r  )	  s    
>r  c               	   C   s   t jt jd} | jdddd | jddd | jd	d
dd | jddd | jdddd | jdddd t|  |  }t|j|j	|j
|jdv |j|j|jd}|| d S )N)formatter_classz--modelTzaName of the pretrained model to download, or path to a directory containing the pretrained model.)requiredhelpz--activation_scaleszPath to the pre-computed activation scales. Models may use them to rescale some weights to smooth the intermediate activations and improve the quantization accuracy. See https://github.com/mit-han-lab/smoothquant.)r  z--copy_files+zWList of filenames to copy from the Hugging Face model to the converted model directory.)nargsr  z
--revisionz<Revision of the model to download from the Hugging Face Hub.z--low_cpu_mem_usage
store_truezNEnable the flag low_cpu_mem_usage when loading the model with from_pretrained.)actionr  z--trust_remote_codez*Allow converting models using custom code.)rG   int8_float16)r%   r&   r'   r(   r)   r*   )argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr#   rS   r%   r&   quantizationr(   r)   r*   convert_from_args)parserargs	converterr   r   r    main{	  sV   	
	r  __main__))r{   r   )r   r   )r      r   r   r   r{   r   r   r   r   r   rs  ))r   r   r  r  r  r  )r   r  )r  rs     r  r{   )r  r  )r  r  )r  )rs  r   )rs  r   r  r  )r  r   )r  rs  )r     ))r  r  r  r   )r  r   )r  r8  )r8  r   )r8  r  r8  r  	   r   )r  rs  )r  r8  )r  
   )r  r   )r  r{   )r  r   )r  r   )r  r  )r     r  r   r  rs  )
)r  r   )r  r  )r8  r   )r8  rs  r  )r8  r8  r  )r  r  )r  r  )r  r  )r  )   r{   )r     )r  r     rs  )   r   )r  rs  )r  r  )   r  r  r  )   r  )r  r  )r  r     r   )r   r   )r   r  )r   r  )   r  ))   r  r  )r  r  r  r{   r  )   rs  )	)r     r  r  )r  r  )   r  )r  r  )r  r  )r  r   )r  r  ))r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  r  )r  rs  )r  r  )r  r  r  r  )r  r   )r  r   )r  r   )r  r  )r  r  )r  r  )r  r  )r  r  )   r  )   r{   )r	  r  )   r  )
r  )r  r  )r  r  )r  r  r  r  r  )r  rs  )   r{   )r  r  )zopenai/whisper-tiny.enzopenai/whisper-tinyzopenai/whisper-base.enzopenai/whisper-basezopenai/whisper-small.enzopenai/whisper-smallzopenai/whisper-medium.enzopenai/whisper-mediumzopenai/whisper-largezopenai/whisper-large-v2zopenai/whisper-large-v3)Nr   r  r@  r  r]   typingr   r   numpyr~  r`   r8   r:   ImportErrorctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   r   r	   r
   r   r  r  r  GELUSigmoidRELUrf  r   rg  LinearSurh  r`  r   ro  AWQ_GEMVrb  r   r"   r#   ABCri   r   r   r  r  r!  r%  r<  r[  r_  rq  r  r  r  r  r  r
  r  r#  rH  rQ  ru  ry  r~  r  r  r  r  r  r  r  r>   r  r   r   r   r    <module>   s     

{By7!F2(8YOdI|M7_g   33x 9NQ
:


