o
    Ti/                     @   s2  d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ ejjejjejjejjd	Zd
d Zdd Zdd ZG dd deZdd Zdd Zd1ddZdd Zdd Zd1ddZdd  Z d2d"d#Z!d$d% Z"d&d' Z#d(d) Z$d*d+ Z%d,d- Z&d.d/ Z'e(d0kre'  dS dS )3    N)PredictConfig)PositionEncodingType)vocabs_to_dict)get_model_class)utils)	Converter)common_spectransformer_spec)gelu	fast_gelurelu
gated-siluc                 C   s(  t | jddtjk}t | jddtjk}|rtdt | jddtjk}|r*tdt | dd}t | jdddkr>d	}	d
}
n| jj}	| jj	}
t | jdd}|dk}t | dd}|dkr_tdt
jj| jj| jjf||t| || jdk|	|
|d	}t|| |D ]}|| q|D ]}|| q|S )4Creates a model specification from the model config.position_encoding_typeNzBRotary embeddings are not supported yet for encoder/decoder modelsz5Alibi is not supported yet for encoder/decoder modelsmlp_activation_fnr   lambda_alignr      heads   r   sliding_windowz=Sliding window is not suported yet for encoder/decoder modelsrms)with_relative_position
activationffn_glurms_normalignment_layeralignment_headsnum_source_embeddings)getattr
embeddingsr   RelativeRotary
ValueErrorAlibidecoderr   r   r	   TransformerSpecfrom_configencoderlayers_SUPPORTED_ACTIVATIONS
layer_normset_transformer_specregister_source_vocabularyregister_target_vocabulary)config	variables
src_vocabs
tgt_vocabsr   r   with_rotary
with_alibiactivation_fnr   r   	num_headsr   r   
model_spec	src_vocab	tgt_vocab r:   S/home/ubuntu/.local/lib/python3.10/site-packages/ctranslate2/converters/eole_ct2.py_get_model_spec_seq2seq   s`   
r<   c                 C   s   t | jddtjk}t | jddtjk}t | jddtjk}t | dd}t | jdd}	t | jdd}
|
|	ks:|
dkr<d}
|r@dnd}t | jd	d
}|dk}t | dd}tj	j
| jj|	t| |||| jdk|||
|d}t|j|dd |D ]}|| qu|S )r   r   Nr   r   r   r   heads_kvr   rotary_interleaveTr   r   r   )	r   r   r   alibir   
rotary_dimr>   num_heads_kvr   Fwith_encoder_attention)r   r    r   r!   r"   r$   r%   rope_configr	   TransformerDecoderModelSpecr'   r)   r*   r+   set_transformer_decoderregister_vocabulary)r/   r0   r1   r2   r   r   r3   r4   r5   r6   num_kvr@   r>   r   r   r7   r9   r:   r:   r;   _get_model_spec_lm\   sT   rI   c                 C   s   | d g}| d g}||fS )Nsrctgtr:   )vocabr1   r2   r:   r:   r;   
get_vocabs   s   

rM   c                   @   s&   e Zd ZdZdefddZdd ZdS )EoleConverterz(Converts models generated by OpenNMT-py.
model_pathc                 C   s
   || _ dS )zInitializes the OpenNMT-py converter.

        Arguments:
          model_path: Path to the OpenNMT-py PyTorch model (.pt file).
        N)_model_path)selfrO   r:   r:   r;   __init__   s   
zEoleConverter.__init__c                 C   s   dd l }t| jdd}t|j}||\}}}t|}||_t|\}}	|jjj	dkr;t
|j| ||	t|d}
nt|j| ||	t|d}
|d |
j_|d d |
j_|d d	 |
j_|d d
 |
j_t|dd|
j_|
S )Nr   dummy)rO   rJ   transformer_lm)r   decoder_start_tokenspecials	bos_token	eos_token	unk_tokennorm_epsgư>)torchr   rP   r   modelfor_inferencer   rM   r%   decoder_typerI   
state_dictlenr<   r/   rU   rW   rX   rY   r   layer_norm_epsilon)rQ   r[   r/   model_classr\   vocabsmodel_configvocabs_dictr1   r2   specr:   r:   r;   _load   s8   
zEoleConverter._loadN)__name__
__module____qualname____doc__strrR   rg   r:   r:   r:   r;   rN      s    rN   c                 C   s   t | j| t| j| d S N)set_transformer_encoderr(   rF   r%   )rf   r0   r:   r:   r;   r,      s   r,   c                 C   sB   t | |d t| j|d t| jD ]\}}t||d|  qd S )Nsrc_embzencoder.layer_normzencoder.transformer_layers.%d)set_input_layersset_layer_normr+   	enumeratelayerset_transformer_encoder_layer)rf   r0   irs   r:   r:   r;   rn      s   
rn   Tc                 C   sT   t | |d t| j|d t| jD ]\}}t||d| |d qt| j|d d S )Ntgt_embzdecoder.layer_normzdecoder.transformer_layers.%drB   	generator)rp   rq   r+   rr   rs   set_transformer_decoder_layer
set_linear
projection)rf   r0   rC   ru   rs   r:   r:   r;   rF      s   rF   c                 C   sP   t | drt| j|d|  nd| _| j}t|tr|d }t||d|  d S )Nposition_encodings%s.peFr   z%s.embeddings)hasattrset_position_encodingsr{   scale_embeddingsr    
isinstancelistset_embeddings)rf   r0   scopeembeddings_specsr:   r:   r;   rp      s   

rp   c                 C   sT   t | j|d| dd t| jj|d|  t| jj|d|  t| j|d|  d S )N%s.self_attnTself_attention%s.input_layernorm%s.post_attention_layernorm%s.mlp)set_multi_head_attentionr   rq   r+   ffnset_ffnrf   r0   r   r:   r:   r;   rt      s   rt   c                 C   s~   t | j|d| dd t| jj|d|  |r*t | j|d|  t| jj|d|  t| jj|d|  t| j|d|  d S )	Nr   Tr   r   z%s.context_attnz%s.precontext_layernormr   r   )r   r   rq   r+   	attentionr   r   )rf   r0   r   rC   r:   r:   r;   rx     s$   rx   c                 C   sH   t | j|d|  t | j|d|  t| dr"t | j|d|  d S d S )Nz%s.gate_up_projz%s.down_projlinear_0_noactz
%s.up_proj)ry   linear_0linear_1r}   r   r   r:   r:   r;   r     s
   
r   Fc                 C   s  |r3dd t dD }t|d |d|  t|d |d|  t|d |d	|  t| jd | n1t| jd |d|  d
d t dD }t|d |d|  t|d |d	|  t| jd | t| jd |d|  t| drt|d| | _| j| _d S d S )Nc                 S      g | ]}t  qS r:   r   
LinearSpec.0_r:   r:   r;   
<listcomp>#      z,set_multi_head_attention.<locals>.<listcomp>   r   z%s.linear_queryr   z%s.linear_keys   z%s.linear_valuesc                 S   r   r:   r   r   r:   r:   r;   r   *  r   r   z%s.final_linearrelative_position_keysz'%s.relative_positions_embeddings.weight)	rangery   r   fuse_linearlinearr}   _get_variabler   relative_position_values)rf   r0   r   r   split_layersr:   r:   r;   r   !  s$   
r   c                 C   st   z
t |d| | _W n ty#   t |d| | _t |d| | _Y nw zt |d| | _W d S  ty9   Y d S w )N	%s.weightz%s.a_2z%s.b_2%s.bias)r   gammaKeyErrorbetar   r:   r:   r;   rq   6  s   rq   c                 C   s4   t |d| | _|d| }|d ur|| _d S d S )Nr   r   )r   weightgetbias)rf   r0   r   r   r:   r:   r;   ry   C  s
   
ry   c                 C   s   t |d| | _d S )Nr   )r   r   r   r:   r:   r;   r   J  s   r   c                 C   s   t |d|  | _d S )Nr|   )r   squeeze	encodingsr   r:   r:   r;   r~   N  s   r~   c                 C   s   | | S rm   r:   )r0   namer:   r:   r;   r   R  s   r   c                  C   sD   t jt jd} | jdddd t|  |  }t|j	| d S )N)formatter_classz--model_pathTzModel path.)requiredhelp)
argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsrN   rO   convert_from_args)parserargsr:   r:   r;   mainV  s   
r   __main__)T)F))r   eole.config.runr   eole.constantsr   eole.inputters.inputterr   eole.models.modelr   ctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r	   
ActivationGELUGELUTanhRELUSWISHr*   r<   rI   rM   rN   r,   rn   rF   rp   rt   rx   r   r   rq   ry   r   r~   r   r   rh   r:   r:   r:   r;   <module>   sD    H51
	



