o
    TiB2                     @   s  d dl Z d dlmZ d dlmZ d dlmZmZ ejj	ejj
ejjejjdZejjejjdZdd Zd	d
 Zdd Zdd ZG dd deZdd Zdd Zd0ddZdd Zdd Zd0ddZdd Zd1d!d"Zd#d$ Zd%d& Z d'd( Z!d)d* Z"d+d, Z#d-d. Z$e%d/kre$  dS dS )2    N)utils)	Converter)common_spectransformer_spec)gelu	fast_gelurelusilu)concatsumc           	      C   s   t | dddk}t | dddk}t | dddk}t | dd}t | dd}t | d	d
}t }|| j| jko9| jdv d ||d
kd|  ||tv d|dt f  || j|p]|p]|kd ||dkpi|t	v d|dt	 f  |
  d S )Nmax_relative_positionsr   pos_ffn_activation_fnr   
feat_merger
   self_attn_typez
scaled-dot>   transformertransformer_lmzROptions --encoder_type and --decoder_type must be 'transformer' or 'transformer_lmzNOption --self_attn_type %s is not supported (supported values are: scaled-dot)zROption --pos_ffn_activation_fn %s is not supported (supported activations are: %s)z, z`Options --position_encoding and --max_relative_positions cannot be both enabled or both disabled   zGOption --feat_merge %s is not supported (supported merge modes are: %s) )getattrr   ConfigurationCheckerencoder_typedecoder_type_SUPPORTED_ACTIVATIONSjoinkeysposition_encoding_SUPPORTED_FEATURES_MERGEvalidate)	optnum_source_embeddingswith_relative_positionwith_rotary
with_alibiactivation_fnr   r   check r'   U/home/ubuntu/.local/lib/python3.10/site-packages/ctranslate2/converters/opennmt_py.py	check_opt   sF   r)   c                 C   s   t | dddk}t | dd}t | dd}t | dddkr!d}d	}	n| j}| j}	t | d
d}
tjj| j| jf|
|t| ||	|t	| t | ddd	}t | dd|j
_t|| |D ]}|| qW|D ]}|| qa|S )5Creates a model specification from the model options.r   r   r   r   r   r
   lambda_alignr   r   heads   
multiqueryF)r"   
activationalignment_layeralignment_headsr!   embeddings_mergemulti_query_attentiondecoder_start_tokenz<s>)r   r0   r1   r   TransformerSpecfrom_config
enc_layers
dec_layersr   r   configr4   set_transformer_specregister_source_vocabularyregister_target_vocabulary)r    	variables
src_vocabs
tgt_vocabsr!   r"   r%   r   r0   r1   	num_heads
model_spec	src_vocab	tgt_vocabr'   r'   r(   _get_model_spec_seq2seq:   s6   


rD   c                 C   s  t | dddk}t | dddk}t | dddk}t | dd}t | dd}	t | d	d}
|
|	ks2|
dkr4d
}
|r8dnd
}t | dd}|dk}t | dd}tjj| j|	t| |||| jdk||t | dd|
|d}t | dd|j_t	|j
|dd |D ]}|| qx|S )r*   r   r   r   r   r   r   r,   r-   num_kvNrotary_interleaveTr	   sliding_windowrmsr.   F)
r/   ffn_glur"   alibirms_norm
rotary_dimrF   r3   num_heads_kvrG   norm_epsgư>with_encoder_attention)r   r   TransformerDecoderModelSpecr6   r8   r   
layer_normr9   layer_norm_epsilonset_transformer_decoderdecoderregister_vocabulary)r    r=   r>   r?   r!   r"   r#   r$   r%   r@   rE   rL   rF   rI   rG   rA   rC   r'   r'   r(   _get_model_spec_lmc   sF   
rW   c                 C   s   t | trFd| v rFt | d tr.| d g}| d g}| d}|d ur*||  ||fS dd | d jD }dd | d jD }||fS | d d jg}| d d jg}||fS )	Nsrctgt	src_featsc                 S      g | ]}|d  j jqS r   vocabitos.0fieldr'   r'   r(   
<listcomp>       zget_vocabs.<locals>.<listcomp>c                 S   r[   r\   r]   r`   r'   r'   r(   rc      rd   r   r   )
isinstancedictlistgetextendvaluesfieldsr_   )r^   r>   r?   rZ   r'   r'   r(   
get_vocabs   s   


	rl   c                   @   s&   e Zd ZdZdefddZdd ZdS )OpenNMTPyConverterz(Converts models generated by OpenNMT-py.
model_pathc                 C   s
   || _ dS )zInitializes the OpenNMT-py converter.

        Arguments:
          model_path: Path to the OpenNMT-py PyTorch model (.pt file).
        N)_model_path)selfrn   r'   r'   r(   __init__   s   
zOpenNMTPyConverter.__init__c                 C   s   dd l }|j| jddd}t|d \}}t|d t|d |d }|d	d
 |d  D  |d jdkrEt	|d |||t|dS t
|d |||t|dS )Nr   cpuF)map_locationweights_onlyr^   r    )r!   modelc                 S   s   i | ]	\}}d | |qS )zgenerator.%sr'   )ra   keyvaluer'   r'   r(   
<dictcomp>   s    z,OpenNMTPyConverter._load.<locals>.<dictcomp>	generatorr   )torchloadro   rl   r)   lenupdateitemsr   rW   rD   )rp   rz   
checkpointr>   r?   r=   r'   r'   r(   _load   s6   
zOpenNMTPyConverter._loadN)__name__
__module____qualname____doc__strrq   r   r'   r'   r'   r(   rm      s    rm   c                 C   s   t | j| t| j| d S N)set_transformer_encoderencoderrT   rU   )specr=   r'   r'   r(   r:      s   r:   c                 C   sB   t | |d t| j|d t| jD ]\}}t||d|  qd S )Nr   zencoder.layer_normzencoder.transformer.%d)set_input_layersset_layer_normrR   	enumeratelayerset_transformer_encoder_layer)r   r=   ir   r'   r'   r(   r      s
   r   Tc                 C   sz   t | |d t| j|d t| jD ]\}}t||d| |d qz
t| j|d W d S  ty<   t| j|d Y d S w )NrU   zdecoder.layer_normzdecoder.transformer_layers.%drO   ry   zgenerator.0)	r   r   rR   r   r   set_transformer_decoder_layer
set_linear
projectionKeyError)r   r=   rP   r   r   r'   r'   r(   rT      s   rT   c                 C   sd   t | drt| j|d|  nd| _| j}t|ts|g}t|D ]\}}t||d||f  q!d S )Nposition_encodingsz%s.embeddings.make_embedding.peFz(%s.embeddings.make_embedding.emb_luts.%d)	hasattrset_position_encodingsr   scale_embeddings
embeddingsre   rg   r   set_embeddings)r   r=   scopeembeddings_specsr   embeddings_specr'   r'   r(   r      s"   


r   c                 C   s@   t | j|d|  t| j|d| dd t| jj|d|  d S )N%s.feed_forward%s.self_attnTself_attention%s.layer_norm)set_ffnffnset_multi_head_attentionr   r   rR   r   r=   r   r'   r'   r(   r     s   r   c                 C   sn   t | j|d|  t| j|d| dd t| jj|d|  |r5t| j|d|  t| jj|d|  d S d S )Nr   r   Tr   z%s.layer_norm_1z%s.context_attnz%s.layer_norm_2)r   r   r   r   r   rR   	attention)r   r=   r   rP   r'   r'   r(   r     s   r   c                 C   sZ   t | j|d|  t| j|d|  t| j|d|  t| dr+t| j|d|  d S d S )Nr   z%s.w_1z%s.w_2linear_0_noactz%s.w_3)r   rR   r   linear_0linear_1r   r   r   r'   r'   r(   r   !  s   
r   Fc                 C   s  |r3dd t dD }t|d |d|  t|d |d|  t|d |d	|  t| jd | n1t| jd |d|  d
d t dD }t|d |d|  t|d |d	|  t| jd | t| jd |d|  t| drt|d| | _| j| _d S d S )Nc                 S      g | ]}t  qS r'   r   
LinearSpecra   _r'   r'   r(   rc   +      z,set_multi_head_attention.<locals>.<listcomp>   r   z%s.linear_queryr   z%s.linear_keys   z%s.linear_valuesc                 S   r   r'   r   r   r'   r'   r(   rc   2  r   r   z%s.final_linearrelative_position_keysz'%s.relative_positions_embeddings.weight)	ranger   r   fuse_linearlinearr   _get_variabler   relative_position_values)r   r=   r   r   split_layersr'   r'   r(   r   )  s$   
r   c                 C   st   z
t |d| | _W n ty#   t |d| | _t |d| | _Y nw zt |d| | _W d S  ty9   Y d S w )N	%s.weightz%s.a_2z%s.b_2%s.bias)r   gammar   betar   r'   r'   r(   r   >  s   r   c                 C   s4   t |d| | _|d| }|d ur|| _d S d S )Nr   r   )r   weightrh   bias)r   r=   r   r   r'   r'   r(   r   K  s
   
r   c                 C   s   t |d| | _d S )Nr   )r   r   r   r'   r'   r(   r   R  s   r   c                 C   s   t |d|  | _d S )Nz%s.pe)r   squeeze	encodingsr   r'   r'   r(   r   V  s   r   c                 C   s   | | S r   r'   )r=   namer'   r'   r(   r   Z  s   r   c                  C   sD   t jt jd} | jdddd t|  |  }t|j	| d S )N)formatter_classz--model_pathTzModel path.)requiredhelp)
argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsrm   rn   convert_from_args)parserargsr'   r'   r(   main^  s   
r   __main__)T)F)&r   ctranslate2.convertersr    ctranslate2.converters.converterr   ctranslate2.specsr   r   
ActivationGELUGELUTanhRELUSWISHr   EmbeddingsMergeCONCATADDr   r)   rD   rW   rl   rm   r:   r   rT   r   r   r   r   r   r   r   r   r   r   r   r   r'   r'   r'   r(   <module>   sD    &),0




