o
    i                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ G dd deZ	dd Z
dd	 Zd
d Zdd Zdd ZedkrAe  dS dS )    N)	Converter)common_spec
model_spectransformer_specc                   @   s&   e Zd ZdZdefddZdd ZdS )OpenAIGPT2Converterz;Converts GPT-2 models from https://github.com/openai/gpt-2.	model_dirc                 C   s
   || _ dS )zInitializes the OpenAI GPT-2 converter.

        Arguments:
          model_dir: Path to the OpenAI GPT-2 model directory.
        N)
_model_dir)selfr    r
   _/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/ctranslate2/converters/openai_gpt2.py__init__   s   
zOpenAIGPT2Converter.__init__c                    s  dd l }|j| j  fdd   D }ttj	| jd}t
|}W d    n1 s2w   Y  ttj	| jd}t
|}dd t| dd	 d
D }W d    n1 s`w   Y  tjj|d |d dtjjd}t|j|d d|_d|_d|_|| |S )Nr   c                    s   i | ]}|  |qS r
   )
get_tensor).0namereaderr
   r   
<dictcomp>   s    
z-OpenAIGPT2Converter._load.<locals>.<dictcomp>zhparams.jsonzencoder.jsonc                 S   s   g | ]\}}|qS r
   r
   )r   tokenindexr
   r
   r   
<listcomp>!   s    z-OpenAIGPT2Converter._load.<locals>.<listcomp>c                 S   s   | d S )N   r
   )itemr
   r
   r   <lambda>#   s    z+OpenAIGPT2Converter._load.<locals>.<lambda>)keyn_layern_headT)pre_norm
activationmodelz<|endoftext|>)
tensorflowtrainload_checkpointr   get_variable_to_shape_mapkeysopenospathjoinjsonloadsorteditemsr   TransformerDecoderModelSpecfrom_configr   
ActivationGELUTanhset_decoderdecoder	unk_token	bos_token	eos_tokenregister_vocabulary)r	   tfweightshparams_filehparams
vocab_filevocabspecr
   r   r   _load   s4   



zOpenAIGPT2Converter._loadN)__name__
__module____qualname____doc__strr   r=   r
   r
   r
   r   r   	   s    r   c                 C   sp   |d|  | j _|d|  | j_d| _| j j| j_t| j|d|  t| j	D ]\}}t
||d||f  q'd S )Nz%s/wtez%s/wpeFz%s/ln_fz%s/h%d)
embeddingsweightposition_encodings	encodingsscale_embeddings
projectionset_layer_norm
layer_norm	enumeratelayer	set_layer)r<   r7   scopei
layer_specr
   r
   r   r0   4   s   r0   c                 C   s    |d|  | _ |d|  | _d S )Nz%s/g%s/b)gammabetar<   r7   rN   r
   r
   r   rI   >   s   rI   c                 C   s(   |d|     | _|d|  | _d S )Nz%s/wrQ   )squeeze	transposerD   biasrT   r
   r
   r   
set_linearC   s   rX   c                 C   s   t | jj|d|  t| jjd |d|  t| jjd |d|  t | jj|d|  t| jj|d|  t| jj|d|  d S )	Nz%s/ln_1r   z%s/attn/c_attnr   z%s/attn/c_projz%s/ln_2z%s/mlp/c_fcz%s/mlp/c_proj)rI   self_attentionrJ   rX   linearffnlinear_0linear_1rT   r
   r
   r   rM   H   s   rM   c                  C   sH   t jt jd} | jdddd t|  |  }t|j}|	| d S )N)formatter_classz--model_dirTzPath to the model directory.)requiredhelp)
argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentr   declare_arguments
parse_argsr   r   convert_from_args)parserargs	converterr
   r
   r   mainQ   s   

rk   __main__)ra   r(   r%    ctranslate2.converters.converterr   ctranslate2.specsr   r   r   r   r0   rI   rX   rM   rk   r>   r
   r
   r
   r   <module>   s    +
	
