o
    iNm                     @   s6  d Z ddlZddlZddlZddlmZmZmZmZm	Z	m
Z
mZmZmZmZ ddlmZ e	 r5ddlmZ eeZG dd deZ	d%d
dZd&defddZ					d'ddZd(ddZ					d'ddZdd Z							d)ddZ						d*ddZ	d+ddZd,dd Z d,d!d"Z!d,d#d$Z"dS )-z#PyTorch - TF 2.0 general utilities.    N   )
ExplicitEnumcheck_torch_load_is_safeexpand_dimsis_numpy_arrayis_safetensors_availableis_torch_tensorloggingreshapesqueezetensor_size)	transpose)	safe_openc                   @   s    e Zd ZdZdZdZdZdZdS )TransposeTypez
    Possible ...
    nosimpleconv1dconv2dN)__name__
__module____qualname____doc__NOSIMPLECONV1DCONV2D r   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/transformers/modeling_tf_pytorch_utils.pyr   ,   s    r    c                 C   s  |dur%|  |sd| vrtd|  d| d| t|d } | d} | dd} t| d	kr5d
| v s<| d
dkrBtd|  tdd| } | dd} tdd| } | d} t| dkrg| dd } t	|}| d dkr|durt|dkrt
j}n+| d dkr|durt|dkrt
j}nt| d dv pd| v pd| v rt
j}nt
j}| d dks| d dks| d dkrd| d< | d dkrd| d< | d dks| d dkr| d d d!| d< d"| } |r| |dd} | |fS )#aU  
    Convert a TF 2.0 model variable name in a pytorch model weight name.

    Conventions for TF2.0 scopes -> PyTorch attribute names conversions:

        - '$1___$2' is replaced by $2 (can be used to duplicate or remove layers in TF2.0 vs PyTorch)
        - '_._' is replaced by a new level separation (can be used to convert TF2.0 lists in PyTorch nn.ModulesList)

    return tuple with:

        - pytorch model weight name
        - transpose: `TransposeType` member indicating whether and how TF2.0 and PyTorch weights matrices should be
          transposed with regards to each other
    Nfinal_logits_biaszWeight name z  does not start with name_scope z. This is an internal error in Transformers, so (unless you were doing something really evil) please open an issue to report it!/z:0r   i   ___
   zBTF variable name is too long or contains too many ___ separators: z/[^/]*___([^/]*)/z/\1/z_._z//+r   kernel      )r$   pointwise_kerneldepthwise_kernel	emb_projs	out_projs
embeddingsgammaweightbetabiasr'   r(   _kernelz.weight.)
startswith
ValueErrorlenlstripreplacecountresubsplitlistr   r   r   boolr   r   join)tf_namestart_prefix_to_removetf_weight_shape
name_scoper   r   r   r   (convert_tf_weight_name_to_pt_weight_name7   sV   
"
  $
rB   Tr   c              
   C   s   | t ju r|r	dnd}t||d}n| t ju rt|dd}n	| t ju r't|}|du r-|S t|t|jk r;t|}nt|t|jkrJt|dd}t	|t	|jkrszt
||}W |S  tyr } z| j||f7  _|d}~ww |S )z
    Apply a transpose to some weight then tries to reshape the weight to the same shape as a given shape, all in a
    framework agnostic way.
    )   r&   r   r   )r&   rC   r   r   )axes)rC   r   r   Nr   )axis)r   r   transpose_funcr   r   r4   shaper   r   r;   r
   AssertionErrorargs)r   r-   match_shapept_to_tfrD   er   r   r   apply_transpose   s.   



rM   Fc              	   C   s   zddl }ddl}ddlm}	 W n ty   td  w t|tr&|g}i }
|D ]*}t	j
|}td|  |drD|	|}nt  |j|ddd	}|
| q*td
tdd |
 D dd t| |
|||||dS )*Load pytorch checkpoints in a TF 2.0 modelr   N)	load_fileLoading a PyTorch model in TensorFlow, requires both PyTorch and TensorFlow to be installed. Please see https://pytorch.org/ and https://www.tensorflow.org/install/ for installation instructions.zLoading PyTorch weights from z.safetensorscpuT)map_locationweights_onlyzPyTorch checkpoint contains c                 s   s    | ]}|  V  qd S N)numel).0tr   r   r   	<genexpr>   s    z7load_pytorch_checkpoint_in_tf2_model.<locals>.<genexpr>,z parameters	tf_inputsallow_missing_keysoutput_loading_info_prefixtf_to_pt_weight_rename)
tensorflowtorchsafetensors.torchrO   ImportErrorloggererror
isinstancestrospathabspathinfoendswithr   loadupdatesumvalues!load_pytorch_weights_in_tf2_model)tf_modelpytorch_checkpoint_pathr[   r\   r]   r^   r_   tfra   safe_load_filept_state_dictri   pt_path
state_dictr   r   r   $load_pytorch_checkpoint_in_tf2_model   s>   



&ry   c                 C   s   |  }t| |||dS )rN   )r[   r\   )rx   rq   )rr   pt_modelr[   r\   rv   r   r   r   load_pytorch_model_in_tf2_model   s   r{   c              	      s\   z
ddl }ddl W n ty   td  w  fdd| D }t| ||||||dS )z*Load pytorch state_dict in a TF 2.0 model.r   NrP   c                    s2   i | ]\}}||j  jkr| n|  qS r   )dtypebfloat16numpyfloat)rV   kvra   r   r   
<dictcomp>   s    &z5load_pytorch_weights_in_tf2_model.<locals>.<dictcomp>rZ   )r`   ra   rc   rd   re   items$load_pytorch_state_dict_in_tf2_model)rr   rv   r[   r\   r]   r^   r_   rt   r   r   r   rq      s*   

rq   c                 C   s   t |dkrtd| d| d| d| d	 n	td| d t | dkr5td	| d
|  d ntd| d| d t |dkr_ddd |D }td| d| d d S d S )Nr   zSSome weights of the PyTorch model were not used when initializing the TF 2.0 model : ,
- This IS expected if you are initializing z from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing z from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).z6All PyTorch model weights were used when initializing .
z,Some weights or buffers of the TF 2.0 model zH were not initialized from the PyTorch model and are newly initialized: o
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.All the weights of z were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use * for predictions without further training.
c              	   S   s*   g | ]\}}}d | d| d| dqS )z- z: found shape z in the checkpoint and z in the model instantiatedr   )rV   keyshape1shape2r   r   r   
<listcomp>'  s    z%_log_key_warnings.<locals>.<listcomp>Some weights of zh were not initialized from the model checkpoint are newly initialized because the shapes did not match:
)r4   rd   warningr=   )missing_keysunexpected_keysmismatched_keys
class_namemismatched_warningr   r   r   _log_key_warnings  sH   
r   c	                     s  ddl }	|du rj}|du rd}|r.|	| |dd W d   n1 s)w   Y  i }
|D ]r}d}d|v r@|dd}d|v rJ|dd	}d
|v rT|d
d}d|v r^|dd}|d}d |ddd ddgkrw|d d  n|ddd ddgkr|d d   dur|dd  g }d|}|du r|}||
|< q2d}tfdd|
D sjd }jj	 }d}t
|
 }g }g }t|d}|D ]}|j}t|||j|d\ }|dur| }|D ]
}||
v r|  nq|d   |
vr"|r|  qЈjdurt fddjD rqt  d|
  }|r/||}n|| }z	t|||j}W n0 |	jjyl } z!|sXt|}|d7 }|	j|| |j|jf W Y d}~qd}~ww |t|7 }||	||j ~|  qtd|dd  t|}jdurjD ]fd!d"|D }qj durj D ]fd#d"|D }q|st!|||j"j#d$ |r|||d%}|fS S )&zLoad a pytorch state_dict in a TF 2.0 model. pt_state_dict can be either an actual dict or a lazy-loading
    safetensors archive created with the safe_open() function.r   Nr   Ftrainingr,   r-   r.   r/   running_varmoving_variancerunning_meanmoving_meanr1   rC   parametrizations	original0_g	original1_vc                 3       | ]	}|  jV  qd S rT   r2   base_model_prefixrV   s)rr   r   r   rX   l      z7load_pytorch_state_dict_in_tf2_model.<locals>.<genexpr>
get_tensor)r?   r@   rA   c                 3   s     | ]}t | d uV  qd S rT   r8   search)rV   pat)namer   r   rX     s    z not found in PyTorch modelz_
	You may consider adding `ignore_mismatched_sizes=True` in the model `from_pretrained` method.zLoaded rY   z  parameters in the TF 2.0 model.c                        g | ]}t  |d u r|qS rT   r   rV   r   r   r   r   r          z8load_pytorch_state_dict_in_tf2_model.<locals>.<listcomp>c                    r   rT   r   r   r   r   r   r     r   r   r   r   r   )$r`   dummy_inputsrA   r6   r:   r=   anyr   trainable_weightsnon_trainable_weightssetkeyshasattrr   rB   rG   append_keys_to_ignore_on_load_missingAttributeErrorr   rM   errorsInvalidArgumentErrorrg   r   assigncastr|   discardrd   rk   r;   "_keys_to_ignore_on_load_unexpectedr   	__class__r   ) rr   rv   r[   r\   r]   r^   r_   ignore_mismatched_sizesskip_logger_warningsrt   tf_keys_to_pt_keysr   new_keykey_componentsr?   symbolic_weightstf_loaded_numelall_pytorch_weightsr   r   is_safetensor_archivesymbolic_weightsw_namer   aliasesaliasstate_dict_namearrayrL   	error_msgr   loading_infor   )r   r   rr   r   r   4  s   









r   c                 C   s   g }|D ],}	t |	dd}
t| |
||d|||dd	\} }W d    n1 s&w   Y  || qttjdd |D  }tdd |D g }tdd |D g }t|||| jj	d	 |rg|||d
}| |fS | S )Nrt   )	frameworkT)r[   r\   r]   r^   r_   r   r   c                 S   s   g | ]}t |d  qS )r   )r   rV   rk   r   r   r   r         zAload_sharded_pytorch_safetensors_in_tf2_model.<locals>.<listcomp>c                 S      g | ]}|d  qS )r   r   r   r   r   r   r         c                 S   r   )r   r   r   r   r   r   r     r   r   r   )
r   r   r   sortedr   intersectionro   r   r   r   )rr   safetensors_shardsr[   r\   r]   r^   r_   r   all_loading_infosshardsafetensors_archiver   r   r   r   r   r   r   -load_sharded_pytorch_safetensors_in_tf2_model  s6   
r   c                 C   s   z
ddl }ddl}W n ty   td  w ddl}ddlm} td|  d| j	j
 }	t||	}
|
| j}|du rA|j}|durK||dd	 ||| t| |||d
S )z
    Load TF 2.0 HDF5 checkpoint in a PyTorch model We use HDF5 to easily do transfer learning (see
    https://github.com/tensorflow/tensorflow/blob/ee16fcac960ae660e0e4496658a366e2f745e1f0/tensorflow/python/keras/engine/network.py#L1352-L1357).
    r   NLoading a TensorFlow model in PyTorch, requires both PyTorch and TensorFlow to be installed. Please see https://pytorch.org/ and https://www.tensorflow.org/install/ for installation instructions.r   )load_tf_weightsz Loading TensorFlow weights from TFFr   r\   r]   )r`   ra   rc   rd   re   transformersmodeling_tf_utilsr   rk   r   r   getattrconfigr   load_tf2_model_in_pytorch_model)rz   tf_checkpoint_pathr[   r\   r]   rt   ra   r   r   tf_model_class_nametf_model_classrr   r   r   r   $load_tf2_checkpoint_in_pytorch_model  s.   


r   c                 C   s   |j }t| |||dS )z$Load TF 2.0 model in a pytorch modelr   )weights!load_tf2_weights_in_pytorch_model)rz   rr   r\   r]   r   r   r   r   r     s   r   c                 C   sN   z
ddl }ddl}W n ty   td  w dd |D }t| |||dS )z.Load TF2.0 symbolic weights in a PyTorch modelr   Nr   c                 S   s   i | ]}|j | qS r   )r   r~   )rV   	tf_weightr   r   r   r   1  r   z5load_tf2_weights_in_pytorch_model.<locals>.<dictcomp>r   )r`   ra   rc   rd   re   $load_tf2_state_dict_in_pytorch_model)rz   
tf_weightsr\   r]   rt   ra   tf_state_dictr   r   r   r   %  s   r   c                    s  dd l }i }t }d}tfdd|D sjd }i }| D ]\}	}
t|	||
jd\}}|
|f||< q$t|	 }i }g }| D ]\}}|
 |v r`|
 dkr`||
  ||< qG|}|d}d }	|dd d d	d
gkr{|d d }	n|dd d d	dgkr|d d }	|	d ur|d d |	g }d|}||vr|r|| qGt| d|| \}}t|||jdd}t|rt|}t|st|s| }t|r||}|||< |||
 < || qGj|dd\}}||7 }jd urjD ]  fdd|D }qjd ur(jD ]  fdd|D }qt|dkrHtdjj d| djj djj d	 ntdjj d t|dkritdjj d| d ntd jj d!jj d" td#|  |r||d$}|fS S )%Nr   r   c                 3   r   rT   r   r   )rz   r   r   rX   @  r   z7load_tf2_state_dict_in_pytorch_model.<locals>.<genexpr>r1   )r?   r@   r   rC   r   r   r   r   r   r   z not found in TF 2.0 modelF)rK   )strictc                    r   rT   r   r   r   r   r   r     r   z8load_tf2_state_dict_in_pytorch_model.<locals>.<listcomp>c                    r   rT   r   r   r   r   r   r     r   zSSome weights of the TF 2.0 model were not used when initializing the PyTorch model r   r   z from a TF 2.0 model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a TFBertForPreTraining model).
- This IS NOT expected if you are initializing z from a TF 2.0 model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a TFBertForSequenceClassification model).z5All TF 2.0 model weights were used when initializing r   r   zG were not initialized from the TF 2.0 model and are newly initialized: r   r   z were initialized from the TF 2.0 model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use r   z1Weights or buffers not loaded from TF 2.0 model: )r   r   ) ra   dictnamed_parametersr   r   r   rB   rG   r   r   data_ptrr:   r=   r   r   rM   r~   isscalarr   r   r   
from_numpyr   load_state_dictr   r   r4   rd   r   r   r   rk   )rz   r   r\   r]   ra   new_pt_params_dictcurrent_pt_params_dictr?   tf_weights_mapr   r   pt_namer   all_tf_weightsloaded_pt_weights_data_ptrmissing_keys_ptpt_weight_name	pt_weightpt_weight_name_to_checkr   r   r   r   r   r   )r   rz   r   r   7  s   











r   )r   NN)NT)NFFNN)NF)NFFNNFF)NFFNNF)NFF)FF)#r   rh   r8   r~   utilsr   r   r   r   r   r   r	   r
   r   r   r   rF   safetensorsr   
get_loggerr   rd   r   rB   rM   ry   r{   rq   r   r   r   r   r   r   r   r   r   r   r   <module>   s`   0

K-

3
#,
 
3

)
	