o
    i3T                     @   s  d Z ddlZddlmZ ddlZddlmZ ddlZddl	m
Z
 ddlmZmZ ddlZddlmZmZ ddlmZmZ e rCddlZe rRdd	lmZ dd
lmZ eeZ	dddZdee  dej!de"e ej!f de deee  ej!f f
ddZ#dd Z$dd Z%dd Z&dd Z'dS )z!PyTorch - Flax general utilities.    N)UnpicklingError)
from_bytes)flatten_dictunflatten_dict   )is_safetensors_availableis_torch_available)check_torch_load_is_safelogging)	safe_open)	load_fileFc           
   	   C   s   |swt j|}td|  |dr<i }t|dd}| D ]	}||||< q"W d   n1 s6w   Y  n4zddl	}W n t
tfyQ   td  w t  |j|dd	d
}tdtdd | D dd t|| }	|	S t|| }	|	S )z(Load pytorch checkpoints in a flax modelzLoading PyTorch weights from .safetensorsflax)	frameworkNr   zLoading a PyTorch model in Flax, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.cpuT)map_locationweights_onlyzPyTorch checkpoint contains c                 s   s    | ]}|  V  qd S N)numel).0t r   \/home/ubuntu/.local/lib/python3.10/site-packages/transformers/modeling_flax_pytorch_utils.py	<genexpr>K   s    z=load_pytorch_checkpoint_in_flax_state_dict.<locals>.<genexpr>,z parameters.)ospathabspathloggerinfoendswithr   keys
get_tensortorchImportErrorModuleNotFoundErrorerrorr	   loadsumvalues"convert_pytorch_state_dict_to_flax*convert_pytorch_sharded_state_dict_to_flax)

flax_modelpytorch_checkpoint_path
is_shardedallow_missing_keyspt_pathpt_state_dictfkr#   flax_state_dictr   r   r   *load_pytorch_checkpoint_in_flax_state_dict0   s4   
&

r5   pt_tuple_key	pt_tensorrandom_flax_state_dictmodel_prefixreturnc                    s  dt t dtf fdd}| dd d }| d dv r$||r$||fS | dd d	 }| d d
kr:|| s:||fS | dd d }| d dkrP|| sP||fS | dd d }| d dkrf||rf||fS | dd d }| d dkr|jdkr|| s|dddd}||fS | dd d }| d dkr|| s|j}||fS | dd d }| d dkr||fS | dd d }| d dkr||fS d}| ddd dkr| d d }n| ddd dkr| d d }|dur| dd |f }||fS | |fS )zYRename PT weight names to corresponding Flax weight names and reshape tensor if necessarykeyr:   c                    s   t t|  f|  h@ dkS )zAChecks if `key` of `(prefix,) + key` is in random_flax_state_dictr   )lenset)r;   r9   r8   r   r   is_key_or_prefix_key_in_dict\   s   zCrename_key_and_reshape_tensor.<locals>.is_key_or_prefix_key_in_dictN)scale)weightgamma)meanrunning_mean)varrunning_var)	embeddingrB   )kernel         r   r   rB   rC   )biasbeta)parametrizations	original0_g)rQ   	original1_v)tuplestrboolndim	transposeT)r6   r7   r8   r9   r?   renamed_pt_tuple_keynamer   r>   r   rename_key_and_reshape_tensorT   sH   r_   c              	   C   s^  t  ottt|  tj}|rtjnd}dd |  D }|r;|  D ]\}}|j	|kr2|
 }|  | |< q%|j}d|jv rI|jd }n|j}t|}	d|jv rat|jd }
|	|
 i }||voo|dd | D v }||v o||dd | D v}|  D ]\}}t|d	}|| |k}|d
 |k}|r|r|dd  }t|||	|\}}|f| |	v }|r|r|f| }||	v r|j|	| jkrtd| d|	| j d|j d	d|jv rd|d v sd|d v rt||d| < qd|d v r||d  q|st|ntj|tjd|d| < q|s t|ntj|tjd||< qt|S )Nbfloat16c                 S      i | ]\}}||j qS r   dtyper   r3   vr   r   r   
<dictcomp>       z6convert_pytorch_state_dict_to_flax.<locals>.<dictcomp>paramsbatch_statsc                 S      h | ]	}| d d qS .r   splitr   r3   r   r   r   	<setcomp>       z5convert_pytorch_state_dict_to_flax.<locals>.<setcomp>c                 S   rj   rk   rm   ro   r   r   r   rp      rq   rl   r   r   1PyTorch checkpoint seems to be incorrect. Weight  was expected to be of shape 	, but is rD   r@   rF   ri   num_batches_trackedrb   rh   )r   
isinstancenextiterr)   r#   Tensorr`   itemsrc   floatr   numpybase_model_prefixrh   r   updaterW   rn   r_   shape
ValueErrorjnpasarraypopr   )r1   r,   from_binr`   weight_dtypesr3   re   r9   flax_model_paramsr8   flax_batch_statsr4   $load_model_with_head_into_base_model$load_base_model_into_model_with_headpt_keyr7   r6   is_bfloat_16has_base_model_prefixflax_keyflax_tensorrequire_base_model_prefixr   r   r   r*      sp   




r*   c              
      sD  dd l  i }| D ]}t   j|dd}dd | D } fdd| D }|j}d|jv rE|jd }t|}|t|jd  n|j}t|}||voX|d	d
 |D v }	||v oe|dd
 |D v}
| D ]\}}t|	d}||  j
k}|d |k}|	r|r|dd  }t||||\}}|f| |v }|
r|r|f| }||v r|j|| jkrtd| d|| j d|j dd|jv r
d|d v rt||d| < qjd|d v rt||d| < qjd|d v r||d  qj|st|ntj|tj
d|d| < qj|st|ntj|tj
d||< qjqt|S )Nr   T)r   c                 S   ra   r   rb   rd   r   r   r   rf      rg   z>convert_pytorch_sharded_state_dict_to_flax.<locals>.<dictcomp>c                    s2   i | ]\}}||j  jkr| n|  qS r   )rc   r`   r~   r}   rd   r#   r   r   rf      s    &ri   rh   c                 S   rj   rk   rm   ro   r   r   r   rp     rq   z=convert_pytorch_sharded_state_dict_to_flax.<locals>.<setcomp>c                 S   rj   rk   rm   ro   r   r   r   rp     rq   rl   r   rr   rs   rt   rD   r@   ru   rF   rv   rb   rw   )r#   r	   r'   r|   r   rh   r   r   rW   rn   r`   r_   r   r   r   r   r   r   )shard_filenamesr,   r4   
shard_filer1   r   r9   r   r8   r   r   r   r7   r6   r   r   r   r   r   r   r   r   r+      sv   




0r+   c              	   C   s   t j|}td|  ttd| jj }|	dr't
|}t|dd}n.t|d!}z	t|| }W n tyE   td| dw W d	   n1 sPw   Y  t| |S )
(Load flax checkpoints in a PyTorch modelzLoading Flax weights from Flaxr   rl   )seprbzUnable to convert z  to Flax deserializable object. N)r   r   r   r   r   getattrtransformers	__class____name__r    safe_load_filer   openr   readr   OSError"load_flax_weights_in_pytorch_model)modelflax_checkpoint_pathflax_clsr4   state_fr   r   r   %load_flax_checkpoint_in_pytorch_modelL  s   

r   c                 C   s  zddl }W n ttfy   td  w ttjdd |	 }t
|r5td tjdd |}t|}|  }| j|v oK| jdd	 |D v}| j|voZ| jd
d	 |D v }g }t| }	| D ]9\}
}|
d | jk}d| jf|
 |v }|r|r|
dd }
n
|r|r| jf|
 }
|
d dkr|jdkrd|
|vr|
dd d }
t|d}nE|
d dkrd|
|vr|
dd d }
|j}n,|
d dv r|
dd d }
nd|
d v r|
dd d }
nd|
d v r|
dd d }
d|v rd|
dd }nd|
}i }|D ]F}|d}d}|ddd ddgkr-|d d }n|ddd ddgkr?|d d }|durV|dd |g }d|}|||< q||v ra|| }||v r|j|| jkrtd |
 d!|| j d"|j dt|tjst|n|}||||< |	| qg|| qg|  | t!|	}	t"|dkrtd#| j#j$ d$| d%| j#j$ d&| j#j$ d'	 ntd(| j#j$ d) t"|	dkrtd*| j#j$ d+|	 d, | S td-| j#j$ d.| j#j$ d/ | S )0r   r   NzLoading a Flax weights in PyTorch, requires both PyTorch and Flax to be installed. Please see https://pytorch.org/ and https://flax.readthedocs.io/en/latest/index.html#installation for installation instructions.c                 S   s   | j tjkS r   )rc   r   r`   )xr   r   r   <lambda>p  s    z4load_flax_weights_in_pytorch_model.<locals>.<lambda>zFound ``bfloat16`` weights in Flax model. Casting all ``bfloat16`` weights to ``float32`` before loading those in PyTorch model.c                 S   s   | j tjkr| tjS | S r   )rc   r   r`   astypenpfloat32rw   r   r   r   r   y  s    c                 S   rj   rk   rm   ro   r   r   r   rp     rq   z5load_flax_weights_in_pytorch_model.<locals>.<setcomp>c                 S   rj   rk   rm   ro   r   r   r   rp     rq   rl   r   r@   rI   rJ   rM   )rL   rK   r   r   )rA   rH   rD   )rE   rF   )rG   ri   rP   rK   rQ   rR   rS   rT   rU   rV   z.Flax checkpoint seems to be incorrect. Weight rs   rt   zQSome weights of the Flax model were not used when initializing the PyTorch model z: z,
- This IS expected if you are initializing z from a Flax model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a FlaxBertForPreTraining model).
- This IS NOT expected if you are initializing z from a Flax model that you expect to be exactly identical (e.g. initializing a BertForSequenceClassification model from a FlaxBertForSequenceClassification model).z3All Flax model weights were used when initializing z.
zSome weights of zE were not initialized from the Flax model and are newly initialized: zo
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.zAll the weights of z were initialized from the Flax model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use z* for predictions without further training.)%r#   r$   r%   r   r&   r   jax	tree_utiltree_mapr)   anywarning
state_dictr   r=   r!   r|   joinrZ   r   r[   r\   rn   r   r   rx   r   ndarrayr   
from_numpyremoveappendload_state_dictlistr<   r   r   )pt_model
flax_stater#   is_type_bf16r4   pt_model_dictr   r   unexpected_keysmissing_keysflax_key_tupler   r   r   r   special_pt_namesr;   key_componentsr^   key_to_checkr   r   r   r   b  s   	

$








r   )F)(__doc__r   pickler   r   	jax.numpyr~   r   r   flax.serializationr   flax.traverse_utilr   r   r    r   r   utilsr	   r
   r#   safetensorsr   safetensors.flaxr   r   
get_loggerr   r   r5   rW   rX   r   dictr_   r*   r+   r   r   r   r   r   r   <module>   sD   
	
$
C\Y