o
    i&                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	 d dl
Z
ddlmZmZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZ d
dlmZ d
dlm Z  G dd de Z!dede"fddZ#dede"dddefddZ$dS )    N)BytesIO)AnyCallableDictOptionalcast   )CupyOpscontext_poolsget_current_opsset_gpu_allocator)torch)	Optimizer)
ArgsKwargsFloatsXd)convert_recursiveget_torch_default_deviceiterate_recursivetorch2xpxp2torch   )PyTorchGradScaler)Shimc                       s   e Zd ZdZ							d$dedededee ded	 d
eeege	f  deeee	d	gef  f fddZ
dd Zedd ZdedefddZdefddZdefddZejdd ZdedefddZd d! Zd"d# Z  ZS )%PyTorchShima  Interface between a PyTorch model and a Thinc Model. This container is
    *not* a Thinc Model subclass itself.

    mixed_precision:
        Enable mixed-precision. This changes whitelisted ops to run
        in half precision for better performance and lower memory use.
    grad_scaler:
        The gradient scaler to use for mixed-precision training. If this
        argument is set to "None" and mixed precision is enabled, a gradient
        scaler with the default configuration is used.
    device:
        The PyTorch device to run the model on. When this argument is
        set to "None", the default device for the currently active Thinc
        ops is used.
    serialize_model:
        Callback that receives the wrapped PyTorch model as its argument and
        returns a "bytes" representation of the same. The representation should
        contain all the necessary information to fully deserialize the model.
    deserialize_model:
        Callback that receives the default PyTorch model (passed to the constructor), the
        serialized "bytes" representation and a PyTorch device. It should return a
        fully deserialized model on the target device as its result.
    NFmodel	optimizermixed_precisiongrad_scalerdevicetorch.deviceserialize_modeldeserialize_modelc	                    s   t  ||| |d u rt }|d ur|| |d u r t|}|| || _|| _|d ur1|nt| _	|d ur:|nt
| _tjd uratt trct }	d|	vreddlm}
 td |
   d S d S d S d S )Npytorchr   )get_default_memory_pool)super__init__r   tor   to__grad_scaler_mixed_precisiondefault_serialize_torch_model_serialize_modeldefault_deserialize_torch_model_deserialize_modelr	   xp
isinstancer   r
   getcupyr#   r   free_all_blocks)selfr   configr   r   r   r   r    r!   poolsr#   	__class__ G/home/ubuntu/.local/lib/python3.10/site-packages/thinc/shims/pytorch.pyr%   0   s4   

zPyTorchShim.__init__c                 C   s    |r|  |S | |dd fS )Nc                 S   s   dS )N.r8   )ar8   r8   r9   <lambda>a   s    z&PyTorchShim.__call__.<locals>.<lambda>)begin_updatepredict)r3   inputsis_trainr8   r8   r9   __call__]   s   
zPyTorchShim.__call__c                 C   s$   t | j d }|d u rt S |jS N)next_model
parametersr   r   )r3   pr8   r8   r9   r   c   s   zPyTorchShim.devicer>   returnc              	   C   s   | j   t * tjd| jd | j |ji |j}W d   n1 s'w   Y  W d   n1 s6w   Y  | j   |S )zPass inputs through to the underlying PyTorch model, and return the
        output. No conversions are performed. The PyTorch model is set into
        evaluation mode.
        cuda)device_typeenabledN)	rC   evalr   no_gradautocastr)   argskwargstrain)r3   r>   outputsr8   r8   r9   r=   k   s   


zPyTorchShim.predictc                    sd   j   tjdjd j  ji  j}W d   n1 s"w   Y   fdd}||fS )a'  Pass the inputs through to the underlying PyTorch model, keeping
        track of which items in the input are tensors requiring gradients.
        If the model returns a single value, it is converted into a one-element tuple.
        Return the outputs and a callback to backpropagate.
        rG   )rI   Nc                    s   j j| jd dd| jd< tjj| ji | j g }tj	
 tdd  D ]}|jd ur6||j q)j |}|rDdd }ndd }tdd | S )	Ngrad_tensorsT)inplacec                 S   
   t | dS Ngradhasattrxr8   r8   r9   r;         
 z<PyTorchShim.begin_update.<locals>.backprop.<locals>.<lambda>c                 S   s   | j d ur
| j  S | j S rA   )rU   zero_rX   r8   r8   r9   r;          c                 S   s   | j S rA   )rU   rX   r8   r8   r9   r;      s    c                 S   rS   rT   rV   rX   r8   r8   r9   r;      rZ   )r(   scalerN   r   autogradbackwardrM   	itertoolschainrC   rD   r   rU   appendunscaler   )gradsrQ   
torch_data	found_infgrad_getr>   r3   r8   r9   backprop   s"   


z*PyTorchShim.begin_update.<locals>.backprop)rC   rO   r   rL   r)   rM   rN   )r3   r>   outputri   r8   rh   r9   r<   {   s   
zPyTorchShim.begin_updatec                 C   s~   | j  D ]2\}}|jd ur7| jjs2|| j|fttt|j	ttt|j\}}t
|d|jd|_	|j  q| j  d S )NT)requires_gradr   )rC   named_parametersrU   r(   rf   idr   r   r   datar   r   r[   update)r3   r   namere   paramrU   r8   r8   r9   finish_update   s   

zPyTorchShim.finish_updatec                 c   s    d| j  d}i }| D ]\}}t|dr)||r)t|| jd|||d< q|rIdd | j  D }| j	| d V  | j	| d S d V  d S )Npytorch__
startswith)r    c                 S   s   i | ]	\}}||  qS r8   )clone).0kvr8   r8   r9   
<dictcomp>   r\   z*PyTorchShim.use_params.<locals>.<dictcomp>)
rm   itemsrW   ru   r   r   replacerC   
state_dictload_state_dict)r3   params
key_prefixr~   ry   rz   backupr8   r8   r9   
use_params   s   
zPyTorchShim.use_paramsrH   	device_idc                 C   sB   |dkr| j   d S |dkr| j | d S d| d}t|)NcpugpuzInvalid device_type: z. Try 'cpu' or 'gpu')rC   r   rG   
ValueError)r3   rH   r   msgr8   r8   r9   	to_device   s   zPyTorchShim.to_devicec                 C   s"   |  | j}| j|d}t|S )N)r4   state)r+   rC   cfgsrslymsgpack_dumps)r3   model_bytesr   r8   r8   r9   to_bytes   s   
zPyTorchShim.to_bytesc                 C   s@   t  }t|}|d | _| | j|d || _| j| | S )Nr4   r   )r   r   msgpack_loadsr   r-   rC   r(   r'   )r3   
bytes_datar   r   r8   r8   r9   
from_bytes   s   

zPyTorchShim.from_bytes)NNFNNNN)__name__
__module____qualname____doc__r   boolr   r   r   bytesr%   r@   propertyr   r   r=   r<   r   rr   
contextlibcontextmanagerr   strintr   r   r   __classcell__r8   r8   r6   r9   r      sF    	-
1
	r   r   rF   c                 C   s(   t  }t|  | |d | S )zSerializes the parameters of the wrapped PyTorch model to bytes.

    model:
        Wrapped PyTorch model.

    Returns:
        A `bytes` object that encapsulates the serialized model parameters.
    r   )r   r   saver~   seekgetvalue)r   fileliker8   r8   r9   r*      s   	
r*   state_bytesr   r   c                 C   s4   t |}|d | tj||d | | | S )aA  Deserializes the parameters of the wrapped PyTorch model and
    moves it to the specified device.

    model:
        Wrapped PyTorch model.
    state_bytes:
        Serialized parameters as a byte stream.
    device:
        PyTorch device to which the model is bound.

    Returns:
        The deserialized model.
    r   )map_location)r   r   r   r   loadr&   )r   r   r   r   r8   r8   r9   r,      s
   

r,   )%r   r`   ior   typingr   r   r   r   r   r   backendsr	   r
   r   r   compatr   
optimizersr   typesr   r   utilr   r   r   r   r   pytorch_grad_scalerr   shimr   r   r   r*   r,   r8   r8   r8   r9   <module>   s0     N