o
    i                     @   s:  d dl mZmZmZmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZmZ d d	lmZ d
dlmZ ddlmZmZmZ dde ddededede def
ddZ!	d!dedededeeeegee f f fddZ"dedeeef dedeeeee gef f fddZ#d S )"    )CallableListTuplecast)ScalarWeight)SimpleFrozenDict)PyTorchWrapper_v2)Model)PyTorchGradScaler)
ArgsKwargsRagged)torch2xpxp2torch)Tensor   )	all_equal   )ScalarWeightInTScalarWeightModelTScalarWeightOutTg?F)dropout_probmixed_precisiongrad_scaler_config
num_layersr   r   r   returnc              	   C   sN   t |tri }d|vr||d< t| d |d}t|tt|tdi |d}|S )a  Construct a model that accepts a list of transformer layer
    outputs and returns a weighted representation of the same.

    num_layers (int):
        Number of transformer hidden layers.
    dropout_prob (float):
        Dropout probability.
    mixed_precision (bool):
        Use mixed-precision training.
    grad_scaler_config (dict):
        Configuration passed to the PyTorch gradient scaler.
    enabledr   )r   r   )convert_inputsconvert_outputsr   grad_scalerN )
isinstancer   r   r   _convert_inputs_convert_outputsr
   )r   r   r   r   scalar_weighting_layermodelr   r   c/home/ubuntu/.local/lib/python3.10/site-packages/spacy_curated_transformers/models/scalar_weight.pybuild_scalar_weight_v1   s   
r&   r$   Xis_trainc              	      s
  | j t }dd  D t}dd  D }t|s&tdt| dd  D }t|s:tdt| |||d |d }t D ]\}}	| }
t|	D ]\}}|j||d |
|d d f< qVqJt	|dd	}d
t
f fdd}t
|fi d}||fS )Nc                 S   s   g | ]
}|d  j jd  qS )r   datashape.0xr   r   r%   
<listcomp>@   s    z#_convert_inputs.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )lenr,   r   r   r%   r/   B   s    zdInput passed to the `ScalarWeight` model do not have the same number layers. Distinct layer counts: c                 S   s"   g | ]}|D ]}|j jd  qqS )r   r)   )r-   r.   layerr   r   r%   r/   H   s   " zVInput passed to the `ScalarWeight` model do not have the same width. Distinct widths: r   T)requires_gradd_inputsc           	   	      s   t t| jd }g }t|jd D ]7}| } | d j}g }t|jd D ]}||d ||d d f }|tt|d|d q'|| q|S )Nr   r   ops)r*   lengths)	r   r   argsranger+   r6   appendr   r   )	r3   	dt_inputsdXdoc_idxseq_lenr6   	dX_layers	layer_idxdX_layerr'   r5   seq_lensr   r%   convert_from_torch_backwardW   s   z4_convert_inputs.<locals>.convert_from_torch_backwardr7   kwargs)r5   r0   maxr   
ValueErrorsetalloc4f	enumeratedataXdr   r   )r$   r'   r(   
batch_sizemax_seq_lenr   layer_widthsXopsr<   layersr=   r?   r*   XtrC   outputr   rA   r%   r!   :   s8   r!   inputs_outputsc           
         s   | j |\} g }tt|D ],}|| d jjd }|| d j} |d |d d f }|tt|| j d|d qdt	t dt
f fdd}	||	fS )Nr   r4   )r6   dYr   c                    s   t dd | D }| d jjd }t| ||}tt| D ]}| | jjd }| | j||d |d d f< q t|}t fd|idS )Nc                 s   s    | ]	}|j jd  V  qdS )r   Nr)   )r-   yr   r   r%   	<genexpr>{   s    zG_convert_outputs.<locals>.convert_for_torch_backward.<locals>.<genexpr>r   r   grad_tensorsrD   )	rF   r*   r+   alloc3fr0   r8   rK   r   r   )rT   rM   widthdYt_opsr<   r=   dYtYtr5   r   r%   convert_for_torch_backwardz   s   z4_convert_outputs.<locals>.convert_for_torch_backward)r5   r8   r0   r*   r+   r6   r9   r   r   r   r   )
r$   rS   r(   r'   Yr<   r=   r6   Y_layerr^   r   r\   r%   r"   l   s   r"   N)F)$typingr   r   r   r   )curated_transformers.models.scalar_weightr   
spacy.utilr   thinc.layers.pytorchwrapperr   thinc.modelr	   thinc.shims.pytorch_grad_scalerr
   thinc.typesr   r   
thinc.utilr   r   torchr   utilr   typesr   r   r   intfloatbooldictr&   r!   r"   r   r   r   r%   <module>   sX    
+
2
