o
    iY                     @   s@   d dl Z d dl mZ d dlmZ ddlmZ G dd deZdS )    N)Tensor)Module   )Errorsc                       s>   e Zd Zdddedef fddZdedefd	d
Z  ZS )ScalarWeightg?)dropout_prob
num_layersr   c                   s@   t    tjjt|| _tjjtd| _	|| _
d S )N)      ?)super__init__torchnn	parameter	Parameterzeroslayer_weightstensorscaler   )selfr   r   	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/curated_transformers/models/scalar_weight.pyr      s   

zScalarWeight.__init__layer_outputsreturnc                 C   s   |j d | jj d krttjj| jj d |j d d| jr7t| jd| j	 
 }d| d }| j| }n| j}|jddddd}|| }|jd	d
d| j S )z
        Shapes:
            layer_outputs - (batch_size, seq_len, num_layers, width)

        Returns a weighted tensor of the input with shape (batch_size, seq_len, width).
        r   r      )num_layers_scalar_weightnum_layers_transformerr	   g     )dimF)r   keepdim)shaper   
ValueErrorr   E008formattrainingr   	full_liker   	bernoullisoftmax	unsqueezesumr   )r   r   dropout_masksoftmask_maskr   weighted_layersr   r   r   forward   s(   

zScalarWeight.forward)	__name__
__module____qualname__intfloatr   r   r/   __classcell__r   r   r   r   r   
   s    r   )r   r   torch.nnr   errorsr   r   r   r   r   r   <module>   s
    