o
    iw                      @   s`   d dl Z d dlZd dlmZmZmZ d dlmZ G dd deZ	dd Z
G dd	 d	ejjZdS )
    N)OptionalTupleUnion
pad_to_lenc                       s    e Zd ZdZ fddZ  ZS )TooShortUttErrorzRaised when the utt is too short for subsampling.

    Args:
        message (str): Message for error catch
        actual_size (int): the short size that cannot pass the subsampling
        limit (int): the limit size for subsampling

    c                    s   t  | || _|| _dS )z/Construct a TooShortUttError for error handler.N)super__init__actual_sizelimit)selfmessager
   r   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/funasr/models/rwkv_bat/rwkv_subsampling.pyr	      s   
zTooShortUttError.__init__)__name__
__module____qualname____doc__r	   __classcell__r   r   r   r   r      s    	r   c                 C   s\   t | tr|dk rdS t | tr|dk rdS t | tr!|dk r!dS t | tr,|dk r,dS d	S )
z4Check if the utterance is too short for subsampling.   )Tr      )Tr      )Tr      )Tr   )F)
isinstanceConv2dSubsampling2Conv2dSubsamplingConv2dSubsampling6Conv2dSubsampling8)inssizer   r   r   check_short_utt   s   r#   c                       s   e Zd ZdZ			ddedeeef deded	ee d
df fddZde	j
dee	j
 dee	j
 d
ee	j
e	j
f fddZde	j
d
e	j
fddZded
efddZ  ZS )RWKVConvInputzStreaming ConvInput module definition.
    Args:
        input_size: Input size.
        conv_size: Convolution size.
        subsampling_factor: Subsampling factor.
        output_size: Block output dimension.
       r   N
input_size	conv_sizesubsampling_factorconv_kernel_sizeoutput_sizereturnc                    s  t    |dkr|\}}}tjtjjd||d|d d dtj tjj|||ddg|d d dtj tjj|||d|d d dtj tjj|||ddg|d d dtj tjj|||d|d d dtj tjj|||ddg|d d dtj | _||d d  }	d| _d| _	| j
| _n|\}}}t|d }
tjtjjd||d|d d dtj tjj||||
dg|d d dtj tjj|||d|d d dtj tjj|||ddg|d d dtj tjj|||d|d d dtj tjj|||d|d d dtj | _||d d  }	|| _| j
| _|
| _	d| _|durAtj|	|| _|| _dS d| _|	| _dS )zConstruct a ConvInput object.      )stridepaddingr   N)r   r	   torchnn
SequentialConv2dReLUconvr(   stride_1create_new_vgg_maskcreate_new_maskintmin_frame_lengthLinearoutputr*   )r   r&   r'   r(   r)   r*   
conv_size1
conv_size2
conv_size3output_projkernel_1r   r   r   r	   3   s   
	





/






/


zRWKVConvInput.__init__xmask
chunk_sizec                    st  |dur|  |}t|dd}| \}}}|d}|dur^t|| j t	t
||| j    t fdd|}t|}tj|dd} || j  }||| d|| j |}| |}| \}	}
}	}|dur|dd |d|
| ddd|ddf }n|dd |d|
| }| jdur| |}||ddd|f ddd|df fS )	a'  Encode input sequences.
        Args:
            x: ConvInput input sequences. (B, T, D_feats)
            mask: Mask of input sequences. (B, 1, T)
        Returns:
            x: ConvInput output sequences. (B, sub(T), D_out)
            mask: Mask of output sequences. (B, 1, sub(T))
        Nr   r,   c                    s   t |  dS )Nr,   r   )inputsmax_input_lengthr   r   <lambda>   s    z'RWKVConvInput.forward.<locals>.<lambda>)dimr-   r   )r8   maxeqsumr"   	unsqueezer9   r(   mathceilfloatmaplistr0   stackviewr5   	transpose
contiguousr<   )r   rB   rC   rD   olensbtfN_chunks_cr   rF   r   forward   s4   


6

.zRWKVConvInput.forwardc                 C   s8   | j dkr|dddddf dddd| jf S |S )zCreate a new mask for VGG output sequences.
        Args:
            mask: Mask of input sequences. (B, T)
        Returns:
            mask: Mask of output sequences. (B, sub(T))
        r,   Nr-   )r(   r6   )r   rC   r   r   r   r7      s   
*z!RWKVConvInput.create_new_vgg_maskr"   c                 C   s
   || j  S )zReturn the original size before subsampling for a given size.
        Args:
            size: Number of frames after subsampling.
        Returns:
            : Number of frames before subsampling.
        )r(   )r   r"   r   r   r   get_size_before_subsampling   s   
z)RWKVConvInput.get_size_before_subsampling)r%   r   N)r   r   r   r   r9   r   r   r   r	   r0   Tensorr^   r7   r_   r   r   r   r   r   r$   *   s<    
 

+r$   )rN   r0   typingr   r   r   *funasr.models.transformer.utils.nets_utilsr   	Exceptionr   r#   r1   Moduler$   r   r   r   r   <module>   s   