o
    %ݫiY                     @   s   d Z ddlmZ ddlZddlmZ ddlm  mZ ddl	m
Z
 ddlmZmZmZmZmZmZ ddlmZ eeZG dd dejjZG d	d
 d
ejjZdS )z}Library implementing quaternion-valued convolutional neural networks.

Authors
 * Titouan Parcollet 2020
 * Drew Wagner 2024
    )TupleN)get_padding_elem)affect_conv_initquaternion_conv_opquaternion_conv_rotation_opquaternion_init!renorm_quaternion_weights_inplaceunitary_init)
get_loggerc                       sh   e Zd ZdZ														d fd
d	Zdd Zdd ZdededefddZdd Z	  Z
S )QConv1da  This function implements quaternion-valued 1d convolution.

    Arguments
    ---------
    out_channels : int
        Number of output channels. Please note
        that these are quaternion-valued neurons. If 256
        channels are specified, the output dimension
        will be 1024.
    kernel_size : int
        Kernel size of the convolutional filters.
    input_shape : tuple
        The shape of the input.
    stride : int, optional
        Stride factor of the convolutional filters (default 1).
    dilation : int, optional
        Dilation factor of the convolutional filters (default 1).
    padding : str, optional
        (same, valid, causal). If "valid", no padding is performed.
        If "same" and stride is 1, output shape is same as input shape.
        "causal" results in causal (dilated) convolutions (default "same").
    groups : int, optional
        Default: 1
        This option specifies the convolutional groups. See torch.nn
        documentation for more information (default 1).
    bias : bool, optional
        If True, the additive bias b is adopted (default True).
    padding_mode : str, optional
        This flag specifies the type of padding. See torch.nn documentation
        for more information (default "reflect").
    init_criterion : str , optional
        (glorot, he).
        This parameter controls the initialization criterion of the weights.
        It is combined with weights_init to build the initialization method of
        the quaternion-valued weights (default "glorot").
    weight_init : str, optional
        (quaternion, unitary).
        This parameter defines the initialization procedure of the
        quaternion-valued weights. "quaternion" will generate random quaternion
        weights following the init_criterion and the quaternion polar form.
        "unitary" will normalize the weights to lie on the unit circle (default "quaternion").
        More details in: "Quaternion Recurrent Neural Networks",
        Parcollet T. et al.
    spinor : bool, optional
        When True, the layer will be turned into a spinor layer. More precisely
        W*x will be turned into W*x*W-1. The input x will be rotated by W such
        as in a spinor neural network. However, x MUST be a quaternion with
        the real part equal to zero. (0 + xi + yj + zk). Indeed, the rotation
        operation only acts on the vector part. Note that W will always be
        normalized before the rotation to ensure the quaternion algebra (default False).
        More details in: "Quaternion neural networks", Parcollet T.
    vector_scale : bool, optional
        The vector_scale is only used when spinor = True. In the context of a
        spinor neural network, multiple rotations of the input vector x are
        performed and summed. Hence, the norm of the output vector always
        increases with the number of layers, making the neural network instable
        with deep configurations. The vector_scale parameters are learnable
        parameters that acts like gates by multiplying the output vector with
        a small trainable parameter (default False).
    max_norm: float
        kernel max-norm.

    Example
    -------
    >>> inp_tensor = torch.rand([10, 16, 40])
    >>> cnn_1d = QConv1d(
    ...     input_shape=inp_tensor.shape, out_channels=12, kernel_size=3
    ... )
    >>> out_tensor = cnn_1d(inp_tensor)
    >>> out_tensor.shape
    torch.Size([10, 16, 48])
    N   sameTreflectglorot
quaternionFc                    s  t    || _|| _|| _|| _|| _|| _|| _|	| _	d| _
|
| _|| _|| _|| _|| _| |d | _|  \| _| _tjtj| j | _tjtj| j | _tjtj| j | _tjtj| j | _| jr}tjjt| jjdd| _nt| jj d| _| jr| jrtjt| jj| _!tjj"#| j!j$ nt| jj d| _!|rtjtd| j | _%ntd| j  d| _%| j%j$&d t't(d| j | _)t*| j| j| j| j| j| j)| j d S )NF   requires_gradr   r   unitary)+super__init__input_shapeout_channelskernel_sizestridedilationpaddinggroupspadding_mode	unsqueezeinit_criterionweight_initspinorvector_scalemax_norm_check_inputin_channels_get_kernel_and_weight_shapek_shapew_shapetorchnn	ParameterTensorr_weighti_weightj_weightk_weightzerosshapezero_kernelrequires_grad_scale_paraminitxavier_uniform_databiasfill_r   r	   winitr   )selfr   r   r   r   r   r   r   r;   r   r!   r"   r#   r$   r%   	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/nnet/quaternion_networks/q_CNN.pyr   f   sl   

zQConv1d.__init__c                 C   s  | dd}| jdurt| j| j| j| j| jd | jdkr*| || j	| j
| j}n#| jdkr@| j	d | j
 }t||df}n| jdkrFntd	| j | jrlt|| j| j| j| j| j| j| j| j| j
d| jd
d}nt|| j| j| j| j| j| j| j
d| jd
d}| dd}|S )  Returns the output of the convolution.

        Arguments
        ---------
        x : torch.Tensor (batch, time, channel)
            Input to convolve. 3d or 4d tensors are expected.

        Returns
        -------
        x : torch.Tensor
            The convolved outputs.
        r   Nr%   r   causalr   valid1Padding must be 'same', 'valid' or 'causal'. Got Tscaler5   r   r   r   r   conv1dr   r   r   r   rK   )	transposer%   r   r/   r0   r1   r2   r   _manage_paddingr   r   r   Fpad
ValueErrorr#   r   r;   r7   r5   r   r   )r>   xnum_padoutrA   rA   rB   forward   sl   



zQConv1d.forwardc                 C   sZ   | j | j dkrtd| j| j dkrtd| j}| j| j | j ft|f }||fS )BReturns the kernel size and weight shape for convolutional layers.r   'in_channels must be divisible by groups(out_channels must be divisible by groups)r'   r   rQ   r   r   tupler>   ksr*   rA   rA   rB   r(     s   z$QConv1d._get_kernel_and_weight_shaper   r   r   c                 C   s.   |j d }t||||}tj||| jd}|S )a  This function performs zero-padding on the time axis
        such that their lengths is unchanged after the convolution.

        Arguments
        ---------
        x : torch.Tensor
            Input tensor.
        kernel_size : int
            Kernel size.
        dilation : int
            Dilation.
        stride: int
            Stride.

        Returns
        -------
        x : torch.Tensor
            The padded input.
        rD   mode)r4   r   rO   rP   r   )r>   rR   r   r   r   L_inr   rA   rA   rB   rN     s   
zQConv1d._manage_paddingc                 C   sf   t |dkr|d }ntdt| | jd dkr#tdt| j |d dkr1tdt| |S ):Checks the input and returns the number of input channels.      z(QuaternionConv1d expects 3d inputs. Got r   1The field kernel size must be an odd number. Got r   zPQuaternion torch.Tensors must have dimensions divisible by 4. input.size()[3] = lenrQ   strr   r>   r   r'   rA   rA   rB   r&   9  s$   

zQConv1d._check_input)Nr   r   r   r   Tr   r   r   FFN)__name__
__module____qualname____doc__r   rU   r(   intrN   r&   __classcell__rA   rA   r?   rB   r      s&    MXN r   c                       s   e Zd ZdZ																		d fd
d	Zdd Zdd Zdd Zdee	e	f dee	e	f dee	e	f fddZ
  ZS )QConv2da  This function implements quaternion-valued 1d convolution.

    Arguments
    ---------
    out_channels : int
        Number of output channels. Please note
        that these are quaternion-valued neurons. If 256
        channels are specified, the output dimension
        will be 1024.
    kernel_size : int
        Kernel size of the convolutional filters.
    input_shape : tuple
        The shape of the input.
    stride : int, optional
        Stride factor of the convolutional filters (default 1).
    dilation : int, optional
        Dilation factor of the convolutional filters (default 1).
    padding : str, optional
        (same, causal). If "valid", no padding is performed.
        If "same" and stride is 1, output shape is same as input shape (default "same").
    groups : int, optional
        This option specifies the convolutional groups. See torch.nn
        documentation for more information. (default 1).
    bias : bool, optional
        If True, the additive bias b is adopted (default True).
    padding_mode : str, optional
        This flag specifies the type of padding. See torch.nn documentation
        for more information. (default "reflect")
    init_criterion : str , optional
        (glorot, he).
        This parameter controls the initialization criterion of the weights.
        It is combined with weights_init to build the initialization method of
        the quaternion-valued weights (default "glorot").
    weight_init : str, optional
        (quaternion, unitary).
        This parameter defines the initialization procedure of the
        quaternion-valued weights. "quaternion" will generate random quaternion
        weights following the init_criterion and the quaternion polar form.
        "unitary" will normalize the weights to lie on the unit circle (default "quaternion").
        More details in: "Quaternion Recurrent Neural Networks",
        Parcollet T. et al.
    spinor : bool, optional
        When True, the layer will be turned into a spinor layer. More precisely
        W*x will be turned into W*x*W-1. The input x will be rotated by W such
        as in a spinor neural network. However, x MUST be a quaternion with
        the real part equal to zero. (0 + xi + yj + zk). Indeed, the rotation
        operation only acts on the vector part. Note that W will always be
        normalized before the rotation to ensure the quaternion algebra (default False).
        More details in: "Quaternion neural networks", Parcollet T.
    vector_scale : bool, optional
        The vector_scale is only used when spinor = True. In the context of a
        spinor neural network, multiple rotations of the input vector x are
        performed and summed. Hence, the norm of the output vector always
        increases with the number of layers, making the neural network instable
        with deep configurations. The vector_scale parameters are learnable
        parameters that acts like gates by multiplying the output vector with
        a small trainable parameter (default False).
    max_norm: float
        kernel max-norm.
    swap: bool
        If True, the convolution is done with the format (B, C, W, H).
        If False, the convolution is done with (B, H, W, C).
        Active only if skip_transpose is False.
    skip_transpose : bool
        If False, uses batch x spatial.dim2 x spatial.dim1 x channel convention of speechbrain.
        If True, uses batch x channel x spatial.dim1 x spatial.dim2 convention.


    Example
    -------
    >>> inp_tensor = torch.rand([10, 4, 16, 40])
    >>> cnn_1d = QConv2d(
    ...     input_shape=inp_tensor.shape, out_channels=12, kernel_size=3
    ... )
    >>> out_tensor = cnn_1d(inp_tensor)
    >>> out_tensor.shape
    torch.Size([10, 4, 16, 48])
    Nr   r   Tr   r   r   Fc                    s$  t    || _|| _|| _|| _|| _|| _|| _|	| _	|
| _
|| _|| _|| _|| _|| _|| _t|tr<||f| _t|trF||f| _t|trP||f| _| |d | _|  \| _| _tjtj| j | _tjtj| j | _tjtj| j | _tjtj| j | _| jrtjjt | jj!dd| _"nt| jj!#d| _"| jr| jrtjt| jj!| _$tjj%&| j$j' nt| jj!#d| _$|rtjtd| j | _(n| )dtd| j #d | j(j'*d t+t,d| j | _-t.| j| j| j| j| j| j-| j
 d S )Nr   Fr   r;   r   r   )/r   r   r   r   r   r   r   r   r   r   r!   r"   r#   r$   r%   swapskip_transpose
isinstancerk   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   register_bufferr<   r   r	   r=   r   )r>   r   r   r   r   r   r   r   r;   r   r!   r"   r#   r$   r%   rn   ro   r?   rA   rB   r     s|   







zQConv2d.__init__c                 C   s.  | j s|dd}| jr|dd}| jdur%t| j| j| j| j| jd | j	dkr6| 
|| j| j| j}n| j	dkr<ntd| j	 | jrft|| j| j| j| j| j| j| j| jd	 | jd	 d	| jd
d}nt|| j| j| j| j| j| jd	 | jd	 d	| jdd}| j s|dd}| jr|dd}|S dS )rC   r   rD   NrE   r   rG   rH   r   TrI   FrL   ra   )ro   rM   rn   r%   r   r/   r0   r1   r2   r   rN   r   r   r   rQ   r#   r   r;   r7   r5   r   r   )r>   rR   rT   rA   rA   rB   rU     st   


zQConv2d.forwardc                 C   s   t |dkr|d }ntdt| | jd d dks%| jd d dkr.tdt| j |d dkrBtdtd d	 t| |S )
r_   r   rD   z(QuaternionConv1d expects 4d inputs. Got r   ra   r   rb   zKQuaternion torch.Tensors must have dimensions divisible by 4. input.size()[z] = rc   rf   rA   rA   rB   r&   Y  s,   

$zQConv2d._check_inputc                 C   sh   | j | j dkrtd| j| j dkrtd| jd | jd f}| j| j | j fg |R  }||fS )rV   r   rW   rX   r   )r'   r   rQ   r   r   rZ   rA   rA   rB   r(   s  s   z$QConv2d._get_kernel_and_weight_shaper   r   r   c           	      C   s^   |j d }t||d |d |d }t||d |d |d }|| }tjj||| jd}|S )a  This function performs zero-padding on the time and frequency axes
        such that their lengths is unchanged after the convolution.

        Arguments
        ---------
        x : torch.Tensor
            Input tensor.
        kernel_size : int
            Kernel size.
        dilation : int
            Dilation.
        stride: int
            Stride.

        Returns
        -------
        x : torch.Tensor
            The padded inputs.
        rD   rr   r\   )r4   r   r,   
functionalrP   r   )	r>   rR   r   r   r   r^   padding_timepadding_freqr   rA   rA   rB   rN   ~  s   
zQConv2d._manage_padding)Nr   r   r   r   Tr   r   r   FFNFF)rg   rh   ri   rj   r   rU   r&   r(   r   rk   rN   rl   rA   rA   r?   rB   rm   T  s6    SdQ


rm   )rj   typingr   r+   torch.nnr,   torch.nn.functionalrs   rO   speechbrain.nnet.CNNr   *speechbrain.nnet.quaternion_networks.q_opsr   r   r   r   r   r	   speechbrain.utils.loggerr
   rg   loggerModuler   rm   rA   rA   rA   rB   <module>   s       :