o
    ic                     @   s   d Z ddlmZ ddlZddlZddlm  mZ	 ddl
mZ ddlmZ ddlmZ ddlmZ G dd	 d	ejjZG d
d dejjZdS )z%JETS related loss module for ESPnet2.    )TupleN)	betabinom)check_argument_types)DurationPredictorLoss)make_non_pad_maskc                       st   e Zd Zddedef fddZdejdejd	ejd
ejdejdejdejdeejejejejf fddZ  Z	S )VarianceLossTFuse_maskinguse_weighted_maskingc                    s^   t  sJ t   ||ks|rJ || _|| _| jrdnd}tjj|d| _t	|d| _
dS )a  Initialize JETS variance loss module.

        Args:
            use_masking (bool): Whether to apply masking for padded part in loss
                calculation.
            use_weighted_masking (bool): Whether to weighted masking in loss
                calculation.

        nonemean)	reductionN)r   super__init__r   r	   torchnnMSELossmse_criterionr   duration_criterion)selfr   r	   r   	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/gan_tts/jets/loss.pyr      s   


zVarianceLoss.__init__d_outsdsp_outspse_outsesilensreturnc                 C   s$  | j r4t||j}||}||}t|d|j}	||	}||	}||	}||	}| ||}
| ||}| ||}| jrt||j}|	 |j
ddd	  }||d }|
||
 }
|d}	|d}|||	
 }|||	
 }|
||fS )a  Calculate forward propagation.

        Args:
            d_outs (LongTensor): Batch of outputs of duration predictor (B, T_text).
            ds (LongTensor): Batch of durations (B, T_text).
            p_outs (Tensor): Batch of outputs of pitch predictor (B, T_text, 1).
            ps (Tensor): Batch of target token-averaged pitch (B, T_text, 1).
            e_outs (Tensor): Batch of outputs of energy predictor (B, T_text, 1).
            es (Tensor): Batch of target token-averaged energy (B, T_text, 1).
            ilens (LongTensor): Batch of the lengths of each input (B,).

        Returns:
            Tensor: Duration predictor loss value.
            Tensor: Pitch predictor loss value.
            Tensor: Energy predictor loss value.

           T)dimkeepdimr   )r   r   todevicemasked_select	unsqueezer   r   r	   floatsumsizemul)r   r   r   r   r   r   r   r   duration_maskspitch_masksduration_loss
pitch_lossenergy_lossduration_weightspitch_weightsr   r   r   forward+   s2   








zVarianceLoss.forward)TF)
__name__
__module____qualname__boolr   r   Tensorr   r4   __classcell__r   r   r   r   r      s&    	r   c                       sj   e Zd ZdZddef fddZejd fdej	dej	d	ej	d
e
dej	f
ddZddej	fddZ  ZS )ForwardSumLosszHForwardsum loss described at https://openreview.net/forum?id=0NQwnnwAORiTcache_priorc                    s   t    || _i | _dS )zInitialize forwardsum loss module.

        Args:
            cache_prior (bool): Whether to cache beta-binomial prior

        N)r   r   r<   _cache)r   r<   r   r   r   r   o   s   

zForwardSumLoss.__init__r!   
log_p_attnr   olens
blank_probr    c              
   C   s   | d}| ||}|j|j|jd}|| }tj|dt|d}d}t	|D ];}	t
d||	 d d}
||	d||	 d||	 d f d}|tj||
||	|	d  ||	|	d  dd7 }q)|| }|S )	a  Calculate forward propagation.

        Args:
            log_p_attn (Tensor): Batch of log probability of attention matrix
                (B, T_feats, T_text).
            ilens (Tensor): Batch of the lengths of each input (B,).
            olens (Tensor): Batch of the lengths of each target (B,).
            blank_prob (float): Blank symbol probability.

        Returns:
            Tensor: forwardsum loss value.

        r   )dtyper&   )r"   r   r   r   r   r   )valuer"   NT)	log_probstargetsinput_lengthstarget_lengthszero_infinity)r+   _generate_priorr%   rA   r&   Fpadnplogranger   aranger(   ctc_loss)r   r>   r   r?   r@   Bbb_priorlog_p_attn_pdlossbidx
target_seqcur_log_p_attn_pdr   r   r   r4   z   s0   

zForwardSumLoss.forwardr"   c                    s"  t |}| }| }tj|||ftj d}t|D ]r}||   ||  }	t d t|	 }
| j	rB|
| j
v rB| j
|
 }n+|tjd d td }|t fdd|D  }t|	}|d }t||	||}| j	rz|
| j
vrz|| j
|
< t|dd}|||d	 d	|	f< q|S )
a  Generate alignment prior formulated as beta-binomial distribution

        Args:
            text_lengths (Tensor): Batch of the lengths of each input (B,).
            feats_lengths (Tensor): Batch of the lengths of each target (B,).
            w (float): Scaling factor; lower -> wider the width.

        Returns:
            Tensor: Batched 2d static prior matrix (B, T_feats, T_text).

        )
fill_value,r"   )rA   c                    s   g | ]} | d  qS r"   r   ).0tTr   r   
<listcomp>   s    z2ForwardSumLoss._generate_prior.<locals>.<listcomp>).Nr   N)lenmaxr   fullrK   infrM   itemstrr<   r=   rN   r)   arrayr   logpmf
from_numpy	transpose)r   text_lengthsfeats_lengthswrP   T_textT_featsrQ   rT   Nkeyprobalphabetak	batched_kr   r\   r   rH      s(   

zForwardSumLoss._generate_prior)TrY   )r5   r6   r7   __doc__r8   r   rK   er   r9   r)   r4   rH   r:   r   r   r   r   r;   l   s     
4r;   )ru   typingr   numpyrK   r   torch.nn.functionalr   
functionalrI   scipy.statsr   	typeguardr   9espnet.nets.pytorch_backend.fastspeech.duration_predictorr   &espnet.nets.pytorch_backend.nets_utilsr   Moduler   r;   r   r   r   r   <module>   s   X