o
    mi(                     @   s   d dl Z d dlZd dlmZ dddZdd Zi ai add	d
Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd ZdddZdS )    N   h㈵>c                 C   s   t t j| |d| S )N)min)torchlogclamp)xCclip_val r   </home/ubuntu/SpeechTokenizer/speechtokenizer/trainer/loss.pydynamic_range_compression_torch   s   r   c                 C   s   t | }|S )N)r   )
magnitudesoutputr   r   r   spectral_normalize_torch	   s   r   Fc	                 C   s  |t vr8tjj|||d d ||ddd}	|	j j| jt t	|d t	| j < t
|| jtt	| j< t
jjj| dt|| d t|| d fdd} | d} t
j| |||tt	| j |dd	d
d
d
}
t
|
d }
t
t t	|d t	| j  |
}
t|
}
|
S )N   r   slaneyhtk)n_melssample_raten_stftf_minf_maxnorm	mel_scale_reflect)modeFT)
hop_length
win_lengthwindowcenterpad_mode
normalizedonesidedreturn_complexg&.>)	mel_basis
torchaudio
transformsMelScalefbfloatTtodevicestrr   hann_windownn
functionalpad	unsqueezeintsqueezestftabsmatmulr   )yn_fftnum_melsr   hop_sizewin_sizefminfmaxr!   mel_transformspecr   r   r   mel_spectrogram   s   "*6

"rC   c                 C   sF   t jdd\}}|j| dddd}t j||d |j  t   |S )N)
   r   )figsizeautolowernone)aspectorigininterpolation)ax)pltsubplotsimshowcolorbarcanvasdrawclose)spectrogramfigrL   imr   r   r   plot_spectrogram%   s   

rW   c                 C   sR   t | d|d}tjj| d d d d d |f |d d d d d |f S )N)r   sizer   r1   r2   l1_loss)r   x_hatlengthr   r   r   
recon_loss0   s   <r]   c                 K   s~   t | dfi |}t |dfi |}t|d|d}tjj|d d d d d |f |d d d d d |f S )Nr   r   )rC   r6   r   rY   r   r1   r2   rZ   )r   r[   kwargsx_mel	x_hat_melr\   r   r   r   mel_loss4   s   <ra   c              	   C   sL   d}t | |D ]\}}t ||D ]\}}|tt|| 7 }qq|d S )Nr   r   )zipr   meanr8   )fmap_rfmap_glossdrdgrlglr   r   r   feature_loss:   s   rk   c                 C   sH   d}t | |D ]\}}td| d }t|d }||| 7 }q|S Nr   r   r   )rb   r   rc   )disc_real_outputsdisc_generated_outputsrf   rg   rh   r_lossg_lossr   r   r   discriminator_lossC   s   rq   c                 C   s,   d}| D ]}t d| d }||7 }q|S rl   )r   rc   )disc_outputsrf   rh   lr   r   r   adversarial_lossM   s
   
rt   c              
   C   s`   t | d|d}tttjjj| d d d |f |d d d |f dd  }|S )Nr   axis)	r   rY   r   r   sigmoidr1   r2   cosine_similarityrc   )featuretarget_featurendistill_lossr   r   r   d_axis_distill_lossV   s   Fr}   c              
   C   s   t | d|d}tjj| d d d |f |d d d |f dd}tttjjj| d d d |f |d d d |f dd	  }|||  }|S )Nr   rc   )	reductionrX   ru   )
r   rY   r   r2   rZ   r   rw   r1   rx   rc   )ry   rz   
lambda_simr{   rZ   sim_lossr|   r   r   r   t_axis_distill_loss[   s
   2Fr   )r   r   )F)r   )r   r'   matplotlib.pylabpylabrM   r   r   r&   r0   rC   rW   r]   ra   rk   rq   rt   r}   r   r   r   r   r   <module>   s     

	
	