o
    }oi_                      @   s   d dl Z d dlmZ d dlmZ d dlZzd dlmZ d dlm	Z	 dZ
W n ey/   dZ
Y nw d dlmZ d d	lmZmZmZ d d
lmZmZmZmZ d dlmZ d dlmZ d dlmZ ddlmZ dZddede defddZ!G dd deeZ"dS )    N)Path)Optional)resample)MelSpectrogramTF)ASRModel)LossTyping	typecheck)LengthsTypeLossType
NeuralTypeVoidType)logging)maybe_download_from_cloud)resolve_cache_dir   )
sisnr_losszhttps://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_en_conformer_ctc_small/versions/1.6.0/files/stt_en_conformer_ctc_small.nemolocationrefresh_cachereturnc                 C   s~   t d|  | dd }| |d}tt |dd  }t| 	d
 }t|||||d}t d	| t|}|S )
zRestore an ASR model from the cloud.

    Args:
        location (str): The URL of the model in the cloud.
        refresh_cache (bool): Whether to force re-download of the model.

    Returns:
        nemo_asr.models.ASRModel: The restored model.
    z'Restoring model from cloud location: %s/ Nzutf-8)urlfilename	cache_dir	subfolderr   zModel file in cache: %s)r   debugsplitreplacer   joinpathr   hashlibmd5encode	hexdigestr   r   restore_from)r   r   r   r   r   cache_subfoldernemo_model_file_in_cachemodel r*   h/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/audio/losses/maxine/losses_combined.pyrestore_asr_model_from_cloud+   s   


r,   c                       s   e Zd ZdZddeedfdededededed	ed
ededef fddZde	j
de	j
de	j
fddZde	j
de	j
de	j
fddZedd Zedd Ze 	d de	j
de	j
dee	j
 de	j
fddZ  ZS )!CombinedLossz
    Combination of three losses (signal quality/spectral+cepstral features/acoustic error)
    See https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=1083798
    Tg      p>sample_rate
hop_lengthnum_mels
fft_lengthsisnr_loss_weightspectral_loss_weightasr_loss_weightuse_asr_lossuse_mel_specc                    s   t std td| jj t   || _t	
|}| d| t	|}| jd|dd d | _|| | _|rIt|
| _| j  | j  t||||dd| _t	j | _|	| _|| _|| _|| _|| _d S )Nz:Could not import torchaudio. Some features might not work.z>torchaudio is not installed but is necessary to instantiate a windowepsilonF)
persistent)r.   n_fftr/   n_melscenter)HAVE_TORCHAUDIOr   errorModuleNotFoundError	__class____name__super__init__r.   torchhann_windowregister_buffertensorsource_lengthssource_valuer,   	asr_modelevalfreezer   mel_transformnnL1Lossmae_lossr6   r5   r2   r3   r4   )selfr.   r/   r0   r1   r2   r3   r4   r5   r6   conformer_modelr8   r7   r@   r*   r+   rC   S   s<   








zCombinedLoss.__init__predicted_audioprimary_audior   c           
      C   s~   d}| j r=| |}| |}| ||}||7 }dttj|| jd }dttj|| jd }| ||}	||	7 }|S )Nr      )min)r6   rM   rP   rD   log10clampr8   )
rQ   rT   rU   lossprimary_mel_specpredicted_mel_specmelLosslog_predlog_prim
logMelLossr*   r*   r+   spectral_loss   s   

zCombinedLoss.spectral_lossc                 C   s   t j|ddt|  j}t j|ddt|  j}t |jddg|jddt|  j}| jj	j
}| j
|krMt|| j
|}t|| j
|}| j||d\}}}| j||d\}	}}d| }
d|	 }t j }|||
}|S )Nr   )dimr   r   )input_signalinput_signal_length
   )rD   squeezetonext
parametersdevicefullsizerJ   cfgr.   r   rN   CrossEntropyLoss)rQ   rT   rU   primary_audio_predicted_audio_	input_lenasr_sample_rateprimary_log_predicted_logprimary_probpredicted_probloss_fnrZ   r*   r*   r+   asr_loss   s    



zCombinedLoss.asr_lossc                 C   s0   d}t |t t |t t tdt dddS )z)Input types definitions for CombinedLoss.)BCTrz   T)optional)estimatetargetinput_length)r   r   tupler
   )rQ   signal_shaper*   r*   r+   input_types   s
   

zCombinedLoss.input_typesc                 C   s   dt t diS )z^Output types definitions for CombinedLoss.
        loss:
            NeuralType(None)
        rZ   )elements_type)r   r   )rQ   r*   r*   r+   output_types   s   zCombinedLoss.output_typesNr~   r   r   c              	   C   s  t |  j}| jd u r|jd }t|f| j|| _t	t
t|d|dg}|d u rCt|jd f|jd |}t||k||}|dd |f }|dd |f }	tdg|}
|
| jt||	| 7 }
| |	|}|
| j| 7 }
| jr|
| j| |	| 7 }
|
S )Nr   r   r   .g        )rh   ri   rj   rH   shaperD   rk   rI   rg   intrW   rG   rl   wherer2   r   ra   r3   r5   r4   ry   )rQ   r~   r   r   rj   batchmin_lensource_lengths_lrU   rT   
loss_totalrZ   r*   r*   r+   forward   s"   

$ zCombinedLoss.forward)N)rA   
__module____qualname____doc__!STT_EN_CONFORMER_CTC_SMALL_v1_6_0floatr   boolrC   rD   Tensorra   ry   propertyr   r   r	   r   r   __classcell__r*   r*   rS   r+   r-   M   sT    	
2
	
r-   )F)#r"   pathlibr   typingr   rD   torchaudio.functionalr   torchaudio.transformsr   r=   r?   nemo.collections.asr.modelsr   	nemo.corer   r   r	   nemo.core.neural_typesr
   r   r   r   
nemo.utilsr   nemo.utils.cloudr   nemo.utils.data_utilsr   r   r   strr   r,   r-   r*   r*   r*   r+   <module>   s*   "