o
    oi6                     @   sp  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZm	Z	 d dl
Z
d dlmZ d dl
mZmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z'm(Z(m)Z) d dl*m+Z+ d dl,m-Z-m.Z.m/Z/m0Z0 dZ1dZ2G dd deZ3dd Z4dee5 de5fddZ6dddddde2dfdee5 d e7d!e5d"ee5 d#e7d$e	e5e8df d%e5d&e7deej9e-e5f fd'd(Z:dAd)ed*e-d+e8deeeef fd,d-Z;e
< 	dBd.ej9d/e-d)ed0ee= fd1d2Z>e2fd3e5de5fd4d5Z?d6e5de	e8e5f fd7d8Z@G d9d: d:e jAZB	dCd;e5de jCfd<d=ZDd>d? ZEeFd@kr6eE  dS dS )D    N)ListOptionalTupleUnion)logger)Tensornn)
functional)
DataLoaderDataset)
load_model)config)
load_audioresample
save_audio)init_logger)ModelParams)
get_device)
as_complexas_realdownload_fileget_cache_dirget_norm_alpha)version)DFerberb_norm	unit_norm)DeepFilterNetDeepFilterNet2DeepFilterNet3r    c                       sN   e Zd Zdee deddf fddZdeeeef fddZ	d	d
 Z
  ZS )AudioDatasetfilessrreturnNc                    sN   t    g | _|D ]}tj|std| d | j| q
|| _	d S )NzFile not found: z. Skipping...)
super__init__r"   ospathisfiler   warningappendr#   )selfr"   r#   file	__class__ H/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/df/enhance.pyr&      s   

zAudioDataset.__init__c                 C   s(   | j | }t|| jd\}}|||jfS )Ncpu)r"   r   r#   sample_rate)r,   indexfnaudiometar0   r0   r1   __getitem__&   s   
zAudioDataset.__getitem__c                 C   s
   t | jS N)lenr"   )r,   r0   r0   r1   __len__+   s   
zAudioDataset.__len__)__name__
__module____qualname__r   strintr&   r   r   r8   r;   __classcell__r0   r0   r.   r1   r!      s    	r!   c                 C   s  t | j| j| jd| j| jd\}}}| jr|nd }| jd u r"d| _ntj	
| js/t| j t j}| jd urQt| jdkrHtd td t| jd }nt| jdks\J d| j}t||}t|d	dd
}t|}t|D ]s\}	\}
}}|
d }
|d}|	d | d }t }t|||| j| jd}t }|jd | }|| }|| }tj	|
}|dkr|ddnd}t| d| d|dd|dd t |!d||}t"|
||| j|dd qsd S )NT)post_filter	log_levelconfig_allow_defaultsepoch	mask_only.r   zGOnly one of `noisy_audio_files` or `noisy_dir` arguments are supported.   z/*zNo audio files provided   )num_workers
pin_memoryd   )padatten_lim_dbz2.0fz% |  zEnhanced noisy audio file 'z' in z.2fzs (RT factor: z.3f)r2   F)r#   
output_dirsuffixlog)#init_dfmodel_base_dirpfrC   rE   no_df_stagerS   rR   r'   r(   isdirmkdirr   r#   	noisy_dirr:   noisy_audio_filesr   errorexitglobr!   r
   	enumeratesqueezetimeenhancecompensate_delay	atten_limshapebasenameinfor   tor   )argsmodeldf_staterS   df_srinput_filesdsloader	n_samplesir-   r6   audio_srprogresst0t1t_audiotrtfr5   p_strr0   r0   r1   main/   sT   




&r{   mr$   c                 C   s$   | d u rt } | tv }|rt| S | S r9   )DEFAULT_MODELPRETRAINED_MODELSmaybe_download_model)r|   is_default_modelr0   r0   r1   get_model_basedir\   s   r   FINFOzenhance.logTbestrV   rB   rC   log_filerD   rE   default_modelrF   c                 C   s<  zddl m}m}	 |jdd |	  W n	 ty   Y nw | du p$| tv }
t| p)|} tj	| s8t
d| |durCtj| |nd}t||| d |
rYtd| d	|   tjtj| d
d|dd |rtddtt j ztdtt j}d| d}W n ty   d}Y nw td|  t }t|j|j|j|j|jd}tj| d}|duot|t o|! dk }|sd}|ptdtdddd}t"||||d\}}|du s|dkr|rt#d t$d t%d|  |&t' }tj(tj)| }|r|d7 }tdt'  td |||fS ) a  Initializes and loads config, model and deep filtering state.

    Args:
        model_base_dir (str): Path to the model directory containing checkpoint and config. If None,
            load the pretrained DeepFilterNet2 model.
        post_filter (bool): Enable post filter for some minor, extra noise reduction.
        log_level (str): Control amount of logging. Defaults to `INFO`.
        log_file (str): Optional log file name. None disables it. Defaults to `enhance.log`.
        config_allow_defaults (bool): Whether to allow initializing new config values with defaults.
        epoch (str): Checkpoint epoch to load. Options are `best`, `latest`, `<int>`, and `none`.
            `none` disables checkpoint loading. Defaults to `best`.

    Returns:
        model (nn.Modules): Intialized model, moved to GPU if available.
        df_state (DF): Deep filtering state for stft/istft/erb
        suffix (str): Suffix based on the model name. This can be used for saving the enhanced
            audio.
    r   )icinstallT)includeContextNzBase directory not found at {})r-   levelrk   zUsing z
 model at 
config.ini)config_must_existallow_defaultsallow_reloadmask_pfpf_betaz(beta: rQ   rP   zRunning with post-filter )r#   fft_sizehop_sizenb_bandsmin_nb_erb_freqscheckpointsnonerF   trainF)castsectiondefaultsave)rE   rF   zCould not find a checkpointrH   zLoaded checkpoint from epoch _pfzRunning on device {}zModel loaded)*icecreamr   r   configureOutputImportErrorr~   r   r'   r(   rY   NotADirectoryErrorformatjoinr   r   rh   r   loadsetboolr   r   getfloatKeyErrorr   r#   r   r   nb_erbmin_nb_freqs
isinstancer?   lowerload_model_cpr]   r^   debugri   r   rg   abspath)rV   rB   rC   r   rD   rE   r   rF   r   r   use_default_modelbetaprl   checkpoint_dirload_cprk   rS   r0   r0   r1   rU   e   sr   
 



rU   r6   dfnb_dfc           	      C   s   | |  }td}| }t  tdt t	t
t|||d}W d    n1 s2w   Y  tt	t|dd |f |d}tt	|d}|d urg||}||}||}|||fS )NFignorerH   .)analysisnumpyr   
erb_widthswarningscatch_warningssimplefilterUserWarningtorch	as_tensorr   r   	unsqueezer   r   ri   )	r6   r   r   devicespecaerb_fberb_feat	spec_featr0   r0   r1   df_features   s   
&



r   rk   rl   rN   c                 C   sN  |    |jd }t| dr| j|t d |jd }d\}}|r2| | }}t|d|f}t	| dt	| dt
 j}	t|||	t d\}
}}| |
 ||d  }t|d	}|d
urt|dkrdt| d  }t|
d	 | |d	|   }t|| }|r|| dksJ || }|d
d
||| f }|S )a  Enhance a single audio given a preloaded model and DF state.

    Args:
        model (nn.Module): A DeepFilterNet model.
        df_state (DF): DF state for STFT/ISTFT and feature calculation.
        audio (Tensor): Time domain audio of shape [C, T]. Sampling rate needs to match to `model` and `df_state`.
        pad (bool): Pad the audio to compensate for delay due to STFT/ISTFT.
        atten_lim_db (float): An optional noise attenuation limit in dB. E.g. an attenuation limit of
            12 dB only suppresses 12 dB and keeps the remaining noise in the resulting audio.

    Returns:
        enhanced audio (Tensor): If `pad` was `False` of shape [C, T'] where T'<T slightly delayed due to STFT.
            If `pad` was `True` it has the same shape as the input.
    r   reset_h0)
batch_sizer   rO   )r   r   r   df_bins)r   rH   N
      )evalrf   hasattrr   r   r   r   FrM   getattrr   r   r   cloner2   r   ra   absr   r   	synthesisr   )rk   rl   r6   rM   rN   bsorig_lenn_ffthopr   r   r   r   enhancedlimdr0   r0   r1   rc      s,   


"rc   namec                 C   s   t  }| dr| d} tj|| }tjtj|ds*tjtj|dr,|S tj|dd d|  }t	|d |dd |S )zDownload a DeepFilterNet model.

    Args:
        - name (str): Model name. Currently needs to one of `[DeepFilterNet, DeepFilterNet2]`.

    Returns:
        - base_dir: Return the model base directory as string.
    z.zipr   r   T)exist_okz:https://github.com/Rikorose/DeepFilterNet/raw/main/models/)extract)
r   endswithremovesuffixr'   r(   r   r)   rY   makedirsr   )r   	cache_dir	model_dirurlr0   r0   r1   r      s   	


r   valuec                 C   s.   zt | W S  ty   | dv sJ |  Y S w )N)r   latest)r@   
ValueError)r   r0   r0   r1   parse_epoch_type  s   
r   c                       s$   e Zd Z fddZdd Z  ZS )PrintVersionc                    s   t  j||dddd d S )Nr   Fz'Print DeepFilterNet version information)option_stringsdestnargsrequiredhelp)r%   r&   )r,   r   r   r.   r0   r1   r&     s   
zPrintVersion.__init__c                 G   s   t dt td d S )Nr   r   )printr   r^   )r,   rj   r0   r0   r1   __call__%  s   
zPrintVersion.__call__)r<   r=   r>   r&   r   rA   r0   r0   r.   r1   r     s    	r   default_log_levelc                 C   s   |d u rt  }|jddtd dd |jdddd |jd	d
td dd |jdt| dd |jdddddd |jdddtdd |jddtd |S )Nz--model-base-dirz-mzModel directory containing checkpoints and config. To load a pretrained model, you may just provide the model name, e.g. `DeepFilterNet`. By default, the pretrained DeepFilterNet2 model is loaded.typer   r   z--pfz>Post-filter that slightly over-attenuates very noisy sections.
store_true)r   actionz--output-dirz-oz;Directory in which the enhanced audio files will be stored.z--log-levelz:Logger verbosity. Can be one of (debug, info, error, none)z--debugz-dstore_constDEBUGrC   )r   constr   z--epochz-er   zFEpoch for checkpoint loading. Can be one of ['best', 'latest', <int>].)r   r   r   z-vz	--versionr   )argparseArgumentParseradd_argumentr?   r   r   )r   parserr0   r0   r1   setup_df_argument_parser*  sJ   	r   c                  C   s   t  } | jddddd | jddtd dd	 | jd
tddd | jddtd dd	 | jddddd | jddd |  }t| d S )Nz--no-delay-compensationrd   store_falsezeDont't add some paddig to compensate the delay introduced by the real-time STFT/ISTFT implementation.)r   r   r   z--atten-limz-azLAttenuation limit in dB by mixing the enhanced signal with the noisy signal.r   r\   *z6List of noise files to mix with the clean speech file.)r   r   r   z--noisy-dirz-izQInput directory containing noisy audio files. Use instead of `noisy_audio_files`.z--no-suffixrS   z6Don't add the model suffix to the enhanced audio files)r   r   r   z--no-df-stager   r   )r   r   r@   r?   
parse_argsr{   )r   rj   r0   r0   r1   runV  sH   r  __main__r9   )TN)r   N)Gr   r_   r'   rb   r   typingr   r   r   r   r   logurur   r   r   torch.nnr	   r   torch.utils.datar
   r   df.checkpointr   r   	df.configr   df.ior   r   r   	df.loggerr   df.modelr   
df.modulesr   df.utilsr   r   r   r   r   
df.versionr   libdfr   r   r   r   r~   r}   r!   r{   r?   r   r   r@   ModulerU   r   no_gradr   rc   r   r   Actionr   r   r   r  r<   r0   r0   r0   r1   <module>   s    -
	
&X.
,
'
