o
    }oi`                     @   s   d dl Z d dlmZ d dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ G d
d dZG dd dZG dd dZG dd dZG dd dZdS )    N)Optional)!SpectrogramToMultichannelFeatures)MaskBasedDereverbWPEMaskEstimatorFlexChannelsMaskEstimatorGSSMaskReferenceChannel)SSLPretrainWithMaskedPatch)AudioToSpectrogram)	WPEFilter)convmtx_mc_numpy)loggingc                   @   sb  e Zd Zejjejddgejdddgejdg dejdd	d
gejdg ddededee	 dee
 dee	 f
ddZejjejddgejdddgejdg dejdddgdededee	 defddZejjejdddgdefddZejjejdddgdefddZejjdd Zd	S )%TestSpectrogramToMultichannelFeatures
fft_length   num_channels      mag_reduction)Nrmsabs_meanmean_abs	mag_powerN   mag_normalization)Nmeanmean_varc              	   C   s  d}d}|d }d}	d}
t jj|
d}|d }t||d}t|j|||d	d
}t|	D ]}|j|||fd}|t	|t	|g| d\}}|||d\}}|
   }|
   }|du rit |}n=|dkr~t t jt |d ddd}n(|dkrt jt |ddd}n|dkrt t j|ddd}ntd| d|durt ||}|dkr|t j|ddd }n|dkr|t j|ddd }|t t j|d ddd }|j|jksJ d| t j|||dsJ d| q+dS )z=Test calculation of spatial features for multi-channel audio.-C6
?   2   
   *   seed   r   
hop_lengthF)num_subbandsr   r   r   use_ipdsizeinputinput_lengthNr   r   r   Taxiskeepdimsr   r   zMagnitude reduction z not implementedr   r   r   r   'Feature shape not matching for example atol"Features not matching for example )nprandomdefault_rngr	   r   r&   rangenormaltorchTensorcpudetachnumpyabssqrtr   NotImplementedErrorpowershapeallclose)selfr   r   r   r   r   r3   
batch_sizenum_samplesnum_examplesrandom_seed_rngr%   
audio2spec	spec2featnxspecspec_lenfeat_feat_npspec_npfeat_golden rV   ^/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/audio/test_audio_modules.pytest_magnitude%   sP   "" z4TestSpectrogramToMultichannelFeatures.test_magnitudeipd_normalizationuse_input_lengthTFc                 C   s  d}d}|d }d}d}	t jj|	d}
|d }t||d}t|jd	d
d|d}t|D ]}|
j|||fd}|t	|t	|g| d\}}|||rM|ndd\}}|
   }|d|jdddf }|
   }t j|dd
d}t |t | }t |t j dt j t j }|dkr|t j|dd
d }n"|dkr|t j|dd
d }|t t t j|d dd
d|j }|j|jksJ d| t j|||dsJ d| q+dS )zATest calculation of IPD spatial features for multi-channel audio.r   r   r   r   r    r!   r#   r$   r   TN)r&   r   r'   r   rY   r(   r*   .r   r-   r   r   r0   r   r1   r2   r4   )r5   r6   r7   r	   r   r&   r8   r9   r:   r;   r<   r=   r>   r   angle	remainderpir@   maximumepsrC   rD   )rE   r   r   rY   rZ   r3   rF   rG   rH   rI   rJ   r%   rK   rL   rM   rN   rO   rP   rQ   rR   rS   ipdrT   	spec_mean
ipd_goldenrV   rV   rW   test_ipdm   sF   " z.TestSpectrogramToMultichannelFeatures.test_ipdr'   c                 C   s   t d|d}tt |j W d   n1 sw   Y  dD ]}t d||d}|j|ks0J q dD ]}t d||dd}|rG|j|ksFJ q3|jdksNJ q3dS )	zTest num channels property.    r&   r'   N)r   r   r   r#   )r&   num_input_channelsr'   r   )r&   rf   r'   r   r   )r   pytestraises
ValueErrorr   )rE   r'   uutr   rV   rV   rW   test_num_channels   s   z7TestSpectrogramToMultichannelFeatures.test_num_channelsc                 C   s8   dD ]}t ||d}|r|jd| ksJ |sJ qdS )zTest num features property.)   r   re   r   N)r   num_features)rE   r'   r&   rj   rV   rV   rW   test_num_features   s
   z7TestSpectrogramToMultichannelFeatures.test_num_featuresc                 C   s   t t tddddd W d   n1 sw   Y  t t tdddd W d   n1 s4w   Y  tddd	}td
ddd}t t ||tdgd W d   dS 1 sbw   Y  dS )z3Test initialization with unsupported normalization.rd   r   Fznot-implemented)r&   r   r'   r   NT)r&   r'   rY   )r&   r   r   r   d   r*   )rg   rh   rA   r   r:   randnri   r;   )rE   rj   r+   rV   rV   rW   test_unsupported_norm   s.   "z;TestSpectrogramToMultichannelFeatures.test_unsupported_norm)__name__
__module____qualname__rg   markunitparametrizeintr   strfloatrX   boolrc   rk   rn   rq   rV   rV   rV   rW   r   $   s@    B(3r   c                
   @   s^   e Zd Zejjejddgejdddgejdddgdededefdd	Zd
S )TestMaskBasedProcessorr      r   r   r#   	num_masksr   c              
   C   s  |dkrdg}nd|d g}d}d}|d }d}d}	t jj|	d}
|d	 }t||d
}|D ]}t|d}t|D ]}|
j|||fd}|t|t|g| d\}}|
j	dd|||j
|jd fd}|||t|d\}}|   }|   }t j||jd}t|D ]}|dd|df |dd|df  |dd|df< q|j|jksJ d| t j|||dsJ d| q8q-dS )z&Test masking of the reference channel.r   r   ư>r   r   r   r    r!   r#   r$   )ref_channelr(   r*                 ?)lowhighr)   r+   r,   maskdtypeN.z&Output shape not matching for example r2   z Output not matching for example )r5   r6   r7   r	   r   r8   r9   r:   r;   uniformr&   rC   tensorr<   r=   r>   
zeros_liker   rD   )rE   r   r   r~   ref_channelsr3   rF   rG   rH   rI   rJ   r%   rK   r   mask_processorrM   rN   rO   rP   r   outrR   out_nprT   
out_goldenmrV   rV   rW   test_mask_reference_channel   s>   

4 z2TestMaskBasedProcessor.test_mask_reference_channelN)	rr   rs   rt   rg   ru   rv   rw   rx   r   rV   rV   rV   rW   r|      s    "r|   c                
   @   s   e Zd Zejjejdddgejddgejdddgdededefd	d
ZejjejdddgejddgejdddgdededefddZ	ejjejddgejddgejdddgdededefddZ
dS )TestMaskBasedDereverbr   r   r   filter_lengthr   delayr   rl   c              
   C   s"  d}d}d}d}d}d}	t jj|d}
||||	f}t|D ]q}|
j|dd	|
j|d  }t j|||	|| f|jd
}t|D ]&}t|D ]}t||dd|ddf  ||d|||ddddf< qEq?t	j
t|||d}t	|}|   }t j|||dsJ d| dqdS )zwTest construction of convolutional tensor in WPE. Compare against
        reference implementation convmtx_mc.
        r   r    r   r         r!   r(                 ?r   Nr   r   r2   Example z: comparison failed)r5   r6   r7   r8   r9   zerosr   r   	transposer
   
convtensorr:   r   permute_convtensorr<   r=   r>   rD   )rE   r   r   r   r3   rI   rH   rF   r&   
num_framesrJ   
input_sizerM   Xtilde_X_refbftilde_X_uutrV   rV   rW   test_wpe_convtensor  s,   
"z)TestMaskBasedDereverb.test_wpe_convtensorc                 C   s*  d}d}d}d}d}d}	t ||dd}
tjj|d	}||||	f}t|D ]}t|j|d
d|j|d
  }t|j|||	fd
}|
j	|||d}|
dddd}|
ddddd|||	|| }t|dd |d | }t|dd |d | }|
j|||d\}}|jdddjddd}|jddd}tj|||dsJ d| dtj|||dsJ d| dtj||}|
||}||||d
dddd}tj|||dsJ d| dt||}|
j||d}|
dddd}tj|||dsJ d| dq$dS )z>Test estimation of correlation matrices, filter and filtering.r   r    r   r#   r   r   N)r   prediction_delaydiag_regr!   r(   r   r   r   r   r   r   r   ).N)r+   weighttilde_input)	start_dimend_dimr2   r   z: comparison failed for Qz: comparison failed for Rz: comparison failed for G)filterr   z*: comparison failed for undesired output U)r
   r5   r6   r7   r8   r:   r   r9   r   r   permutereshapematmulr   conjestimate_correlationsflattenrD   linalgsolveestimate_filterapply_filter)rE   r   r   r   r3   rI   rH   rF   r&   r   
wpe_filterrJ   r   rM   r   r   tilde_XX_goldentilde_X_goldenQ_goldenR_goldenQ_uutR_uutQ_uut_flattenedR_uut_flattenedG_goldenG_uutG_uut_flattenedU_goldenU_uut	U_uut_refrV   rV   rW   test_wpe_filterC  sJ   "     

z%TestMaskBasedDereverb.test_wpe_filterr   c                 C   s   d}d}d}d}d}||||f}	t |||d}
t|D ]8}t|	dt|	  }td||f}t|	}|
|||d	\}}|j|jksIJ d
t||sSJ dqdS )z<Test that dereverb can be initialized and can process audio.r   r   r   r   r   )r   r   num_iterationsr   r   r   z&Output shape not matching, example {n}z Length not matching, example {n}N)r   r8   r:   rp   randintrandrC   equal)rE   r   r   r   rH   rF   r&   r   r   r   dereverbrM   rN   x_lengthr   yy_lengthrV   rV   rW   test_mask_based_dereverb_init  s"   
z3TestMaskBasedDereverb.test_mask_based_dereverb_initN)rr   rs   rt   rg   ru   rv   rw   rx   r   r   r   rV   rV   rV   rW   r     s     #I"r   c                   @   s   e Zd Zejjejdg dejdddgejdddgdededefd	d
Z	ejjejdddgejdddgejdddgejdddgdedededefddZ
dS )TestMaskEstimatorchannel_reduction_position)r   r   r   channel_reduction_typeaverage	attentionchannel_block_typetransform_average_concatenatetransform_attend_concatenatec                 C   s  ddg}ddg}ddg}ddg}d}d}	|D ]n}
|D ]i}|D ]d}t d|
|| t||
||||d	}|D ]M}t d
| |||
|	f}tj|tjd}td|	|f}|||d\}}|||
|	f}|j|kspJ d| d|j t||ksJ d| d| q4qqqdS )zTTest initialization of the mask estimator and make sure it can process input tensor.rd   A   r   r   rl   r#   r   z?Instantiate with num_subbands=%d, num_outputs=%d, num_blocks=%d)num_outputsr&   
num_blocksr   r   r   Process num_channels=%dr   r*    Output shape mismatch: expected , got z!Output length mismatch: expected N)	r   debugr   r:   rp   cfloatr   rC   all)rE   r   r   r   num_subbands_testsnum_outputs_testsnum_blocks_testsnum_channels_testsrF   r   r&   r   r   rj   r   r   rO   spec_lengthr   mask_lengthexpected_mask_shaperV   rV   rW   test_flex_channels  sT   	
z$TestMaskEstimator.test_flex_channelsr   r   r#   r&   rd   r   r   r   r   rF   c                 C   s   d}t  }td| ||||f}td| tj|tjd}t|||dk}	|||	d}
||||f}|
j|ksDJ d| d|
j d	S )
zTest initialization of the GSS mask estimator and make sure it can process an input tensor.
        This tests initialization and the output shape. It does not test correctness of the output.
        r   r   zInput size: %sr   r   )r+   activityr   r   N)r   r   r   r:   rp   r   rC   )rE   r   r&   r   rF   r   rj   r   mixture_specsource_activityr   r   rV   rV   rW   test_gss  s   
zTestMaskEstimator.test_gssN)rr   rs   rt   rg   ru   rv   rw   rx   ry   r   r   rV   rV   rV   rW   r     s$    9(r   c                
   @   sp   e Zd Zejjejdg dejdddgejdddgdedede	fd	d
Z
ejjdd ZdS )TestSSLPretrainMaskingWithPatch
patch_size)r   rl   r   mask_fractiong      ?r   trainingTFc              
   C   s<  d}d}d}d}d}t ||d}	|r|	  n|	  t }
|
d tj||||tj|
d}tj|d	 ||f|
d
}t	|D ]}d||dddd|| df< q@|	||d}|j
|j
ksaJ t	|D ]6}t||ddddd|| f  dk|| ||   }t|| |k sJ d| d| d| qedS )Test SSL pretrain masking.rd   i  r   r   g{Gz?)r   r   r   )r   	generatorr   )r   r   N)
input_speclengthr   z: est_mask_fraction = z, mask_fraction = )r   trainevalr:   	Generatormanual_seedrp   r   r   r8   rC   sumr?   )rE   r   r   r   r&   r   r   rF   abs_tolrj   rngr   r,   r   masked_specest_mask_fractionrV   rV   rW   test_masking  s2   

",z,TestSSLPretrainMaskingWithPatch.test_maskingc                 C   s   t t tdd W d   n1 sw   Y  t t tdd W d   n1 s/w   Y  t t tdd W d   n1 sIw   Y  t t tdd W d   dS 1 sdw   Y  dS )r   r   )r   Nr   g?)r   g)rg   rh   ri   r   )rE   rV   rV   rW   test_unsupported_initializationA  s   "z?TestSSLPretrainMaskingWithPatch.test_unsupported_initializationN)rr   rs   rt   rg   ru   rv   rw   rx   rz   r{   r  r  rV   rV   rV   rW   r     s    +r   )	importlibtypingr   r>   r5   rg   r:   'nemo.collections.audio.modules.featuresr   &nemo.collections.audio.modules.maskingr   r   r   r   3nemo.collections.audio.modules.ssl_pretrain_maskingr   )nemo.collections.audio.modules.transformsr	   4nemo.collections.audio.parts.submodules.multichannelr
   (nemo.collections.audio.parts.utils.audior   
nemo.utilsr   r   r|   r   r   r   rV   rV   rV   rW   <module>   s&    >: `