o
    ei                     @   sn   d Z ddlZddlZddlZeeZdd Zdd Zdd Z	eee	d	Z
	 e a	 e ad
d Zdd ZdS )zGlobal changes and platform/GPU-specific quirks, i.e. workarounds and saner
defaults, sometimes due to platform-specific issues.

Author:
    * Sylvain de Langen 2024
    Nc                   C   s   dt jj_dS )af  Disables CuDNN benchmarking. no-op on platforms where it is already off
    by default.

    Benchmarking, when enabled, theoretically improves convolution performance
    by automatically comparing different kernels for some operations.

    However, benchmarking has to be re-run for every unique input shape, which
    makes it unsuitable for highly dynamic shapes.
    Since SpeechBrain does tend to use very varied shapes without attempting to
    pad the differences out, leaving benchmarking on can severely degrade
    training performance.

    This function disables it as we deem no-benchmarking to be a saner default
    to avoid performance bugs at the moment.

    As of PyTorch 2.3.0, the default is `False` for CUDA GPUs, but `True`
    for HIP GPUs.

    The HIP equivalent to CuDNN is MIOpen, but it is controlled through the same
    PyTorch API.
    FN)torchbackendscudnn	benchmark r   r   V/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/utils/quirks.pydisable_cudnn_benchmarking   s   r   c                   C   s   t jd t jd dS )zPDisables JIT profiling to avoid performance issues on highly dynamic
    shapes.FN)r   _C_jit_set_profiling_executor_jit_set_profiling_moder   r   r   r   disable_jit_profiling*   s   r   c                   C   s   dt jjj_dt jj_dS )ac  On CUDA backends (potentially including ROCm), enables TensorFloat32
    support for CuDNN and the matmul operator.

    This allows performing certain operations transparently at a lower
    precision, even in fp32 math when AMP is not in use, when otherwise tensor
    cores would not be used. TF32 supports accumulation into fp32, so the
    concern for overflowing is somewhat mitigated.

    On NVIDIA GPUs, this is available since Ampere (e.g. A100).

    See `PyTorch documentation <https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-and-later-devices>`__ for more
    details.TN)r   r   cudamatmul
allow_tf32r   r   r   r   r   r   2   s   r   )r   r   r   c               	   C   s   t d t d tj rtjjrt d dtjv rGtjd 	dD ]!} | dkrF| t
 vrAtd|  dd	t
  d
t|  q%t t a t D ]}t
|   qMt  dS )zHApply quirks depending on the platform. Also populates `applied_quirks`.r   r   r   SB_DISABLE_QUIRKS, zDSB_DISABLE_QUIRKS environment variable includes unknown quirk name "z". Supported quirks: [, ]N)applied_quirksaddr   r   is_availableversionhiposenvironsplitKNOWN_QUIRKSkeys
ValueErrorjoinexcluded_quirkslog_applied_quirks)quirk_to_excludequirkr   r   r   apply_quirksR   s"   





r%   c                   C   s(   t ddt t ddt dS )z:Logs whichever quirks have been applied by `apply_quirks`.z5Applied quirks (see `speechbrain.utils.quirks`): [%s]r   z]Excluded quirks specified by the `SB_DISABLE_QUIRKS` environment (comma-separated list): [%s]N)loggerinfor    r   r!   r   r   r   r   r"   q   s   r"   )__doc__loggingr   r   	getLogger__name__r&   r   r   r   r   setr   r!   r%   r"   r   r   r   r   <module>   s$    
