o
    }oio                     @   s   d dl mZmZmZ d dlZd dlmZ d dlmZ dZ	zd dl
Z
d dlmZmZ W n ey9   eZdZdZ	Y nw ded	efd
dZG dd deZdS )    )AnyMappingSequenceN)HalfPrecision)overrideT)
DictConfig	OmegaConfFtrainer_cfgreturnc                 C   s   t j| dd} ts| S | d}|dv r!| dd t|g| d< | dd }dur8t|tr8tj	
|| d< | dd }dur[t|tr[g }|D ]}|tj	
| qK|| d< | S )	a  
    Resolves and processes a trainer configuration.

    This function handles specific trainer configuration details:
    - For half precision setups, replaces precision settings with custom plugins
    - Instantiates strategy objects from mapping configurations
    - Instantiates custom callbacks from sequences

    Args:
        trainer_cfg: A DictConfig containing trainer configuration parameters

    Returns:
        A processed DictConfig with resolved configuration values
    T)resolve	precision)z	fp16-truez	bf16-trueNpluginsstrategy	callbacks)r   to_container
_HAS_HYDRAgetpopHalfPrecisionForAudio
isinstancer   hydrautilsinstantiater   append)r	   r   r   cbsresolvedcb r   L/home/ubuntu/.local/lib/python3.10/site-packages/nemo/utils/trainer_utils.pyresolve_trainer_cfg    s   
r   c                       s.   e Zd ZdZededef fddZ  ZS )r   z
    Adjusted Pytorch Lightning plugin for training with half precision.
    It avoids downcasting audio to bfloat16 when the mini-batch is a dict
    with 'audio' string in the keys corresponding to audio tensors.
    datar
   c                    s,   t |tst |S  fdd  |S )a	  
        Converts input data to the appropriate precision format, preserving audio tensor precision.

        This method overrides the parent class implementation to avoid downcasting tensors
        with 'audio' in their dictionary keys. It processes input data recursively when
        encountering nested dictionaries.

        Args:
            data: The input data to convert (can be tensor, dict, or other types)

        Returns:
            The converted data with appropriate precision for each element
        c                    sl   t | tr#i }|  D ]\}} d|vst| s | } | ||< q|S t | tjr4t| r4| jS | S )Naudio)	r   dictitemstorch	is_tensorTensoris_floating_pointto_desired_input_dtype)vansk_convertselfr   r   r.   `   s   

z5HalfPrecisionForAudio.convert_input.<locals>._convert)r   r"   superconvert_input)r/   r    	__class__r-   r   r1   N   s   
z#HalfPrecisionForAudio.convert_input)__name__
__module____qualname____doc__r   r   r1   __classcell__r   r   r2   r   r   G   s     r   )typingr   r   r   r$   lightning.pytorch.pluginsr   typing_extensionsr   r   r   	omegaconfr   r   ModuleNotFoundErrorr   r   r   r   r   r   <module>   s   '