o
    wip                     @   sZ   d dl mZ d dlmZ d dlmZ d dlmZ e G dd dZG dd ded	Z	d
S )    )	dataclass)Lock)Optional)	Singletonc                   @   s2   e Zd ZU dZeed< eed< dZee ed< dS )ModelMetadataRegistryz0
    Dataclass for model metadata registry.
    guidgidxNrestoration_path)	__name__
__module____qualname____doc__str__annotations__intr	   r    r   r   Q/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/utils/app_state.pyr      s
   
 r   c                   @   sd  e Zd ZdZdd Zedd Zejdd Zedd Zejd	d Zed
d Z	e	jdd Z	edd Z
e
jdd Z
edd Zejdd Zedd Zejdd Zedd Zejdd Zedd Zejdd Zedd Zejdd Zedd  Zejd!d  Zed"d# Zejd$d# Zed%d& Zejd'd& Zed(d) Zejd*d) Zed+d, Zejd-d, Zed.d/ Zejd0d/ Zed1d2 Zejd3d2 Zed4d5 Zejd6d5 Zed7d8 Zejd9d8 Zed:d; Zejd<d; Zed=d> Zejd?d> Zed@dA ZejdBdA ZedCdD ZejdEdD ZedFdG ZejdHdG ZedIdJ ZejdKdJ ZedLdM ZejdNdM ZedOdP Z e jdQdP Z edRdS Z!e!jdTdS Z!edUdV Z"e"jdWdV Z"edXdY Z#e#jdZdY Z#ed[d\ Z$e$jd]d\ Z$ed^d_ Z%e%jd`d_ Z%edadb Z&e&jdcdb Z&eddde Z'e'jdfde Z'edgdh Z(e(jdidh Z(edjdk Z)e)jdldk Z)edmdn Z*e*jdodn Z*edpdq Z+e+jdrdq Z+edsdt Z,e,jdudt Z,edvdw Z-e-jdxdw Z-edydz Z.e.jd{dz Z.ed|d} Z/e/jd~d} Z/edd Z0e0jdd Z0edd Z1e1jdd Z1edd Z2e2jdd Z2edd Z3e3jdd Z3edd Z4e4jdd Z4dde5de6e5 fddZ7dd Z8de9fddZ:ede;fddZ<e<jde;fddZ<ede5fddZ=e=jde5fddZ=ede;fddZ>e>jde;fddZ>dS )AppStatez(
    App state for the application.
    c                 C   sJ  t  | _d | _d | _d | _d | _d | _d | _d | _d | _	d | _
d | _d | _d | _d | _d | _d | _d | _d | _d | _d | _d | _d | _d | _d| _d | _d | _d| _d| _d | _d| _d | _d| _ d | _!d| _"d| _#d | _$d | _%d | _&d | _'d | _(d | _)d | _*d | _+d | _,d| _-d | _.d | _/g | _0i | _1d| _2g | _3g | _4d | _5d| _6d S )NF   T)7r   _AppState__lock_app_cfg
_device_id_local_rank_global_rank_tensor_model_parallel_rank_expert_model_parallel_rank_expert_tensor_parallel_rank_pipeline_model_parallel_rank_data_parallel_rank_world_size_model_parallel_size_tensor_model_parallel_size_tensor_model_parallel_group_expert_model_parallel_size_expert_tensor_parallel_size_pipeline_model_parallel_size%_virtual_pipeline_model_parallel_size#_encoder_tensor_model_parallel_size%_encoder_pipeline_model_parallel_size_pipeline_model_parallel_group#_pipeline_model_parallel_split_rank%_pipeline_model_parallel_comm_backend_is_megatron_initialized_data_parallel_size_data_parallel_group_use_tp_pp_dp_mapping$_num_distributed_optimizer_instances_megatron_checkpoint_version_use_fp8_context_parallel_size_init_mpi_proc_gruop_nccl_communicator_config_path
_use_sharp_use_gloo_process_groups_random_seed_log_dir_exp_dir_name_checkpoint_name_version_create_checkpoint_callback_checkpoint_callback_params_tmpdir_name_is_model_being_restored_nemo_file_folder_model_restore_path_all_model_restore_paths_model_guid_map_restorefiles_to_move_files_to_copy	_cmd_args_nvtx_rangesselfr   r   r   __init__&   sl   
zAppState.__init__c                 C      | j S )zNProperty returns the device_id
        Returns:
            device_id
        r   rK   r   r   r   	device_ido      zAppState.device_idc                 C   
   || _ dS )zZProperty sets the device_id.
        Args:
            size (int): The device id.
        NrO   )rL   idr   r   r   rP   w      
c                 C   rN   )zfProperty returns the total number of GPUs.
        Returns:
            Total number of GPUs.
        r   rK   r   r   r   
world_size   rQ   zAppState.world_sizec                 C   rR   )zmProperty sets the total number of GPUs.
        Args:
            size (int):  Total number of GPUs.
        NrU   rL   sizer   r   r   rV      rT   c                 C   rN   zProperty returns the number of GPUs in each model parallel group.
        Returns:
            Number of GPUs in each model parallel group.
        r    rK   r   r   r   model_parallel_size   rQ   zAppState.model_parallel_sizec                 C   rR   zProperty sets the number of GPUs in each model parallel group.
        Args:
            size (int):  Number of GPUs in each model parallel group.
        NrZ   rW   r   r   r   r[      rT   c                 C   rN   rY   r!   rK   r   r   r   tensor_model_parallel_size   rQ   z#AppState.tensor_model_parallel_sizec                 C   rR   r\   r]   rW   r   r   r   r^      rT   c                 C   rN   )zrProperty returns the expert model parallel rank.
        Returns:
            Tensor model parallel rank.
        r   rK   r   r   r   expert_model_parallel_rank   rQ   z#AppState.expert_model_parallel_rankc                 C   rR   )zyProperty sets the expert model parallel rank.
        Args:
            rank (int):  Tensor model parallel rank.
        Nr_   rL   rankr   r   r   r`      rT   c                 C   rN   )Property returns the number of GPUs in each expert parallel group.
        Returns:
            Number of GPUs in each expert parallel group.
        r#   rK   r   r   r   expert_model_parallel_size   rQ   z#AppState.expert_model_parallel_sizec                 C   rR   )rc   Nrd   rW   r   r   r   re      rT   c                 C   rN   )zProperty returns the number of GPUs in each expert tensor parallel group.
        Returns:
            Number of GPUs in each expert tensor parallel group.
        r$   rK   r   r   r   expert_tensor_parallel_size   rQ   z$AppState.expert_tensor_parallel_sizec                 C   rR   )zProperty sets the number of GPUs in each expert tensor parallel group.
        Args:
            size (int):  Number of GPUs in each tensor expert parallel group.
        Nrf   rW   r   r   r   rg      rT   c                 C   rN   )zyProperty returns the expert tensor model parallel rank.
        Returns:
            Tensor model parallel rank.
        r   rK   r   r   r   expert_tensor_parallel_rank   rQ   z$AppState.expert_tensor_parallel_rankc                 C   rR   )zProperty sets the expert tensor model parallel rank.
        Args:
            rank (int):  Tensor model parallel rank.
        Nrh   ra   r   r   r   ri      rT   c                 C   rN   rY   r%   rK   r   r   r   pipeline_model_parallel_size   rQ   z%AppState.pipeline_model_parallel_sizec                 C   rR   r\   rj   rW   r   r   r   rk      rT   c                 C   rN   )zProperty returns the backend communication library of pipeline communication.
        Returns:
            Backend communication library of pipeline communication.
        r+   rK   r   r   r   $pipeline_model_parallel_comm_backend   rQ   z-AppState.pipeline_model_parallel_comm_backendc                 C   rR   )zProperty sets the backend communication library of pipeline communication.
        Args:
            backend (str): Backend communication library of pipeline communication.
        Nrl   )rL   backendr   r   r   rm     rT   c                 C   rN   rY   r'   rK   r   r   r   "encoder_tensor_model_parallel_size  rQ   z+AppState.encoder_tensor_model_parallel_sizec                 C   rR   r\   ro   rW   r   r   r   rp     rT   c                 C   rN   rY   r(   rK   r   r   r   $encoder_pipeline_model_parallel_size  rQ   z-AppState.encoder_pipeline_model_parallel_sizec                 C   rR   r\   rq   rW   r   r   r   rr   '  rT   c                 C   rN   )zxProperty returns whether to use TP-PP-DP mapping.
        Returns:
            Whether to use TP-PP-DP mapping.
        r/   rK   r   r   r   use_tp_pp_dp_mapping/  rQ   zAppState.use_tp_pp_dp_mappingc                 C   rR   )zProperty sets whether to use TP-PP-DP mapping.
        Args:
            use_new_mapping (bool):  Whether to use TP-PP-DP mapping.
        Nrs   )rL   use_new_mappingr   r   r   rt   7  rT   c                 C   rN   )zProperty returns the factor by which the Partial DistOpt is sharded.
        Returns:
            The partial DistOpt shard factor
        r0   rK   r   r   r   #num_distributed_optimizer_instances?  rQ   z,AppState.num_distributed_optimizer_instancesc                 C   rR   )zProperty sets the factor by which the Partial DistOpt is sharded.
        Args:
            shard_factor (int):  The partial DistOpt shard factor.
        Nrv   )rL   shard_factorr   r   r   rw   G  rT   c                 C   rN   rY   r&   rK   r   r   r   $virtual_pipeline_model_parallel_sizeO  rQ   z-AppState.virtual_pipeline_model_parallel_sizec                 C   rR   )zProperty sets the size of the virtual pipeline parallel model.
        Args:
            size (int):  Number of modules in each pipeline parallel model.
        Nry   rW   r   r   r   rz   W  rT   c                 C   rN   )zProperty returns the number of GPUs in each data parallel group.
        Returns:
            Number of GPUs in each data parallel group.
        r-   rK   r   r   r   data_parallel_size_  rQ   zAppState.data_parallel_sizec                 C   rR   )zProperty sets the number of GPUs in each data parallel group.
        Args:
            size (int):  Number of GPUs in each data parallel group.
        Nr{   rW   r   r   r   r|   g  rT   c                 C   rN   )zRProperty returns the local rank.
        Returns:
            Local rank.
        r   rK   r   r   r   
local_ranko  rQ   zAppState.local_rankc                 C   rR   )zYProperty sets the local rank.
        Args:
            rank (int):  Local rank.
        Nr}   ra   r   r   r   r~   w  rT   c                 C   rN   )zTProperty returns the global rank.
        Returns:
            Global rank.
        r   rK   r   r   r   global_rank  rQ   zAppState.global_rankc                 C   rR   )z[Property sets the global rank.
        Args:
            rank (int):  Global rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )zrProperty returns the tensor model parallel rank.
        Returns:
            Tensor model parallel rank.
        r   rK   r   r   r   tensor_model_parallel_rank  rQ   z#AppState.tensor_model_parallel_rankc                 C   rR   )zyProperty sets the tensor model parallel rank.
        Args:
            rank (int):  Tensor model parallel rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )ztProperty returns the tensor model parallel group.
        Returns:
            Tensor model parallel group.
        r"   rK   r   r   r   tensor_model_parallel_group  rQ   z$AppState.tensor_model_parallel_groupc                 C   rR   )zvProperty sets the tensor model parallel group.
        Args:
            group:  Tensor model parallel group.
        Nr   rL   groupr   r   r   r     rT   c                 C   rN   )zvProperty returns the pipeline model parallel rank.
        Returns:
            Pipeline model parallel rank.
        r   rK   r   r   r   pipeline_model_parallel_rank  rQ   z%AppState.pipeline_model_parallel_rankc                 C   rR   )z}Property sets the pipeline model parallel rank.
        Args:
            rank (int):  Pipeline model parallel rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )zoProperty returns the virtual pipeline parallel rank.
        Returns:
            Model parallel rank.
        %_virtual_pipeline_model_parallel_rankrK   r   r   r   $virtual_pipeline_model_parallel_rank  rQ   z-AppState.virtual_pipeline_model_parallel_rankc                 C   rR   )zProperty sets the virtual pipeline parallel rank.
        Args:
            rank (int):  Virtual pipeline parallel rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )zzProperty returns the encoder tensor model parallel rank.
        Returns:
            Tensor model parallel rank.
        #_encoder_tensor_model_parallel_rankrK   r   r   r   "encoder_tensor_model_parallel_rank  rQ   z+AppState.encoder_tensor_model_parallel_rankc                 C   rR   )zProperty sets the encoder tensor model parallel rank.
        Args:
            rank (int):  Tensor model parallel rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )z|Property returns the encoder pipeline model parallel rank.
        Returns:
            Tensor model parallel rank.
        %_encoder_pipeline_model_parallel_rankrK   r   r   r   $encoder_pipeline_model_parallel_rank  rQ   z-AppState.encoder_pipeline_model_parallel_rankc                 C   rR   )zProperty sets the encoder pipeline model parallel rank.
        Args:
            rank (int):  Tensor model parallel rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )zProperty returns the rank at which Encoder and Decoder are split into different pipelines for
        Megatrron Encoder-Decoder models.
        Returns:
            Pipeline model parallel split rank.
        r*   rK   r   r   r   "pipeline_model_parallel_split_rank  s   z+AppState.pipeline_model_parallel_split_rankc                 C   rR   )zProperty sets the rank at which Encoder and Decoder are split into different pipelines for
        Megatron Encoder-Decoder models.
        Args:
            rank (int): Model parallel split rank.
        Nr   ra   r   r   r   r        
c                 C   rN   )zxProperty returns the pipeline model parallel group.
        Returns:
            Pipeline model parallel group.
        r)   rK   r   r   r   pipeline_model_parallel_group  rQ   z&AppState.pipeline_model_parallel_groupc                 C   rR   )zzProperty sets the pipeline model parallel group.
        Args:
            group:  Pipeline model parallel group.
        Nr   r   r   r   r   r   	  rT   c                 C   rN   )zbProperty returns the data parallel rank.
        Returns:
            Data parallel rank.
        r   rK   r   r   r   data_parallel_rank  rQ   zAppState.data_parallel_rankc                 C   rR   )ziProperty sets the data parallel rank.
        Args:
            rank (int):  Data parallel rank.
        Nr   ra   r   r   r   r     rT   c                 C   rN   )zdProperty returns the data parallel group.
        Returns:
            Data parallel group.
        r.   rK   r   r   r   data_parallel_group!  rQ   zAppState.data_parallel_groupc                 C   rR   )zfProperty sets the data parallel group.
        Args:
            group:  Data parallel group.
        Nr   r   r   r   r   r   )  rT   c                 C   rN   )z\Property returns the use of fp8 precision.
        Returns:
            Use of FP8.
        r2   rK   r   r   r   use_fp81  rQ   zAppState.use_fp8c                 C   rR   )z`Property sets the use of fp8 precision.
        Args:
            use_fp8:  Use of FP8.
        Nr   )rL   r   r   r   r   r   9  rT   c                 C   rN   )z|Property returns whether to use SHARP for all-reduce operations.
        Returns:
            Whether to use SHARP.
        r6   rK   r   r   r   	use_sharpA  rQ   zAppState.use_sharpc                 C   rR   )zProperty sets whether to use SHARP for all-reduce operations.
        Args:
            use_sharp (bool): Whether to use SHARP.
        Nr   )rL   r   r   r   r   r   I  rT   c                 C   rN   )z~Property returns whether to use Gloo process groups.
        Returns:
            Whether to use Gloo process groups.
        r7   rK   r   r   r   use_gloo_process_groupsQ  rQ   z AppState.use_gloo_process_groupsc                 C   rR   )zProperty sets whether to use Gloo process groups.
        Args:
            use_gloo_process_groups (bool): Whether to use Gloo process groups.
        Nr   )rL   r   r   r   r   r   Y  rT   c                 C   rN   )zProperty returns the number of GPUs in each context parallel group.
        Returns:
            Number of GPUs in each context parallel group.
        r3   rK   r   r   r   context_parallel_sizea  rQ   zAppState.context_parallel_sizec                 C   rR   )zProperty sets the number of GPUs in each context parallel group.
        Args:
            size (int):  Number of GPUs in each context parallel group.
        Nr   rW   r   r   r   r   i  rT   c                 C   rN   )zzProperty sets the initialization of mpi process group.
        Returns:
            Initialize mpi process group.
        _init_mpi_proc_grouprK   r   r   r   init_mpi_proc_groupq  rQ   zAppState.init_mpi_proc_groupc                 C   rR   )zProperty sets the initialization of mpi process group.
        Args:
            init_mpi_proc_group:  Initialize mpi process group.
        Nr   )rL   r   r   r   r   r   y  rT   c                 C   rN   )zProperty returns the path to the nccl communicator config.
        Returns:
            Path to the nccl communicator config.
        r5   rK   r   r   r   nccl_communicator_config_path  rQ   z&AppState.nccl_communicator_config_pathc                 C   rR   )zProperty sets the path to the nccl communicator config.
        Args:
            path (str):  Path to the nccl communicator config.
        Nr   rL   pathr   r   r   r     rT   c                 C   rN   )zTProperty returns the random seed.
        Returns:
            Random seed.
        r8   rK   r   r   r   random_seed  rQ   zAppState.random_seedc                 C   rR   )z[Property sets the random seed.
        Args:
            seed (int):  Random seed.
        Nr   )rL   seedr   r   r   r     rT   c                 C   rN   )z'Returns the log_dir set by exp_manager.r9   rK   r   r   r   log_dir     zAppState.log_dirc                 C   rR   zeSets the log_dir property.

        Args:
            dir (str): Log_dir set by exp_manager.
        Nr   rL   dirr   r   r   r     r   c                 C   rN   )z'Returns the exp_dir set by exp_manager.r:   rK   r   r   r   exp_dir  r   zAppState.exp_dirc                 C   rR   r   r   r   r   r   r   r     r   c                 C   rN   z$Returns the name set by exp_manager.r;   rK   r   r   r   name  r   zAppState.namec                 C   rR   z_Sets the name property.

        Args:
            dir (str): name set by exp_manager.
        Nr   rL   r   r   r   r   r     r   c                 C   rN   r   r<   rK   r   r   r   checkpoint_name  r   zAppState.checkpoint_namec                 C   rR   r   r   r   r   r   r   r     r   c                 C   rN   z'Returns the version set by exp_manager.r=   rK   r   r   r   version  r   zAppState.versionc                 C   rR   )zeSets the version property.

        Args:
            dir (str): version set by exp_manager.
        Nr   )rL   r   r   r   r   r     r   c                 C   rN   )z:Returns the create_checkpoint_callback set by exp_manager.r>   rK   r   r   r   create_checkpoint_callback  r   z#AppState.create_checkpoint_callbackc                 C   rR   )zSets the create_checkpoint_callback property.

        Args:
            dir (bool): create_checkpoint_callback set by exp_manager.
        Nr   )rL   r   r   r   r   r     r   c                 C   rN   r   r?   rK   r   r   r   checkpoint_callback_params  r   z#AppState.checkpoint_callback_paramsc                 C   rR   )zySets the name property.

        Args:
            params (dict): checkpoint_callback_params set by exp_manager.
        Nr   )rL   paramsr   r   r   r     r   c                 C   rN   )z<Returns the list of files to move into a separate directory._files_to_moverK   r   r   r   rG     r   zAppState.files_to_movec                 C   rR   )zrSets the files_to_move property.

        Args:
            files (list[str]): list of filenames to move.
        Nr   rL   filesr   r   r   rG     r   c                 C   rN   )z3Returns the list of files to copy into the log dir.rH   rK   r   r   r   files_to_copy  r   zAppState.files_to_copyc                 C   rR   )zrSets the files_to_copy property.

        Args:
            files (list[str]): list of filenames to copy.
        Nr   r   r   r   r   r     r   c                 C   rN   )z7Returns the command line arguments for the current run.rI   rK   r   r   r   cmd_args  r   zAppState.cmd_argsc                 C   rR   )zSets the cmd_args property.

        Args:
            args (list[str]): list of the command line arguments
                used to run the experiment.
        Nr   )rL   argsr   r   r   r   $  s   
c                 C   s$   t | jdkr| jd }|S d}|S )zbProperty returns the model restore path.
        Returns:
            Model restore path.
        r   N)lenrD   )rL   restore_pathr   r   r   model_restore_path.  s   zAppState.model_restore_pathc                 C   s>   | j  || _| j| W d   dS 1 sw   Y  dS )zhProperty sets the model restore path.
        Args:
            path (str): Model restore path.
        N)r   rC   rD   appendr   r   r   r   r   7  s   "Nr   r	   c                 C   sb   | j $ || jv r| j| j}nt| j}t|||d| j|< W d   dS 1 s*w   Y  dS )zMaps a guid to its restore path (None or last absolute path).
        Args:
            guid (str): Guid.
            restoration_path (Optional[str]): Restore path.
        )r	   N)r   rE   r   r   r   )rL   r   r	   idxr   r   r   register_model_guidA  s   

"zAppState.register_model_guidc                 C   s6   | j  | j  W d   dS 1 sw   Y  dS )zResets the guid mapping.N)r   rE   clearrK   r   r   r   reset_model_guid_registryN  s   "z"AppState.reset_model_guid_registryreturnc                 C   s   | j | }|S )zReturns the global model idx and restoration path.
        Args:
            guid (str): Guid.
        Returns:
            Model metadata registry.
        )rE   )rL   r   metadatar   r   r   get_model_metadata_from_guidS  s   
z%AppState.get_model_metadata_from_guidc                 C   rN   )zProperty returns whether the model is being restored.
        Returns:
            Whether the model is being restored.
        rA   rK   r   r   r   is_model_being_restored]  rQ   z AppState.is_model_being_restoredis_restoredc                 C   rR   )zProperty sets whether the model is being restored.
        Args:
            is_restored (bool): Whether the model is being restored.
        Nr   )rL   r   r   r   r   r   e  rT   c                 C   rN   )z^Property returns the nemo file folder.
        Returns:
            Nemo file folder.
        rB   rK   r   r   r   nemo_file_folderm  rQ   zAppState.nemo_file_folderr   c                 C   rR   )zdProperty sets the nemo file folder.
        Args:
            path (str): Nemo file folder.
        Nr   r   r   r   r   r   u  rT   c                 C   rN   )zrProperty returns whether to restore the model.
        Returns:
            Whether to restore the model.
        rF   rK   r   r   r   restore}  rQ   zAppState.restorer   c                 C   rR   )z|Property sets whether to restore the model.
        Args:
            restore (bool): Whether to restore the model.
        Nr   )rL   r   r   r   r   r     rT   )N)?r
   r   r   r   rM   propertyrP   setterrV   r[   r^   r`   re   rg   ri   rk   rm   rp   rr   rt   rw   rz   r|   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rG   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r   r   r   r   !   s   I

























































































	

	
r   )	metaclassN)
dataclassesr   	threadingr   typingr   nemo.utils.metaclassesr   r   r   r   r   r   r   <module>   s   
