o
    }oi
                     @   s6   	 	 	 dde de de de de de de de fd	d
ZdS )   global_batch_sizemicro_batch_sizedevices	num_nodestp_sizepp_sizecp_sizereturnc              	   C   s   t dd | ||||||fD rtd|| }|| | }|| dkr/td| d| d|| }	| ||	  }
|
 sJtd|  d	||	  dt|
S )
a  Calculate valid gradient accumulation steps based on the given parameters.

    Args:
        global_batch_size (int): The desired global batch size
        micro_batch_size (int): The micro batch size per GPU
        devices (int): Number of GPUs per node
        num_nodes (int): Number of nodes
        tp_size (int, optional): Tensor parallel size. Defaults to 1.
        pp_size (int, optional): Pipeline parallel size. Defaults to 1.
        cp_size (int, optional): Context parallel size. Defaults to 1.

    Returns:
        int: The calculated gradient accumulation steps

    Raises:
        ValueError: If the parameters result in invalid configuration
    c                 s   s    | ]}|d kV  qdS )    N ).0xr   r   M/home/ubuntu/.local/lib/python3.10/site-packages/nemo/automodel/misc_utils.py	<genexpr>,   s    z:calculate_valid_accumulate_grad_batches.<locals>.<genexpr>zAll parameters must be positiver
   zWorld size (z,) must be divisible by model parallel size ()z*Invalid configuration: global_batch_size (z>) must be divisible by micro_batch_size * data_parallel_size ()any
ValueError
is_integerint)r   r   r   r   r   r   r   
world_sizemodel_parallel_sizedata_parallel_sizeaccumulate_grad_batchesr   r   r   'calculate_valid_accumulate_grad_batches   s    r   N)r   r   r   )r   r   r   r   r   r   <module>   s(   