o
    zi(                     @   s   d dl Z d dlmZmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZmZ d d	lmZm Z  d d
l!m"Z"m#Z# erhd dl$m%Z%m&Z& G dd deZ'deeef dej(fddZdS )    N)TYPE_CHECKINGAnyCallableDictOptionalUnion)override)	Precision)
Throughputget_available_flops)_plugin_to_compute_dtype)Callback)	BitsandbytesPrecisionDeepSpeedPrecisionDoublePrecisionFSDPPrecisionHalfPrecisionMixedPrecisionr	   TransformerEnginePrecisionXLAPrecision)RunningStage	TrainerFn)rank_zero_onlyrank_zero_warn)LightningModuleTrainerc                       s  e Zd ZdZ	d-deegef deeegef  deddf fddZe	d	d
ddde
ddfddZd.ddZe d	d
dddededdf
ddZd-d	d
dee ddfddZe	ed	d
deddfddZe	ed	d
dddedededdfddZe	ed	d
deddfddZe	ed	d
dddededed eddfd!d"Ze	ed	d
deddfd#d$Ze	ed	d
deddfd%d&Ze	ed	d
dddededed eddfd'd(Ze	ed	d
deddfd)d*Ze	ed	d
dddededed eddfd+d,Z  ZS )/ThroughputMonitora  Computes and logs throughput with the :class:`~lightning_fabric.utilities.throughput.Throughput`

    Example::

        class MyModel(LightningModule):
            def setup(self, stage):
                with torch.device("meta"):
                    model = MyModel()

                    def sample_forward():
                        batch = torch.randn(..., device="meta")
                        return model(batch)

                    self.flops_per_batch = measure_flops(model, sample_forward, loss_fn=torch.Tensor.sum)


        logger = ...
        throughput = ThroughputMonitor(batch_size_fn=lambda batch: batch.size(0))
        trainer = Trainer(max_steps=1000, log_every_n_steps=10, callbacks=throughput, logger=logger)
        model = MyModel()
        trainer.fit(model)

    Notes:
        - It assumes that the batch size is the same during all iterations.
        - It will try to access a ``flops_per_batch`` attribute on your ``LightningModule`` on every iteration.
          We suggest using the :func:`~lightning_fabric.utilities.throughput.measure_flops` function for this.
          You might want to compute it differently each time based on your setup.

    Args:
        batch_size_fn: A function to compute the number of samples given a batch.
        length_fn: A function to compute the number of items in a sample given a batch.
        \**kwargs: See available parameters in
            :class:`~lightning_fabric.utilities.throughput.Throughput`

    Nbatch_size_fn	length_fnkwargsreturnc                    s8   t    || _|| _|| _d | _i | _i | _i | _d S N)	super__init__r   r   r   available_flops_throughputs_t0s_lengths)selfr   r   r   	__class__ b/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_lightning/callbacks/throughput_monitor.pyr#   O   s   

zThroughputMonitor.__init__trainerr   	pl_moduler   stagec                 C   s   t |j}t|jj|| _|tjkr(|jr(t	d| j|j
d| j}|| jtj< t	d| j|j
d| j}|jj}|d us?J || j|< d S )N)r$   
world_sizer+   )r   precision_pluginr   strategyroot_devicer$   r   FITTINGenable_validationr
   r0   r   r%   r   
VALIDATINGstater/   )r(   r-   r.   r/   dtype
throughputr+   r+   r,   setup[   s   
zThroughputMonitor.setupc                 C   s>   |j j}|d us
J | j|   d| j|< t | j|< d S )Nr   )r7   r/   r%   resetr'   timeperf_counterr&   )r(   r-   r/   r+   r+   r,   _startj   s
   
zThroughputMonitor._startbatchiter_numc           
      C   s   |j j}|d us
J | j| }|jjjdkrtj  t	
 | j|  }| jd ur5| j|  | |7  < t|dr>|j}ntdt|j d d }| |}	|j||||	 | jd u r^d n| j| |d d S )Ncudaflops_per_batchzdWhen using the `ThroughputMonitor`, you need to define a `flops_per_batch` attribute or property in z to compute the FLOPs.)r<   batchessampleslengthsflops)r7   r/   r%   r2   r3   typetorchrA   synchronizer<   r=   r&   r   r'   hasattrrB   r   __name__r   update)
r(   r-   r.   r?   r@   r/   r9   elapsedrB   
batch_sizer+   r+   r,   _updateq   s0   





zThroughputMonitor._updatec                    s^   |j jsd S |jj  d usJ | j   } fdd| D }|j j||d d S )Nc                    s&   i | ]\}} j  j | |qS r+   )value	separator).0kvr/   r9   r+   r,   
<dictcomp>   s   & z.ThroughputMonitor._compute.<locals>.<dictcomp>)step)_logger_connectorshould_update_logsr7   r/   r%   computeitemslog_metrics)r(   r-   r@   metricsr+   rU   r,   _compute   s   
zThroughputMonitor._compute_c                 G      |  | d S r!   r>   r(   r-   r_   r+   r+   r,   on_train_start      z ThroughputMonitor.on_train_startoutputsc                 G   s4   |  ||||jjd  |j s| | d S d S )N   )rO   fit_looptotal_batch_idx_should_accumulater^   )r(   r-   r.   re   r?   r_   r+   r+   r,   on_train_batch_end   s   
z$ThroughputMonitor.on_train_batch_endc                 G   s   |j rd S | | d S r!   )sanity_checkingr>   rb   r+   r+   r,   on_validation_start   s   z%ThroughputMonitor.on_validation_start__c                 O   s6   |j rd S |jjjj}| |||| | || d S r!   )rk   _evaluation_loopbatch_progresstotalreadyrO   r^   r(   r-   r.   re   r?   r_   rm   r@   r+   r+   r,   on_validation_batch_end   s
   z)ThroughputMonitor.on_validation_batch_endc                 G   st   |j s
|jjtjkrd S | jtj t| j	tj j
 }| jtj | }t| j	tj j
}| jtj  || 7  < d S r!   )rk   r7   fnr   r4   r&   r   TRAININGsumr%   _timer6   )r(   r-   r_   training_finishedtime_between_train_and_valval_timer+   r+   r,   on_validation_end   s   z#ThroughputMonitor.on_validation_endc                 G   r`   r!   ra   rb   r+   r+   r,   on_test_start   rd   zThroughputMonitor.on_test_startc                 O   ,   |j jjj}| |||| | || d S r!   )rn   ro   rp   rq   rO   r^   rr   r+   r+   r,   on_test_batch_end      z#ThroughputMonitor.on_test_batch_endc                 G   r`   r!   ra   rb   r+   r+   r,   on_predict_start   rd   z"ThroughputMonitor.on_predict_startc                 O   r}   r!   )predict_loopro   rp   rq   rO   r^   rr   r+   r+   r,   on_predict_batch_end   r   z&ThroughputMonitor.on_predict_batch_endr!   )r-   r   r    N)rK   
__module____qualname____doc__r   r   intr   r#   r   strr:   r>   rH   inference_moderO   r^   r   rc   rj   rl   rs   r{   r|   r~   r   r   __classcell__r+   r+   r)   r,   r   *   s    %
  			r   pluginr    c                 C   s   t | ts	t| S t | tr| jS t | tr| jS t | tr)| jdkr&t	j
S t	jS t | tr1t	jS t | ttfr;| jS t | trCt	jS t | trO| jjpNt	jS t | trWt	jS t| )Nz
bf16-mixed)
isinstancer	   fabric_plugin_to_compute_dtyper   r8   r   _desired_input_dtyper   	precisionrH   bfloat16halfr   doubler   r   _desired_dtyper   int8r   mixed_precision_configreduce_dtypefloat32NotImplementedError)r   r+   r+   r,   r      s&   







r   ))r<   typingr   r   r   r   r   r   rH   typing_extensionsr   lightning_fabric.pluginsr	   FabricPrecision%lightning_fabric.utilities.throughputr
   r   r   r   pytorch_lightning.callbacksr   pytorch_lightning.pluginsr   r   r   r   r   r   r   r    pytorch_lightning.trainer.statesr   r   %pytorch_lightning.utilities.rank_zeror   r   pytorch_lightningr   r   r   r8   r+   r+   r+   r,   <module>   s     ,  >