o
    ॵiR+                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlmZ d dl	m	Z	 d dl
mZ d dlmZmZmZmZmZ 			dddZ						dd
dZdd Zdd ZdddZdddZdd ZdS )    N)Mapping)distributed)tqdm)	to_device)	broadcastget_dist_infois_dist	is_mastermake_tmp_dirc              
   C   s^  |j }d}|du r(zt|}W n ty$ }	 z	t|	 tdd}	~	ww d}
nd}|}d}
t||
do}t|D ]U\}}t||}t	| ||| |rNd}n+t
|trud	|v r\|d	 }nzttt| }W n tyt   |j}Y nw t|}t|D ]}|  q}|r|d |kr nq9W d   t|S W d   t|S 1 sw   Y  t|S )
aB  Test model in EpochBasedTrainer with a single gpu.

    Args:
        trainer (modelscope.trainers.EpochBasedTrainer): Trainer to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        device (str | torch.device): The target device for the data.
        metric_classes (List): List of Metric class that uses to collect metrics.
        vis_closure (Callable): Collect data for TensorboardHook.
        data_loader_iters (int): Used when dataset has no attribute __len__ or only load part of dataset.

    Returns:
        list: The prediction results.
    FNzVPlease implement ``__len__`` method for your dataset, or provide ``data_loader_iters``zTotal test samplesTzTest iterationstotaldesc   
nsentences)datasetlen	Exceptionloggingerror
ValueErrorr   	enumerater   evaluate_batch
isinstancer   nextitervalues
batch_sizerangeupdateget_metric_values)trainerdata_loaderdevicemetric_classesvis_closuredata_loader_itersr   progress_with_itersdata_lener   pbaridatar   _ r-   W/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/utils/inference.pysingle_gpu_test   sX   







r/   Fc              
   C   s  |j }t| j\}	}
d}|du r1zt|}|}W n ty- } z	t| tdd}~ww d}n
d}d}||
 }d}d}t||d}t	|D ]\}}t
||}t| ||| t|trqd	|v rf|d	 }nttt| }nt|}|||
 d
 krt|g| jj}tj|tjjd | }n||
 }|r|
}n|}||7 }|	dkr||kr|||  }t|D ]}|  q|r|d
 |kr nqHW d   n1 sw   Y  |rt|| j}n|du rt }t|| t j!"|d}t#|}t$|S )a  Test model in EpochBasedTrainer with multiple gpus.

    This method tests model with multiple gpus and collects the results
    under two different modes: gpu and cpu modes. By setting
    ``gpu_collect=True``, it encodes results to gpu tensors and use gpu
    communication for results collection. On cpu mode it saves the results on
    different gpus to ``tmpdir`` and collects them by the rank 0 worker.

    Args:
        trainer (modelscope.trainers.EpochBasedTrainer): Trainer to be tested.
        data_loader (nn.Dataloader): Pytorch data loader.
        device: (str | torch.device): The target device for the data.
        tmpdir (str): Path of directory to save the temporary results from
            different gpus under cpu mode.
        gpu_collect (bool): Option to use either gpu or cpu to collect results.
        data_loader_iters_per_gpu (int): Used when dataset has no attribute __len__ or only load part of dataset.
    Returns:
        list: The prediction results.
    FNz^Please implement ``__len__`` method for your dataset, or provide ``data_loader_iters_per_gpu``z"Total test samples with multi gpusr   Tz%Total test iterations with multi gpusr   r   r   )opmetrics)%r   r   dp_groupr   r   r   r   r   r   r   r   r   r   r   r   r   r   torch
LongTensortomodelr"   dist
all_reduce	reduce_opSUMitemr   r   collect_results_gpur
   collect_results_cpuospathjoinmerge_metricsr   )r    r!   r"   r#   r$   tmpdirgpu_collectdata_loader_iters_per_gpur   rank
world_sizer&   r'   total_samplesr(   r   countr)   r*   r+   r   iter_cnt_allr,   metric_classes_listr-   r-   r.   multi_gpu_testP   sx   







$rK   c                 C   s@   |  |}|d ur|D ]}||| q|d ur|| d S d S N)evaluation_stepadd)r    r+   r#   r$   batch_result
metric_clsr-   r-   r.   r      s   
r   c                 C   s6   i }t  r| D ]	}||  qt rt|d}|S )Nr   )r	   r   evaluater   r   )r#   metric_valuesrP   r-   r-   r.   r      s   
r   c           
   	   C   s4  t |j\}}|du rt }tj|stj|dd t  |	 r(t
|jrU| r1t
|jrUttj|d| dd}t| | W d   n1 sPw   Y  t  t
 s^dS g }t|D ].}tj|d| d}t|d}t|}	W d   n1 sw   Y  |	r||	 qdt| |S )aB  Collect results under cpu mode.

    On cpu mode, this function will save the results on different gpus to
    ``tmpdir`` and collect them by the rank 0 worker.

    Args:
        result_part (list): Result list containing result parts
            to be collected.
        trainer(`EpochBasedTrainer`): The trainer instance to get the parallel groups.
        tmpdir (str | None): temporal directory for collected results to
            store. If set to None, it will create a random temporal directory
            for it.

    Returns:
        list: The collected results.
    NT)exist_okpart_z.pklwbrb)r   r2   r
   r>   r?   existsmakedirsr7   barrieris_tp_group_availabler	   tp_groupis_pp_group_availablepp_groupopenr@   pickledumpr   loadappendshutilrmtree)
result_partr    rB   rE   rF   f	part_listr*   	part_filepart_resultr-   r-   r.   r=      s8   

r=   c                    s  t |\}}tjtt| tjdd tj jddfddt|D }t	
|| t| tjtjdd} |dd <  fddt|D }t	
||| t rg }t||D ]\}}	t|d|	d     }
|
r||
 qc|S dS )	a  Collect results under gpu mode.

    On gpu mode, this function will encode results to gpu tensors and use gpu
    communication for results collection.

    Args:
        result_part (list): Result list containing result parts
            to be collected.
        dp_group(`ProcessGroup` or None): The data parallel group, default None for global group.

    Returns:
        list: The collected results.
    cuda)dtyper"   )r"   c                    s   g | ]}   qS r-   )clone.0r,   )shape_tensorr-   r.   
<listcomp>  s    z'collect_results_gpu.<locals>.<listcomp>Nr   c                    s   g | ]}  qS r-   )	new_zerosrm   )part_tensor	shape_maxr-   r.   rp     s    
)r   r3   tensor	bytearrayr_   dumpsuint8shaper   r7   
all_gathermaxzerosr	   ziploadscpunumpytobytesrb   )re   r2   r,   rF   
shape_list	part_sendpart_recv_listrg   recvrx   ri   r-   )rr   rs   ro   r.   r<      s.   "
r<   c                 C   sH   | d u rd S | d }| dd  D ]}t ||D ]	\}}|| qq|S )Nr   r   )r|   merge)rJ   metric_classes_0metric_classes_icls_0cls_ir-   r-   r.   rA   '  s   rA   )NNN)NNNFNrL   )r   r>   r_   rc   collections.abcr   r3   r   r7   r   modelscope.utils.data_utilsr   modelscope.utils.torch_utilsr   r   r   r	   r
   r/   rK   r   r   r=   r<   rA   r-   r-   r-   r.   <module>   s2   
A
b


2-