o
    ॵiZ                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	m
Z
 d dlZd dlZd dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% e% Z&eG dd dZ'ej(ej)dG dd de!Z*dS )    N)	dataclass)AnyCallableDictListOptionalTupleUnion)nn)
DataLoaderDataset)tqdm)Trainers)Model
TorchModel)BertForTextRanking)	MsDataset)Preprocessor)TRAINERS)NlpEpochBasedTrainer)DEFAULT_MODEL_REVISION)
get_loggerc                   @   s6   e Zd ZdZdeeeef  deeef fddZdS )GroupCollatorz
    Wrapper that does conversion from List[Tuple[encode_qry, encode_psg]] to List[qry], List[psg]
    and pass batch separately to the actual collator.
    Abstract out data detail for the model.
    featuresreturnc                 C   sr   t |d trt|g }|d  }dd |D }|D ]}| D ]\}}|| | q!qdd | D }|S )Nr   c                 S   s   i | ]}|t  qS  )list).0kr   r   `/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/nlp/text_ranking_trainer.py
<dictcomp>&       z*GroupCollator.__call__.<locals>.<dictcomp>c                 S   s    i | ]\}}|t j|d dqS )r   )dim)torchcat)r   r   vr   r   r   r    *   s     )
isinstancer   sumkeysitemsappend)selfr   r(   batcheler   r%   r   r   r   __call__"   s   
zGroupCollator.__call__N)	__name__
__module____qualname____doc__r   r   strr   r.   r   r   r   r   r      s    *r   )module_namec                       s   e Zd Zdddddddddef
deeeeje	f  dee	 dee
 dee
 dee
 deeeef  d	eeeef  d
ee deejjejjjf dee	 f fddZdddZdddZ	ddee	 dee	ef fddZ  ZS )TextRankingTrainerN)NNmodelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetpreprocessor
optimizersmodel_revisionc                    s:   |d u rt  }t jd|||||||	|||
d
| d S )N)
r6   r7   r8   r9   r:   r=   r>   r;   r<   r?   r   )r   super__init__)r+   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   kwargs	__class__r   r   rA   1   s    
zTextRankingTrainer.__init__
   c           	      C   sv   d}|  D ].}t|dd dd}d}t|d | D ]\}}t|d dkr/d|d  } nq||7 }q|t| S )	Nr   c                 S      | d S Nr   r   xr   r   r   <lambda>T       z0TextRankingTrainer.compute_mrr.<locals>.<lambda>Tkeyreverse   1g      ?)valuessorted	enumerater3   len)	r+   resultr   mrrres
sorted_resarindexr-   r   r   r   compute_mrrQ   s   
zTextRankingTrainer.compute_mrrc           	      C   s   d}ddl m} | D ],}t|dd dd}tdd |D g}td	d |D g}|t||||d
7 }q|t| }|S )Nr   )
ndcg_scorec                 S   s   dgS rG   r   rH   r   r   r   rJ   a   s    z1TextRankingTrainer.compute_ndcg.<locals>.<lambda>TrL   c                 S      g | ]}|d  qS )rO   r   r   r-   r   r   r   
<listcomp>b   r!   z3TextRankingTrainer.compute_ndcg.<locals>.<listcomp>c                 S   r]   )r   r   r^   r   r   r   r_   c   r!   r   )sklearnr\   rQ   rR   nparrayfloatrT   )	r+   rU   r   ndcgr\   rW   rX   labelsscoresr   r   r   compute_ndcg]   s   zTextRankingTrainer.compute_ndcgcheckpoint_pathr   c              	      s  | j | jfi | jjdi d| ji| _|dur t|}n| j	}|
  d}t }t }t }d}	tj r;dnd |  tt| jD ]\}
}z fdd	| D }W n tyl   d
d	 | D }Y nw t }t % |d   }|d   }|di |}W d   n1 sw   Y  t }|	|| 7 }	|| jj7 }dd }|d d   }|| }|| || || qItd |	|	d |  i }t!|||D ]\}}}||vrg ||< || "||f q|D ]}t#|| dd d||< qt }| j$D ][}|%drI|&dd }t'|}| j(||d}td || |"||f q|%drs|&dd }t'|}| j)||d}td || |"d|f qt*d| t+|S )a  evaluate a dataset

        evaluate a dataset via a specific model from the `checkpoint_path` path, if the `checkpoint_path`
        does not exist, read from the config file.

        Args:
            checkpoint_path (Optional[str], optional): the model path. Defaults to None.

        Returns:
            Dict[str, float]: the results about the evaluation
            Example:
            {"accuracy": 0.5091743119266054, "f1": 0.673780487804878}
        
dataloader
collate_fnNr   g        zcuda:0cpuc                    s,   i | ]\}}|t |tjr| n|qS r   )r&   r#   Tensortor   rM   valdevicer   r   r       s
    z/TextRankingTrainer.evaluate.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r   r   ro   r   r   r   r       s    rf   qidc                 S   s   t | dt |   S )NrO   )rb   exp)logitsr   r   r   sigmoid   s   z,TextRankingTrainer.evaluate.<locals>.sigmoidru   z/Inference time = {:.2f}s, [{:.4f} ms / sample] i  c                 S   rF   rG   r   rH   r   r   r   rJ      rK   z-TextRankingTrainer.evaluate.<locals>.<lambda>)rM   rV   @r`   z{}: {}re   zMetric %s not implementedr   ),_build_dataloader_with_datasetr<   cfg
evaluationgeteval_data_collatoreval_dataloaderr   from_pretrainedr6   evalr   r#   cudais_availablern   rS   r   r)   RuntimeErrortimeno_gradpopdetachrl   numpy
batch_sizesqueezetolistextendloggerinfoformatzipr*   rR   metrics
startswithsplitintr[   rh   NotImplementedErrordict)r+   ri   argsrB   r6   total_sampleslogits_list
label_listqid_listtotal_spent_time_stepr,   infer_start_time	label_idsqidsoutputsinfer_end_timerv   ru   rank_resultrs   scorelabeleval_outputsmetricr   rV   re   r   rq   r   evaluateh   s   







zTextRankingTrainer.evaluate)rE   )N)r/   r0   r1   r   r   r	   r   r
   Moduler3   r   r   r   r   r   r#   optim	Optimizerlr_scheduler_LRSchedulerrA   r[   rh   r   rd   r   __classcell__r   r   rC   r   r5   .   sT    	

 

r5   )+r   dataclassesr   typingr   r   r   r   r   r   r	   r   rb   r#   r
   torch.utils.datar   r   r   modelscope.metainfor   modelscope.models.baser   r   modelscope.models.nlpr    modelscope.msdatasets.ms_datasetr   modelscope.preprocessors.baser   modelscope.trainers.builderr   modelscope.trainers.nlp_trainerr   modelscope.utils.constantr   modelscope.utils.loggerr   r   r   register_modulenlp_text_ranking_trainerr5   r   r   r   r   <module>   s,   $