o
    ॵiO                     @   s   d dl Z d dlmZ d dlZd dlmZmZ d dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ e Zejej dG dd deZ!dS )    N)DictOptional)LambdaLR)
DataLoader)Trainers)Model)TableQuestionAnswering)BaseTrainer)TRAINERS)	ModelFile)
get_logger)module_namec                   @   s   e Zd Zd*dedefddZ	d+ddZd	d
 Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zd,ddZd*d d!Z	"	#	$	%	&	&	'd-d(d)ZdS ).TableQuestionAnsweringTrainerNmodelcfg_filec                 O   s$   t || _|d | _|d | _d S )Ntrain_dataseteval_dataset)r   from_pretrainedr   r   r   )selfr   r   argskwargs r   l/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/nlp/table_question_answering_trainer.py__init__   s   
z&TableQuestionAnsweringTrainer.__init__c                    s    dt f fdd}t|||S )z 
        set scheduler.
        current_stepc                    sB   | k rt | t td S tdt  |  t td   S )N           )floatmax)r   num_training_stepsnum_warmup_stepsr   r   	lr_lambda,   s   
zPTableQuestionAnsweringTrainer.get_linear_schedule_with_warmup.<locals>.lr_lambda)intr   )r   	optimizerr"   r!   
last_epochr#   r   r    r   get_linear_schedule_with_warmup#   s   	z=TableQuestionAnsweringTrainer.get_linear_schedule_with_warmupc                 C   $   g }|D ]}| t|d  q|S )E
        [ [wc, wo, wv],
        [wc, wo, wv], ...
        ]
        r   appendr$   )r   condswc1condr   r   r   get_wc16      z%TableQuestionAnsweringTrainer.get_wc1c                 C   r(   )r)   r   r*   )r   r,   wo1r.   r   r   r   get_wo1A   r0   z%TableQuestionAnsweringTrainer.get_wo1c                 C   r(   )r)      )r+   str)r   r,   wv1r.   r   r   r   get_wv1L   r0   z%TableQuestionAnsweringTrainer.get_wv1c                 C   s    t ||d D ]}|||< q|S )Nr   )range)r   datastartendvalueir   r   r   set_from_toW   s   
z)TableQuestionAnsweringTrainer.set_from_toc              
   C   s  g }g }g }g }g }g }	g }
g }g }g }t |D ]a\}}t|d |d< t|d }t|d }t|d | d }tt|d | }t |D ]\}}||| krdd||< || dkrd|d8 }qL|dd t| jjt| D 7 }|dd t| jjt| D 7 }|	| |	| |
	|dkrdn| t
|d |d< t
|d |d< t|d t|d ksJ |	|| d  |	|d  t|d	 }d
d |dddf D }t|}|	| || |d	< t|d dk syt|}tt| t|| d }tt| t|| }t |D ]\}}||| kr2d||< |d8 }q|dd t| jjt| D 7 }|dd t| jjt| D 7 }|	| |	|dkrddn| |	| |		| t||  qt||||||	||
||f
S )zB
        for backward compatibility, separated with get_g
        selr   aggr   c                 S      g | ]}d qS r   r   .0_r   r   r   
<listcomp>x       z7TableQuestionAnsweringTrainer.get_g.<locals>.<listcomp>c                 S   r@   rA   r   rB   r   r   r   rE   |   rF   cond_conn_opr,   c                 S   s   g | ]}t |qS r   )r$   )rC   xr   r   r   rE      s    Nc                 S   r@   rA   r   rB   r   r   r   rE      rF   c                 S   r@   rA   r   rB   r   r   r   rE      rF   )	enumeratenumpyasarrayargsortlenlistr7   r   max_select_numr+   sortarrayr/   r2   max_where_numr6   EnvironmentError)r   sql_il_hsactiong_scg_sag_wng_wcg_wog_wvg_sleng_actiong_cond_conn_opidxsbpsql_i1idxslensid_list	said_listr<   sidr,   	conds_numwlenwcd_listwod_listwcdr   r   r   get_g\   s   





z#TableQuestionAnsweringTrainer.get_gc              	   C   s  d}|D ]}||kr|}q|d7 }g }g }g }t |D ]a\}	}
dg| }dg| jj }dg| jj }t tt|
d ||	  D ]+\}}|\}}|dkrR|dkrRqA| ||d |d |d  |d ||< |d ||< qA|| || || q|||ffS )z
        Generate SQuAD style start and end index of wv in nlu. Index is for of after WordPiece tokenization.

        Assumption: where_str always presents in the nlu.
        r   r3   wvi_corenlpir   )rI   r   rR   rN   rJ   rK   r=   r+   )r   g_wvi_corenlpl_nr`   max_lelemg_wvig_wv_psg_wv_pera   t_objg_wvi1g_wvss1g_wvse1i_wng_wvi_corenlp11st_idxed_idxr   r   r   !get_g_wvi_bert_from_g_wvi_corenlp   s4   


z?TableQuestionAnsweringTrainer.get_g_wvi_bert_from_g_wvi_corenlpc                 C   s$   t jj|t || jj}|S N)torchnn
functionalcross_entropytensortor   device)r   s_ccor_   lossr   r   r   	loss_scco   s
   z'TableQuestionAnsweringTrainer.loss_sccoc                 C   s  d}|t jj|d|t |d| jj7 }|t jj|d| jj	t |	d| jj7 }|\}}|| 
||7 }|| 
||7 }|| 
||
7 }|| 
||7 }|t jj|d|t |d| jj7 }|t jj|d| jjt |d| jj7 }|\}}|t jj|d|jd t |d d| jj7 }|t jj|d|jd t |d d| jj7 }|S )Nr   r   r   )r   r   r   r   reshaper   r   r   r   	n_agg_opsr   
n_cond_opsshape)r   s_actions_scs_sar   s_wcs_wos_wvsrW   rX   rY   rZ   r[   rs   r_   r]   g_wvp	max_h_lens_lenr^   r   s_slens_wlens_wvs_ss_wvs_er   r   r   
loss_sw_se   s@   







z(TableQuestionAnsweringTrainer.loss_sw_sec           
      C   s   t |t |kr||fS i }t|D ]
\}}|| ||< qt| dd d}g }g }|D ]}	||	d  ||	d  q-||fS )Nc                 S   s   | d S )Nr   r   )dr   r   r   <lambda>   s    z<TableQuestionAnsweringTrainer.sort_agg_sel.<locals>.<lambda>)keyr   r   )rM   rI   sorteditemsr+   )
r   aggsselsseldicr<   r>   apsnew_aggsnew_selsapr   r   r   sort_agg_sel   s   z*TableQuestionAnsweringTrainer.sort_agg_selc                 C   s   g }|D ]c}t |dkr|| qt |}t|D ]:\}}|d |d k r*|} n+|d |d krT|d }|d }	||}
||	}|
dkrT|dkrT|
|k rT|} nq|t |kra|| q||| q|S )Nr   r3   r   )rM   r+   rI   findinsert)r   nlur,   newcondsr.   rc   r<   newcondvalnewvalvalidx	newvalidxr   r   r   
sort_conds  s.   


z(TableQuestionAnsweringTrainer.sort_condsr   c           6      C   s  t |t |kst |dkrd S d\}}}}}d\}	}
}}d\}}}}ttj|ddD ]\}}|d }|d }|| }|d }|d }|d }i i }}||krSq,|d	7 }d
}t |d t |d krzt |d t |d krz|d	7 }d |d< nd}d |d< |d d |d kr|	d	7 }	d |d< nd}d |d< |d |d kr|d	7 }d |d< nd}d |d< t |d t |d kr|d	7 }d |d< nd}d |d< |tt |d t |d 7 }| |d |d \}}| |d |d \}}t|D ]G\} }!| t |k r||  |!kr|
d	7 }
d |d< nd}d |d< | t |k r7| t |k r7||  ||  kr7|d	7 }d |d< qd}d |d< q|tt |d t |d 7 }| ||d }"| ||d }#t|"D ]\} }$| t |#krn n|#|  }%|$d |%d kr|d	7 }d |d< nd}d |d< |$d	 |%d	 kr|d	7 }d |d< nd}d |d< d}&zt|%d |%d d	 d	D ]}'|&||'  7 }&qW n ty   d}&Y nw |& 	 }(|$d  	 })|%d  	 }*|)|*v s|)|(v s|(|)v s|*|)v r|d	7 }d |d< qad}d |d< qa|r|d	7 }q,||d  }+|	|d  },||d  }-||d  }.||d  }/|
|d  }0||d  }1||d  }2||d  }3||d  }4t
d||+|,|-|.|/|0|1|2|3|4f  |+|,|-|.|/|0|1|2|3|4d
}5|5S ) Nr   )r   r   r   r   r   )r   r   r   r   evaluatedescquestionsqlquestion_tokr   Tr>   r?   zselect numberFrV   rG   zcondition operatorr,   zwhere numberzselect aggregationzselect columnzwhere columnzwhere operator startIdendIdr3   zwhere value{Gz?z{STATIS} [epoch=%d] all_ratio: %.3f, act_ratio: %.3f, sc_len_ratio: %.3f, cco_ratio: %.3f, wc_len_ratio: %.3f, s_agg_ratio: %.3f, s_col_ratio: %.3f, w_col_ratio: %.3f, w_op_ratio: %.3f, w_val_ratio: %.3f)
accuracyaction_accuracyselect_length_accuracyconnector_accuracywhere_length_accuracyselect_aggregation_accuracyselect_column_accuracywhere_column_accuracywhere_operator_accuracywhere_value_accuracy)rM   rI   tqdmr   r   r   r7   strip	Exceptionlowerloggerinfo)6r   answersresultsepochall_sum	all_rightsc_lenccowc_lenacts_aggall_cols_colall_ww_colw_opw_valrc   itemr   qaSQLresultr   r   questionTokenrightserrorsrightaaggsaselsraggsrselsjr?   acondsrcondsr.   pcondr;   kvaluelownormalvaluenormal	all_ratio	act_ratiosc_len_ratio	cco_ratiowc_len_ratios_agg_ratios_col_ratiow_col_ratio
w_op_ratiow_val_ratiometricsr   r   r   calculate_scores  s   
$




.





	z.TableQuestionAnsweringTrainer.calculate_scoresc                 C   s   ddi}|durCt |}| jj|d  | jjj|d dd g }tj| jdd	D ]}| j|gd
 }|	| q+| 
| j|}|S )z#
        Evaluate testsets
        r   r   Nbackbone_model
head_modelF)strictpredictr   r   )r   loadr   r   load_state_dictr   r   r   r   r+   r   )r   checkpoint_pathr   
state_dictr   r8   r   r   r   r   r     s   
z&TableQuestionAnsweringTrainer.evaluate      h㈵>Mb@?r   皙?c           E      C   s  t || jdddd d}t|| }	t||	 }
tjjtdd | jj	
 ||d}tjjtdd | jj
 ||d}| ||
|	}| ||
|	}d	}td
|d
 D ]}| jj	  | jj  t|D ]\}}| jj|ddd\}}}}}}}}}}}| j| jj| jj||||||||
\}}} }!}"}#}$}%}&}'}(| ||%|\
})}*}+},}-}.}/}0}1}2| ||#|2\}3}4| j	||#|%|&|'| |(\}5}6}7}8}9}:};}<t|%}=| |5|6|7|8|9|:|;|)|*|+|,|-|3|/|0|4|=|<|1}>td|||t||> f  |  |  |>  |  |  |  |  qfg }?tj| jddD ]}@| j|@gd }A|? |A q| j!| j|?|d}B|Bd |krj|Bd }t"j#$| jj%d}C| jj	& | jj& d}Dt'|D|C td||Bd |Cf  qSdS )z'
        Fine-tuning trainsets
        T   c                 S   s   | S r   r   )rH   r   r   r   r     s    z5TableQuestionAnsweringTrainer.train.<locals>.<lambda>)
batch_sizedatasetshufflenum_workers
collate_fnc                 S      | j S r   requires_gradpr   r   r   r         )lrweight_decayc                 S   r  r   r  r  r   r   r   r     r  r   r   N)trainz.{train} [epoch=%d/%d] [batch=%d/%d] loss: %.4fr   r   r   )r   r   zfinetuned_model.bin)r   r   z3epoch %d obtain max score: %.4f, saving model to %s)(r   r   rM   r$   r   optimAdamWfilterr   r   
parametersr   r'   r7   r  rI   get_fields_infoget_bert_output	tokenizerrm   r~   r   r   r   r   r   	zero_gradbackwardstepr   r   r   r+   r   ospathjoin	model_dirr  save)Er   r
  total_epochesbackbone_learning_ratehead_learning_ratebackbone_weight_decayhead_weight_decaywarmup_ratiotrain_loadertotal_train_stepswarmup_stepsoptopt_bertlr_schedulerlr_scheduler_bertmax_accuracyr   iBr   r   nlu_trT   q_knowt_knowrV   hs_ttypesunitshis_sqlschema_linkall_encoder_layerrD   tokensi_nlui_hdsrp   l_hpurU   start_indexcolumn_indexidsrW   rX   rY   rZ   r[   r\   r_   r]   r^   r`   rs   r   r   r   r   r   r   r   r   r   r   loss_allr   r8   r   r   
model_pathr  r   r   r   r    s   





z#TableQuestionAnsweringTrainer.trainr   )r   rA   )r  r  r  r  r   r   r  )__name__
__module____qualname__r4   r   r'   r/   r2   r6   r=   rm   r~   r   r   r   r   r   r   r  r   r   r   r   r      s2    	
L&$
 
r   )"r"  os.pathr#  osptimetypingr   r   jsonrJ   r   r   torch.optim.lr_schedulerr   torch.utils.datar   modelscope.metainfor   modelscope.modelsr   9modelscope.models.nlp.space_T_cn.table_question_answeringr   modelscope.trainers.baser	   modelscope.trainers.builderr
   modelscope.utils.constantr   modelscope.utils.loggerr   r   register_module table_question_answering_trainerr   r   r   r   r   <module>   s(   