o
    7ti$                     @   sR  d dl Z d dlZd dlZd dlmZmZ d dlZd dlZd dl	m	Z	 d dl
mZmZ er4d dlmZmZ dddd	d
iZed d ed d  Z	d(dedededeeee  eee  f fddZd(ddZdedefddZ							d)dededededed edefd!d"Z	d*d#ed dee fd$d%Zdeeejf fd&d'ZdS )+    N)TYPE_CHECKINGUnion)tqdm)DEFAULT_SEQ_LENGTHSget_tokenizer)PreTrainedTokenizerPreTrainedTokenizerFastvariable_tracking   zMemorize and track the chain(s) of variable assignment hidden in the following text.

{context}
Question: Find all variables that are assigned the value {query} in the text above.z Answer: According to the chain(s) of variable assignment in the text above, {num_v} variables are assgined the value {query}, they are: )tokens_to_generatetemplateanswer_prefixr   r   F
num_chainsnum_hopsis_iclreturnc           
   	      s4  g }|sdnd |s|nt d|} fddt|d |  D }tt|| |d  k rH|dtjtj	 d
  tt|| |d  k s,g }g }td	t||d D ]?}|||| d  }|| d
|d	  dtjdd g}t|D ]}	|d
||	d   d||	  d q{|| qV||fS )N      
   c                    s&   g | ]}d  tjtj d qS ) k)joinrandomchoicesstringascii_uppercaseupper).0_r    P/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/ruler/vt_utils.py
<listcomp>3   s    z#generate_chains.<locals>.<listcomp>   r   r   r   zVAR z = i'  i z = VAR  )minrangelensetappendr   r   r   r   r   r   nprandint)
r   r   r   vars_allvars_ret
chains_reti	this_vars
this_chainjr    r   r!   generate_chains-   s&   
 
 &r3   c                    s  t |||d\}}d}|g|   t t|d krHdd dd  D D  zt t|d ks5J dW n   td  fd	d|D }Y |D ]*}ttttt t|}t|tt|D ]\}	}
 	|	|
 ||
  qeqJd

 }|dd}t}|r|td d td d  kr|td d d d }|td d d d }d

|||  d d ||d   }|d d dd  }|j|||d d}||d fS )Nr   zZThe grass is green. The sky is blue. The sun is yellow. Here we go. There and back again.
r   c                 S   s(   g | ]}t | d kr|d n|qS )r   .)r'   strip)r   nr    r    r!   r"   N   s    z)generate_input_output.<locals>.<listcomp>c                 S   s    g | ]}| d D ]}|q	qS )r5   split)r   noisexr    r    r!   r"   P        z)Noises too short, unable to generate dataz*reduces chain length for not enough noisesc                    s    g | ]}|d t  d  qS )Nr#   )r'   )r   chain	sentencesr    r!   r"   X   r<   r$   z. 
z.
r	   r   r      r   =r#   )contextquerynum_v)r3   r'   printlistsortedr   sampler&   zipinsertr   replaceTEMPLATECONFIGindexr9   r6   format)
num_noisesr   r   r   varschainsr:   chain_i	positions	insert_pir2   rC   r   cutoff
cutoff_ansvalue
input_textr    r>   r!   generate_input_outputF   sH   
 


(r[   icl_examplec                 C   sj   |  td d dd  }| |d d    }|D ]}dtjtjt	|d
 }| ||} q| S )Nr	   r   ir   r   r   )rO   rN   r6   r9   r   r   r   r   r   r'   r   rL   )r\   icl_tgt_cuticl_tgtitemnew_itemr    r    r!   randomize_iclx   s   ra   r   r#      Tnum_samplesmax_seq_lengthincrementaladd_fewshotc
                 C   s6  g }
|}|}d}d}|r(|d ur(d |d }|d d | d }t| |j}|| | |k rtt|||||d u @ d\}}t| |d|  j}td| d|| |  d	|  || | |krh||8 }n||7 }|| | |k s0td
| tt|D ]}|}	 z&t|||||d u @ d\}}t| |j| | }||ksJ | dW n   ||kr||8 }Y q|r|d ur|td d d d }|d | t	| d ||d   }|	rd |
dd
dd  }|d}||d   }|d ur|d | }|||||| d}|
| q|
S )Nr   r$   outputsinputz

r4   zMax length z | Current length z | Noises: zNum noises:Tz exceeds max_seq_length.r	   r   r@   
	z9 Answer: According to the chain(s) of variable assignment)rO   rh   rg   length
max_length
gen_prefix)r   r'   	input_idsr[   rF   r   r&   rO   rN   ra   rL   r6   r9   rfindr)   )	tokenizerrc   rd   re   r   r   rf   r   r\   remove_newline_tabwrite_jsonsrQ   total_tokensexample_tokensicl_example_outrZ   answerrO   used_noisesrk   rW   gen_prefix_indexrm   formatted_outputr    r    r!   sys_vartrack_w_noise_random   s   






rz   rp   c                 K   s(   t | ddddd }t | d||d}|S )Nr#   i  r   )rp   rc   rd   re   r   )rp   rc   rd   r\   )rz   )rp   seqkwargsr\   rr   r    r    r!   get_dataset   s   r}   c                     sR   |  d|  dd  fdd| dtD }dtjjttj	|tj
jdiS )	Nrp   
pretrainedr   c                 3   s     | ]}t t |d V  qdS ))rp   r{   N)r}   r   )r   r{   r~   r    r!   	<genexpr>   s
    
z!get_vt_dataset.<locals>.<genexpr>max_seq_lengthstestr8   )getpopr   datasetsDataset	from_listrG   	itertoolsr=   from_iterableSplitTEST)r|   dfr    r   r!   get_vt_dataset   s   

r   )F)r   r#   rb   Tr
   NF)N)r   r   r   typingr   r   r   numpyr*   r    lm_eval.tasks.ruler.common_utilsr   r   transformersr   r   rN   rM   intbooltuplerG   strr3   r[   ra   dictrz   r}   r   r   r    r    r    r!   <module>   sz   
	


2	
b
