o
    6tiL"                     @   s   d dl Z d dlZd dlZd dlZd dlmZmZ G dd deZG dd deZG dd deZ	G d	d
 d
eZ
G dd deZdS )    N)FilterRegexFilterc                       sf   e Zd Zedd eejD Z						dde	d	e	d
df fddZ
dd Zi fddZ  ZS )ExtendedRegexFilterc                 c   s(    | ]}t t|d r|V  qdS )PN)unicodedatacategorychr
startswith).0i r   X/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/tasks/bbh/cot_zeroshot/utils.py	<genexpr>
   s    
zExtendedRegexFilter.<genexpr>#### (\-?[0-9\.\,]+)r   	[invalid]FNregex_patternfallbackreturnc                    s&   t  ||| || _|| _|| _d S N)super__init__ignore_caseignore_punctuationregexes_to_ignore)selfr   group_selectr   r   r   r   	__class__r   r   r      s   	
zExtendedRegexFilter.__init__c                 C   sH   | j d ur| j D ]	}t|d|}q| jr| }| jr"|| j}|S )N )r   resubr   lowerr   	translate	punct_tbl)r   stsr   r   r   filter_ignores   s   

z"ExtendedRegexFilter.filter_ignoresc                 C   sT   | |}|r(|| j }t|trdd |D d }| }|r(||v r(|| }|S )Nc                 S   s   g | ]}|r|qS r   r   )r
   mr   r   r   
<listcomp>.   s    z2ExtendedRegexFilter.find_match.<locals>.<listcomp>r   )findallr   
isinstancetuplestrip)r   regexrespconvert_dictmatchr   r   r   
find_match)   s   


zExtendedRegexFilter.find_match)r   r   r   FFN)__name__
__module____qualname__dictfromkeysrangesys
maxunicoder#   strr   r&   r1   __classcell__r   r   r   r   r   	   s&    
r   c                       s@   e Zd Zi dddddfdededdf fdd	Zd
d Z  ZS )MapRegexFilterr   r   FNregex_pattern_to_valuer   r   c                    s<   t  dt| ||||| dd | D | _dS )a  
        regex_pattern_to_value: Match the regex pattern and change the result into the value
        group_select: Selects the (group_select)th match from the findall result. We use the whole regex_patterns, concatenated by |
        ignore_case: Lowers the case of response before matching with the given regex
        ignore_punctuation: Remove the punctuation before matching with the given regex
        regexes_to_ignore: Remove these regexes before matching with the given regex
        |c                 S   s   i | ]
\}}t ||qS r   )r   compile)r
   rvr   r   r   
<dictcomp>N   s    z+MapRegexFilter.__init__.<locals>.<dictcomp>N)r   r   joinlistkeysitemsregex_to_value)r   r=   r   r   r   r   r   r   r   r   r   6   s   zMapRegexFilter.__init__c              	   C   s   g }|D ]?}g }|D ]3}|  | j| |}|r1| j D ]\}}	|  || |}
|
r0|	}
 nq|r5|
s8| j}
||
 q
|| q|S r   )r1   r-   r&   rG   rF   r   append)r   respsdocsfiltered_respsr@   filteredr.   $whole_match_considering_group_selectr-   mapped_valuer0   r   r   r   applyR   s,   zMapRegexFilter.apply)r2   r3   r4   r5   r:   r   rO   r;   r   r   r   r   r<   5   s    r<   c                   @   s   e Zd Zdd ZdS )NumberParseRegexFilterc                 C   s   g }dd l }ddlm} |d}|D ]2}g }|D ]&}	| | j |	}
|
s5| ||	 }
|
r5t||
}
|
s:| j}
|	|
 q|	| q|S )Nr   )w2na  ((?:(?:zero|one|two|three|four|five|(?:twen|thir|for|fif|six|seven|nine)(?|teen|ty)|eight(?:|een|y)|ten|eleven|twelve|fourteen|hundred|thousand|(?:m|b|tr)illion)(?:zero|one|two|three|four|five|(?:twen|thir|for|fif|six|seven|nine)(?:|teen|ty)|eight(?|een|y)|ten|eleven|twelve|fourteen|hundred|thousand|(?:m|b|tr)illion|[^\S
]|,|and|&)+)?(?:zero|one|two|three|four|five|(?:twen|thir|for|fif|six|seven|nine)(?|teen|ty)|eight(?|een|y)|ten|eleven|twelve|fourteen|hundred|thousand|(?:m|b|tr)illion)))
r-   word2numberrQ   r?   r1   r!   r:   word_to_numr   rH   )r   rI   rJ   rK   r-   rQ   english_number_regexr@   rL   r.   r0   r   r   r   rO   n   s&   zNumberParseRegexFilter.applyN)r2   r3   r4   rO   r   r   r   r   rP   m   s    rP   c                   @   s   e Zd ZdZdd ZdS )WordSortFilter c              
   C   s   g }t ||D ]J\}}|d dd   }tddd |D }g }|D ]"}	||	}
|
  tt	
t |
d gt|
 }|d| q)|| q|S )NinputzList:   r>   c                 S   s   g | ]}d | d qS )z\br   )r
   wr   r   r   r(      s    z(WordSortFilter.apply.<locals>.<listcomp>rV   )zipsplitr,   r   r?   rC   r)   reversereversedcollectionsOrderedDictlenrH   )r   rI   rJ   rK   r@   docwordsr-   rL   r.   r0   ordered_wordsr   r   r   rO      s   
zWordSortFilter.applyN)r2   r3   r4   __doc__rO   r   r   r   r   rU      s    rU   c                       s$   e Zd Z fddZdd Z  ZS )MultiChoiceRegexFilterc                    s   t  j|i | dS )a  
        regex_pattern: The basic regex pattern to use. If fails to match, we will use the customized match procedure
                        - step 1 : We parse the choices between ([A-Z])s then try to find these choices in the response.
                        - step 2 : We parse the choice with regex :[\s]*([A-?]), where ? varies by number of choices.
        group_select: Selects the (group_select)th match from the findall result.
        ignore_case: Ignores the case during step 1 matching
        ignore_punctuation: Remove the punctuation during step 1 matching
        regexes_to_ignore: Remove these regexes during step 1 matching
        N)r   r   )r   argskwargsr   r   r   r      s   
zMultiChoiceRegexFilter.__init__c                 C   s<  g }t ||D ]\}}g }i }d}g }	i }
td}||d }|D ]/}| | }|t|  d| d||< |	| d| d|
|< tt	|d }q#td
|}d
|	}td| d}g }|D ](}| | j|}|s| || ||}|s| |||
}|s| j}|| qm|| q|S )	NAz\([A-Z]\)([^\n^(]*)rW   ()rX   r>   z:[\s]*()rZ   r   r?   r)   r&   r,   rH   escaper   ordrC   r1   r-   r   )r   rI   rJ   rK   r@   ra   fallback_regexeschoice_to_alpha
next_alphawithout_paren_fallback_regexeswithout_paren_to_targetmultiple_choices_regexr0   r'   fallback_regexwithout_paren_fallback_regexrL   r.   r   r   r   rO      sJ   



zMultiChoiceRegexFilter.apply)r2   r3   r4   r   rO   r;   r   r   r   r   re      s    re   )r^   r   r8   r   lm_eval.filters.extractionr   r   r   r<   rP   rU   re   r   r   r   r   <module>   s    ,8