o
    㹏ir&                     @   sx  d Z ddlmZ ddlmZmZmZmZ ddlm	Z	 e
dZe
dZdZdZdZd	Zd
ededeeeee
f f deeeee
f f fddZeeddZeeddZeeddZdZdZdZdZeeeeedZi eeeegZeeeef edededi Zdedee
 fddZd edede	fd!d"Zd edede fd#d$Z!d edede	fd%d&Z"d edede	fd'd(Z#d)S )*z2Module contains the score calculation algorithems.    )partial)DictListUnioncast)SCORE_INDICESz-infinfg{Gztg{Gzg      ?
char_start	char_stop
hash_tablereturnc                    s6   |  }| fddtt| t|d D  |S )a  Generate index mapping for `bonus` calculation.

    Args:
        char_start: Starting char of the range.
        char_stop: Ending char of the range.
        value: Value to give to the range of char.
        hash_table: Base dictionary to add the mapping.

    Returns:
        A dictionary containing the given range with provided index.

    Examples:
        >>> _char_range_with("a", "d", 1, {})
        {'a': 1, 'b': 1, 'c': 1, 'd': 1}
    c                 3   s    | ]	}t | fV  qd S )N)chr).0uni_charvalue G/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/pfzy/score.py	<genexpr>"   s
    

z#_char_range_with.<locals>.<genexpr>   )copyupdaterangeord)r	   r
   r   r   r   r   r   _char_range_with   s
   r   azAZ09g?g?gffffff?g333333?)/-_ .r      haystackc                 C   s8   d}g }| D ]}| tt|d |d |}q|S )a  Calculate bonus score for the given haystack.

    The bonus are applied to each char based on the previous char.

    When previous char is within the `BONUS_MAP` then additional bonus
    are applied to the current char due to it might be the start of a new
    word.

    When encountered a mix case character, if the current char is capitalised then
    if the previous char is normal case or within `BONUS_MAP`, additional bounus are applied.

    Args:
        haystack: String to calculate bonus.

    Returns:
        A list of float matching the length of the given haystack
        with each index representing the bonus score to apply.

    Examples:
        >>> _bonus("asdf")
        [0.9, 0, 0, 0]
        >>> _bonus("asdf asdf")
        [0.9, 0, 0, 0, 0, 0.8, 0, 0, 0]
        >>> _bonus("asdf aSdf")
        [0.9, 0, 0, 0, 0, 0.8, 0.7, 0, 0]
        >>> _bonus("asdf/aSdf")
        [0.9, 0, 0, 0, 0, 0.9, 0.7, 0, 0]
    r!   r   )appendBONUS_STATESBONUS_INDEXget)r'   	prev_charbonuscharr   r   r   _bonus<   s   r/   needlec                    sH  t | t |} t|}|  r| }|dks| kr%ttt|fS  fddt|D } fddt|D }t|D ]o}t}||d krKtnt	}t D ]\}	| | ||	 krt}
|dkrj|	t
 ||	  }
n|	dkrt||d  |	d  ||	  ||d  |	d  t }
|
|| |	< t|
||  || |	< }qQt|| |	< ||  || |	< }qQq?|d  d }}	d}dd t|D }|dkr|	dkr|s|| |	 || |	 kr|| |	 tkr|dko|	dko|| |	 ||d  |	d  t k}|	||< |	d8 }	n|	d8 }	|	dks|d8 }|dks||d   d  |fS )af  Use fzy logic to calculate score for `needle` within the given `haystack`.

    2 2D array to track the score.
    1. The running score (`running_score`) which represents the best score for the current position.
    2. The result score (`result_score`) which tracks to overall best score that could be for the current positon.

    With every consequtive match, additional bonuse score are given and for every non matching char, a negative
    gap score is applied.

    After the score is calculated, the final matching score will be stored at the last position of the `result_score`.

    Backtrack the result by comparing the 2 2D array to find the corresponding indices.

    Args:
        needle: Substring to find in haystack.
        haystack: String to be searched and scored.

    Returns:
        A tuple of matching score with a list of matching indices.
    r   c                       g | ]}d d t  D qS )c                 S      g | ]}d qS r   r   r   r#   r   r   r   
<listcomp>       %_score.<locals>.<listcomp>.<listcomp>r   r4   haystack_lenr   r   r5          z_score.<locals>.<listcomp>c                    r1   )c                 S   r2   r3   r   r4   r   r   r   r5      r6   r7   r8   r4   r9   r   r   r5      r;   r   Fc                 S   r2   r3   r   r4   r   r   r   r5      r6   )lenr/   islowerlower	SCORE_MAXlistr   	SCORE_MINSCORE_GAP_TRAILINGSCORE_GAP_INNERSCORE_GAP_LEADINGmaxSCORE_MATCH_CONSECUTIVE)r0   r'   
needle_lenbonus_scorerunning_scoreresult_scorei
prev_score	gap_scorejscorematch_requiredindicesr   r9   r   _scorea   sl   




rR   c                 C   sJ   |   |  } }| sdS d}| D ]}|||d }|dkr" dS qdS )a  Check if needle is subsequence of haystack.

    Args:
        needle: Substring to find in haystack.
        haystack: String to be searched and scored.

    Returns:
        Boolean indicating if `needle` is subsequence of `haystack`.

    Examples:
        >>> _subsequence("as", "bbwi")
        False
        >>> _subsequence("as", "bbaiws")
        True
        >>> _subsequence("sa", "bbaiws")
        False
    Tr   r   F)r>   find)r0   r'   offsetr.   r   r   r   _subsequence   s   rU   c                 C   s   t | |r
t| |S tdfS )a  Use fzy matching algorithem to match needle against haystack.

    Note:
        The `fzf` unordered search is not supported for performance concern.
        When the provided `needle` is not a subsequence of `haystack` at all,
        then `(-inf, None)` is returned.

    See Also:
        https://github.com/jhawthorn/fzy/blob/master/src/match.c

    Args:
        needle: Substring to find in haystack.
        haystack: String to be searched and scored against.

    Returns:
        A tuple of matching score with a list of matching indices.

    Examples:
        >>> fzy_scorer("ab", "acb")
        (0.89, [0, 2])
        >>> fzy_scorer("ab", "acbabc")
        (0.98, [3, 4])
        >>> fzy_scorer("ab", "wc")
        (-inf, None)
    N)rU   rR   rA   )r0   r'   r   r   r   
fzy_scorer   s   

rV   c                 C   s   g }d}|   |  } }| dD ]'} | sq|| |}|dk r'tdf  S t| }|t|||  ||7 }q|s@d|fS |d d |d   d|d d   d|d d   |fS )aU  Match needle against haystack using :meth:`str.find`.

    Note:
        Scores may be negative but the higher the score, the higher
        the match rank. `-inf` score means no match found.

    See Also:
        https://github.com/aslpavel/sweep.py/blob/3f4a179b708059c12b9e5d76d1eb3c70bf2caadc/sweep.py#L837

    Args:
        needle: Substring to find in haystack.
        haystack: String to be searched and scored against.

    Returns:
        A tuple of matching score with a list of matching indices.

    Example:
        >>> substr_scorer("ab", "awsab")
        (-1.3, [3, 4])
        >>> substr_scorer("ab", "abc")
        (0.5, [0, 1])
        >>> substr_scorer("ab", "iop")
        (-inf, None)
        >>> substr_scorer("ab", "asdafswabc")
        (-1.6388888888888888, [7, 8])
        >>> substr_scorer(" ", "asdf")
        (0, [])
    r   r$   Nr   r&   )r>   splitrS   rA   r<   extendr   )r0   r'   rQ   rT   rG   r   r   r   substr_scorer  s"   
4rZ   N)$__doc__	functoolsr   typingr   r   r   r   
pfzy.typesr   floatrA   r?   rD   rB   rC   rF   strintr   
lower_with
upper_with
digit_withSCORE_MATCH_SLASHSCORE_MATCH_WORDSCORE_MATCH_CAPITALSCORE_MATCH_DOT	BONUS_MAPr)   r*   r/   rR   boolrU   rV   rZ   r   r   r   r   <module>   sN    
$%d 