o
    qi                     @   s   d Z ddlmZmZ ddlmZmZ ddlmZmZ dedefddZ	dedefd	d
Z
dedefddZdedefddZdS )z)
Urduhack Character preprocess functions
   ) _SPACE_AFTER_ALL_PUNCTUATIONS_RE!_SPACE_BEFORE_ALL_PUNCTUATIONS_RE)_SPACE_AFTER_DIGITS_RE_SPACE_BEFORE_DIGITS_RE)_SPACE_BEFORE_ENG_CHAR_RE_SPACE_AFTER_ENG_CHAR_REtextreturnc                 C      t d| } td| } | S )u  
    Add spaces before|after numeric and urdu digits

    Args:
        text (str): ``Urdu`` text
    Returns:
        str: Returns a ``str`` object containing normalized text.
    Examples:
        >>> from urduhack.preprocessing import digits_space
        >>> text = "20فیصد"
        >>> normalized_text = digits_space(text)
        >>> normalized_text
        20 فیصد
     )r   subr   r    r   T/home/ubuntu/.local/lib/python3.10/site-packages/urduhack/preprocessing/character.pydigits_space   s   r   c                 C   r
   )u/  
    Functionality to add spaces before and after English words in the given Urdu text. It is an important step in
    normalization of the Urdu data.

    this function returns a :py:class:`String` object which contains the original text with spaces before & after
    English words.

    Args:
        text (str): ``Urdu`` text
    Returns:
        str: Returns a ``str`` object containing normalized text.
    Examples:
        >>> from urduhack.preprocessing import english_characters_space
        >>> text = "خاتون Aliyaنے بچوںUzma and Aliyaکے قتل کا اعترافConfession کیا ہے۔"
        >>> normalized_text = english_characters_space(text)
        >>> normalized_text
        خاتون Aliya نے بچوں Uzma and Aliya کے قتل کا اعتراف Confession کیا ہے۔
    r   )r   r   r   r   r   r   r   english_characters_space    s   r   c                 C   r
   )z
    Add spaces after punctuations used in ``urdu`` writing

    Args:
        text (str): ``Urdu`` text
    Returns:
        str: Returns a ``str`` object containing normalized text.
    r   )r   r   r   r   r   r   r   all_punctuations_space9   s   	r   c                 C   s.   t | ts	tdt| } t| } t| } | S )uC  
    To preprocess some text, all you need to do pass ``unicode`` text. It will return a ``str``
    with proper spaces after digits and punctuations.

    Args:
        text (str): ``Urdu`` text
    Returns:
        str: urdu text
    Raises:
        TypeError: If text param is not not str Type.
    Examples:
        >>> from urduhack.preprocessing import preprocess
        >>> text = "اَباُوگل پاکستان ﻤﯿﮟ 20 سال ﺳﮯ ، وسائل کی کوئی کمی نہیں ﮨﮯ۔"
        >>> normalized_text = preprocess(text)
        >>> # The text now contains proper spaces after digits and punctuations,
        >>> # normalized characters and no diacritics!
        >>> normalized_text
        اباوگل پاکستان ﻤﯿﮟ 20 سال ﺳﮯ ، وسائل کی کوئی کمی نہیں ﮨﮯ ۔
    ztext must be str type.)
isinstancestr	TypeErrorr   r   r   r   r   r   r   
preprocessG   s   
r   N)__doc__regexesr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s   