o
    qi                     @   s2   d Z g dZg dZdd ZdedefddZd	S )
z'Rule based Sentence tokenization module)u
   جنہیںu   جسu   جنu   جوu   اورu   اگرu
   اگرچہu   لیکنu   مگرu   پرu   یاu   تاہمu   کہu   کرu   تو   گے   گی)u
   کیجیےu
   کیجئےu   گئیںu   تھیںu   ہوںu
   خریداr   u
   ہونگےu   گاu
   چاہیےu
   ہوئیںr   u   تھاu   تھیu   تھےu   ہیںu   ہےc                 C   s2   | sg S t tt| d }| ||| |S )z&Replace end of sentence with separator   )chrordmaxreplacesplit)_str	separatormax_p r   M/home/ubuntu/.local/lib/python3.10/site-packages/urduhack/tokenization/eos.py_split_and_keep   s   r   textreturnc                 C   s,  g }t | d}|D ]	}|rt| dkrd|v rt |d}|D ]w}| }d}d}t|D ]O\}}	|r:d}q1|	tv rz|d t|k rz||d  tvrz|d t|k rq||d  dv rq|d|	 d ||d   d	 7 }d
}q1|d|	 d	 7 }q1|d|	 7 }q1|d	D ]}
|
rt|
 dkr||
  qq#q	| }d}d}t|D ]O\}}	|rd}q|	tv r|d t|k r||d  tvr|d t|k r||d  dv r|d|	 d ||d   d	 7 }d
}q|d|	 d	 7 }q|d|	 7 }q|d	D ]}
|
rt|
 dkr||
  qq	|S )a  Generate a list of urdu sentences from a given string.
    This function automatically fixes multiple whitespaces
    or new lines so you just need to pass the data and
    get sentences in return.

    Args:
        text (str): base string
    Returns:
        list
       ۔   u   ؟ Fr   )r   u   ، 
T)r   lenr   	enumerate_URDU_NEWLINE_WORDS_URDU_CONJUNCTIONSappendstrip)r   all_sentences	sentencessentenceq_sentences_sennew_sentis_contindexwordsenr   r   r   _generate_sentences   sd   


    r&   N)__doc__r   r   r   strlistr&   r   r   r   r   <module>   s
   