o
    Ni"                     @   s   d dl Z d dlZd dlZddlmZ ddlmZmZm	Z	m
Z
mZ d dlmZmZmZ dgZdgZdd	gZd
gZdgZdgZdgZG dd dZdS )    N   )StringTranslator)devanagari_preprocessordevanagari_short_vowels_remover"devanagari_initial_vowels_abjadify&devanagari_nuqta_consonants_simplifier(devanagari_non_initial_vowels_abjadifier)remove_diacriticsnormalize_charactersnormalize_combine_characterszinitial_vowels.csvz
vowels.csvznumerals.csvzpunctuations.csvzfinal_vowels.csvz
arabic.csvz	hamza.csvzhamza_combo.csvc                   @   sJ   e Zd ZdZejed fddZdd Z	dd	d
Z
dd Zdd ZdS )BaseIndoArabicTransliteratorzX
    Common processing for all supported Indo-Pakistani languages (except Kashmiri)
    z/data/c                 C   s|  || _ i | _i | _i | _i | _i | _i | _i | _i | _t	D ]6}t
j|| d d}|jD ]'}t|| d  t|| d  t|| d  }}}|| j|< q+qtD ];}t
j|| d d}|jD ],}t|| d  t|| d  t|| d  }}}|| j|< || j|< qdqVtD ];}t
j|| d d}|jD ],}t|| d  t|| d  t|| d  }}}|| j|< || j|< qqtD ]6}t
j|| d d}|jD ]'}t|| d  t|| d  t|| d  }}}|| j|< qqtD ]8}t
j|| d d}|jD ](}t|| d  t|| d  t|| d  }}}|| j|< qqtD ]8}t
j|| d d}|jD ](}t|| d  t|| d  t|| d  }}}|| j|< qTqFtD ]8}t
j|| d d}|jD ](}t|| d  t|| d  t|| d  }}}|| j|< qqg }	|D ]i}t
j|| d d}|jD ]Y}t|| d  t|| d  t|| d  }}}|| j|< |d | j|d < |dvr	|	|||f |d  |d	 | }
}|| j|
< |d | j|
d < q̐qtt|	D ]O}|	| \}}}tt|	D ]>}|	| \}}}|d
 | | j|d | < |d | | j|d | < |d | | j|d | < |d | | j|d | < q=q.t| jdd| _t| jdd| _t| j| _t| j| _t| j| _t| j| _t| j| _t| j| _ ddl!m"} | | _#d S )N)headerr   r      u   ाu   ا>      و   ھ   یu    ّ   ्u   ीu   यu   ोu   वu   ींu   यंu   ोंu   वंT)match_initial_only)match_final_only)DevanagariNormalizer)$data_dir initial_arabic_to_devanagari_mapfinal_arabic_to_devanagari_maparabic_to_devanagari_map_pass1arabic_to_devanagari_map_pass2!arabic_to_devanagari_cleanup_passhamza_to_devanagari_maphamza_combo_to_devanagari_mapdevanagari_postprocess_mapMISC_MAP_FILESpdread_csvcolumnsstrstripINITIAL_MAP_FILESFINAL_MAP_FILESARABIC_MAP_FILESHAMZA_FILESHAMZA_COMBO_FILESMAIN_MAP_FILESappendrangelenr   &initial_arabic_to_devanagari_converter$final_arabic_to_devanagari_converter$arabic_to_devanagari_converter_pass1$arabic_to_devanagari_converter_pass2"arabic_to_devanagari_final_cleanuphamza_to_devanagari_converter#hamza_combo_to_devanagari_converterdevanagari_postprocessor"indicnlp.normalize.indic_normalizer   devanagari_normalizer)selfconsonants_map_filesr   map_filedfiarabic_letterroman_letterdevanagari_letter
consonantsarabic_shaddadevanagari_shaddaarabic_letter_iroman_letter_idevanagari_letter_ijarabic_letter_jroman_letter_jdevanagari_letter_jr    rJ   T/home/ubuntu/.local/lib/python3.10/site-packages/indo_arabic_transliteration/base.py__init__   s   
@
@

@

@
@
@
@
@


z%BaseIndoArabicTransliterator.__init__c                 C   sl   t |}tt|}|ddddddddd	d
}|dd}tdd|}tdd|}|S )N,u   ،?u   ؟u   ؛;u   ؍/u   ٪%u   اےu   ائےu   (\B)یےu   \1ئےu   \s([ۓؤئ])z\1)r	   r   r
   replaceresubr8   textrJ   rJ   rK   arabic_normalizeu   s   ,z-BaseIndoArabicTransliterator.arabic_normalizeTFc                 C   sX   | j |}|rt|}|r|dd}|t}| j|}| j|}t|}|S )Nr    )	r7   	normalizer   rR   	translater   r5   reverse_translater   )r8   rV   abjadify_initial_vowelsdrop_viramarJ   rJ   rK   devanagari_normalize   s   

z1BaseIndoArabicTransliterator.devanagari_normalizec                 C   s   | t}tdd|}|S )Nu   े([ऀ-ॣॲ-ॿ])u   ी\1)rZ   r   rS   rT   rU   rJ   rJ   rK   devanagari_remove_short_vowels   s   
z;BaseIndoArabicTransliterator.devanagari_remove_short_vowelsc                 C   s
   t |S )N)r   rZ   rU   rJ   rJ   rK   devanagari_nativize   s   
z0BaseIndoArabicTransliterator.devanagari_nativizeN)TF)__name__
__module____qualname____doc__ospathdirname__file__rL   rW   r^   r_   r`   rJ   rJ   rJ   rK   r      s    _
r   )re   rS   pandasr    
str_mapperr   commonr   r   r   r   r    urduhack.normalization.characterr	   r
   r   r%   r*   r   r&   r'   r(   r)   r   rJ   rJ   rJ   rK   <module>   s    