o
    'Ni'                  	   @   s  d dl Z d dlmZ d dlmZ d dlmZ G dd dZG dd dZG d	d
 d
Z	e
dkre  ee jdkrDed e d e jd dkre jd ZeddZee jd dddGZee jd ddd$Zee D ]\ZZe ZeeeZeed  qqW d   n1 sw   Y  W d   dS W d   dS 1 sw   Y  dS e jd dkre jd ZeddZee jd dddGZee jd ddd$Zee D ]\ZZe ZeeeZeed  qW d   n1 sw   Y  W d   dS W d   dS 1 sw   Y  dS e jd dkre jd Ze Zee jd dddIZee jd ddd%Zee D ]\ZZe ZeeeZeed  qJW d   n1 slw   Y  W d   dS W d   dS 1 sw   Y  dS e jd dkre jd Ze	 Zee jd dddIZee jd ddd%Zee D ]\ZZe ZeeeZeed  qW d   n1 sw   Y  W d   dS W d   dS 1 sw   Y  dS dS dS )    N)indic_normalize)unicode_transliterate)loaderc                   @   &   e Zd Zd
ddZdd Zdd Zd	S )AggressiveScriptUnifierhito_nasal_consonantsc                 C   s0   || _ || _d| _d| _d| _i | _|   d S )NT)common_langnasals_modedo_normalize_chandrasdo_normalize_vowel_endingremove_nuktasnormalizer_map_init_normalizersselfr	   r
    r   Y/home/ubuntu/.local/lib/python3.10/site-packages/indicnlp/transliterate/script_unifier.py__init__   s   z AggressiveScriptUnifier.__init__c              
   C   s   t  }dD ]}|j|| j| j| j| jd| j|< q|jd| j| j| j| jdddd| jd< |jd| j| j| j| jdd| jd< |jd| j| j| j| jdd	| jd< |jd
| j| j| j| jddd| jd
< d S )N)r   mrsakKnesdbngutatekn)r
   r   r   r   paT)r
   r   r   r   do_canonicalize_addakdo_canonicalize_tippido_replace_vowel_basesor)r
   r   r   r   do_remap_waas)r
   r   r   r   do_remap_assamese_charsml)r
   r   r   r   do_canonicalize_chillusdo_correct_geminated_T)r   IndicNormalizerFactoryget_normalizerr
   r   r   r   r   r   normalizer_factorylangr   r   r   r      s6   




z)AggressiveScriptUnifier._init_normalizersc                 C   s&   | j | |}tj||| j}|S Nr   	normalizer   UnicodeIndicTransliteratortransliterater	   r   textr.   r   r   r   	transform:   s   z!AggressiveScriptUnifier.transformN)r   r   __name__
__module____qualname__r   r   r6   r   r   r   r   r      s    
	r   c                   @   r   )BasicScriptUnifierr   
do_nothingc                 C   s   || _ || _i | _|   d S r/   )r	   r
   r   r   r   r   r   r   r   A   s   zBasicScriptUnifier.__init__c                 C   s,   t  }dD ]}|j|| jd| j|< qd S )N)r   r   r   r   r   r   r   r   r   r   r   r   r#   r%   r'   r
   )r   r*   r+   r
   r   r,   r   r   r   r   G   s   z$BasicScriptUnifier._init_normalizersc                 C   s0   || j v r| j | |}tj||| j}|S r/   r0   r4   r   r   r   r6   M   s   
zBasicScriptUnifier.transformN)r   r<   r7   r   r   r   r   r;   ?   s    
r;   c                   @   s   e Zd ZdddZdd ZdS )NaiveScriptUnifierr   c                 C   s
   || _ d S r/   )r	   )r   r	   r   r   r   r   W   s   
zNaiveScriptUnifier.__init__c                 C   s   t j||| j}|S r/   )r   r2   r3   r	   r4   r   r   r   r6   Z   s   zNaiveScriptUnifier.transformN)r   )r8   r9   r:   r   r6   r   r   r   r   r>   U   s    
r>   __main__   zDUsage: python script_unifier <command> <infile> <outfile> <language>   
aggressiver   r=      rzutf-8)encoding   w
moderater<   basicnaive)sysindicnlp.normalizer   indicnlp.transliterater   indicnlpr   r   r;   r>   r8   loadlenargvprintexitlanguageunifieropenifileofile	enumerate	readlinesilinestripr6   transliterated_linewriter   r   r   r   <module>   s   ,




"

$
 $
 $2