o
    wi                     @   sN  d dl Z d dlZd dlmZ d dlmZ d dlmZ dZdZdZ	dZ
d	Zd
ZdZdZ	 dZdZdZdZdZdZdZg dZG dd deZG dd deZG dd deZG dd deZi Ze je jeddZ e !e D ]0Z"eZ#e"$dd Z%e%&dreZ#ne%&d
reZ#ne%ev reZ#ee je e"e#d!Z'e'ee'j%< qpe( Z)dS )"    N)Scheme)load_schemedev_vowel_to_mark_maphkhk_dravidianiastiso	iso_vedicitransitrans_dravidiantitus	optitransoptitrans_dravidian
kolkata_v2slp1slp1_accentedvelthuiswx)r   
iast_iso_mr	   r
   r   r   c                       s6   e Zd Zd
 fdd	Zdd Zdd Zdd	 Z  ZS )RomanSchemeNc                    s8   t t| j||dd tdd | d  D | d< d S )NT)datanameis_romanc                 S   s$   g | ]\}}|d krt | |fqS )u   अr   ).0kv r   p/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/indic_transliteration/sanscript/schemes/roman.py
<listcomp>"   s   $ z(RomanScheme.__init__.<locals>.<listcomp>vowelsvowel_marks)superr   __init__dictitems)selfr   r   kwargs	__class__r   r   r#       s   zRomanScheme.__init__c                 C   s@   | d du r|S ddl m} |j|j| j|j|d|j| jdS )zRoman schemes define multiple representations of the same devanAgarI character. This method gets a library-standard representation.
    
    data : a text in the given scheme.
    
alternatesNr   	sanscript)_from_tor   )r   r-   r.   )indic_transliterationr,   transliterater   
DEVANAGARI)r&   r   r,   r   r   r   get_standard_form%   s   zRomanScheme.get_standard_formc                 C   s4   | j |d}|dd}|dd}|dd}|S )Nr   AaaIiiUuu)r2   replace)r&   textr   r   r   get_double_lettered0   s
   zRomanScheme.get_double_letteredc                 C   s^   |  }ddlm} g }|D ]}|| | j kr$|d|  q|| qd|S )Nr   )detectz<%s> )splitr/   r=   lowerr   appendjoin)r&   r;   wordsr=   	out_wordswordr   r   r   mark_off_non_indic_in_line7   s   
z&RomanScheme.mark_off_non_indic_in_line)NN)__name__
__module____qualname__r#   r2   r<   rF   __classcell__r   r   r(   r   r      s
    r   c                   @   s   e Zd ZdddZdS )ItransSchemeFTc                 C   s   |r
| j |||dS |}dd l}|rd}nd}|d| d|}|d| d|}|d	| d
|}|d| d|}|d| d|}|sO|d| d|}|S )N)data_inomit_samomit_yrlr   z(?<!sa) z%sM( *)([kgx])z~N\1\2z%sM( *)([cCj])z~n\1\2z%sM( *)([tdn])zn\1\2z%sM( *)([TDN])zN\1\2z%sM( *)([pb])zm\1\2z%sM( *)([yvl])z\2.N\1\2)#fix_lazy_anusvaara_except_padaantasregexsub)r&   rL   rM   rN   ignore_padaantadata_outrQ   prefixr   r   r   fix_lazy_anusvaaraD   s   zItransScheme.fix_lazy_anusvaaraN)FFT)rG   rH   rI   rV   r   r   r   r   rK   C   s    rK   c                   @   s    e Zd Zd	ddZd
ddZdS )OptitransSchemeGYtc                 C   st   | j |d}|dd}|dd}|dd}|dd}|d|}|d	d
}|dkr4|d|}| }|S )Nr3   RRriRLLilriLLIjnxkshrY   )r2   r:   r@   )r&   r;   jn_replacementt_replacementr   r   r   to_lay_indianY   s   zOptitransScheme.to_lay_indianTc                 C   s>  i dddddddddd	d
ddddddddddd	ddddddddddddddddddd d!d"d#d#ddd$dd%}|  D ]
\}}|||}qKd&}td'| d(|}td)| d(|}td*d|}td+d,|}|d-d}td.d/|}|rtd0d1|}d2d3lm} |||j|j}|S )4Nu   ‘rO   u   ʼz{}u   ’oor9   eer7      ëEu   ěeu   ēou   ōu   ār5   u   īu   ūwr   u   ẕzu   żu   ẓu   ž   ću   ćhcqsshrY   hu   r̥)u   ̌u   c̱ẖchhchrn   u   ḳu   ṣu   s̱ẖu   s̱u   ẖu   ḥu   ̱u   ̠u   r̤iu   ̤u   [aāeēiīoōuū]z(%s)'z\1z'(%s)z'(?=\s|$|-)z'hz{}h'u   ṅ(?=[^kgq]|$)u   m̐z#([kghncjzftdTDpbmyrlvsq])(?=\s|$|-)z\1ar   r+   )	r%   r:   rQ   rR   r/   r,   r0   ISO	OPTITRANS)r&   r;   add_terminal_areplacementskeyvaluevowels_patternr,   r   r   r   approximate_from_iso_urduf   sb   	z)OptitransScheme.approximate_from_iso_urduN)rX   rY   )T)rG   rH   rI   re   r~   r   r   r   r   rW   W   s    
rW   c                       s*   e Zd Zd fdd	Z fddZ  ZS )CapitalizableSchemeNTc                    s   t t j|||d  fdd}| d   | d   d v r-| d   d v r9| d   |dg d S )	N)r   r   r   c                    sz   | D ]8}|  | g}| d v r0| d | dd  d | D  dd  d | D  7 }tt| d |< qd S )Nr*   c                 S      g | ]}|  qS r   )
capitalizer   yr   r   r   r          zRCapitalizableScheme.__init__.<locals>.add_capitalized_synonyms.<locals>.<listcomp>c                 S   r   r   )upperr   r   r   r   r      r   )r   r   listset)	some_listra   synonymsr&   r   r   add_capitalized_synonyms   s   <z>CapitalizableScheme.__init__.<locals>.add_capitalized_synonymsr    
consonantsextra_consonantsaccented_vowel_alternatesu   oṃ)r"   r   r#   valueskeys)r&   r   r   r   r   r(   r   r   r#      s   zCapitalizableScheme.__init__c                    s6   dd | d   }t|d|}tt| j|dS )Nu   ([%s])([̥̇¯̄]+)rO   accentsz\2\1r3   )rB   r   rQ   rR   r"   r   r2   )r&   r   patternr(   r   r   r2      s   z%CapitalizableScheme.get_standard_form)NTN)rG   rH   rI   r#   r2   rJ   r   r   r(   r   r      s    r   r   romanz.tomlrO   )	file_pathcls)*osrQ   indic_transliteration.sanscriptr   'indic_transliteration.sanscript.schemesr   r   HKHK_DRAVIDIANIASTrw   	ISO_VEDICITRANSITRANS_DRAVIDIANTITUSrx   OPTITRANS_DRAVIDIAN
KOLKATA_v2SLP1SLP1_ACCENTEDVELTHUISWXCAPITALIZABLE_SCHEME_IDSr   rK   rW   r   SCHEMESpathrB   dirname__file__	data_pathlistdirfr   r:   r   
startswithschemer   ALL_SCHEME_IDSr   r   r   r   <module>   sP    $,

