o
    wiN                     @   sT  d dl Z d dlZd dlZdZdZdZdZdZdZdZ	d	Z
d
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdd dddddd d!d"d#d$ZeeegZi dddd%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCi dDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddei dfdgdhdidjdkdldmdndodpdqdrdsdtdudvdwdxdydzd{d|d}d~dddddddddi ddddddddddddddddddddddddddddddddddi ddddddddddddddddddddddddd
dddddddddddddddddddddZ	 g g dâg dĢg dŢg dƢg dâg dŢg dŢg dŢg dŢg dŢg dŢg dǢg dȢg dɢg dʢg dŢg dŢg dɢg dʢg dŢg dŢg dŢg dˢg dˢg dˢg dŢg dŢg dˢg dŢg dˢg dŢg dŢg dˢg dˢg dˢg dŢg dŢg dˢg dˢg dˢg dŢg d̢g dŢg dˢg dˢg dˢg dŢg dŢg dŢg dʢg dŢg d͢g d΢g dϢg dˢg dƢg dŢg dŢg dâg dâg dТg dѢg dŢg dŢg dŢg dŢg dŢg dǢg dҢg dɢg dʢg dŢg dŢg dɢg dʢg dŢg dŢg dŢg dâg dâg dɢg dӢg dӢg dӢg dӢg dԢg dբg d֢g dӢg dעg dעg dעg dآg d٢g dڢg d٢g dǢg dȢg dۢg dۢg dӢg dӢg dˢg dŢg dŢg dŢg dŢg dŢg dŢg dŢg dŢg dŢg dܢg dݢg dݢg dޢg dޢg dߢg dߢg dߢg dߢg dߢg dߢg dâg dâg dâg dâg dâZ ddddddddddddddddZ!dddZ"dd Z#e# Z$dd Z%e% Z&i Z'eeeeg Z(e)dD ]Z*e+e*e(v e'e*< qdd Z,G dd de-Z.G dd dZ/dd Z0d Z1e2dkr(d Z*ze+e j3d Z*e*e)ddvre4W n e4e5fy   e0e j3d   Y nw e/ Z6e67e* dZ8d Z9	 e9rdS e j:;e1Z<e<sdZ9e8e< Z<e6=e<e9Z>e<e>d Z8e6?  qdS (      N   ip	  O   id	     iQ	  iR	  ie	           iM	  i 	  i  i                 
   i  i	  i   i                           )@   B   C   D   E   F   G   H   I   J   K   i	     i	     i	     i	     i	     i	     i	     i		     i
	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i	     i 	     i!	     i"	     i#	     i$	     i%	     i&	     i'	     i(	     i)	     i*	     i+	     i,	     i-	     i.	     i/	     i_	     i0	     i1	     i2	     i3	     i4	     i5	     i6	     i7	     i8	     i9	     i>	     i?	     i@	     iA	     iB	     iC	     iF	     iG	     iH	     iE	     iJ	     iK	     iL	     iI	  i<	              if	     ig	     ih	     ii	  ij	  ik	  il	  im	  in	  io	  )                                 )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   i	  i=	  iD	  iP	  iX	  iY	  iZ	  i[	  i\	  i]	  i^	  i`	  ia	  ib	  ic	  )r&   r	   r^   r   r3   r4   r5   r:   r?   r@   rI   r*   r'   rZ   r[   i	  i	  ))r   rO   )r   rT   c                  C   s   t tddt tdd } i }tdD ]:}i }tdD ]}|||< qtddD ]!}|| v r/q(t| }t|d@  | rI|||< ||  |d 7  < q(|||< qtD ]}t| ||d	  |d
 < qQ|S )Nrg   rj   rt      	   r   rx   r   r   r   )listrangeiscii_to_unicodevalidation_tablespecial_maps)_invalid_rangescriptsicurr_scrcht r   p/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/indic_transliteration_unmaintained/iscii2utf8.pymake_script_mapsf  s$   

r   c                  C   sH   i } t dD ]}i }t dD ]}|t| vo|tv||< q|| |< q| S )Nrz   ry   )r|   script_mapsISCII_SPECIALS)mapsr   curr_mapjr   r   r   make_invalid_maps  s   
r   ry   c                 C   s   g }| D ]k}|dk r| | q|dk r'| |d? dB  | |d@ dB  q|dk rI| |d? dB  | |d? d@ dB  | |d@ dB  q| |d	? d
B  | |d? d@  | |d? d@ dB  | |d@ dB  qdtt|S )z6
    converts an array of integers to utf8 string
    r   i   r   r@   ?   i      r_      r    )appendjoinmapchr)youtxr   r   r   to_utf8  s    r   c                   @   s   e Zd Zdd Zdd ZdS )IllegalInputc                 C   s
   || _ d S N)	exception)selfer   r   r   __init__     
zIllegalInput.__init__c                 C   s
   t | jS r   )reprr   r   r   r   r   __str__  r   zIllegalInput.__str__N)__name__
__module____qualname__r   r   r   r   r   r   r     s    r   c                   @   sf   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd ZdddZdS )Parserc                 C   s:   d| _ d| _t | _ | _| _g | _d| _dgd | _d S )Nr   r   )	delta	curr_maskNO_CHAR	prev_charsrc_char	dest_chardestposstatr   r   r   r   r     s   zParser.__init__c                 C   s    t | j}tj| g | _d S r   )r   r   sysstdoutwrite)r   r   r   r   r   write_output  s   

zParser.write_outputc                 C   sD   |t ddv r|d }ntdt| |dkr || _|t | _dS )zQ
        set the value of delta to reflect the current codepage
        
        r   r   zInvalid Value for ATR %sr   N)r|   r   hexcurr_scriptDELTAr   )r   r   nr   r   r   
set_script  s   

zParser.set_scriptc                 C   s   t |d@  | j S )Nrx   )r~   r   r   r   r   r   r   isvalid  s   zParser.isvalidc                 C   s   |t | j vS r   )invalid_charsr   )r   r   r   r   r   isvalid_iscii  s   zParser.isvalid_isciic                 C   s   t |d }|S r   )nukta_specialsget)r   r   r   r   r   r   is_nukta_special  s   zParser.is_nukta_specialc                 C   s   |  j d7  _ tdD ]+}t|krt|ks n |dvr n|dkr$t}n	|dkr+t}nt}| |r6|  S qt	t
jdtt  d S )Nr   )r?   r5   r8   r?   r5   zInvalid input after EXT %s)r   r|   EXT_RANGE_ENDEXT_RANGE_BEGINDEV_ABBR_SIGNDEV_SVARITADEV_ANUDATTAr   logginginfor   stderrr   r   )r   	curr_charar   r   r   r   
handle_ext  s"   
zParser.handle_extc                 C   s\   t tjd |t v r| t|  t tjd| nt tjd 	 |  jd7  _d S )NzHandling ATR:zsetting script toignoredr   )r   r   r   r   ISCII_SCRIPTSkeysr   r   r   r   r   r   
handle_atr  s   zParser.handle_atrc                 C   s$   |t krd}nt}|  jd7  _|S )N    r   )ISCII_HALANTZWJr   )r   r   retr   r   r   
handle_inv.  s
   zParser.handle_invc                 C   sF   d }|t kr| |}|S |tkr| |}|S |tkr!| |}|S r   )	ISCII_ATRr   	ISCII_EXTr   	ISCII_INVr   )r   r   r   r   r   r   r   post_analysis:  s   



zParser.post_analysisr   c                 C   s  | j }td| }t }}t|}d| _| j}t|D ]}|| }t}	d}
t| j	 | r@t
tjdt| |  jd7  _q|rK||d krK|}	nh|tkrR|}q|tv re| ||}|d urb|}	t}nNt| sjnI|tv rs|}	|}n@|tkr|tkrt}	t}|  jd7  _n,|tkr|tkrt}	d}
n|tkr|tkrt}	d}
n| |}|r|}	t}|  jd7  _g }|
dkr|| t}|	tkr||	 n|tkr|| |}|D ]}|dkrt| j	 | }n|}|  jd7  _| j | qq| jS )NBr   zignoring invalid iscii charr   rx   )r   arraytolistr   lenr   r   r|   r   r   r   r   r   r   r   r   r   iscii_modifyingISCII_DANDADOUBLE_DANDAr   ZWNJISCII_NUKTAr   r   r   r   )r   srcflushr   r   r   r   r   r   r   add_prevr   tmpto_addr   mr   r   r   
iscii2utf8K  s   



zParser.iscii2utf8N)r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r     s    	r   c                 C   s$   d|  }t tj| td d S )NaZ  
    Usage:

    %s script

    where script is a number between 1-9

    1 - devnag
    2 - bengali / assamese
    3 - punjabi
    4 - gujarati
    5 - oriya
    6 - tamil
    7 - telugu
    8 - kannada
    9 - malayalam

    the program reads from stdin and writes to stdout

    any msgs to the user (error msgs etc) are printed on stderr
    r   )r   r   r   r   exit)nameusager   r   r   
show_usage  s
   r   i   __main__r   )@r   r   r   r   r   ATR_MASKDANDAr   r   r   r   r   r   r   HALANTINDIC_BLOCK_BEGININDIC_BLOCK_ENDINVALID_CHARISCII_BEGINr   r   r   r   LFr   	UNI_BEGINUNI_ENDr   r   r   r   r}   r~   r   r   r   r   r   r   r   _tmpr|   r   intr   	Exceptionr   r   r   
chunk_sizer   argv
ValueError
IndexErrormyparr   r   r   stdinreadr   r   r   r   r   r   r   r   <module>   sB  

	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVd	
 !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~   	" b
