o
    i                     @   sz   d Z ddlZddlZdd Zdd Zdd Zed	kr;ejd
 Zejd Z	e
edd Ze
e	ddee dS dS )zNormalization of Vietnamese text.

IWSLT 2015
code for normalization of Vietnamese texts, kindly provided by:
Thanh-Le Ha, MSc.
Karlsruhe Institute of Technology (KIT)

author:
Thang Tat Vu, Institute of Information Technology, Vietnam.
    Nc                 C   sd  |  dd dd dd dd d	d
 dd dd dd dd dd dd dd dd dd dd dd  d!d" d#d$ d%d& d'd( d)d* d+d, d-d. d/d0 d1d2 d3d4 d5d6 d7d8 d9d: d;d< d=d> d?d@ dAdB dCdD dEdF dGdH dIdJ dKdL dMdN dOdP dQdR dSdT dUdV dWdX dYdZ d[d\ d]d^ d_d` dadb dcdd dedf dgdh didj dkdl dmdn dodp dqdr dsdt dudv dwdx dydz d{d| d}d~ dd dd dd dd dd dd dd dd dd dd dd dd ddS )zPerform UniStd L normalization.u   à   àu   ã   ãu   ảu   ảu   á   áu   ạu   ạu   ằu   ằu   ẵu   ẵu   ẳu   ẳu   ắu   ắu   ặu   ặu   ầu   ầu   ẫu   ẫu   ẩu   ẩu   ấu   ấu   ậu   ậu   ỳu   ỳu   ỹu   ỹu   ỷu   ỷu   ý   ýu   ỵu   ỵu   ì   ìu   ĩu   ĩu   ỉu   ỉu   í   íu   ịu   ịu   ù   ùu   ũu   ũu   ủu   ủu   ú   úu   ụu   ụu   ừu   ừu   ữu   ữu   ửu   ửu   ứu   ứu   ựu   ựu   è   èu   ẽu   ẽu   ẻu   ẻu   é   éu   ẹu   ẹu   ều   ều   ễu   ễu   ểu   ểu   ếu   ếu   ệu   ệu   ò   òu   õ   õu   ỏu   ỏu   ó   óu   ọu   ọu   ờu   ờu   ỡu   ỡu   ởu   ởu   ớu   ớu   ợu   ợu   ồu   ồu   ỗu   ỗu   ổu   ổu   ốu   ốu   ộu   ộu   òa   oàu   õa   oãu   ỏa   oảu   óa   oáu   ọa   oạu   òe   oèu   õe   oẽu   ỏe   oẻu   óe   oéu   ọe   oẹu   ùy   uỳu   ũy   uỹu   ủy   uỷu   úy   uýu   ụy   uỵu   aóu   áoreplacestr r"   [/home/ubuntu/.local/lib/python3.10/site-packages/vietnamese_cleaner/vietnameseNormUniStd.pyUniStd_L   s$   (((((((((((((((r$   c                 C   s\  |  dd dd dd dd d	d
 dd dd dd dd dd dd dd dd dd dd dd  d!d" d#d$ d%d& d'd( d)d* d+d, d-d. d/d0 d1d2 d3d4 d5d6 d7d8 d9d: d;d< d=d> d?d@ dAdB dCdD dEdF dGdH dIdJ dKdL dMdN dOdP dQdR dSdT dUdV dWdX dYdZ d[d\ d]d^ d_d` dadb dcdd dedf dgdh didj dkdl dmdn dodp dqdr dsdt dudv dwdx dydz d{d| d}d~ dd dd dd dd dd dd dd dd dd dd dd ddS )zPerform UniStd H normalization.u   À   Àu   Ã   Ãu   Ảu   Ảu   Á   Áu   Ạu   Ạu   Ằu   Ằu   Ẵu   Ẵu   Ẳu   Ẳu   Ắu   Ắu   Ặu   Ặu   Ầu   Ầu   Ẫu   Ẫu   Ẩu   Ẩu   Ấu   Ấu   Ậu   Ậu   Ỳu   Ỳu   Ỹu   Ỹu   Ỷu   Ỷu   Ý   Ýu   Ỵu   Ỵu   Ì   Ìu   Ĩu   Ĩu   Ỉu   Ỉu   Í   Íu   Ịu   Ịu   Ù   Ùu   Ũu   Ũu   Ủu   Ủu   Ú   Úu   Ụu   Ụu   Ừu   Ừu   Ữu   Ữu   Ửu   Ửu   Ứu   Ứu   Ựu   Ựu   È   Èu   Ẽu   Ẽu   Ẻu   Ẻu   É   Éu   Ẹu   Ẹu   Ều   Ều   Ễu   Ễu   Ểu   Ểu   Ếu   Ếu   Ệu   Ệu   Ò   Òu   Õ   Õu   Ỏu   Ỏu   Ó   Óu   Ọu   Ọu   Ờu   Ờu   Ỡu   Ỡu   Ởu   Ởu   Ớu   Ớu   Ợu   Ợu   Ồu   Ồu   Ỗu   Ỗu   Ổu   Ổu   Ốu   Ốu   Ộu   Ộu   ÒAu   OÀu   ÕAu   OÃu   ỎAu   OẢu   ÓAu   OÁu   ỌAu   OẠu   ÒEu   OÈu   ÕEu   OẼu   ỎEu   OẺu   ÓEu   OÉu   ỌEu   OẸu   ÙYu   UỲu   ŨYu   UỸu   ỦYu   UỶu   ÚYu   UÝu   ỤYu   UỴr   r    r"   r"   r#   UniStd_H&   s"   (((((((((((((((r2   c                 C   s   t t| ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<S )=zPerform UniStd normalization.u   òAr   u   õAr   u   ỏAr   u   óAr   u   ọAr   u   òEr   u   õEr   u   ỏEr   u   óEr   u   ọEr   u   ùYr   u   ũYr   u   ủYr   u   úYr   u   ụYr   u   Òau   Oàu   Õau   Oãu   Ỏau   Oảu   Óau   Oáu   Ọau   Oạu   Òeu   Oèu   Õeu   Oẽu   Ỏeu   Oẻu   Óeu   Oéu   Ọeu   Oẹu   Ùyu   Uỳu   Ũyu   Uỹu   Ủyu   Uỷu   Úyu   Uýu   Ụyu   Uỵ)r$   r2   r   r    r"   r"   r#   UniStd:   s   
((((((r3   __main__      rutf8w)__doc__codecssysr$   r2   r3   __name__argvfileinfileoutopenreaddatawriter"   r"   r"   r#   <module>   s   

