o
    Ti_                     @  s  U d Z ddlmZ ddlZddlZddlZddlZg dZedZ	edZ
dRd	d
Ze ZdSddZedZe ZdTddZdUddZe ZedZedZedZi eddeddedd ed!d"ed#d$ed%d&ed'd(ed)d*ed+d,ed-d.ed/d0ed1d2ed3d4ed5d6ed7d8ed9d:ed;d<ed=d>ed?d@edAdBedCdDedEdFiZdVdHdIZe ZdJdKdLdMdNdOZdedP< edQjdWi eejZdS )Xzi
This gives other modules access to the gritty details about characters and the
encodings that use them.
    )annotationsN)
zlatin-1zsloppy-windows-1252zsloppy-windows-1251zsloppy-windows-1250zsloppy-windows-1253zsloppy-windows-1254zsloppy-windows-1257z
iso-8859-2macromancp437u   [ʼ‘-‛]u	   [“-‟]returndict[str, re.Pattern[str]]c                  C  sX   dt di} tD ] }tttdddg }||}d| d}t || |< q	| S )a  
    ENCODING_REGEXES contain reasonably fast ways to detect if we
    could represent a given string in a given encoding. The simplest one is
    the 'ascii' detector, which of course just determines if all characters
    are between U+0000 and U+007F.
    asciiz^[ -]*$         z^[ --z]*$)recompileCHARMAP_ENCODINGSbyteslistrangedecode)encoding_regexesencoding
byte_rangecharlistregex r   A/home/ubuntu/.local/lib/python3.10/site-packages/ftfy/chardata.py_build_regexes    s   
r   dict[str, str]c                  C  sj   i } t jj D ]*\}}|dr2|| d| < || kr2| }d| }t ||kr2| | |< q| S )N;&)htmlentitieshtml5itemsendswithlowerupperunescape)r   namechar
name_upperentity_upperr   r   r   _build_html_entities>   s   
r)   z&#?[0-9A-Za-z]{1,24};textstrr   boolc                 C  s   t t| | S )z
    Given text and a single-byte encoding, check whether that text could have
    been decoded from that single-byte encoding.

    In other words, check whether it can be encoded in that encoding, possibly
    sloppily.
    )r,   ENCODING_REGEXESmatch)r*   r   r   r   r   possible_encodingV   s   r/   dict[int, None]c                  C  sJ   i } t tdddgtdddgtddd	gtd
dD ]}d| |< q| S )z
    Build a translate mapping that strips likely-unintended control characters.
    See :func:`ftfy.fixes.remove_control_chars` for a description of these
    codepoint ranges and why they should be removed.
    r   	                ij   ip   i  i  i  N)	itertoolschainr   )control_charsir   r   r   _build_control_char_mappinga   s   
	r:   se   [][ ]|[][ ][---]|[-][---][ ]|[][ ][-][-]|[][-][ ][-]|[][-][-][ ]s   [-][]|[-][?]|[-][?][-][?-]|[-][?-][-][?]|[-][?][-]|[-][-][?]|[-][?][-][-]|[-][-][?][-]|[-][-][-][?]|z[\x80-\x9f]u   ĲIJu   ĳiju   ŉu   ʼnu   ǱDZu   ǲDzu   ǳdzu   Ǆu   DŽu   ǅu   Džu   ǆu   džu   ǇLJu   ǈLju   ǉlju   ǊNJu   ǋNju   ǌnju   ﬀffu   ﬁfiu   ﬂflu   ﬃffiu   ﬄfflu   ﬅu   ſtu   ﬆstdict[int, str]c                  C  s@   ddi} t ddD ]}t|}td|}||kr|| |< q	| S )zt
    Build a translate mapping that replaces halfwidth and fullwidth forms
    with their standard-width forms.
    i 0   i  i  NFKC)r   chrunicodedata	normalize)	width_mapr9   r&   	alternater   r   r   _build_width_map   s   rT   u   ĂÂÄĀÅÃÆĆČÇĎĐÉĚÊËĖÈĒĘÐĞĢÍÎÏİÌĪĶĹĻŁŃŇŅÑÓÔÖŐÒŌØÕŘŚŠŞŢÞÚÛÜŰÙŪŲŮÝŹŽŻß×ΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯu   áăâäàāąåãæćčçďéěêëėèēęęģíîïìīįķĺļŕźΰαβγδεζηθικλμνξοабвгдежзийклмнопu   đðğóšπσруu   -¿ ĄÆĽŁØŖŚŠŞŤŸŹŽŻŒąæƒľłøŗśšşťźžżœˆˇ˘˛˜˝΄΅ΆΈΉΊΌΎΏЁЂЃЄЅІЇЈЉЊЋЌЎЏёђѓєѕіїјљњћќўџҐґ–—―‘’‚“”„†‡•…‰‹›€№™u   -¿ĄÆĽŁØŖŚŠŞŤŸŹŽŻŒąæƒľłøŗśšşťźžżœˆˇ˘˛˜˝΄΅ΆΈΉΊΌΎΏЁЂЃЄЅІЇЈЉЊЋЌЎЏёђѓєѕіїјљњћќўџҐґ†‡‰‹›€№™)utf8_first_of_2utf8_first_of_3utf8_first_of_4utf8_continuationutf8_continuation_strict
UTF8_CLUESz
    (?<! [{utf8_continuation_strict}])
    (
        [{utf8_first_of_2}] [{utf8_continuation}]
        |
        [{utf8_first_of_3}] [{utf8_continuation}]{{2}}
        |
        [{utf8_first_of_4}] [{utf8_continuation}]{{3}}
    )+
    )r   r   )r   r   )r*   r+   r   r+   r   r,   )r   r0   )r   rL   r   )__doc__
__future__r   r   r6   r   rP   r   r   SINGLE_QUOTE_REDOUBLE_QUOTE_REr   r-   r)   HTML_ENTITY_REHTML_ENTITIESr/   r:   CONTROL_CHARSALTERED_UTF8_RELOSSY_UTF8_REC1_CONTROL_REord	LIGATURESrT   	WIDTH_MAPrZ   __annotations__formatVERBOSEUTF8_DETECTOR_REr   r   r   r   <module>   s    






(
	

Ge     &		
