o
    Lεi                     @   s*   d dl mZ d dlmZ G dd dZdS )    )Rule)AbbreviationReplacerc                   @   s   e Zd Zg dZeddZeddZeddZeddZed	d
Z	eddZ
G dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZdS )Standard)   。   ．.   ！!?   ？u   (?<=[a-zA-z]°)\.(?=\s*\d+)   ∯z(?<=\s)\.(?=(jpe?g|png|gif|tiff?|pdf|ps|docx?|xlsx?|svg|bmp|tga|exif|odt|html?|txt|rtf|bat|sxw|xml|zip|exe|msi|blend|wmv|mp[34]|pptx?|flac|rb|cpp|cs|js)\s)z\n   ȹz\?(?=(\'|\"))   &ᓷ&z\s{3,} u   &⎋&'c                   @   s2   e Zd ZdZg dZg dZg dZeddZdS )zStandard.Abbreviationz:Defines the abbreviations for each language (if available))adjadmadvalalaaltaaprarcarizarkartassnasstattysaugavebartbldbldgblvdbrigbrosbtwcalcalifcaptclcmdrcocolcolocomdrconconncorpcplcresctzd.phildakdecdeldeptdetdistdrzdr.philz	dr.philosdrsze.gensespesqetcexpexpyextfebfedflaftfwyfygagengovhonhosphrhwayhwyzi.eiaididaillincindinginspisjanjrjuljunkankanskenkylaltltdmajmanmarmassmaymdmemedmessrsmexmfgmichminminnmissmllemmmmemomontmrmrsmsmsgrmssrsmtmtnnebnebrnevnonosnovnroctokoklaontopordoreppapdpdepennpennapfcphzph.dplplzppprofpvtquerdrsrefreprepsresrevrtsasksecsensenssepseptsfcsgtsrstsuptsurgtcetenntexunivusafazu.sutvavvervizvsvtwashwiswiscwywyoyukfig)!r   r   r%   r*   r,   r.   r4   r;   r=   rM   rN   rZ   rf   rh   r|   r}   r~   r   rp   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r   rE   r   r   r   r   z ([a-zA-Z0-9_])(\.)([a-zA-Z0-9_])u   \1∮\3N)	__name__
__module____qualname____doc__ABBREVIATIONSPREPOSITIVE_ABBREVIATIONSNUMBER_ABBREVIATIONSr   WithMultiplePeriodsAndEmailRule r   r   N/home/ubuntu/.local/lib/python3.10/site-packages/pysbd/lang/common/standard.pyAbbreviation   s    r   c                   @   sD   e Zd ZeddZeddZeddZeddZd	ZeeeegZ	d
S )zStandard.DoublePunctuationRulesz\?!   ☉z!\?   ☈z\?\?   ☇!!   ☄z\?!|!\?|\?\?|!!N)
r   r   r   r   	FirstRule
SecondRule	ThirdRule	ForthRuleDoublePunctuationAllr   r   r   r   DoublePunctuationRules#   s    



r   c                   @   s4   e Zd ZeddZeddZeddZeeegZdS )zStandard.ExclamationPointRulesz\!(?=(\'|\"))   &ᓴ&z\!(?=\,\s[a-z])z\!(?=\s[a-z])N)r   r   r   r   InQuotationRuleBeforeCommaMidSentenceRuleMidSentenceRuler   r   r   r   r   ExclamationPointRules+   s
    


r   c                   @   s   e Zd ZeddZeddZeddZeddZed	d
ZeddZ	eddZ
eddZeddZeddZeddZeddZeddZeddZeddZedd Zed!d"Zeeeeee	e
eeeeeeeeeegZd#S )$zStandard.SubSymbolsRulesr   r   u   ♬u   ،u   ♭:u   &ᓰ&r   u   &ᓱ&r   u   &ᓳ&r   r   r	   r   r
   u   &ᓸ&r   r   z?!r   z??r   z!?r   r   u   &✂&(u   &⌬&)u   ȸ r   
N)r   r   r   r   PeriodArabicComma	SemiColonFullWidthPeriodSpecialPeriodFullWidthExclamationExclamationPointQuestionMarkFullWidthQuestionMarkMixedDoubleQEMixedDoubleQQMixedDoubleEQMixedDoubleEE
LeftParensRightParensTemporaryEndingPunctutationNewliner   r   r   r   r   SubSymbolsRules7   s0    

















r   c                   @   sL   e Zd ZeddZeddZeddZeddZed	dZeeeeegZ	d
S )zStandard.EllipsisRulesz\.\.\.(?=\s+[A-Z])u   ☏☏.z(?<=\S)\.{3}(?=\.\s[A-Z])   ƪƪƪz(\s\.){3}\s   ♟♟♟♟♟♟♟z(?<=[a-z])(\.\s){3}\.($|\\n)   ♝♝♝♝♝♝♝z\.\.\.N)
r   r   r   r   ThreeConsecutiveRuleFourConsecutiveRuleThreeSpaceRuleFourSpaceRuleOtherThreePeriodRuler   r   r   r   r   EllipsisRulesO   s    




r  c                   @   sL   e Zd ZeddZeddZeddZeddZed	d
ZeeeeegZ	dS )zStandard.ReinsertEllipsisRulesr   z...r   z . . . r   z. . . .u   ☏☏z..u   ∮r   N)
r   r   r   r   SubThreeConsecutivePeriodSubThreeSpacePeriodSubFourSpacePeriodSubTwoConsecutivePeriodSubOnePeriodr   r   r   r   r   ReinsertEllipsisRulesd   s    




r  c                   @   s   e Zd ZddZdS )zStandard.AbbreviationReplacerzgA Being Did For He How However I In It Millions More She That The There They We What When Where Who Whyr   N)r   r   r   splitSENTENCE_STARTERSr   r   r   r   r   o   s    r   N)r   r   r   Punctuationsr   GeoLocationRuleFileFormatRuleSingleNewLineRuleQuestionMarkInQuotationRuleExtraWhiteSpaceRuleSubSingleQuoteRuleobjectr   r   r   r   r  r  r   r   r   r   r   r      s    





r   N)pysbd.utilsr   pysbd.abbreviation_replacerr   r   r   r   r   r   <module>   s   