o
    ^’×iÔC  ã                   @  s  d Z ddlmZ ddlZddlZddlZddlmZm	Z	 er$ddl
mZ e d¡Zej d¡Zej e¡Zej e¡ eejd< e d	¡e_e d
¡e_e d¡e_e d¡e_eje_e dej¡e_e dej¡e_e d¡ZG dd„ dejƒZ e e_G dd„ dejƒZ!dS )a  
This module imports a copy of [`html.parser.HTMLParser`][] and modifies it heavily through monkey-patches.
A copy is imported rather than the module being directly imported as this ensures that the user can import
and  use the unmodified library for their own needs.
é    )ÚannotationsN)ÚTYPE_CHECKINGÚSequence)ÚMarkdownz--!?>zhtml.parserÚ
htmlparserz<[a-zA-Z]|</>z</[a-zA-Z]?z\?>z&([a-zA-Z][-.a-zA-Z0-9]*);a”  
  <[a-zA-Z][^`\t\n\r\f />\x00]*       # tag name <= added backtick here
  (?:[\s/]*                           # optional whitespace before attribute name
    (?:(?<=['"\s/])[^`\s/>][^\s/=>]*  # attribute name <= added backtick here
      (?:\s*=+\s*                     # value indicator
        (?:'[^']*'                    # LITA-enclosed value
          |"[^"]*"                    # LIT-enclosed value
          |(?!['"])[^`>\s]*           # bare value <= added backtick here
         )
         (?:\s*,)*                    # possibly followed by a comma
       )?(?:\s|/(?!>))*
     )*
   )?
  \s*                                 # trailing whitespace
a  
  [a-zA-Z][^`\t\n\r\f />]*           # tag name
  [\t\n\r\f /]*                     # optional whitespace before attribute name
  (?:(?<=['"\t\n\r\f /])[^`\t\n\r\f />][^\t\n\r\f /=>]*  # attribute name
    (?:=                            # value indicator
      (?:'[^']*'                    # LITA-enclosed value
        |"[^"]*"                    # LIT-enclosed value
        |(?!['"])[^>\t\n\r\f ]*     # bare value
       )
     )?
    [\t\n\r\f /]*                   # possibly followed by a space
   )*
   >?
z^([ ]*\n){2}c                      s.   e Zd ZdZ‡ fdd„Zd	‡ fdd„Z‡  ZS )
Ú_HTMLParserz"Handle special start and end tags.c                   s‚   | j ||d … }t|d ƒ}t|ƒdk s+d|  krdks;n d|  kr*dks;n |  | j ||d … ¡ |d S tƒ  |¡S )Né   éÿÿÿÿéA   éZ   éa   éz   é   )ÚrawdataÚordÚlenÚhandle_dataÚsuperÚparse_endtag)ÚselfÚiÚstartÚc©Ú	__class__© úQ/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/markdown/htmlparser.pyr   c   s   8z_HTMLParser.parse_endtagr   ÚintÚreturnc                   sB   | j ||d … dkr|  | j ||d … ¡ |d S tƒ  |¡S )Nr   ú</>)r   r   r   Úparse_starttag©r   r   r   r   r   r    k   s   z_HTMLParser.parse_starttag©r   r   r   r   )Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r    Ú__classcell__r   r   r   r   r   `   s    r   c                      s"  e Zd ZU dZdC‡ fdd„Z‡ fdd„Z‡ fdd	„ZedDdd„ƒZdEdd„Z	dFdd„Z
dGdd„ZdHdd„ZdIdd„ZdJdd „ZdHd!d"„ZdKd$d%„ZdKd&d'„ZdId(d)„ZdId*d+„ZdId,d-„ZdId.d/„ZdL‡ fd1d2„ZdMd4d5„ZdL‡ fd6d7„ZdNdO‡ fd:d;„Zd<Zd=ed>< dPd?d@„ZdLdAdB„Z‡  ZS )QÚHTMLExtractorzû
    Extract raw HTML from text.

    The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
    [`Markdown`][markdown.Markdown] instance passed to `md` and the remaining text
    is stored in `cleandoc` as a list of strings.
    Úmdr   c                   s@   d|vrd|d< t dgƒ| _dg| _tƒ j|i |¤Ž || _d S )NÚconvert_charrefsFÚhrr   )ÚsetÚ
empty_tagsÚlineno_start_cacher   Ú__init__r)   )r   r)   ÚargsÚkwargsr   r   r   r/      s   
zHTMLExtractor.__init__c                   s4   d| _ d| _g | _g | _g | _dg| _tƒ  ¡  dS )z1Reset this instance.  Loses all unprocessed data.Fr   N)ÚinrawÚintailÚstackÚ_cacheÚcleandocr.   r   Úreset©r   r   r   r   r7   Ž   s   zHTMLExtractor.resetc                   sv   t ƒ  ¡  t| jƒr | jr| js|  t | j¡¡ n|  | j¡ t| j	ƒr9| j
 | jj d | j	¡¡¡ g | _	dS dS )zHandle any buffered data.Ú N)r   Úcloser   r   r*   Ú
cdata_elemr   r   Úunescaper5   r6   Úappendr)   Ú	htmlStashÚstoreÚjoinr8   r   r   r   r:   ™   s   



þzHTMLExtractor.closer   r   c                 C  sj   t t| jƒd | jd ƒD ]}| j| }| j d|¡}|dkr$t| jƒ}| j |d ¡ q| j| jd  S )zHReturns char index in `self.rawdata` for the start of the current line. é   Ú
r	   )Úranger   r.   Úlinenor   Úfindr=   )r   ÚiiÚlast_line_start_posÚlf_posr   r   r   Úline_offset¨   s   

zHTMLExtractor.line_offsetÚboolc                 C  s<   | j dkrdS | j dkrdS | j| j| j| j  …  ¡ dkS )z†
        Returns True if current position is at start of line.

        Allows for up to three blank spaces at start of line.
        r   Tr   Fr9   )Úoffsetr   rI   Ústripr8   r   r   r   Úat_line_startµ   s
   

 zHTMLExtractor.at_line_startÚtagÚstrc                 C  s<   | j | j }tj | j|¡}|r| j|| ¡ … S d |¡S )z™
        Returns the text of the end tag.

        If it fails to extract the actual text from the raw data, it builds a closing tag with `tag`.
        z</{}>)rI   rK   r   Ú	endendtagÚsearchr   ÚendÚformat)r   rN   r   Úmr   r   r   Úget_endtag_textÂ   s
   
zHTMLExtractor.get_endtag_textÚattrsúSequence[tuple[str, str]]c                 C  sœ   || j v r|  ||¡ d S | j |¡r&| js|  ¡ r&| js&d| _| j d¡ |  	¡ }| jr;| j
 |¡ | j |¡ d S | j |¡ || jv rL|  ¡  d S d S )NTrB   )r-   Úhandle_startendtagr)   Úis_block_levelr3   rM   r2   r6   r=   Úget_starttag_textr4   r5   ÚCDATA_CONTENT_ELEMENTSÚclear_cdata_mode)r   rN   rV   Útextr   r   r   Úhandle_starttagÑ   s   
 
þzHTMLExtractor.handle_starttagc                 C  sÖ   |   |¡}| jrc| j |¡ || jv r!| jr!| j ¡ |krn| jst| jƒdkrat | j	| j
| j t|ƒ d … ¡rA| j d¡ nd| _d| _| j | jj d | j¡¡¡ | j d¡ g | _d S d S | j |¡ d S )Nr   rB   TFr9   ú

)rU   r2   r5   r=   r4   Úpopr   Úblank_line_reÚmatchr   rI   rK   r3   r6   r)   r>   r?   r@   )r   rN   r]   r   r   r   Úhandle_endtagæ   s$   

þ$
ózHTMLExtractor.handle_endtagÚdatac                 C  s:   | j r
d|v r
d| _ | jr| j |¡ d S | j |¡ d S )NrB   F)r3   r2   r5   r=   r6   ©r   rd   r   r   r   r     s
   zHTMLExtractor.handle_dataÚis_blockc                 C  sÆ   | j s| jr| j |¡ dS |  ¡ r[|r[t | j| j| j	 t
|ƒ d… ¡r+|d7 }nd| _| jr6| jd nd}| d¡sH| d¡rH| j d¡ | j | jj |¡¡ | j d¡ dS | j |¡ dS )z Handle empty tags (`<data>`). NrB   Tr	   r9   r_   )r2   r3   r5   r=   rM   ra   rb   r   rI   rK   r   r6   Úendswithr)   r>   r?   )r   rd   rf   Úitemr   r   r   Úhandle_empty_tag	  s   $
zHTMLExtractor.handle_empty_tagc                 C  s   | j |  ¡ | j |¡d d S )N©rf   )ri   rZ   r)   rY   )r   rN   rV   r   r   r   rX      s   z HTMLExtractor.handle_startendtagÚnamec                 C  ó   | j d |¡dd d S )Nz&#{};Frj   ©ri   rS   ©r   rk   r   r   r   Úhandle_charref#  ó   zHTMLExtractor.handle_charrefc                 C  rl   )Nz&{};Frj   rm   rn   r   r   r   Úhandle_entityref&  rp   zHTMLExtractor.handle_entityrefc                 C  rl   )Nz	<!--{}-->Trj   rm   re   r   r   r   Úhandle_comment)  s   zHTMLExtractor.handle_commentc                 C  rl   )Nz<!{}>Trj   rm   re   r   r   r   Úhandle_decl-  rp   zHTMLExtractor.handle_declc                 C  rl   )Nz<?{}?>Trj   rm   re   r   r   r   Ú	handle_pi0  rp   zHTMLExtractor.handle_pic                 C  s,   |  d¡rdnd}| jd ||¡dd d S )NzCDATA[z]]>z]>z<![{}{}Trj   )Ú
startswithri   rS   )r   rd   rR   r   r   r   Úunknown_decl3  s   zHTMLExtractor.unknown_declr   c                   s,   |   ¡ s| jrtƒ  |¡S |  d¡ |d S )Nz<?r   )rM   r3   r   Úparse_pir   r!   r   r   r   rw   7  s   
zHTMLExtractor.parse_piTc                 C  sj   | j }| d|¡sJ dƒ‚t ||d ¡}|s |  d¡ |d S |r1| ¡ }|  ||d |… ¡ | ¡ S )Nz<!--z"unexpected call to parse_comment()é   ú<rA   )r   ru   ÚcommentcloserQ   r   r   rr   rR   )r   r   Úreportr   rb   Újr   r   r   Úparse_commentA  s   
zHTMLExtractor.parse_commentc                   sŽ   |   ¡ s| jr>| j||d … dkr8| j||d … dks8|  |¡}|dkr6|  | j||d … ¡ |d S |S tƒ  |¡S |  d¡ |d S )	Nr   z<![é	   z	<![CDATA[r	   rA   z<!r   )rM   r3   r   Úparse_bogus_commentr   r   Úparse_html_declaration)r   r   Úresultr   r   r   r€   M  s   ,

z$HTMLExtractor.parse_html_declarationr   r{   c                   s6   t ƒ  ||¡}|dkrdS | j| j||… dd |S )Nr	   Frj   )r   r   ri   r   )r   r   r{   Úposr   r   r   r   ]  s
   z!HTMLExtractor.parse_bogus_commentNz
str | NoneÚ_HTMLExtractor__starttag_textc                 C  s   | j S )z)Return full source of start tag: `<...>`.)rƒ   r8   r   r   r   rZ   l  s   zHTMLExtractor.get_starttag_textc                 C  s@  | j ||d … dkr|  | j ||d … ¡ |d S d | _|  |¡}|dk r7|  | j ||d … ¡ |d S | j }|||… | _g }tj ||d ¡}|sRJ dƒ‚| ¡ }| d¡ 	¡  | _
}||k rÂtj ||¡}|snnT| ddd¡\}	}
}|
s}d }n-|d d… d  kr|dd … ks¤n |d d… d	  kr¢|dd … krªn n|dd… }|r±t |¡}| |	 	¡ |f¡ | ¡ }||k sd|||…  ¡ }|d
vrÿ|  ¡ \}}d| jv rí|| j d¡ }t| jƒ| j d¡ }n|t| jƒ }|  |||… ¡ |S | d¡r|  ||¡ |S || jv r|  |¡ |  ||¡ |S )Nr   r   r   rA   z#unexpected call to parse_starttag()r   ú'r	   ú")ú>ú/>rB   r‡   )r   r   rƒ   Úcheck_for_whole_start_tagr   Útagfind_tolerantrb   rR   ÚgroupÚlowerÚlasttagÚattrfind_tolerantr<   r=   rL   ÚgetposÚcountr   Úrfindrg   rX   r[   Úset_cdata_moder^   )r   r   Úendposr   rV   rb   ÚkrN   rT   ÚattrnameÚrestÚ	attrvaluerR   rD   rK   r   r   r   r    p  s`   
&(
ó

ÿý
zHTMLExtractor.parse_starttag)r)   r   )r   r   )r   rJ   )rN   rO   r   rO   )rN   rO   rV   rW   )rN   rO   )rd   rO   )rd   rO   rf   rJ   )rk   rO   r"   )T)r   )r   r   r{   r   r   r   )r   rO   )r#   r$   r%   r&   r/   r7   r:   ÚpropertyrI   rM   rU   r^   rc   r   ri   rX   ro   rq   rr   rs   rt   rv   rw   r}   r€   r   rƒ   Ú__annotations__rZ   r    r'   r   r   r   r   r(   x   s6   
 















r(   )"r&   Ú
__future__r   ÚreÚimportlib.utilÚ	importlibÚsysÚtypingr   r   Úmarkdownr   Úcompilerz   ÚutilÚ	find_specÚspecÚmodule_from_specr   ÚloaderÚexec_moduleÚmodulesÚstarttagopenÚ
endtagopenÚpicloseÚ	entityrefÚ
incompleteÚVERBOSEÚlocatestarttagend_tolerantÚlocatetagendra   Ú
HTMLParserr   r(   r   r   r   r   Ú<module>   s8   

òó
