o
    “^©iO@  ã                   @   sh  d dl Z d dlmZmZmZmZmZ d dlZddlm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZ ddlmZmZ e jde jd	Ze  d
¡Ze jde jd	Z e jde jd	Z!e  d¡Z"e  d¡Z#dd $e¡ d d $e¡ d Z%e  ed ¡Z&e  d¡Z'e  d¡Z(G dd„ dee ƒZ)dede*de+de+fdd„Z,dedee* de+fdd„Z-dS )é    N)ÚOptionalÚListÚTupleÚMatchÚPatterné   )ÚunikeyÚ
escape_urlÚ
expand_tabÚexpand_leading_tab)ÚParserÚ
BlockState)Ú
LINK_LABELÚHTML_TAGNAMEÚHTML_ATTRIBUTESÚ
BLOCK_TAGSÚPRE_TAGSÚunescape_charÚparse_link_hrefÚparse_link_title)Ú
parse_listÚLIST_PATTERNz^ {1,4})Úflagsz(\s+|^)#+\s*$z^ ?z^ *>z\n[ \t]*\n$z[ \t]*\nú(ú|ú)z[ \t]*>[ \t]*(?:\n|$)z( {0,3}>[^\n]*(?:\n|$))+c                       s  e Zd ZeZe dej¡Zde	 d Z
de d Zddddd	d
de d deee
dœZdZ			d5deee  deee  def‡ fdd„Zdee dedefdd„Zdee dedefdd„Zdee dedefdd„Zdee dedee fd d!„Zdee dedefd"d#„Zdee dedee fd$d%„Zdee dedee fd&d'„Zdee dedeeee f fd(d)„Zdee dedefd*d+„Z dee dedefd,d-„Z!dee dedee fd.d/„Z"dee dedee fd0d1„Z#d6ded2eee  ddfd3d4„Z$‡  Z%S )7ÚBlockParserz(^[ \t\v\f]*\n)+z^ {0,3}(</?z|<!--|<\?|<![A-Z]|<!\[CDATA\[)z^ {0,3}(?:(?:</?z.(?:[ \t]+|\n|$))|<!--|<\?|<![A-Z]|<!\[CDATA\[)z:^ {0,3}(?P<atx_1>#{1,6})(?!#+)(?P<atx_2>[ \t]*|[ \t]+.*?)$z#^ {0,3}(?P<setext_1>=|-){1,}[ \t]*$zF^(?P<fenced_1> {0,3})(?P<fenced_2>`{3,}|~{3,})[ \t]*(?P<fenced_3>.*?)$zC^(?: {4}| *\t)[^\n]+(?:\n+|$)((?:(?: {4}| *\t)[^\n]+(?:\n+|$))|\s)*z:^ {0,3}((?:-[ \t]*){3,}|(?:_[ \t]*){3,}|(?:\*[ \t]*){3,})$z^ {0,3}\[(?P<reflink_1>z)\]:z^ {0,3}>(?P<quote_1>.*?)$)Ú
blank_lineÚatx_headingÚsetex_headingÚfenced_codeÚindent_codeÚthematic_breakÚref_linkÚblock_quoteÚlistÚ
block_htmlÚraw_html)
r    r!   r   r   r"   r$   r%   r#   r'   r   Né   Úblock_quote_rulesÚ
list_rulesÚmax_nested_levelc                    s^   t tˆ ƒ ¡  |d u rtˆ jƒ}|d u rtˆ jƒ}|ˆ _|ˆ _|ˆ _‡ fdd„ˆ jD ƒˆ _	d S )Nc                    s   i | ]
}|t ˆ d | ƒ“qS )Úparse_)Úgetattr)Ú.0Úname©Úself© úO/home/ubuntu/hpml_nyu/venv/lib/python3.10/site-packages/mistune/block_parser.pyÚ
<dictcomp>o   s    z(BlockParser.__init__.<locals>.<dictcomp>)
Úsuperr   Ú__init__r%   ÚDEFAULT_RULESr)   r*   r+   ÚSPECIFICATIONÚ_methods)r1   r)   r*   r+   ©Ú	__class__r0   r3   r6   ]   s   

zBlockParser.__init__ÚmÚstateÚreturnc                 C   s   |  ddi¡ | ¡ S )zParse token for blank lines.Útyper   ©Úappend_tokenÚend©r1   r<   r=   r2   r2   r3   Úparse_blank_lineq   s   zBlockParser.parse_blank_linec                 C   s   |  ddi¡ | ¡ d S )z:Parse token for thematic break, e.g. ``<hr>`` tag in HTML.r?   r"   r   r@   rC   r2   r2   r3   Úparse_thematic_breakv   s   z BlockParser.parse_thematic_breakc                 C   sR   |  ¡ }|r|S | d¡}t|ƒ}t d|¡}| d¡}| d|ddœ¡ | ¡ S )z9Parse token for code block which is indented by 4 spaces.r   Ú Ú
Ú
block_codeÚindent)r?   ÚrawÚstyle)Úappend_paragraphÚgroupr   Ú_INDENT_CODE_TRIMÚsubÚstriprA   rB   )r1   r<   r=   Úend_posÚcoder2   r2   r3   Úparse_indent_code|   s   

zBlockParser.parse_indent_codec                 C   s"  |  d¡}|  d¡}|  d¡}|d }|r"|dkr"| |¡dkr"dS t d| d	 tt|ƒƒ d
 tj¡}| ¡ d }| |j	|¡}	|	rR|j	||	 
¡ … }
|	 ¡ }n
|j	|d… }
|j}|ru|
rut dtt|ƒƒ d tj¡}| d|
¡}
d|
d|dœ}|rŠt|ƒ}d| ¡ i|d< | |¡ |S )a9  Parse token for fenced code block. A fenced code block is started with
        3 or more backtick(`) or tilde(~).

        An example of a fenced code block:

        .. code-block:: markdown

            ```python
            def markdown(text):
                return mistune.html(text)
            ```
        Úfenced_1Úfenced_2Úfenced_3r   ú`éÿÿÿÿNz^ {0,3}Ú{z,}[ \t]*(?:\n|$)r   z^ {0,Ú}rF   rH   Úfenced)r?   rJ   rK   ÚmarkerÚinfoÚattrs)rM   ÚfindÚreÚcompileÚstrÚlenÚMrB   ÚsearchÚsrcÚstartÚ
cursor_maxrO   r   rP   rA   )r1   r<   r=   Úspacesr\   r]   ÚcÚ_endÚcursor_startÚm2rR   rQ   Ú_trim_patternÚtokenr2   r2   r3   Úparse_fenced_codeŠ   s0   


&

zBlockParser.parse_fenced_codec                 C   sX   t | d¡ƒ}| d¡ tj¡}|rt d|¡}d|d|iddœ}| |¡ | ¡ d S )	z[Parse token for ATX heading. An ATX heading is started with 1 to 6
        symbol of ``#``.Úatx_1Úatx_2rF   ÚheadingÚlevelÚatx)r?   Útextr^   rK   r   )	rc   rM   rP   ÚstringÚ
whitespaceÚ_ATX_HEADING_TRIMrO   rA   rB   )r1   r<   r=   rt   rv   ro   r2   r2   r3   Úparse_atx_heading¹   s   
zBlockParser.parse_atx_headingc                 C   sˆ   |  ¡ }|r+|d dkr+| d¡dkrdnd}d|d< d|d	< d
|i|d< | ¡ d S |  ddg¡}| |j|j¡}|rB|  ||¡S dS )zParse token for setex style heading. A setex heading syntax looks like:

        .. code-block:: markdown

            H1 title
            ========
        r?   Ú	paragraphÚsetext_1ú=r   é   rs   ÚsetextrK   rt   r^   r"   r%   N)Ú
last_tokenrM   rB   Ú
compile_scÚmatchrf   ÚcursorÚparse_method)r1   r<   r=   r€   rt   Úscrm   r2   r2   r3   Úparse_setex_headingÆ   s   zBlockParser.parse_setex_headingc                 C   sB  |  ¡ }|r|S | d¡}t|ƒ}|sdS t|j| ¡ dd\}}|du r'dS |dus-J ‚| j |j|¡}|r<| ¡ }	n|j	}	t
|j||	ƒ\}
}|r\t |j|¡}|rX| ¡ }nd}d}
|du rrt |j|¡}|rn| ¡ }nd}d}|pu|}|szdS ||jd vrŸ|dus‡J ‚t|ƒ}t|ƒ|dœ}|
r˜|
|d< ||jd |< |S )aã  Parse link references and save the link information into ``state.env``.

        Here is an example of a link reference:

        .. code-block:: markdown

            a [link][example]

            [example]: https://example.com "Optional title"

        This method will save the link reference into ``state.env`` as::

            state.env['ref_links']['example'] = {
                'url': 'https://example.com',
                'title': "Optional title",
            }
        Ú	reflink_1NT)ÚblockÚ	ref_links)ÚurlÚlabelÚtitle)rL   rM   r   r   rf   rB   Ú
BLANK_LINEre   rg   rh   r   Ú_BLANK_TO_LINEr‚   Úenvr   r	   )r1   r<   r=   rQ   r‹   ÚkeyÚhrefÚhref_posÚ_blankÚmax_posrŒ   Ú	title_posrm   Úm3Údatar2   r2   r3   Úparse_ref_linkÜ   sN   



zBlockParser.parse_ref_linkc                 C   s¦  |  d¡d }t|dƒ}t d|¡}|  g d¢¡}t| |¡ƒ}| ¡ d |_d}|rUt	 |j
|j¡}|rT|  d¡}t d|¡}t|dƒ}t d|¡}||7 }| ¡ |_nxd	}	|  g d
¢¡}
|j|jk rÍt	 |j
|j¡}|rœ|  d¡}t d|¡}t|dƒ}t d|¡}||7 }| ¡ |_| ¡ s”d}	ntt |¡ƒ}	q^|	rŸn.|
 |j
|j¡}|r²|  ||¡}|r²n| ¡ }| |¡}t|dƒ}||7 }||_|j|jk sdt|ƒ|fS )z6Extract text and cursor end position of a block quote.Úquote_1rG   é   rF   )r   r!   r    r   Nr   F)r   r"   r    r%   r&   T)rM   r   Ú_BLOCK_QUOTE_TRIMrO   r   Úboolr‚   rB   rƒ   Ú_STRICT_BLOCK_QUOTErf   Ú_BLOCK_QUOTE_LEADINGrh   rP   Ú_LINE_BLANK_ENDre   r„   Úfind_line_endÚget_textr
   )r1   r<   r=   rv   r…   Úrequire_markerrQ   rm   ÚquoteÚprev_blank_lineÚbreak_scr–   Úm4ÚposÚliner2   r2   r3   Úextract_block_quote!  s`   



€ÿ	




à$zBlockParser.extract_block_quotec                 C   s‚   |   ||¡\}}| |¡}| ¡ | jd kr!t| jƒ}| d¡ n| j}|  ||¡ d|jdœ}|r9| 	|¡ |S | 
|¡ |jS )z­Parse token for block quote. Here is an example of the syntax:

        .. code-block:: markdown

            > a block quote starts
            > with right arrows
        r   r$   )r?   Úchildren)r©   Úchild_stateÚdepthr+   r%   r)   ÚremoveÚparseÚtokensÚprepend_tokenrA   rƒ   )r1   r<   r=   rv   rQ   ÚchildÚrulesro   r2   r2   r3   Úparse_block_quotei  s   



zBlockParser.parse_block_quotec                 C   s   t | ||ƒS )z,Parse tokens for ordered and unordered list.)r   rC   r2   r2   r3   r   ‚  s   zBlockParser.parse_listc                 C   s   |   ||¡S ©N)Úparse_raw_htmlrC   r2   r2   r3   Úparse_block_html†  s   zBlockParser.parse_block_htmlc           	      C   sH  |  d¡ ¡ }|dkrt|d| ¡ ƒS |dkrt|d| ¡ ƒS |dkr+t|d| ¡ ƒS | d¡r8t|d	| ¡ ƒS d }d }| d
¡rT|dd …  ¡ }|tv rSt|| jƒS n$|dd …  ¡ }|t	v rnd
| d	 }t||| ¡ ƒS |tv rxt|| jƒS | 
¡ }|r€|S | ¡ }| ¡ }|r’t |j||¡sœ|r¢t |j||¡r¢t|| jƒS d S )Nr   z<!--z-->z<?z?>z	<![CDATA[z]]>z<!ú>z</r~   r   )rM   rP   Ú_parse_html_to_endrB   Ú
startswithÚlowerr   Ú_parse_html_to_newliner   r   rL   r    Ú_OPEN_TAG_ENDr‚   rf   Ú_CLOSE_TAG_END)	r1   r<   r=   r\   Ú	close_tagÚopen_tagÚend_tagrQ   Ú	start_posr2   r2   r3   rµ   ‰  sD   

ÿÿÿzBlockParser.parse_raw_htmlr²   c           	      C   sÔ   |   |¡}|j|jk rO| |j|j¡}|sn9| ¡ }||jkr,| |¡}| |¡ ||_|  ||¡}|r8||_n| 	¡ }| |¡}| |¡ ||_|j|jk s|j|jk rh|j|jd … }| |¡ |j|_d S d S r´   )
r   rƒ   rh   re   rf   rg   r¡   Úadd_paragraphr„   r    )	r1   r=   r²   r…   r<   rQ   rv   Úend_pos2Úend_pos3r2   r2   r3   r®   ¼  s.   





î
ýzBlockParser.parse)NNr(   r´   )&Ú__name__Ú
__module__Ú__qualname__r   Ú	state_clsr`   ra   rd   r   r   ÚRAW_HTMLÚ_BLOCK_TAGS_PATTERNÚ
BLOCK_HTMLr   r   r8   r7   r   r   rb   Úintr6   r   rD   rE   rS   rp   rz   r†   r˜   r   r©   r³   r   r¶   rµ   r®   Ú__classcell__r2   r2   r:   r3   r   %   sf    ÿÿÿ
ÿÿÿ

ïü
þ
ýü/&EH(3r   r=   Ú
end_markerrÁ   r>   c                 C   sh   | j  ||¡}|dkr| j | jd … }| j}n|  |¡}|| _|  ¡ }||  |¡7 }|  d|dœ¡ |S )NrX   r&   ©r?   rJ   )rf   r_   rƒ   rh   r¡   r    rA   )r=   rÎ   rÁ   Ú
marker_posrv   rQ   r2   r2   r3   r¸   Ù  s   
r¸   Únewlinec                 C   sR   |  | j| j¡}|r| ¡ }|  |¡}n| j| jd … }| j}|  d|dœ¡ |S )Nr&   rÏ   )re   rf   rƒ   rg   r¡   rh   rA   )r=   rÑ   r<   rQ   rv   r2   r2   r3   r»   è  s   r»   ).r`   Útypingr   r   r   r   r   rw   Úutilr   r	   r
   r   Úcorer   r   Úhelpersr   r   r   r   r   r   r   r   Úlist_parserr   r   ra   rd   rN   ry   r›   rž   rŸ   rŽ   ÚjoinrÊ   r¼   r½   r   r   rb   rÌ   r¸   r»   r2   r2   r2   r3   Ú<module>   s.    (



 

   7