o
    Ni:l                     @  s  d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZmZ ddlmZ ddlmZmZmZ dd	lmZmZmZmZmZ dd
lmZ ddlmZmZm Z m!Z! ddlm"Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z, ddl-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4 ddl5m6Z6 ddl7m8Z8 erddl9m:Z:m;Z; ddlm<Z< ddl,m=Z= ddl>m?Z? ddl@mAZA ddlBmCZC ddlDmEZE e+FeGZHeIdZJddiZKdZLdZMd ZNG d!d" d"e%ZOG d#d$ d$e)ZPG d%d& d&eZQG d'd( d(ZRG d)d* d*eZSG d+d, d,eZTG d-d. d.eZUdTd5d6ZVdUd;d<ZWG d=d> d>e	ZXdVdCdDZYG dEdF dFeZZdWdJdKZ[dXdOdPZ\dYdRdSZ]dS )Zz$The CheckExternalLinksBuilder class.    )annotationsN)
HTMLParser)path)PriorityQueueQueue)Thread)TYPE_CHECKING
NamedTuplecast)quoteunquoteurlparseurlsplit
urlunparse)nodes)ConnectionError	HTTPErrorSSLErrorTooManyRedirects)Timeout)DummyBuilder)__)SphinxPostTransform)loggingrequests)
encode_uri)darkgray	darkgreenpurplered	turquoise)rfc1123_to_epoch)get_node_line)CallableIterator)Any)Response)Sphinx)Config)_StrPath)ExtensionMetadataz([a-z]+:)?//Acceptz/text/html,application/xhtml+xml;q=0.9,*/*;q=0.8   g      N@c                   @  sN   e Zd ZdZdZedZdddZddd	ZdddZ	dddZ
d ddZdS )!CheckExternalLinksBuilderz+
    Checks for broken external links.
    	linkcheckzCLook for any errors in the above output or in %(outdir)s/output.txtreturnNonec                 C  s    d| _ d| _i | _td d S )Nr   g      @)broken_hyperlinkstimed_out_hyperlinks
hyperlinkssocketsetdefaulttimeout)self r7   M/home/ubuntu/.local/lib/python3.10/site-packages/sphinx/builders/linkcheck.pyinit>   s   zCheckExternalLinksBuilder.initc              	   C  s   t | j}td t| jd}t| jd}t|ddd/| _t|ddd| _	|
| jD ]}| | q0W d    n1 sBw   Y  W d    n1 sQw   Y  | js\| jrbd| j_d S d S )N z
output.txtzoutput.jsonwzutf-8)encodingr,   )HyperlinkAvailabilityCheckerconfigloggerinfor   joinoutdiropentxt_outfilejson_outfilecheckr3   process_resultr1   r2   app
statuscode)r6   checkeroutput_textoutput_jsonresultr7   r7   r8   finishE   s&   

 z CheckExternalLinksBuilder.finishrM   CheckResultc                 C  sD  | j |jd}t||j|j|j|j|jd}| 	| |jdkr$d S |jdkr0|jdkr0d S |jr>t
jd|j|jdd |jd	krc|jrWt
td
|j d |j  d S t
td
|j  d S |jdkrt
td|j  | d|j||j|j d S |jdkrt
td|j |j  d S |jdkr| jjrt
jd|j |j |j|jfd nt
td|j td|j   | d|j||j|jd |j  |  jd7  _d S |jdkr!| jjrt
jtd|j|j|j|jfd nt
td|j td|j   | d|j||j|jd |j  |  jd7  _d S |jdkrzdtfdtfdtfdtfdtfd|j \}}W n tyO   dt}}Y nw ||d< | jjrqt
jd|j d | d  |j |j|jfd nt
|d|j |d| d  |j   | d!| |j||j|jd  |j  d S td"|j )#NF)filenamelinenostatuscodeurir@   	uncheckedworkingoldz(%16s: line %4d) T)nonlignoredz
-ignored- z: localz
-local-   z
ok        timeoutz
timeout   )locationz - r,   brokenzbroken link: %s (%s)z
broken    
redirectedpermanentlyz
with Foundzwith See Othertemporarily)i-  i.  i/  i3  i4  zwith unknown codetextz
redirect  z to zredirected zUnknown status %s.)envdoc2pathdocnamestrrQ   rR   rS   rT   messagewrite_linkstatr?   r@   r   write_entryr   rH   quietwarningr   r2   r   r1   r   r    KeyErrorr>   linkcheck_allowed_redirects
ValueError)r6   rM   rP   linkstatra   colorr7   r7   r8   rG   U   s   


"






z(CheckExternalLinksBuilder.process_resultdatadict[str, str | int]c                 C  s"   | j t| | j d d S )N
)rE   writejsondumps)r6   rp   r7   r7   r8   rg      s   z(CheckExternalLinksBuilder.write_linkstatwhatre   rd   rP   r)   lineintrT   c              
   C  s(   | j | d| d| d| d d S )N:z: [z] rr   )rD   rs   )r6   rv   rd   rP   rw   rT   r7   r7   r8   rh      s   (z%CheckExternalLinksBuilder.write_entryNr/   r0   )rM   rO   r/   r0   )rp   rq   r/   r0   )rv   re   rd   re   rP   r)   rw   rx   rT   re   r/   r0   )__name__
__module____qualname____doc__namer   epilogr9   rN   rG   rg   rh   r7   r7   r7   r8   r-   6   s    



fr-   c                   @  s2   e Zd ZdZdZdddZdddZdddZdS )HyperlinkCollector)r.      kwargsr%   r/   r0   c                 K  s.   | j  D ]}| | }r| || qd S N)documentfindallfind_uri_add_uri)r6   r   noderT   r7   r7   r8   run   s
   zHyperlinkCollector.runr   nodes.Element
str | Nonec                 C  sp   t |tjrd|v r|d S t |tjr#|d d}|r#d|v r#|S t |tjr6|d}|r6d|v r6|S dS )a  Find a URI for a given node.

        This call can be used to retrieve a URI from a provided node. If no
        URI exists for a provided node, this call will return ``None``.

        This method can be useful for extension developers who wish to
        easily inject hyperlinks into a builder by only needing to override
        this method.

        :param node: A node class
        :returns: URI of the node
        refuri
candidates?://sourceN)
isinstancer   	referenceimagegetraw)r6   r   rT   r7   r7   r8   r      s   
zHyperlinkCollector.find_urirT   re   c                 C  s   t t| jj}|j}| jj}| jd| }r|}zt|}W n t	y*   d}Y nw ||vr>t
||| j||||< dS dS )a  Registers a node's URI into a builder's collection of hyperlinks.

        Provides the ability to register a URI value determined from a node
        into the linkcheck's builder. URI's processed through this call can
        be manipulated through a ``linkcheck-process-uri`` event before the
        builder attempts to validate.

        :param uri: URI to add
        :param node: A node class where the URI was found
        linkcheck-process-uriN)r
   r-   rH   builderr3   rb   rd   emit_firstresultr"   rm   	Hyperlinkrc   )r6   rT   r   r   r3   rd   newurirQ   r7   r7   r8   r      s   zHyperlinkCollector._add_uriN)r   r%   r/   r0   )r   r   r/   r   )rT   re   r   r   r/   r0   )r{   r|   r}   buildersdefault_priorityr   r   r   r7   r7   r7   r8   r      s    

 r   c                   @  s.   e Zd ZU ded< ded< ded< ded< dS )	r   re   rT   rd   r)   docpathrx   rQ   Nr{   r|   r}   __annotations__r7   r7   r7   r8   r     s
   
 r   c                   @  s>   e Zd ZdddZdd
dZdddZdddZdddZdS )r=   r>   r(   r/   r0   c                 C  sD   || _ i | _t | _g | _t | _|j| _t	t
tj| j j| _d S r   )r>   rate_limitsr   rqueueworkersr   wqueuelinkcheck_workersnum_workerslistmaprecompilelinkcheck_ignore	to_ignore)r6   r>   r7   r7   r8   __init__  s   
z%HyperlinkAvailabilityChecker.__init__r3   dict[str, Hyperlink]Iterator[CheckResult]c                 c  s    |    d}| D ]$}| |jr!t|j|j|jdddV  q| jt	t
|d |d7 }qd}||k rD| j V  |d7 }||k s6|   d S )Nr   rY   r:   Fr,   )invoke_threadsvaluesis_ignored_urirT   rO   rd   rQ   r   putCheckRequestCHECK_IMMEDIATELYr   r   shutdown_threads)r6   r3   total_links	hyperlinkdoner7   r7   r8   rF     s    
z"HyperlinkAvailabilityChecker.checkc                 C  s>   t | jD ]}t| j| j| j| j}|  | j	| qd S r   )
ranger    HyperlinkAvailabilityCheckWorkerr>   r   r   r   startr   append)r6   _ithreadr7   r7   r8   r   3  s   z+HyperlinkAvailabilityChecker.invoke_threadsc                 C  s.   | j   | jD ]}| j ttd d qd S NF)r   rA   r   r   r   r   )r6   _workerr7   r7   r8   r   ;  s   

z-HyperlinkAvailabilityChecker.shutdown_threadsrT   re   boolc                   s   t  fdd| jD S )Nc                 3  s    | ]}|  V  qd S r   match).0patrT   r7   r8   	<genexpr>A  s    z>HyperlinkAvailabilityChecker.is_ignored_uri.<locals>.<genexpr>)anyr   )r6   rT   r7   r   r8   r   @  s   z+HyperlinkAvailabilityChecker.is_ignored_uriN)r>   r(   r/   r0   )r3   r   r/   r   rz   )rT   re   r/   r   )r{   r|   r}   r   rF   r   r   r   r7   r7   r7   r8   r=     s    



r=   c                   @  s   e Zd ZU ded< ded< dS )r   float
next_checkzHyperlink | Noner   Nr   r7   r7   r7   r8   r   D     
 r   c                   @  s>   e Zd ZU ded< ded< ded< ded< ded< ded< d	S )
rO   re   rT   rd   rx   rQ   rR   rf   rS   Nr   r7   r7   r7   r8   rO   I  s   
 rO   c                      sT   e Zd ZdZd& fddZd'ddZd(ddZd)ddZd*ddZd+d$d%Z	  Z
S ),r   z;A worker class for checking the availability of hyperlinks.r>   r(   r   Queue[CheckResult]r   Queue[CheckRequest]r   dict[str, RateLimit]r/   r0   c                   s   || _ || _|| _tttj|j| _tttj|j	| _
tttj|j| _dd |jD | _|j| _|j| _|j| _|  |j| _|j| _|j| _|j| _|jrUd| _nd| _|j| _|j | _ |j!| _!t"# | _$t% j&dd d S )Nc                 S  s   g | ]\}}t ||fqS r7   )r   r   )r   pattern	auth_infor7   r7   r8   
<listcomp>i  s    z=HyperlinkAvailabilityCheckWorker.__init__.<locals>.<listcomp>r]   r[   T)daemon)'r   r   r   r   r   r   r   linkcheck_anchors_ignoreanchors_ignore linkcheck_anchors_ignore_for_urlanchors_ignore_for_urllinkcheck_exclude_documentsdocuments_excludelinkcheck_authauthlinkcheck_timeoutr[   linkcheck_request_headersrequest_headerslinkcheck_anchorscheck_anchorsrl   allowed_redirectslinkcheck_retriesretrieslinkcheck_rate_limit_timeoutrate_limit_timeoutlinkcheck_allow_unauthorized_allow_unauthorized#linkcheck_report_timeouts_as_broken_timeout_status
user_agent
tls_verifytls_cacertsr   _Session_sessionsuperr   )r6   r>   r   r   r   	__class__r7   r8   r   U  s@   
z)HyperlinkAvailabilityCheckWorker.__init__c              	   C  s  	 | j  \}}|d u r| j  d S |\}}}}|d u rd S t|j}tt | j	| j
}W d    n1 s:w   Y  |t krZtt | j t||d | j   q | |||\}}	}
|dkrvttd| td  n| jt|||||	|
 | j   q)NTFrate-limitedz-rate limited-   z | sleeping...)r   r   r   closer   netloc
contextlibsuppressrk   r   r   timesleepQUEUE_POLL_SECSr   r   	task_done_checkr?   r@   r   r   rO   )r6   r   r   rT   rd   _docpathrQ   r   rR   r@   rS   r7   r7   r8   r     s2   




z$HyperlinkAvailabilityCheckWorker.runrd   re   rT   r   r   tuple[str, str, int]c           
      C  s   | j D ]}||r| d|j d}d|df  S qt|dks&|dr(dS |dsGt|r4dS t|j}t	t
||rEdS d	S d
\}}}t| jD ]}	| ||\}}}|dkrb nqQ|||fS )Nz	 matched z! from linkcheck_exclude_documentsrY   r   )#zmailto:ztel:)rU   r:   r   )zhttp:zhttps:rV   r:   r   )r]   r:   r   )r:   r:   r   r]   )r   r   r   len
startswithuri_rer   dirnamer   existsrA   r   r   
_check_uri)
r6   rd   rT   r   doc_matcherr@   src_dirrR   rS   _r7   r7   r8   r     s,   





z'HyperlinkAvailabilityCheckWorker._checkr   r   anchor9Iterator[tuple[Callable[..., Response], dict[str, bool]]]c                 c  s2    |r|s| j jddifV  | j jddifV  d S )Nallow_redirectsTstream)r   headr   )r6   r   r
  r7   r7   r8   _retrieval_methods  s   z3HyperlinkAvailabilityCheckWorker._retrieval_methodsc                 C  s  | d\}}}|r.|r.| jD ]}||rd} nq| jD ]}||r)d} nqt|}z|d W n tyB   t|}Y nw | jD ]\}}||rQ nqFd }t	|| j
}	d}
d}d }}| | j|D ]Z\}}z|d|||	| jd|| j| j| jfdA}|r| jr|jrzt||}W n ty   Y W d    W  dS w |sdtd	t| d
fW  d    W   S W d    n1 sw   Y  |j}|jr|jd jnd }|jdd}|j }|  ~W  n ty } z| jt|d
fW  Y d }~  S d }~w ty' } zdt|d
fW  Y d }~  S d }~w t t!fy? } z
t|}
W Y d }~qid }~w t"y } z^t|}
|dkre| j#rVdnd}|dd
fW  Y d }~  S |dkr| $|| }r| j%&t'||d W Y d }~ dS d|
d
fW  Y d }~  S |dkrW Y d }~ dS W Y d }~qid }~w t(y } zdt|d
fW  Y d }~  S d }~ww d|
d
fS t)|j*}| j+,|d  |-d|-dkst.||| j/rdS |d urd||fS d|d
fS )Nr   r:   asciir   )urlr   headersr[   )_user_agent	_tls_info)rY   z!unable to decode response contentr   r]   zAnchor '%s' not foundr   zRetry-Afteri  rV   unauthorizedi  F)r   r:   r   i  )rY   zservice unavailabler   /r   r^   r7   )0	partitionr   r   r   r   encodeUnicodeErrorr   r   _get_request_headersr   r  r   r[   r   r   r   okcontains_anchorUnicodeDecodeErrorr   r   status_codehistoryr  r   r  raise_for_statusRequestTimeoutr   re   r   r   r   r   r   
limit_rater   r   r   	Exceptionr   r   r   poprstrip_allowed_redirectr   )r6   rT   r   req_url	delimiterr
  rexr   r   r  error_messager  response_urlretry_afterretrieval_methodr   responsefoundredirect_status_codeerrrR   r   r   r7   r7   r8   r    s   






  







z+HyperlinkAvailabilityCheckWorker._check_urir+  r,  r   float | Nonec           	      C  s   t }d }|r4zt|}W n! ty-   zt|}W n ttfy$   Y nw |t  }Y nw t | }t|j}|d u rt| j}z| j	| }W n t
yR   t }Y nw |j}d| }||  krd|krhn n|}||krnd S t | }t||| j	|< |S )Ng       @)DEFAULT_DELAYr   rm   r!   	TypeErrorr   r   r   r   r   rk   delay	RateLimit)	r6   r+  r,  r5  r   r   	max_delay
rate_limitlast_wait_timer7   r7   r8   r"  X  s@   
z+HyperlinkAvailabilityCheckWorker.limit_rate)
r>   r(   r   r   r   r   r   r   r/   r0   rz   )rd   re   rT   re   r   r   r/   r   )r   r   r
  re   r/   r  )rT   re   r   r   r/   r   )r+  re   r,  r   r/   r2  )r{   r|   r}   r~   r   r   r   r  r  r"  __classcell__r7   r7   r   r8   r   R  s    
0
#
"
	 r   rT   re   r   dict[str, dict[str, str]]r/   dict[str, str]c                 C  s\   t | }|j d|j |j d|j d| df}|D ]}||v r+i t||   S qi S )Nr   r  *)r   schemer   DEFAULT_REQUEST_HEADERS)rT   r   r  r   ur7   r7   r8   r  ~  s   r  r.  r&   r
  r   c                 C  sP   t |}| jdddD ]}t|tr| }|| |jr  nq|  |jS )z<Determine if an anchor is contained within an HTTP response.i   T)
chunk_sizedecode_unicode)AnchorCheckParseriter_contentr   bytesdecodefeedr/  r   )r.  r
  parserchunkr7   r7   r8   r    s   

r  c                      s,   e Zd ZdZd fddZdddZ  ZS )rC  z9Specialised HTML parser that looks for a specific anchor.search_anchorre   r/   r0   c                   s   t    || _d| _d S r   )r   r   rJ  r/  )r6   rJ  r   r7   r8   r     s   

zAnchorCheckParser.__init__tagr%   attrsc                 C  s0   |D ]\}}|dv r|| j krd| _ d S qd S )N)idr   T)rJ  r/  )r6   rK  rL  keyvaluer7   r7   r8   handle_starttag  s   z!AnchorCheckParser.handle_starttag)rJ  re   r/   r0   )rK  r%   rL  r%   r/   r0   )r{   r|   r}   r~   r   rP  r:  r7   r7   r   r8   rC    s    rC  r  new_urlr   &dict[re.Pattern[str], re.Pattern[str]]c                   s   t  fdd| D S )Nc                 3  s(    | ]\}}| o|  V  qd S r   r   )r   from_urlto_urlrQ  r  r7   r8   r     s
    
z$_allowed_redirect.<locals>.<genexpr>)r   items)r  rQ  r   r7   rU  r8   r&    s   r&  c                   @  s   e Zd ZU ded< ded< dS )r6  r   r5  r   Nr   r7   r7   r7   r8   r6    r   r6  rH   r'   r   c                 C  sH   t |}|jdkr"|jr"|jd}|s"d|j }t|j|dS dS )zRewrite anchor name of the hyperlink to github.com

    The hyperlink anchors in github.com are dynamically generated.  This rewrites
    them before checking and makes them comparable.
    z
github.comzuser-content-)fragmentN)r   hostnamerW  r  r   _replace)rH   rT   parsedprefixedrW  r7   r7   r8   rewrite_github_anchor  s   r\  r>   r(   r0   c                 C  s   | j j}t| D ]<\}}z2zt||t|< W n tjy9 } ztt	d|j
|j W Y d}~nd}~ww W || q
|| w dS )zFCompile patterns in linkcheck_allowed_redirects to the regexp objects.z=Failed to compile regex in linkcheck_allowed_redirects: %r %sN)r>   rl   r   rV  r   r   errorr?   rj   r   r   msgr$  )rH   r>   rl   r  r   excr7   r7   r8   #compile_linkcheck_allowed_redirects  s   r`  r*   c                 C  s  |  t | t | dg d | dg d | di d | dg d | di d | ddd | d	d
dttf | ddd | ddd | ddgd | dddttf | dddttf | ddd | dddt	 | 
d | jdtdd ddddS )Nr   r:   r   rl   r   r   r   r,   r      r      r   Tr   z^!r   r7   r   g     r@r   Fr   r   zconfig-initedr   )prioritybuiltin)versionparallel_read_safeparallel_write_safe)add_builderr-   add_post_transformr   add_config_valuerx   r   tupler   r   	add_eventconnectr`  )rH   r7   r7   r8   setup  s,   


rn  )rT   re   r   r;  r/   r<  )r.  r&   r
  re   r/   r   )r  re   rQ  re   r   rR  r/   r   )rH   r'   rT   re   r/   r   )rH   r'   r>   r(   r/   r0   )rH   r'   r/   r*   )^r~   
__future__r   r   rt   r   r4   r   html.parserr   osr   queuer   r   	threadingr   typingr   r	   r
   urllib.parser   r   r   r   r   docutilsr   requests.exceptionsr   r   r   r   r   r!  sphinx.builders.dummyr   sphinx.localer   !sphinx.transforms.post_transformsr   sphinx.utilr   r   sphinx.util._urir   sphinx.util.consoler   r   r   r   r    sphinx.util.http_dater!   sphinx.util.nodesr"   collections.abcr#   r$   r%   r&   sphinx.applicationr'   sphinx.configr(   sphinx.util._pathlibr)   sphinx.util.typingr*   	getLoggerr{   r?   r   r  r?  r   r   r3  r-   r   r   r=   r   rO   r   r  r  rC  r&  r6  r\  r`  rn  r7   r7   r7   r8   <module>   sr    

 F2	  
.

	

