o
    Vi                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ ee	jZdZdZd	Zd
ZdZdZdZdZeeeeefZeeefZeeefZh eeeZej ej!"   ej ej#"   ej ej$"   ej ej%"   eeZ&d6ddZ'd6ddZ(d7ddZ)d8ddZ*d9dd Z+d:d#d$Z,d;d'd(Z-d<d)d*Z.d=d+d,Z/e0d-ej1ej2B Z3e0d.Z4d>d0d1Z5d?d4d5Z6dS )@    )annotationsN)IO)
extensions)interpreters)licenses	directorysymlinksocketfile
executableznon-executabletextbinarypathstrreturnset[str]c              	   C  s0  zt | }W n ttfy   t|  dw |j}t|r#thS t|r+t	hS t
|r3thS th}t | t j}|rE|t n|t tt j| }t|dkr^|| n|rst| }t|dkrs|t|d  t|@ st| r|t n|t t|@ sJ |t|@ sJ ||S )N does not exist.r   )oslstatOSError
ValueErrorst_modestatS_ISDIR	DIRECTORYS_ISLNKSYMLINKS_ISSOCKSOCKETFILEaccessX_OKadd
EXECUTABLENON_EXECUTABLEtags_from_filenamer   basenamelenupdateparse_shebang_from_filetags_from_interpreterENCODING_TAGSfile_is_textTEXTBINARY	MODE_TAGS)r   srmodetagsr   tshebang r5   E/home/ubuntu/.local/lib/python3.10/site-packages/identify/identify.pytags_from_path(   s>   




r7   c                 C  s   t j| \}}t j|\}}t }|g|d D ]}|tjv r,|tj|   nqt|dkrW|dd  	 }|tj
v rJ|tj
|  |S |tjv rW|tj|  |S )N.r      )r   r   splitsplitextsetr   NAMESr(   r'   lower
EXTENSIONSEXTENSIONS_NEED_BINARY_CHECK)r   _filenameextretpartr5   r5   r6   r%   V   s    


r%   interpreterc                 C  sB   |  d\}}} | r| tjv rtj|  S |  d\} }}| s
t S )N/r8   )
rpartitionr   INTERPRETERSr<   )rF   rA   r5   r5   r6   r*   l   s   

r*   bytesio	IO[bytes]boolc                 C  s@   t g dt tdd t tdd }t| dd| S )zReturn whether the first KB of contents seems to be binary.

    This is roughly based on libmagic's binary/text detection:
    https://github.com/file/file/blob/df74b09b9027676088c797528edcaae5a9ce9ad0/src/encoding.c#L203-L228
    )      	   
                            i   N)	bytearrayrangerL   read	translate)rJ   
text_charsr5   r5   r6   is_texty   s   
r^   c                 C  sP   t j| st|  dt| d}t|W  d    S 1 s!w   Y  d S )Nr   rb)r   r   lexistsr   openr^   )r   fr5   r5   r6   r,      s
   $r,   line	list[str]c                 C  s(   zt | W S  ty   |   Y S w )N)shlexr:   r   )rc   r5   r5   r6   _shebang_split   s
   rf   cmdtuple[str, ...]c                 C  s   |  ddkrS|  }z|d}W n ty   | Y S w |D ]
}|tvr*|  S q tt| }t|d d D ]\}}|dkrDq;||d  f}q;|  ddks|S )N      #!UTF-8z-ir9   )	r[   readlinedecodeUnicodeDecodeError	printabletuplerf   strip	enumerate)rJ   rg   next_line_b	next_linecline_tokensitokenr5   r5   r6   _parse_nix_shebang   s$   rz   c                 C  s   |  ddkr	dS |  }z|d}W n
 ty   Y dS w |D ]	}|tvr* dS q!tt| }|dd dkrB|dd }n|dd dkrP|dd }|d	krYt| |S |S )
z8Parse the shebang from a file opened for reading binary.ri   rj   r5   rk   N)/usr/bin/envz-Sr9   )r{   )z	nix-shell)	r[   rm   rn   ro   rp   rq   rf   rr   rz   )rJ   first_line_b
first_linerv   rg   r5   r5   r6   parse_shebang   s(   
r~   c              
   C  s   t j| st|  dt | t jsdS zt| d}t|W  d   W S 1 s,w   Y  W dS  tyM } z|j	t	j
krHW Y d}~dS  d}~ww )z$Parse the shebang given a file path.r   r5   r_   N)r   r   r`   r   r    r!   ra   r~   r   errnoEINVAL)r   rb   er5   r5   r6   r)      s   (r)   z^\s*(Copyright|\(C\)) .*$z\s+sc                 C  s    t d| } td| } |  S )N  )COPYRIGHT_REsubWS_RErr   )r   r5   r5   r6   _norm_license   s   r   rB   
str | Nonec                 C  s   ddl }t| dd}| }W d   n1 sw   Y  t|}tj}d}tdt| }t	j
D ]6\}}	t|	}
||
krC|  S |rVtt|t|
 t| dkrVq3|||
|}||k ri||k ri|}|}q3|rr||k rr|S dS )a  Return the spdx id for the license contained in `filename`.  If no
    license is detected, returns `None`.

    spdx: https://spdx.org/licenses/
    licenses from choosealicense.com: https://github.com/choosealicense.com

    Approximate algorithm:

    1. strip copyright line
    2. normalize whitespace (replace all whitespace with a single space)
    3. check exact text match with existing licenses
    4. failing that use edit distance
    r   Nrk   )encodingr   g?)ukkonenra   r[   r   sysmaxsizemathceilr'   r   LICENSESabsdistance)rB   r   rb   contentsnormmin_edit_distmin_edit_dist_spdxcutoffspdxr   norm_license	edit_distr5   r5   r6   
license_id   s,   
$r   )r   r   r   r   )rF   r   r   r   )rJ   rK   r   rL   )r   r   r   rL   )rc   r   r   rd   )rJ   rK   rg   rh   r   rh   )rJ   rK   r   rh   )r   r   r   rh   )r   r   r   r   )rB   r   r   r   )7
__future__r   r   r   os.pathr   rere   r   stringr   typingr   identifyr   r   identify.vendorr   	frozensetrp   r   r   r   r   r#   r$   r-   r.   	TYPE_TAGSr/   r+   	_ALL_TAGSr(   r?   valuesr@   r=   rI   ALL_TAGSr7   r%   r*   r^   r,   rf   rz   r~   r)   compileI	MULTILINEr   r   r   r   r5   r5   r5   r6   <module>   sX    


.








