o
    i                     @   s`  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	m
Z
 d dlZd dlZd dlZd dlmZmZ ddlmZ ddlmZ dd	lmZmZmZmZmZ ed
ejd
ddedddedddddedddddfdejdedee defddZd&dedee deddfdd
Z dede	e ddfd d!Z!d"e
eef d#edee fd$d%Z"dS )'    N)Path)IteratorOptionalSequenceUnion)Printermsg   )Language)
load_model   )NAMEArgOptapp	debug_cliprofileT)hidden.zTrained pipeline to load)helpz&Location of input file. '-' for stdin.)r   exists
allow_dash'  z	--n-textsz-nz+Maximum number of texts to use if availablectxmodelinputsn_textsc                 C   s*   | j jjtkrtd t|||d dS )a\  
    Profile which functions take the most time in a spaCy pipeline.
    Input should be formatted as one JSON object per line with a key "text".
    It can either be provided as a JSONL file, or be read from sys.sytdin.
    If no input file is specified, the IMDB dataset is loaded via Thinc.

    DOCS: https://spacy.io/api/cli#debug-profile
    zThe profile command is now available via the 'debug profile' subcommand. You can run python -m spacy debug --help for an overview of the other available debugging commands.)r   r   N)parentcommandnamer   r   warnr   )r   r   r   r    r    E/home/ubuntu/.local/lib/python3.10/site-packages/spacy/cli/profile.pyprofile_cli   s
   r"   returnc           	      C   s<  |d urt |t}tt||}|d u rYzdd l}W n ty+   tjddd Y nw td |j	|dd\}}t
| \}}W d    n1 sKw   Y  td| d td	|  d
 t| }W d    n1 sqw   Y  td|  d tdt t d td}td | d  d S )Nr   zwThis command, when run without an input file, requires the ml_datasets library to be installed: pip install ml_datasetsr   exitsz'Loading IMDB dataset via ml_datasets...)train_limit	dev_limitzLoaded IMDB dataset and using z	 exampleszLoading pipeline 'z'...zLoaded pipeline ''zparse_texts(nlp, texts)zProfile.profzProfile statstime)_read_inputsr   list	itertoolsisliceml_datasetsImportErrorfailloadingimdbzipinfor   goodcProfilerunctxglobalslocalspstatsStatsdivider
strip_dirs
sort_statsprint_stats)	r   r   r   textsr.   
imdb_train_nlpsr    r    r!   r   -   s2   




rC   r@   c                 C   s"   | j tj|d dddD ]}qd S )N)disable   )
batch_size)pipetqdm)rC   r@   docr    r    r!   parse_textsI   s   rK   locr   c                 c   s    | dkr| d tj}dd |D }n#t| }| r!| s)|jd| dd | d|jd	   | }|D ]}t	
|}|d
 }|V  q:d S )N-zReading input from sys.stdinc                 s   s    | ]}| d V  qdS )utf8N)encode).0liner    r    r!   	<genexpr>R   s    z_read_inputs.<locals>.<genexpr>zNot a valid input data filer   r$   zUsing data from text)r4   sysstdinr   r   is_filer0   partsopensrsly
json_loads)rL   r   file_
input_pathrQ   datarT   r    r    r!   r*   N   s   

r*   )Nr   )#r6   r,   r:   rU   pathlibr   typingr   r   r   r   rZ   rI   typerwasabir   r   languager
   utilr   _utilr   r   r   r   r   r   Contextstrintr"   r   rK   r*   r    r    r    r!   <module>   s<    
 &