o
    iB                     @   sL  d dl Z d dlZd dlmZ d dlmZ d dlmZmZm	Z	 d dl
Z
d dlZd dlmZ d dlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZ ejdddddedddeddddeddddddeddddeddd d!ded"d#d$d%d&ed'd(d)d d*dedd+d,d-dfd.ejd/ed0ed1e	e  d2e!d3e d4e d5e d6e	e fd7d8Z"G d9d: d:Z#G d;d< d<Z$d=ed>ee d1e	e  d?e
j%fd@dAZ&d=ed>ee d4e d1e dBe!d?e
j%fdCdDZ'e
j(dEfd?e
j%fdFdGZ)d>ee d?e fdHdIZ*dJe
j%fdKdLZ+dJe
j%fdMdNZ,d=ed>ee d5e d1e	e  d?e
j%f
dOdPZ-dS )Q    N)islice)Path)IterableListOptional)tqdm)msg   )util)Language)Doc)Corpus   )ArgOptbenchmark_cliimport_code	setup_gpuspeedT)allow_extra_argsignore_unknown_options)context_settings.zModel name or path)helpz3Location of binary evaluation data in .spacy format)r   existsz--batch-sizez-bz Override the pipeline batch size)minr   Fz--no-shufflezDo not shuffle benchmark dataz--gpu-idz-gzGPU ID or -1 for CPU2   z	--batchesz&Minimum number of batches to benchmark   )r   r      z--warmupz-wz-Number of iterations over the data for warmupz--codez-czNPath to Python file with additional code (registered functions) to be importedctxmodel	data_path
batch_size
no_shuffleuse_gpu	n_batcheswarmup_epochs	code_pathc	                 C   s   t | t|dd t|}	|dur|n|	j}t|}
dd |
|	D }t|dkr2tjddd	 t	d
| d t
|	||| t	  t	d| d t|	|||| }t	  t| t| dS )zu
    Benchmark a pipeline. Expects a loadable spaCy pipeline and benchmark
    data in the binary .spacy format.
    F)r$   silentNc                 S   s   g | ]}|j qS  )	predicted).0egr)   r)   M/home/ubuntu/.local/lib/python3.10/site-packages/spacy/cli/benchmark_speed.py
<listcomp>.   s    z'benchmark_speed_cli.<locals>.<listcomp>r   z-Cannot benchmark speed using an empty corpus.r   )exitszWarming up for z
 epochs...zBenchmarking z batches...)r   r   r
   
load_modelr"   r   lenr   failprintwarmup	benchmarkprint_outliersprint_mean_with_ci)r   r    r!   r"   r#   r$   r%   r&   r'   nlpcorpusdocswpsr)   r)   r-   benchmark_speed_cli   s    
r<   c                   @   s    e Zd ZdZdd Zdd ZdS )time_contextz'Register the running time of a context.c                 C   s   t  | _| S N)timeperf_counterstart)selfr)   r)   r-   	__enter__C   s   
ztime_context.__enter__c                 C   s   t  | j | _d S r>   )r?   r@   rA   elapsed)rB   typevalue	tracebackr)   r)   r-   __exit__G   s   ztime_context.__exit__N)__name__
__module____qualname____doc__rC   rH   r)   r)   r)   r-   r=   @   s    r=   c                   @   sF   e Zd ZU dZeed< eed< eed< eed< dejddfd	d
ZdS )	QuartileszVCalculate the q1, q2, q3 quartiles and the inter-quartile range (iqr)
    of a sample.q1q2q3iqrsamplereturnNc                 C   s<   t |d| _t |d| _t |d| _| j| j | _d S )Ng      ?g      ?g      ?)numpyquantilerN   rO   rP   rQ   )rB   rR   r)   r)   r-   __init__T   s   zQuartiles.__init__)	rI   rJ   rK   rL   float__annotations__rT   ndarrayrV   r)   r)   r)   r-   rM   K   s   
 rM   r8   r:   rS   c                 C   s   | j t|dd d|d}g }	 t }tt||r|n| j}W d    n1 s)w   Y  t|dkr5nt|}|||j	  qt
|S )Ndoc)unitdisable)r"   Tr   )piper   r=   listr   r"   r1   count_tokensappendrD   rT   array)r8   r:   r"   r;   rD   
batch_docsn_tokensr)   r)   r-   annotate[   s   

rd   shufflec                    sJ   |r fddt || D }n fddt || D }t||S )Nc                    s   g | ]} t jqS r)   )make_docrandomchoicetextr+   _r:   r8   r)   r-   r.   u   s    zbenchmark.<locals>.<listcomp>c                    s$   g | ]}  |t   jqS r)   )rf   r1   ri   )r+   irl   r)   r-   r.   z   s    )rangerd   )r8   r:   r%   r"   re   
bench_docsr)   rl   r-   r5   m   s   

r5   i'  c                    s"   t  fddt|D t jS )z9Apply a statistic to repeated random samples of an array.c                 3   s*    | ]} t jjtd dV  qdS )T)replaceN)rT   rg   rh   r1   rj   	statisticxr)   r-   	<genexpr>   s
    
zbootstrap.<locals>.<genexpr>)rT   fromiterrn   float64)rs   rr   
iterationsr)   rq   r-   	bootstrap   s   rx   c                 C   s   t dd | D S )Nc                 s   s    | ]}t |V  qd S r>   )r1   r+   rZ   r)   r)   r-   rt      s    zcount_tokens.<locals>.<genexpr>)sum)r:   r)   r)   r-   r_      s   r_   rR   c                 C   sp   t | }t| }|  |tt|d  }|tt|d  }td|dd|| dd|| dd d S )Ng?g333333?zMean: .1fz words/s (95% CI: z +))rT   meanrx   sortintr1   r3   )rR   r}   bootstrap_meanslowhighr)   r)   r-   r7      s   
.r7   c                 C   s   t | }t| |jd|j  k | |jd|j  kB }t| |jd|j  k | |jd|j  kB }tdd| t|  ddd| t|   d d S )Ng      ?g      @z
Outliers: d   r{   z%, extreme outliers: %)rM   rT   rz   rN   rQ   rP   r3   r1   )rR   	quartiles
n_outliersn_extreme_outliersr)   r)   r-   r6      s   *r6   c                 C   s   dd || D }t | ||S )Nc                 S   s   g | ]}|  qS r)   )copyry   r)   r)   r-   r.      s    zwarmup.<locals>.<listcomp>)rd   )r8   r:   r&   r"   r)   r)   r-   r4      s   r4   ).rg   r?   	itertoolsr   pathlibr   typingr   r   r   rT   typerr   wasabir    r
   languager   tokensr   trainingr   _utilr   r   r   r   r   commandContextstrr   boolr<   r=   rM   rY   rd   r5   r}   rx   r_   r7   r6   r4   r)   r)   r)   r-   <module>   s    
	
)

