o
    i                     @   s|  d dl Z d dlmZ d dlmZ d dlZd dlZd dlmZ ddl	m
Z
 ddlmZ ddlmZmZ d	d
lmZmZmZmZmZmZmZ ededddeddddedddeddddded ddddedddddedddd ded!d"d#d$d%dedd&d'd(dd)ed*d+d,d-df
d.ed/ed0ed1ed2ed3ed4ee d5ed6ee d7efd8d9Zd:ed6ed;dfd<d=Zejd:ddd>dd?edd@dddAeddBdeddCdDdEded!d"d#d$d%deddFdGdHdfdIejdJedKedLee d5edMefdNdOZejdPddd>dQedd@dddAeddRdeddCdDdEded!d"d#d$d%deddFdGdHdfdIejdJedKedLee d5edMefdSdTZ dUdV Z!dS )W    N)Path)Optional)msg   )util)Language)convert_vectorsinit_nlp   )ArgOptimport_codeinit_cliparse_config_overrides	setup_gpushow_validation_errorvectors.z(The language of the nlp object to create)helpzVectors file in Word2Vec formatT)r   existszPipeline output directoryz--prunez-pz&Optional number of vectors to prune toz
--truncatez-tzFOptional number of vectors to truncate to when reading in vectors filedefaultz--modez-mzVectors mode: default or floretz--namez-nz?Optional name for the word vectors, e.g. en_core_web_lg.vectorsFz	--verbosez-Vz-VVz/Display more information for debugging purposesz--lexemes-jsonlz-jz+Location of JSONL-formatted attributes file)r   hiddenORTHz--attrz-az?Optional token attribute to use for vectors, e.g. LOWER or NORMlangvectors_loc
output_dirprunetruncatemodenameverbose	jsonl_locattrc
              	   C   s   |r	t jtj td|  d t |  }
|dur!t|
| t	|
||||||	d t
dt|
jj d |
| t
d|  dS )zConvert word vectors for use with spaCy. Will export an nlp object that
    you can use in the [initialize] block of your config to initialize
    a model with vectors.
    z(Creating blank nlp object for language ''N)r   r   r   r   r"   zSuccessfully converted z vectorszSaved nlp object with vectors to output directory. You can now use the path to it in your config as the 'vectors' setting in [initialize].)r   loggersetLevelloggingDEBUGr   infoget_lang_classupdate_lexemesr   goodlenvocabr   to_diskresolve)r   r   r   r   r   r   r   r    r!   r"   nlp r1   K/home/ubuntu/.local/lib/python3.10/site-packages/spacy/cli/init_pipeline.pyinit_vectors_cli   s*   
	
r3   r0   returnc                 C   s@   t |}|D ]}d|v rq| j|d  }|jdi | qd S )Nsettingsorthr1   )srsly
read_jsonlr-   	set_attrs)r0   r!   	lex_attrsattrslexemer1   r1   r2   r*   B   s   
r*   )allow_extra_argsignore_unknown_options)context_settingsr   zPath to config file)r   r   
allow_dashz&Output directory for the prepared dataz--codez-czNPath to Python file with additional code (registered functions) to be importedz--gpu-idz-gzGPU ID or -1 for CPUctxconfig_pathoutput_path	code_pathuse_gpuc           	      C   s   |r	t jtj t| j}t| t| t	| t j
||d}W d    n1 s,w   Y  t	dd t||d}W d    n1 sGw   Y  || td|  d S )N	overridesF	hint_fillrE   zSaved initialized pipeline to )r   r$   r%   r&   r'   r   argsr   r   r   load_configr	   r.   r   r+   	rA   rB   rC   rD   r    rE   rG   configr0   r1   r1   r2   init_pipeline_cliL   s   


rO   labels)r?   zOutput directory for the labelsc           	      C   s   |r	t jtj | s|jdd t| j}t	| t
| t| t j||d}W d   n1 s6w   Y  tdd t||d}W d   n1 sQw   Y  t|| dS )zGenerate JSON files for the labels in the data. This helps speed up the
    training process, since spaCy won't have to preprocess the data to
    extract the labels.T)parentsrF   NFrH   rJ   )r   r$   r%   r&   r'   r   mkdirr   rK   r   r   r   rL   r	   _init_labelsrM   r1   r1   r2   init_labels_clih   s   

rT   c                 C   sj   | j D ]/\}}t|dd d ur)|| d }t||j td| d|  qtd| d qd S )N
label_dataz.jsonz!Saving label data for component 'z' to z#No label data found for component 'r#   )pipelinegetattrr7   
write_jsonrU   r   r+   r(   )r0   rC   r   	componentoutput_filer1   r1   r2   rS      s   rS   )"r&   pathlibr   typingr   r7   typerwasabir    r   languager   training.initializer   r	   _utilr   r   r   r   r   r   r   commandstrintboolr3   r*   ContextrO   rT   rS   r1   r1   r1   r2   <module>   s    $

	
*


