o
    i"                     @   s  d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	m
Z
mZmZmZ d dlmZ d dlmZ d dlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZm Z  ej!dddddedddddedddedddddedddddeddd d!dedd"d#d$dedd%d&d'dedd(d)d*dedd+d,d-dedd.d/d0dedd1d2d3ded4d5d6d7dfd8ej"d9ed:e#d;e#d<e$d=e$d>e$d?e$d@e$dAe$dBe$dCe$dDe%fdEdFZ&ddGdHeee#ef  fdIdJZ'dKdL Z(dWdNe#fdOdPZ)dQdR Z*dSdT Z+dUdV Z,dS )X    N)Path)AnyDictOptional)Modeldata_validationfix_random_seedset_dropout_rateset_gpu_allocator)msg)Example)resolve_dot_names   )util)ConfigSchemaTraining)registry   )ArgOpt	debug_cliparse_config_overrides	setup_gpushow_validation_errorstring_to_listmodelT)allow_extra_argsignore_unknown_options)context_settings.zPath to config file)helpexists
allow_dashzDName of the pipeline component of which the model should be analysed)r    z--layersz-lz+Comma-separated names of layer IDs to printFz--dimensionsz-DIMzShow dimensionsz--parametersz-PARzShow parametersz--gradientsz-GRADzShow gradientsz--attributesz-ATTRzShow attributesz--print-step0z-P0zPrint model before trainingz--print-step1z-P1z Print model after initializationz--print-step2z-P2zPrint model after trainingz--print-step3z-P3zPrint final predictionsz--gpu-idz-gzGPU ID or -1 for CPUctxconfig_path	componentlayers
dimensions
parameters	gradients
attributesP0P1P2P3use_gpuc              
   C   s,  t | t|dd}|||||||	|
|d	}t| j}t| tj||dd}W d   n1 s2w   Y  | }|d d }|d	krK|rKt| t| t	|}|j
 }tj|d td
}W d   n1 smw   Y  |d }|durtd|  t| ||}t|||||d dS )z
    Analyze a Thinc model implementation. Includes checks for internal structure
    and activations during training.

    DOCS: https://spacy.io/api/cli#debug-model
    T)intify)	r'   r(   r)   r*   r&   print_before_trainingprint_after_initprint_after_trainingprint_predictionF)	overridesinterpolateNtraininggpu_allocatorr   )schemaseedzFixing random seed: print_settings)r   r   r   argsr   r   load_configr6   r
   load_model_from_configconfigr   resolver   r   infor   get_pipedebug_model)r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r<   config_overrides
raw_configr@   	allocatornlpTr:   pipe rK   I/home/ubuntu/.local/lib/python3.10/site-packages/spacy/cli/debug_model.pydebug_model_cli    sB   





rM   r;   r<   c          
         s  t |dstjd| ddd |j}t|ts%tjdt| ddd |d u r+i }td|j  |	d	rCt
d
 t|| td z9|d g}t  t| |\fdd W d    n1 slw   Y  td ttd W nK ty   z1td|d t  dd t D   fdd W d    n1 sw   Y  td W n ty   tjddd Y nw Y nw W d    n1 sw   Y  |	drt
d t|| t|d d }|drd|djv rd}|drd|djv rd}tdD ]}|r(|  |  q|	d r?t
d! t|| |d"d  D }	|	d#r[t
d$ tt|	 t d% d S )&Nr   zThe component 'z0' does not specify an object that holds a Model.r   )exitsz1Requires a Thinc Model to be analysed, but found z	 instead.zAnalysing model with ID r1   zSTEP 0 - before trainingFtrain_corpusc                      s    S NrK   rK   )rH   rO   rK   rL   <lambda>   s    zdebug_model.<locals>.<lambda>z/Initialized the model with the training corpus.      )nOr   c                 S   s   g | ]}t |i qS rK   )r   	from_dict).0xrK   rK   rL   
<listcomp>   s    zdebug_model.<locals>.<listcomp>c                      s    S rP   rK   rK   )examplesrK   rL   rQ      s    z&Initialized the model with dummy data.zjCould not initialize the model: you'll have to provide a valid 'train_corpus' argument in the config file.r2   zSTEP 1 - after initializationg?tok2vecztok2vec-listenerztransformer-listenertransformer   r3   zSTEP 2 - after trainingc                 S   s   g | ]}|j qS rK   )	predicted)rV   exrK   rK   rL   rX      s    r4   zSTEP 3 - predictionz/Successfully ended analysis - model looks good.)!hasattrr   failr   
isinstancer   typerB   idgetdivider_print_modelr   r   r   
initializelist	itertoolsislice
ValueError_set_output_dim	_get_docs	Exceptionr	   has_refget_refnamerC   rangeupdatepredictstrgood)
r@   resolved_train_configrH   rJ   r<   r   	dot_namesupstream_componente
predictionrK   )rY   rH   rO   rL   rD   ^   s   



















rD   c                   C   s   g dS )N)z6Apple is looking at buying U.K. startup for $1 billionz>Autonomous cars shift insurance liability toward manufacturersz8San Francisco considers banning sidewalk delivery robotsz+London is a big city in the United Kingdom.rK   rK   rK   rK   rL   
_sentences   s   r|   enlangc                 C   s   t |  }t|t S rP   )r   get_lang_classrh   rJ   r|   )r~   rH   rK   rK   rL   rm      s   rm   c                 C   sV   |  dd u r| d| | dr'| d dd u r)| dd| d S d S d S )NrT   output_layer)has_dimset_dimro   rp   )r   rT   rK   rK   rL   rl      s   
rl   c              
   C   s  | dd}| dd}| dd}| dd}| dd}t|  D ]\}}|r.||v rtd| d	|j d
|j d |rU|jD ]}	td|	 d||	  qD|r|j	D ](}	|
|	rtt||	}
td|	 d|
  qZtd|	 d|
|	  qZ|r|j	D ](}	||	rt||	}
td|	 d|
  qtd|	 d||	  q|r|j}| D ]\}	}td|	 d|  qq$d S )Nr&   r!   r(   Fr'   r)   r*   zLayer z: model ID z: ''z - dim z: z	 - param z - grad z - attr )rd   	enumeratewalkr   rB   rc   rq   	dim_namesmaybe_get_dimparam_names	has_param_print_matrix	get_paramhas_gradget_gradattrsitems)r   r<   r&   r(   r'   r)   r*   inoderq   print_valuer   valuerK   rK   rL   rf      s:   "




rf   c                 C   s`   | d u s	t | tr| S t| jd }| }t| jd D ]}|d }q|dd }|t| }|S )Nz - sample: r   r   rR   )ra   boolru   shaperr   ndim)r   resultsample_matrixdrK   rK   rL   r      s   
r   )r}   )-ri   pathlibr   typingr   r   r   typer	thinc.apir   r   r   r	   r
   wasabir   spacy.trainingr   
spacy.utilr   r!   r   schemasr   r   _utilr   r   r   r   r   r   r   commandContextru   r   intrM   rD   r|   rm   rl   rf   r   rK   rK   rK   rL   <module>   s    $
	
@
R		"