o
    im                     @   s   d dl mZmZmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZ dd	 Zejjejd
ddddfddddfdddddfgdd ZdS )    )CallableDictIterableN)Configfix_random_seedLanguage)ConfigSchemaTraining)Example)load_model_from_configregistryresolve_dot_namesc                     s8  d} t ddttttgtt f f fdd}t 	| }t
|dd t j j d td	}|d
 |d g}t j|\}ttsHJ |d } j fdd|d  D ]
} j|g|d q\ t| }|d dksxJ  d}	|	jsJ d j d i}
t |
d d }t|tsJ d S )Nz
    [training]

    [corpora]
    @readers = "myreader.v1"

    [nlp]
    lang = "en"
    pipeline = ["tok2vec", "textcat"]

    [components]

    [components.tok2vec]
    factory = "tok2vec"

    [components.textcat]
    factory = "textcat"
    zmyreader.v1returnc                     s.   ddddi dt f fdd} | | | | dS )	Ncats      ?        )POSNEGnlpc                    s   |  d}t| gS )NzThis is an example)make_docr
   	from_dict)r   docannots U/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/training/test_readers.pyreader$   s   
z.test_readers.<locals>.myreader.<locals>.reader)traindevextra	somethingr   )r   r   r   r   myreader    s   ztest_readers.<locals>.myreaderT	auto_filltrainingschematrain_corpus
dev_corpus	optimizerc                          S Nr   r   r   r'   r   r   <lambda>4       ztest_readers.<locals>.<lambda>sgdcats_macro_aucr   
Quick testcorporar   )r   readersr   strr   r   r   r
   r   from_strr   resolveconfiginterpolater	   r   
isinstance
initializeupdateevaluatelistr   )config_stringr!   r8   T	dot_namesr(   r)   examplescoresr   r3   extra_corpusr   r,   r   test_readers   s,   &	
rE   zreader,additional_configzml_datasets.imdb_sentiment.v1
   )train_limit	dev_limitzml_datasets.dbpedia.v1zml_datasets.cmu_movies.v1   g?)limitfreq_cutoffsplitc                    sN  d}t  |}t|d d  | |d d< |d | t|dd tj jd td}|d	 |d
 g}t	 j|\}|d } j
 fdd|d  D ]"}|jjsYJ ttt|jj ddgkskJ  j|g|d qQt| }	|	D ]}ttt|jj ddgksJ q| |	}
|
d sJ  d}|jsJ d S )Na\  
    [training]
    seed = 0

    [training.score_weights]
    cats_macro_auc = 1.0

    [corpora]
    @readers = "PLACEHOLDER"

    [nlp]
    lang = "en"
    pipeline = ["tok2vec", "textcat_multilabel"]

    [components]

    [components.tok2vec]
    factory = "tok2vec"

    [components.textcat_multilabel]
    factory = "textcat_multilabel"
    r$   seedr3   z@readersTr"   r%   r'   r(   r)   c                      r*   r+   r   r   r,   r   r   r-   k   r.   z"test_cat_readers.<locals>.<lambda>r/   r   r   
cats_scorer2   )r   r6   r   r<   r   r   r7   r8   r	   r   r;   yr   sortedr>   setvaluesr=   )r   additional_confignlp_config_stringr8   r@   rA   r(   r)   rB   dev_examplesrC   r   r   r,   r   test_cat_readersA   s,   
$&
rV   )typingr   r   r   pytest	thinc.apir   r   spacyr   spacy.schemasr	   spacy.trainingr
   
spacy.utilr   r   r   rE   markslowparametrizerV   r   r   r   r   <module>   s"    5