o
    Si3                     @   sV   d Z ddlZddlZddlZddlZddlmZ 				d	ddZG dd deZ	dS )
z/
The file_reader converts raw corpus to input.
    NF	c           
      C   s   i }t j| dddD ]6}|d|}t|dkrq
|r"|\}}	n|\}	}|	|v r0td|	 |r6||	}	|r<||}|||	< q
|S )z'
    Load key-value dict from file
    rutf8)encoding
   zkey duplicated with [%s])ioopenstripsplitlenKeyError)
	dict_pathreverse	delimiterkey_func
value_funcresult_dictlinetermsvaluekey r   P/home/ubuntu/.local/lib/python3.10/site-packages/jieba/lac_small/reader_small.pyload_kv_dict   s    

r   c                   @   sH   e Zd ZdZdd Zedd Zedd Zdd	 Zd
d Z	dd Z
dS )Datasetzdata readerc                 C   sl   t jt}t j|}t j|d}t j|d}t|dtd| _t|| _	t|dtd| _
t|| _d S )Nzword.dicztag.dicT)r   r   )ospathabspath__file__dirnamejoinr   intword2id_dictid2word_dictlabel2id_dictid2label_dict)selfbasepathfolderword_dict_pathlabel_dict_pathr   r   r   __init__4   s   
zDataset.__init__c                 C      t | j d S )zvocabulary size   )maxr#   valuesr'   r   r   r   
vocab_sizeA      zDataset.vocab_sizec                 C   r-   )
num_labelsr.   )r/   r%   r0   r1   r   r   r   r4   F   r3   zDataset.num_labelsc                 C   4   g }|D ]}|| j vrd}| j | }|| q|S )zconvert word to word indexOOV)r#   append)r'   wordsword_idswordword_idr   r   r   word_to_idsK      

zDataset.word_to_idsc                 C   r5   )zconvert label to label indexO)r%   r7   )r'   labels	label_idslabellabel_idr   r   r   label_to_idsU   r=   zDataset.label_to_idsc                 C   s   |  }| |}|S )N)r
   r<   )r'   str1r8   r9   r   r   r   get_vars_   s   
zDataset.get_varsN)__name__
__module____qualname____doc__r,   propertyr2   r4   r<   rC   rE   r   r   r   r   r   2   s    



r   )Fr   NN)
rI   r   
__future__r   paddlepaddle.fluidfluidr   objectr   r   r   r   r   <module>   s   
