o
    toi(1                     @  s   d dl mZ d dlZd dlmZ d dlZd dlmZ G dd dej	Z
G dd dZG d	d
 d
eZdddZG dd deZdS )    )annotationsN)IntEnum)OpRunc                      s8   e Zd Z fddZdd Zdd Zedd Z  ZS )	IntMapc                   s   t    g | _d S N)super__init__
added_keysself	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/onnx/reference/ops/op_tfidf_vectorizer.pyr      s   

zIntMap.__init__c                 C  sh   t |ttfstdt| dt |ts tdt| d|| vr/| j| || j|< | j| S )Nzkey must be a int or str not .zvalue must be a NGramPart not )	
isinstanceintstr	TypeErrortype	NgramPartr	   appenddatar   keyvaluer   r   r   emplace   s   


zIntMap.emplacec              	   C  s   dd |   D }dg}t|  D ]]\}}d|v rc|d}t|D ]>\}}|dkrH|dkr;|d| d|  q#|d| d| d q#|t|d	 krY|d|  q#|d
|  q#q|d| d| d q|d d|S )Nc                 S  s   i | ]	\}}|t |qS r   )repr).0kvr   r   r   
<dictcomp>   s    z#IntMap.__repr__.<locals>.<dictcomp>{
r   z  =,   z    })itemssortedsplit	enumerater   lenjoin)r   valsrowsr   r    vsiliner   r   r   __repr__   s"   


zIntMap.__repr__c                 C  s   t | dkr
td| jd S )Nr   zIntMap is empty.)r,   
ValueErrorr	   r
   r   r   r   	first_key2   s   
zIntMap.first_key)	__name__
__module____qualname__r   r   r3   propertyr5   __classcell__r   r   r   r   r      s    
r   c                   @  sZ   e Zd ZdddZdd Zdd Zd	d
 Zdd Zedd Z	dd Z
dd Zdd ZdS )r   nidr   c                 C  s   || _ d | _d S r   )id__leafs_)r   r;   r   r   r   r   :   s   
zNgramPart.__init__c                 C  s   t  | _d S r   )r   r=   r
   r   r   r   init>   s   zNgramPart.initc                 C  s,   |   rd| j dS d| j d| jdS )Nz
NgramPart()z, )emptyr<   leafs_r
   r   r   r   r3   A   s   zNgramPart.__repr__c                 C  s
   | j d u S r   r=   r
   r   r   r   r@   F      
zNgramPart.emptyc                 C  s   | j d uot| j dkS )Nr   )r=   r,   r
   r   r   r   
has_leavesI   s   zNgramPart.has_leavesc                 C  s   | j d u r	td| j S )NzNgramPart was not initialized.)r=   RuntimeErrorr
   r   r   r   rA   L   s   
zNgramPart.leafs_c                 C  s   |   sd S || jv r|S d S r   )rD   r=   r   r   r   r   r   findR   s
   
zNgramPart.findc                 C  s   | j ||S r   )rA   r   r   r   r   r   r   Y   s   zNgramPart.emplacec                 C  s
   | j | S r   rB   rF   r   r   r   __getitem__\   rC   zNgramPart.__getitem__N)r;   r   )r6   r7   r8   r   r>   r3   r@   rD   r9   rA   rG   r   rH   r   r   r   r   r   9   s    

r   c                   @  s   e Zd ZdZdZdZdZdS )WeightingCriteriar   r&         N)r6   r7   r8   NONETFIDFTFIDFr   r   r   r   rI   `   s
    rI   n_ngramsr   
ngram_sizengram_idc           
      C  s   t |ddD ]?}d}|}|t| k rE|| | td}	||kr,||	_|d7 }|d7 }n|	 r4|	  |	j}|d7 }|d7 }|t| k sq|S )Nr   r&   )ranger,   r   r   r<   r@   r>   rA   )
els	els_indexrP   rQ   rR   c_ngramsnmpr   r   r   populate_gramsg   s$   r\   c                   @  sb   e Zd Zdd Zdd
dZdddZ									ddddZ									dddZdS )TfIdfVectorizerc                 C  sl  t | || | j}|dkrtj| _n|dkrtj| _n|dkr$tj| _| j| _	| j
| _| j| _| j| _| j
| _| j| _t| jd | _| j| _| j| _| j| _td| _| j  t| jp`| j}d}d}tt| jD ]F}| j| }|d t| jk r| j|d  n|}	|	| }
|
dkr|
| }|| j	kr|| jkrt| jp| j||||| j}n||7 }|d7 }qmd S )NrM   rN   rO   r&   ir   ) r   r   moderI   rM   weighting_criteria_rN   rO   min_gram_lengthmin_gram_length_max_gram_lengthmax_gram_length_max_skip_countmax_skip_count_ngram_countsngram_counts_ngram_indexesngram_indexes_maxoutput_size_weightsweights_pool_int64spool_int64s_pool_stringspool_strings_r   
int64_map_r>   r,   rT   r\   )r   	onnx_node
run_paramsr^   total_itemsrR   rQ   r1   	start_idxend_idxr(   ngramsr   r   r   r      sZ   







	
zTfIdfVectorizer.__init__rR   r   row_numfrequencies	list[int]returnNonec                 C  s0   |d8 }|| j  | j|  }||  d7  < d S )Nr&   )rk   ri   )r   rR   ry   rz   
output_idxr   r   r   increment_count   s   zTfIdfVectorizer.increment_countB
np.ndarrayc                 C  s  g }|dkr| | j d}n| | | | j t|}| j}t|}tj|ftjd}| j}| jt	j
krFt|D ]\}	}
|
||	< q<n| jt	jkrt|dkrud}t|D ]}t|D ]}	|| dkrj||	 nd||< |d7 }q^qXnUd}|D ]}
|
dkrdnd||< |d7 }qyn?| jt	jkrt|dkrd}t|D ]}t|D ]}	||	 ||  ||< |d7 }qqnd}|D ]
}
|
||< |d7 }qntd||S )Nr   r&   dtypezUnexpected weighting_criteria.)r   rk   tuplenpprodr@   float32rm   r_   rI   rM   r+   rN   r,   rT   rO   rE   reshape)r   r   rz   l_output_dimsoutput_dimsrow_size
total_dimsYwr1   fr[   _batchr   r   r   output_result   sZ   







zTfIdfVectorizer.output_resultNXr   c                 C  s>  t |jdkr|| }n|}d}|| }|d }|}td|d D ]{}|}|}||k r|||d   }||kr8nU|}| j}d}| r||kr||k r|| }||}|d u rYn,|| j}||krm|dkrm| ||| || }|d7 }||7 }| r||kr||k sK|d7 }||k s+|dkr|d7 }||kr d S q!d S )Nr&   r   )r,   shaperT   rr   rD   rG   r<   r   )r   r   ry   r   rz   rb   rd   r`   r^   rf   rh   rn   rp   rl   X_flat	row_beginrow_endmax_skip_distancestart_ngram_sizeskip_distancengram_startngram_row_endat_least_this
ngram_itemint_maprQ   valhitr   r   r   compute_impl   sV   


zTfIdfVectorizer.compute_implc                   sb  t j}d}d}d j}t|dkr&d}d |dkr%td| dn8t|dkr3d}|d  n+t|dkrS|d }|d  |}|dk rRtd| d  dntd| d  d|  |krqtd|   d	| dt j|j ft jd
|dksj	 r
|fS  	
fdd}t|D ]}|| q
|fS )Nr   r&   zUnexpected total of items r   rJ   zCInput shape must have either [C] or [B,C] dimensions with B > 0, B=z, C=z*Unexpected total of items, num_rows * C = z != total_items = r   c                   s(   j |  	
d d S )N)	rb   rd   r`   r^   rf   rh   rn   rp   rl   )r   )ry   Cr   rz   rb   rd   r`   r^   rf   rh   rn   rp   r   rl   r   r   fnk  s   
z TfIdfVectorizer._run.<locals>.fn)r   r   r   r,   r4   zerosrk   int64rr   r@   r   rT   )r   r   rb   rd   r`   r^   rf   rh   rn   rp   rl   ru   num_rowsr   
input_dimsr   r1   r   r   r   _run0  sJ   
$
zTfIdfVectorizer._run)rR   r   ry   r   rz   r{   r|   r}   )r   r   rz   r{   r|   r   )	NNNNNNNNN)
r   r   ry   r   r   r   rz   r{   r|   r}   )r6   r7   r8   r   r   r   r   r   r   r   r   r   r]      s0    
7
	5Br]   )rP   r   rQ   r   rR   r   )
__future__r   collectionsenumr   numpyr   onnx.reference.op_runr   UserDictr   r   rI   r\   r]   r   r   r   r   <module>   s   +'
