o
    Ni                     @  s   d dl mZ d dlmZ d dlZd dlmZ d dlm  mZ	 d dl
mZ d dlmZ d dlZd dlmZmZmZmZ dd	 Zd
d ZdddZdd ZG dd deZdS )    )annotations)zip_longestN)tensor)Module)pad_sequence)repeat	rearrangepackunpackc                 C  s   | d uS N )vr   r   Z/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/entropy_based_tokenizer.pyexists   s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default      r   #B;c                 C  s   | j |d S )N)min)clamplog)tepsr   r   r   r      r   r   c                 C  s"   | j dd}|t| jdd S )Ndim)softmaxr   sum)logitsprobr   r   r   calc_entropy_from_logits   s   r    c                      s>   e Zd Z	dd fdd	Ze dd
e fdddZ  ZS )EntropyBasedTokenizerNdecoderr   entropy_thresholdfloatmax_token_size
int | Nonec                   s    t    || _|| _|| _d S r   )super__init__r"   r#   r%   )selfr"   r#   r%   	__class__r   r   r(   "   s   

zEntropyBasedTokenizer.__init__Fdecoder_forward_kwargsdictc           %      C  sp  |j dk}t|fd\}}| j  t|}g |j|j| jR \}}	}
}tj	|	|
d}| j|fi |}t
|}td|
d}|rItd||}|| jk|@ }|d }t|d|d}| }|sfd|d< nt|d d	}|d
|d t|r|jd
d}tj|ddd}|jd
d  }tj	||
d}td||}|jd
d}|| dk}||@ jdd}||B }t|r||@ }|jd
d}|| | }g }|D ]}tj|ddd}|dd  |d d
  }|| qt|dd}|st||d\}|S t |d}g } t!|||D ]#\}!}"}#t|"r|!d |" }!|#|#dk }#|!|# }$| |$ q|r6| d } | S )N   z* n)deviceTzn, b -> b nzn -> b n)b).r   zb -> b 1r   r   )r.   r   r   )valuezj, b i -> b j i)r.   r   g        )batch_firstz* num_tokensr   )"ndimr	   r"   evalr   shaper/   r%   torcharanger    r   einxlessr#   r   cloner   scatter_cumsumFpadr   amaxitemequalanysplittolistappendr   r
   r   r   )%r)   seqlensreturn_segmented_seqr,   no_batch_dimmaybe_batch_psis_var_lengthbatchseq_lenr/   r%   r7   r   	entropiesmaskover_thres_maskarange_plus_one
boundariesscatter_indices	token_idsmax_num_tokenstoken_ids_seq
token_masktoken_sub_seq_arangesub_seq_boundaries
num_tokensindicestoken_lengthsone_indicespadded_indicesone_token_lengthssegmented_seqone_seqone_lenone_token_lengthsplitted_seqr   r   r   forward.   sj   

 


zEntropyBasedTokenizer.forwardr   )r"   r   r#   r$   r%   r&   )r,   r-   )	__name__
__module____qualname__r(   r6   no_gradr-   re   __classcell__r   r   r*   r   r!   !   s    r!   )r   )
__future__r   	itertoolsr   r6   r   torch.nn.functionalnn
functionalr=   torch.nnr   torch.nn.utils.rnnr   r8   einopsr   r   r	   r
   r   r   r   r    r!   r   r   r   r   <module>   s    
	