o
    Ni!                     @   s   d Z ddlZddlmZmZ ddlm  mZ ddlmZ ddl	m
Z
 ddlmZmZmZmZ ddlmZ ddlmZmZmZmZmZ dd	lmZmZ e
d
ddgZe
dg dZdd Zdd ZG dd dejZ G dd dejZ!dS )z
regular transformer with discrete tokens, but continuous for number
generalizes better for arithmetic
https://arxiv.org/abs/2310.02989
    N)nnTensor)Callable)
namedtuple)	rearrangerepeatpackunpack)	Rearrange)AttentionLayersTokenEmbeddingScaledSinusoidalEmbeddingAbsolutePositionalEmbeddingalways)top_ktop_pLossBreakdowncross_entropy_lossnumerical_mse_lossGenerateReturn)sampled_token_idssampled_numbersis_number_maskc                 C   s   | d uS N )valr   r   G/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/xval.pyexists&   s   r   c                 C   s   t | r| S t|r| S |S r   )r   callable)r   dr   r   r   default)   s   r    c                	       sZ   e Zd Zddddddddddef fdd	Z								dd
edefddZ  ZS )XValTransformerWrapperNFr           T)emb_dim
logits_dimtie_embeddingmax_mem_lennum_memory_tokensemb_dropoutuse_abs_pos_embscaled_sinu_pos_embattn_layersc                   s   t    |j}t||}| _t|| _| _| _| _	|r$|j
r*td _n|r2t| _nt|| _t|
 _t|	d}	|	dk _|	dkrVtt|	| _| _t||}|sft||n fdd _tt|dtd _d S )Nr   c                    s   |  j jj  S r   )	token_embembweightt)r/   selfr   r   <lambda>f   s    z1XValTransformerWrapper.__init__.<locals>.<lambda>   z... 1 -> ...)super__init__dimr    r#   r   r,   numerical_token_idmax_seq_lenr&   disable_abs_pos_embr   pos_embr   r   r   Dropoutr(   has_memory_tokens	Parametertorchrandnmemory_tokensr+   Linear	to_logits
Sequentialr
   to_numerical_output)r1   
num_tokensr8   r7   r+   r#   r$   r%   r&   r'   r(   r)   r*   r6   	__class__r0   r   r5   1   s2   







zXValTransformerWrapper.__init__xx_numc                    s  |j |j ksJ |j d }| jk} |}t||d}t|d}|| }| j||	d } jrWt j	d|d}t
||gd\}}t|rW|j d }t||dfd	d
d}t|
rx|
j dd  \}}||j d	 ksoJ dtj|
|fdd} |} j|f||d
d|\}} jrt||d\}}||_	|s |} |}||f}n|}|r||fS |r|j}t fdd|D }||fS |rtdd |jD }||fS |S )Nr         ?z... -> ... 1)poszm d -> b m d)bzb * dT)r6   valuer3   zEprepended embeddings need to have same dimensions as model dimensionsr6   )maskmemsreturn_hiddensc                 3   s.    | ]}|d  j  dddf  V  qdS ).N)r&   detach.0r/   r0   r   r   	<genexpr>   s   , z1XValTransformerWrapper.forward.<locals>.<genexpr>c                 s   s    | ]}|j V  qd S r   )post_softmax_attnrU   r   r   r   rW      s    )shaper7   r,   r>   wherer   r:   r<   r   r@   r   r   
pad_at_dimcatr(   r+   r	   rB   rD   hiddenstupleattn_intermediates)r1   rH   rI   return_embeddingsreturn_intermediatesreturn_memsrQ   return_attnrR   rK   prepend_embedskwargsbatchr   scalemmem_psnum_mems_prepend_dimintermediateslogitsnumerical_predoutr]   new_mems	attn_mapsr   r0   r   forwardm   sL   








zXValTransformerWrapper.forward)FFFNFNNN)__name__
__module____qualname__r   r5   r   rs   __classcell__r   r   rF   r   r!   0   s2    @r!   c                
       sl   e Zd Z			ddef fddZe ee dfde	de	d	e
d
efddZ	dde	de	fddZ  ZS )XValAutoregressiveWrapperr   rJ   netc                    s(   t    || _|j| _|| _|| _d S r   )r4   r5   rz   r8   numerical_loss_weightignore_index)r1   rz   r|   	pad_valuer{   rF   r   r   r5      s
   

z"XValAutoregressiveWrapper.__init__start_tokensstart_numbersfilter_logits_fnfilter_kwargsc                 K   s  |j }| jj}	t|j}
|
dksJ d|j|jksJ g |j|j R \}}}| j  |}|}t|D ]a}|d d | j d f }|d d | j d f }| j||fi |\}}|d d df }|d d dd f }||fi |}tj	|| dd}t
|d}t
j||fdd}t
j||fdd}q5|d d |d f }|d d |d f }|| jjk}t
||td}| j|	 t|||S )N   zGnumber of dimensions of your start tokens must be greater or equal to 2rN   rP   r3   nan)devicerz   traininglenrY   evalranger8   Fsoftmaxr>   multinomialr\   r7   rZ   floattrainr   )r1   r~   r   seq_lenr   r   temperaturere   r   was_trainingnum_dimsrL   r/   rp   num_outrk   rH   rI   rn   ro   last_logitslast_num_predfiltered_logitsprobssample	is_numberr   r   r   generate   s4   

z"XValAutoregressiveWrapper.generateFrH   rI   c                 K   sT  |d d d df |d d dd f }}|d d d df |d d dd f }}|| j k}	|dd }
t|
rU|	|
M }	|
jd |jd krU|
d d d df }
|
|d< | j||fi |\}}t|d}tj||d| j d}|| jjk}|	| d}tj
||dd}||	 }|	| d}||| j  }||	 }| }|s|S |t||fS )	NrN   r3   rQ   zb n c -> b c nnone)	reductionr|   r"   )r   )r|   getr   rY   rz   r   r   cross_entropyr7   masked_fillmse_lossr{   meanr   )r1   rH   rI   return_loss_breakdownre   inptarget	x_num_inpx_num_targettarget_maskrQ   rn   ro   r   target_is_number_maskr   lossr   r   r   rs      s.   **

z!XValAutoregressiveWrapper.forward)ry   r   rJ   )F)rt   ru   rv   r!   r5   r>   no_gradr   dictr   r   r   rs   rw   r   r   rF   r   rx      s4    6rx   )"__doc__r>   r   r   torch.nn.functional
functionalr   typingr   collectionsr   einopsr   r   r   r	   einops.layers.torchr
   x_transformers.x_transformersr   r   r   r   r   %x_transformers.autoregressive_wrapperr   r   r   r   r   r    Moduler!   rx   r   r   r   r   <module>   s"     