o
    Ni5                     @  s  d dl mZ d dlZd dlmZmZmZmZ d dlmZ d dl	m  m
Z d dlmZ d dlZd dlmZmZmZmZmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d	d
 Z dd Z!		dddZ"dd ZG dd deZ#G dd deZ$G dd deZ%dS )    )annotationsN)nncatstackarange)Module)Normal)	rearrangereducepackrepeatunpack)align_right)	AttentionAttentionLayersScaledSinusoidalEmbeddingAbsolutePositionalEmbedding	LayerNormmasked_meanalways
pad_at_dimc                 C  s   | d uS N )valr   r   M/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/continuous.pyexists   s   r   c                 C  s(   t | r| S t|tst|r| S |S r   )r   
isinstancer   callable)r   dr   r   r   default   s   r   h㈵>      ?c                 C  s    |j |d }t| || S )Nmin)clampsqrttorchnormal)meanvarianceepstemperaturestdr   r   r   sample_from_mean_variance#   s   r-   c                 C  s<   t d|| d} t| dd}|jdd}||jdd }|S )	Nzb n, b n d, -> b n d        z
b n d -> bsumdimr!   r"   )einxwherer
   r/   r$   )tmasknumdenmasked_averager   r   r   r   ,   s
   r   c                   @  s   e Zd Zdd ZdS )GaussianNLLc                 C  s   |\}}t j|||ddS Nnone)	reduction)Fgaussian_nll_loss)selfpredtargetr(   varr   r   r   forward8   s   zGaussianNLL.forwardN)__name__
__module____qualname__rD   r   r   r   r   r:   7   s    r:   c                      sf   e Zd Zddddddddddddddd fddZ															ddddZ  ZS )ContinuousTransformerWrapperNr   Fr.   T)max_seq_lendim_indim_out
project_inproject_outmax_mem_lennum_memory_tokenspost_emb_normemb_dropoutuse_abs_pos_embscaled_sinu_pos_embaverage_pool_embedprobabilisticattn_layersr   rL   Module | NonerM   c                  sF  t    |j || _|| _t| p|dkp|o|j  }|r&td| _n|r.t	 | _nt
 || _|	r:t nt | _t|
| _t|d}|dk| _|dkr]tt| | _|| _|| _trot|roJ dtr{t|r{J dt| fdd| _| _t| fdd| _tdd |  D | _d S )	Nr   z>either `dim_in` or `project_in` can be passed in, but not bothz@either `dim_out` or `project_out` can be passed in, but not bothc                     s    t rtj ddS t S )NFbiasr   r   LinearIdentityr   )r2   rJ   r   r   <lambda>y   s     z7ContinuousTransformerWrapper.__init__.<locals>.<lambda>c                     s,   t rtj rdnd ddS t S )N      FrX   rZ   r   )r2   rK   rU   r   r   r]      s   , c                 S  s   g | ]
}t |tr|jqS r   )r   r   can_cache_kv).0moduler   r   r   
<listcomp>   s    z9ContinuousTransformerWrapper.__init__.<locals>.<listcomp>)super__init__r2   rI   rN   r   disable_abs_pos_embr   pos_embr   r   r   r   r\   rP   DropoutrQ   r   has_memory_tokens	Parameterr&   randnmemory_tokensrV   rT   rL   rU   rM   allmodulesr`   )r@   rI   rV   rJ   rK   rL   rM   rN   rO   rP   rQ   rR   rS   rT   rU   no_abs_pos_emb	__class__)r2   rJ   rK   rU   r   re   ?   s0   
 

z%ContinuousTransformerWrapper.__init__cacheLayerIntermediates | Nonec           !   
     s  g |j d d ||jR \ }t|r,t|rJ dtd}td||}d}t|r7|r7|j}|}|j||
||d }t|rO|| }	|}j
r{tjd d}t||gd	\}}t|r{|j d
 }t||dfddd}t|r|j dd  \}||j d ksJ dt||fd
d}t|st|rt| fdd}t| fdd}t||fdd}|}j|f|||	|||dd|\}}j
rt||d	\}}||_jrt||d}|s|n|}|sjrt|ddd\}}| }t||f}|r||fS |r/|j}tfdd|D }||fS |r@tdd |jD } || fS |S )Nr^   /either `mask` or `lens` passed in, but not bothdevicej, i -> i jr   )posseq_start_posoffsetzm d -> b m d)bzb * dr0   T)r2   valuer_   zEprepended embeddings need to have same dimensions as model dimensionsr1   c                        t j ft jdS N)rv   dtyper&   onesboolr   )batchrv   seqr   r   r]          z6ContinuousTransformerWrapper.forward.<locals>.<lambda>c                     r~   r   r   r   )r   rv   prepend_seqr   r   r]      r   )r6   mems	mem_masksrr   input_not_include_cacheseq_pos_offsetreturn_hiddens)r6   z*... (d mean_log_var) -> mean_log_var ... d)mean_log_varc                 3  s.    | ]}|d  j  dddf  V  qdS ).N)rN   detachra   r5   )r@   r   r   	<genexpr>   s   , z7ContinuousTransformerWrapper.forward.<locals>.<genexpr>c                 s  s    | ]}|j V  qd S r   )post_softmax_attnr   r   r   r   r      s    )shaperv   r   r   r3   lesscache_lengthrL   rg   rP   ri   r   rl   r   r   r   r   rQ   rV   r   rT   r   rM   rU   r	   expr   hiddenstupleattn_intermediates)!r@   xreturn_embeddingsreturn_intermediatesreturn_memsr6   lensreturn_attnr   r   rx   
sum_embedsprepend_embedsprepend_maskrr   r   ry   kwargs	orig_mask
seq_aranger   mmem_psnum_memsprepend_dimintermediatesoutr(   log_varr)   r   new_mems	attn_mapsr   )r   rv   r   r@   r   r   rD      sb   &



&z$ContinuousTransformerWrapper.forward)rV   r   rL   rW   rM   rW   )FFFNNFNNNNNNNFN)rr   rs   )rE   rF   rG   re   rD   __classcell__r   r   rp   r   rH   >   s>    IrH   c                      sT   e Zd Z			dd fddZe 			
dddZ	dddZ	dddZ  Z	S )ContinuousAutoregressiveWrapperNFnetrH   loss_fnrW   c                   sf   t    || _|j| _|j}|| _t|s+|rt }n|r%tjdd}ntj	dd}|| _
|| _d S r;   )rd   re   r   rI   rU   r   r:   r   L1LossMSELossr   equal_loss_weight_batch)r@   r   r   use_l1_lossr   rU   rp   r   r   re      s   

z(ContinuousAutoregressiveWrapper.__init__r!   Tc                 K  s>  |o| j j}|j}| j j}|j}	|	dksJ d|	dk}
|
r#t|d}g |j|jR \}}}}| j   |}d }t|D ]H}|}t	| j
rR|d d | j
 d f }| j |f|dd|\}}|ddd d d f }| jry|\}}t|||d}t||fd	d
}|r|}q=|d d |d f }|
rt|d}| j | |S )Nr^   zGnumber of dimensions of your start tokens must be greater or equal to 2zn d -> 1 n dT)rr   r   .r0   )r+   r|   r1   z1 n d -> n d)r   r`   rv   trainingndimr	   r   evalranger   rI   rU   r-   r   train)r@   start_tokensseq_lenr+   cache_kvr   should_cache_kvrv   was_trainingnum_dimsno_batchr{   r5   _r   rr   r   net_out	new_cachelast_outputr(   rC   r   r   r   generate  s<   	



z(ContinuousAutoregressiveWrapper.generater^   c                 K  s  |dksJ |}|j }d|vsJ |dd }t|r=d|vs#J d|jd |j }}t||d}	td|	|}
|
|d< t|sS|jd d \}}tj|f||d}|dd }
|| j	d	d
}|d	k}|| }t|
rq|
| }
|jd	 }tj
|f|d|   }tj||d}t|d}|| }|  }|d d d |f }t||}|d d d | f |d d | d f }}d }g }t|D ]/}| j|f|dd|\}}|ddd d d f }| jr|\}}t||}n|}|| qt|dd}| ||}| S )Nr_   r   r   r6   rt   ru   rw   r^   r   r"   zb -> b 1T)ry   r   .r0   r1   )rv   popr   r   r   r3   r   r&   fullr$   randfloorlongr	   amaxitemr   r   r   rU   r-   appendr   r   r(   )r@   r   rollout_stepsr   stepsrv   r   inpr   r   r6   r   valid_tokens_for_rolloutvalid_samplery   batch_arangeseq_end_posmax_end_postargetsrr   predsr   r   	last_predr(   rC   lossr   r   r   forward_rolloutE  sb   


.
z/ContinuousAutoregressiveWrapper.forward_rolloutr_   c                 K  sR  |dkr| j |fd|i|S |d d d df |d d dd f }}d|vs*J |dd }t|rWd|vs<J d|jd |j}}tj||d}	td	|	|}
|
|d< |	dd }
t|
ry|
jd |jd kry|
d d d df }
|
|d< | j
|fi |}| ||}t|
r|jdksJ d
| jrt||
}| S ||
 }| S )Nr_   r   r0   r   r   r6   rt   ru   rw   z/loss should not be reduced if mask is passed in)r   r   r   r   rv   r&   r   r3   r   getr   r   r   r   r   r(   )r@   r   r   r   r   rB   r   r   rv   r   r6   r   r   r   r   r   rD     s0   *
z'ContinuousAutoregressiveWrapper.forward)NFF)r   rH   r   rW   )r!   T)r^   )r_   )
rE   rF   rG   re   r&   no_gradr   r   rD   r   r   r   rp   r   r      s    8
dr   )r    r!   )&
__future__r   r&   r   r   r   r   torch.nnr   torch.nn.functional
functionalr>   torch.distributionsr   r3   einopsr	   r
   r   r   r   %x_transformers.autoregressive_wrapperr   x_transformers.x_transformersr   r   r   r   r   r   r   r   r   r   r-   r:   rH   r   r   r   r   r   <module>   s(    (
	 6