o
    Ni                     @  s   d dl mZ d dlZd dlmZmZmZmZ d dlm  mZ	 d dl
mZmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZmZmZ d	d
 Zdd ZG dd deZdS )    )annotationsN)nnTensor	is_tensortensor)Module
ModuleList)EncoderDecoderTransformerWrapper)AutoregressiveWrapper)	Rearrange)	rearrangereducerepeatc                 C  s   | d uS N )vr   r   J/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/gpt_vae.pyexists   s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default   s   r   c                      sv   e Zd Zddde e dddddd
d fddZedd Z	dddZe	 		dddZ
		dddZ  ZS )GPTVAEN@            ?g              ?)

dim_latentattn_dim_headheads
enc_kwargs
dec_kwargsvae_kl_loss_weightvae_kl_div_floorlatents_dropout_probpad_idencoderr!   dictr"   r'   Module | Nonec                  s   t    t||}t|s%t||d ddtd||||d||	d}|| _tt	||d t
ddd| _tt	||t
d| _t||td||||d||
d	| _t| j|d
| _|| _|| _|| _t|| _d S )N   T)dimdepthr   r    )
num_tokensmax_seq_lenreturn_only_embedaverage_pool_embedattn_layers   zb (two d) -> two b d)twozb d -> b 1 d)r-   r.   r1   )ignore_indexr   )super__init__r   r   r   r	   r'   r   
SequentialLinearr   to_latent_mean_log_variancefrom_latent_to_prepend_tokenr
   decoderr   ar_wrapped_decoderr&   r$   r#   Dropoutlatents_dropout)selfr-   r+   r,   	enc_depthr.   r   r   r    r!   r"   r#   r$   r%   r&   r'   kwargs	__class__r   r   r6   !   s^   



zGPTVAE.__init__c                 C  s   t |  jS r   )next
parametersdevice)r?   r   r   r   rF   l   s   zGPTVAE.deviceFc           	      C  sX   || j k}| j||d}| |\}}d|  }||t|  }|s&|S |||ffS )N)maskr   )r&   r'   r9   exptorch
randn_like)	r?   seqreturn_mean_log_varrG   pooledlatents_meanlatents_log_varlatents_stdlatentsr   r   r   encode_to_latentsp   s   
zGPTVAE.encode_to_latentsc           	      K  s   |j dv sJ |j dkr|jd nd}t|r$t|rJ d| |}d }t|rFt|s5t|| jd}|j dkrAt|d|d}| |}| j	j
||fd	|i|}|S )
N>   r*   r2   r2   r   r*   zJlatents should not be passed in if given the seq from which to derive themrF   zd -> b d)bprepend_embeds)ndimshaper   rR   r   r   rF   r   r:   r<   generate)	r?   promptsseq_lenrQ   seq_for_latentsgenerate_kwargsbatchrU   	generatedr   r   r   rX      s*   	


zGPTVAE.generatec                 C  s   |j d |j}}t||}| j|dd\}\}}| tj|f|d  }	| |}
| j	||
|	
 d}d| |  | d  }t|| j }|jdd	 }||| j  }|s_|S ||f}||fS )
Nr   T)rL   rS   )rU   seq_start_posr   r   r   )r+   )rW   rF   r   rR   r>   rI   onesboolr:   r<   longrH   squareFrelur$   summeanr#   )r?   rK   r[   return_all_lossesr]   rF   rQ   rN   rO   dropped_latentsrU   ar_lossvae_kl_loss
total_losslossesr   r   r   forward   s:   

zGPTVAE.forward)r!   r(   r"   r(   r'   r)   )F)NN)NF)__name__
__module____qualname__r(   r6   propertyrF   rR   rI   no_gradrX   rn   __classcell__r   r   rB   r   r       s.    	K

,r   )
__future__r   rI   r   r   r   r   torch.nn.functional
functionalrd   torch.nnr   r   x_transformers.x_transformersr	   r
   r   %x_transformers.autoregressive_wrapperr   einops.layers.torchr   einopsr   r   r   r   r   r   r   r   r   r   <module>   s    