o
    Ni>4                     @  s  d dl mZ d dlmZ d dlZd dlmZ d dlmZmZ d dlm	Z	m
Z
mZmZmZmZmZ d dlm	  mZ d dlmZmZ d dlmZmZ d dlZd d	lmZmZmZmZ d d
lm Z  dd Z!dd Z"dddZ#G dd deZ$e$j%Z&G dd deZ'dS )    )annotations)randomN)Function)Module
ModuleList)nncatstacktensorTensorarangecartesian_prod)eval_decoratormin_p)DecoderTransformerWrapper)	rearrangerepeatpackunpack)	Rearrangec                 C  s   | d uS N )vr   r   W/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/belief_state_wrapper.pyexists    s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default#   s   r      c           
      C  s   t |s	| |S g | jd d | jR \}}}t||d}td||}td||d}|jddd}	| j	dkrDt
|	d	| jd d
}	| ||	S )N   devicej, i -> i jzi j, j,T)dim
descending   z... -> ... d)r   )r   flipshaper!   r   einxlesswhereargsortndimr   gather)
xr$   lensbatchseq_lenr!   seqmask
masked_seqflip_indicesr   r   r   r'   (   s   
 
r'   c                   @  s$   e Zd Zedd Zedd ZdS )DetachMultiplec                 G  s(   t dd |D }|D ]}|  q|S )Nc                 s  s    | ]}|  V  qd S r   )detach).0tr   r   r   	<genexpr>?   s    z)DetachMultiple.forward.<locals>.<genexpr>)tuplerequires_grad_)selfctxtensorsdetached_tensorsdetached_tensorr   r   r   forward=   s   
zDetachMultiple.forwardc                 G  s   |S r   r   )r>   r?   gradsr   r   r   backwardF   s   zDetachMultiple.backwardN)__name__
__module____qualname__classmethodrC   rE   r   r   r   r   r7   ;   s
    
r7   c                	      sp   e Zd ZdZ									dd fddZe edddee	dddfd ddZ
		d!d"ddZ  ZS )#BeliefStateWrapperz8
    Figure 13. in https://arxiv.org/abs/2410.23506
    N      ?F      ?forward_decoderr   backward_decoderTransformerWrapper | None!train_frac_forward_backward_pairsfloat	text_headModule | Nonebackward_ar_loss_weightpred_distance_loss_weightc              
     s  t    t||}|j|jksJ d|j|jksJ d|j}|j}|j}|| _tt	|| _
tjj| j
dd t|sUtt|d |t t||d }|| _t|
|| _|rttt|d |t t|| jnd | _|| _d|	  k rdk sJ  J || _|	| _|rttdtd	|t t||d | _|| _|| _d
|  k rdksJ  J || _|dk | _|| _|dk| _| dt d| jg | jj| _d S )NzBforward and backwards model must have the same embedding dimensionz?forward and backwards model must have the same number of tokensg{Gz?)stdr           rK   z... -> ... 1r   r   loss_weights)!super__init__r   emb_dim
num_tokensmax_seq_lenr   	Parametertorchzerossuffix_tokeninitnormal_r   
SequentialLinear	LeakyReLUrR   max_pred_distanceto_distance_logitsrU   cond_on_distancecond_on_distance_probr   to_distance_condrM   rN   train_frac_fb_pairsneeds_subsample_fb_pairsrT   needs_loss_weightregister_bufferr
   )r>   rM   rN   rP   rR   rT   pred_distancerU   ri   rj   rg   r$   r\   r]   	__class__r   r   rZ   T   s\   


	

zBeliefStateWrapper.__init__g      ?g?)r   suffixTensor | Nonec	                  K  s  | j |dk|j}
}}t|gd\}}|j\}}| j}|r&|d}| j}|}d }t| jd}t	|d|d}|sat
|rN|jdkrIt	|d|d}|d}| j||dd	}|d d d
d f }ntjd| j|df|d}| j|dd}t|D ]~}||f|r|nd dd|d|	\}}|d d d
d f }|st||fd
d}nt||fd
d}|r| jjr|}| |jdd
d\}}|s|n|}|d d d
f }|r|jd
dd}n||fi |}tj|| d
d}t|d}tj||fd
d}qx|d d |d f }t||d\}|S )NrW   z* dr   z
d -> 1 1 dz1 1 d -> b 1 dbn -> b nTprepend_embedsreturn_embeddingsr#   r   r    rz   )ry   return_intermediatesrz   cacher$   r   )r$   keepdimz* n)r]   r!   r   r(   rM   r'   rN   r   ra   r   r   r-   r_   randintr\   ranger   can_cache_kvrR   chunkargmaxFsoftmaxmultinomialr   ) r>   promptsr2   temperaturecache_kvrs   filter_logits_fnfilter_kwargsdecode_backwardskwargsr]   greedyr!   batch_psr1   orig_seq_lenmain_decoderoutr}   suffix_sos_tokenssuffix_embedprefix_embed_embeds	new_cachelast_embedsforward_logitsbackward_logitslogitssamplefiltered_logitsprobsr   r   r   generate_with_suffix_cond   sj   





	z,BeliefStateWrapper.generate_with_suffix_condr0   loss_weight_by_fb_indicescallable | Nonec           %      C  s  g |j |jR \}}}|}t|r$tdt||d|}t||d}| j|dd}	t	||d}
t
| jd|d}| j|
|dd	}t	||d}t|	|\}	}t||d}t||d
 }|jdd\}}|| dk}|| }| jr|j d }tt|| j d
}tj||dd | }|| }|jdd\}}|d
 |d
 }}|d d |f |d d |f }}t||fdd}t|	d d |f |d d |f fdd}| |}tjt|ddd|| jrdnddd}| jot | jk }|r||  }| |}|| }t| j r.|s.|  |} || j!| j"d
 d}!t
|!d|d}!tt| d|!}"||"| j#  }t$| jt|}#|#ryt|ddd}| jrLt%d|| j&}t|ru||}$|$j'd
krct%d||$}n|$j'dkrqt%d||$}nt(d|) }|S )Nr"   r    r#   Tr{   )r0   z
d -> b 1 dru   rx   r   r~   r   r   zb n (fb l) -> b l (fb n))fbnonemean)	reductionignore_index)maxrw   zb n l -> b l nzb (fb n) -> b fb nz
b fb n, fbz	b fb n, nzb fb n, n fbzinvalid loss weight dims)*r(   r!   r   r)   r*   r   r_   r+   rM   r'   r   ra   rN   detach_multipler   unbindrm   r   intrl   randpermr   rR   r   cross_entropyr   rn   ri   r   rj   rQ   rk   rh   clamprg   rU   r   multiplyrX   r-   
ValueErrorr   )%r>   r3   r0   r   r1   r2   r!   seq_for_labelsr4   forward_embedsbackward_seqsuffix_tokensbackward_embeds
seq_arangefb_pairsfibi
valid_mask	num_pairsnum_subsampledrand_subsampled_indices	labels_fi	labels_biforward_labelsbackward_labelslabels	fb_embedsr   lossri   distancedistance_conddistance_logitsdistance_labelspred_dist_lossrn   loss_weightr   r   r   rC     s   
"
	


zBeliefStateWrapper.forward)	NrK   NrK   FrK   FrL   N)rM   r   rN   rO   rP   rQ   rR   rS   rT   rQ   rU   rQ   )rs   rt   )NN)r0   rt   r   r   )rF   rG   rH   __doc__rZ   r_   no_gradr   r   dictr   rC   __classcell__r   r   rq   r   rJ   O   s4    [qrJ   )r   N)(
__future__r   r   r_   torch.autogradr   torch.nnr   r   r   r   r	   r
   r   r   r   torch.nn.functional
functionalr   %x_transformers.autoregressive_wrapperr   r   x_transformers.x_transformersr   r   r)   einopsr   r   r   r   einops.layers.torchr   r   r   r'   r7   applyr   rJ   r   r   r   r   <module>   s$   $
