o
    Ni-                     @  s<  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlmZ d dlmZmZ d dlm	  mZ d dlZd d	lmZmZmZmZ d d
lmZ edg dZdd Zdd Zd+ddZd,ddZdd Z d-ddZ!dd Z"dd  Z#d.d!d"Z$d#d$ Z%d%d& Z&G d'd( d(eZ'G d)d* d*eZ(dS )/    )annotationsNrandom)nullcontext)
namedtuple)nnpi)Module)grad_and_valuevmap)	rearrangerepeatpackunpack)TransformerWrapperLosses)lossgenerator_losscritic_lossc                 C  s   | d uS N )valr   r   \/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/nonautoregressive_wrapper.pyexists      r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default   s   r   ?c                 C  sL   t d| | jd  }| j|dd\}}t| td}|d|| |S )N   dimz-inf   )mathceilshapetopktorch	full_likefloatscatter_)logitsthreskr   indprobsr   r   r   top_k!   s
   r0   绽|=c                 C  s   t | | S r   )r'   log)tepsr   r   r   r2   (   s   r2   c                 C  s"   t | dd}tt|  S )Nr   r   )r'   
zeros_likeuniform_r2   )r3   noiser   r   r   gumbel_noise+   s   r8         ?r   c                 C  s   | t |d t|  j|dS )Nr1   r    )maxr8   argmax)r3   temperaturer!   r   r   r   gumbel_sample/   s   r=   c                 C  s
   t  | k S r   r   )probr   r   r   sample_prob4   s   
r?   c                   C  s   t dS )Ng      ?)r?   r   r   r   r   	coin_flip7   r   r@   c                 C  s   g | j | jR \}}}| jddd| j|d}tj||f|d}||  d}|jddjdd }|  jddd}	||	8 }||k }
|
	|  d |
S )Nr   T)r!   keepdimmindevicer    F)
r%   rE   sumclampr'   randmasked_fillargsortr)   masked_fill_)maskr>   min_maskbatchseqrE   num_to_maskr+   randpermnum_paddingsubset_maskr   r   r   get_mask_subset_prob<   s   rT   c                 C  s   d|  S Nr   r   r3   r   r   r   linear_scheduleM   r   rW   c                 C  s   t | t d S )z" https://arxiv.org/abs/2202.04200 r"   )r'   cosr   rV   r   r   r   cosine_scheduleP   s   rY   c                      s$   e Zd Z fddZdd Z  ZS )
SelfCriticc                   s*   t    || _|jj}t|d| _d S rU   )super__init__netattn_layersr!   r   Linear	to_logits)selfr]   r!   	__class__r   r   r\   X   s   
zSelfCritic.__init__c                 C  s   | j |dd}| |S )NT)return_embeddings)r]   r`   )ra   xembedr   r   r   forward_   s   
zSelfCritic.forward)__name__
__module____qualname__r\   rg   __classcell__r   r   rb   r   rZ   W   s    rZ   c                      sf   e Zd ZdZdddddddddd	d
dd fddZe 						dddZ			dddZ  Z	S )NonAutoregressiveWrapperzO
    https://arxiv.org/abs/1904.09324
    https://arxiv.org/abs/2202.04200
       Fg      ?g333333?g?linearNr9   T)steps	self_condself_cond_train_probno_replace_probrandom_token_probschedulecan_mask_prev_unmaskedtoken_criticself_token_criticcritic_loss_weightuse_simple_mdlm_loss_weightrv   TransformerWrapper | Nonec                  s  t    |rt|
rJ || _|j}|| _|j| _|| _|| _|| _	|j
| _
|| _t|r1|| _|dkr9t| _n|dkrAt| _ntd| d | _|r]tt| j  fdd}|| _|	| _|| _|r~tt|| _|rxtj||ddnd | _|| _|
| _|rt|| _|| _ d S )Nrn   cosinezinvalid schedule c                   s    | \}}|d|  S )Nr9   r   )timesgradvaluegrad_and_value_schedule_fnr   r   loss_weight_fn   s   z9NonAutoregressiveWrapper.__init__.<locals>.loss_weight_fnF)bias)!r[   r\   r   r]   emb_dimr!   
num_tokensmask_idrr   rs   max_seq_lenro   callableschedule_fnrW   rY   
ValueErrorr   r   r
   ru   rp   r   	Parameterr'   randn
null_embedr_   to_self_condrq   rv   rZ   rx   )ra   r]   r   ro   rp   rq   rr   rs   rt   ru   rv   rw   rx   ry   r!   r   rb   r   r   r\   i   sB   


z!NonAutoregressiveWrapper.__init__ffffff?c                 K  s4  t | }t|d}t| j j}| j}|   t	dd| j
d }	|| jf}
tj|
| j|d}tj|
d|d}| |	dd  | j  }| j}|rO| jnd }t| tt| j
D ]\}}|rh| |nd }| j|f|dd|\}}|r||}t |rt||}|| j
 }|| }|t|d jdd	}t|t|dd
}t|||}t | jr| |}t|d}||t| |  }nd|jdd	 }|dt|d}t|d}|dkr	 | j s|!| t"|j#j }|j$|dd	j%}tj&|tj'd(d|d}|!|| j}q]| )| |rt|d}|S )Nr           r9   rD   T)
sum_embedsreturn_logits_and_embeddingsgMbP?r   r    r<   zb n 1 -> b nr"   zb n -> b n 1r   )dtypez1 n -> n)*r   r   nextr]   
parametersrE   trainingevalr'   linspacero   r   fullr   r   longrp   r   ziptolistreversedranger   r0   r:   softmaxr=   whererv   r   r8   gatherru   rI   finfor   r&   indicesr5   boolscattertrain)ra   
batch_sizestart_temperaturefilter_thresnoise_level_scalekwargs
sample_onerE   was_trainingr|   r%   rO   rL   all_mask_num_tokenshas_self_cond
last_embedmask_num_tokenssteps_until_x0rp   r+   embedsannealing_scaler<   r/   sampled_idsscoresmask_indicesr   r   r   generate   sb   
	

 








z!NonAutoregressiveWrapper.generatec           !      K  s  g |j |jR \}}}|| jksJ | }	tj||ddd}
tj||f|djdd	 }| 
|
}|| jdd}|t|dk }| }d}| jd	krct rc|| j8 }t|| j}|| M }| jd	krt rt|| j| }tjd| j||f|d}t|||}|| M }t|| j|}| jr| j}t| jrt  | j|fd
di| }W d    n1 sw   Y  |j| |d |rtjnt}|  | j|fi |}W d    n1 sw   Y  | jjst j!nt j"}t#| j$r|t|d|	dd}| $|
}t%&d||}|| ' }n	||| |	| }t#| j(r*|r0t)||d S t*|t+|t, d}t|||	}| (|}||	k	 }t -t|d|}|r]|} d }n||| j.  } t)| ||S )NrD   r   r   r   r    r9   rB   zb -> b 1r   rd   T)r   zb n l -> b l nnone)	reductionzb n, br   z... 1 -> ...)/r%   rE   r   cloner'   emptyr6   rH   rJ   r)   r   rG   r   rr   r@   rT   rs   randintr   r   r   rp   r   r?   rq   no_gradr]   detachupdater   r   output_is_log_probFcross_entropynll_lossr   r   einxmultiplymeanrv   r   r=   r   r    binary_cross_entropy_with_logitsrx   )!ra   re   only_train_generatoronly_train_criticgenerator_sample_temperaturer   bnrE   orig_seq
rand_timesbatched_randperm
rand_probsnum_tokens_maskrL   replace_mask_id_maskfrac_seq_leftno_replace_prob_maskrandom_token_prob_maskrandom_tokensmaskedrp   contextr+   loss_fnr   loss_weightsr   	generatedcritic_logitscritic_labelsr   
total_lossr   r   r   rg     sx   







z NonAutoregressiveWrapper.forward)rv   rz   )Nr9   r   r9   )FFN)
rh   ri   rj   __doc__r\   r'   r   r   rg   rk   r   r   rb   r   rl   c   s0    
SYrl   )r   )r1   )r9   r   )r   ))
__future__r   r#   r   
contextlibr   collectionsr   r'   r   r   torch.nnr	   
torch.funcr
   r   torch.nn.functional
functionalr   r   einopsr   r   r   r   x_transformers.x_transformersr   r   r   r   r0   r2   r8   r=   r?   r@   rT   rW   rY   rZ   rl   r   r   r   r   <module>   s6    



