o
    Ni-                     @  s
  d dl mZ d dlZd dlZd dlmZmZmZmZmZ d dl	m  m
Z d dlmZmZ d dlmZmZmZ d dlmZmZmZ d dlmZmZ d dlmZmZmZmZmZm Z  d	d
 Z!dd Z"dddZ#dd Z$e#dZ%dd Z&G dd deZ'G dd deZ(dS )    )annotationsN)nnTensor	is_tensortensorarange)Module
ModuleList)EncoderDecoderTransformerWrapper)gumbel_sampletop_ptop_k)	RearrangeReduce)	rearrangereducerepeateinsumpackunpackc                 C  s   | d uS N )vr   r   S/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/free_transformer.pyexists   s   r   c                 C  s   t | r| S |S r   )r   )r   dr   r   r   default"   s   r   #B;c                 C  s   |  | S r   )	clamp_minlog)tepsr   r   r   r!   %   s   r!   c                   s(   t | g \}d fdd	}||fS )Nc                   s   t | }t| |\}|S r   )r   r   )outinv_patternunpackedpatternpsr   r   inverse+   s   
z"pack_with_inverse.<locals>.inverser   )r   )r"   r(   packedr*   r   r'   r   pack_with_inverse(   s   r,      c                 C  s8   |   }d| }|t|  |t|    jdd S )N      ?dim)sigmoidF
logsigmoidsum)logitsprobnot_probr   r   r   binary_entropy6   s   (r9   c                      s2   e Zd Zdef fdd	Z			dddZ  ZS )	BinaryMapper   c                   s   t    || _d| | _dt| }t| jd d d f |dk  }| jd|dd | jd|dd || _	| jdt
ddd d S )	Nr-   r   	power_twoF)
persistentcodeszero        )super__init__bits	num_codesr   bitwise_andbyteboolregister_bufferkl_loss_thresholdr   )selfrC   rI   r<   r>   	__class__r   r   rB   <   s   

(zBinaryMapper.__init__r.   Nc                 C  s   t || j}t || j}|jd | jksJ d| j ||  }t||k }| j| j	dd}t
|| j }| j}	|rU| jt t| }
t
|
| j  }	|r{tt
|| j dtt
| | j  d  }|| |  }||	fS )Nr/   z%logits must have a last dimension of r0   z!... bits, codes bits -> ... codes)r   trainingshaperC   r2   torch	rand_likelongr<   r5   r3   one_hotrD   floatr?   NATr9   relurI   meanr   r4   r>   expdetach)rJ   r6   temperaturestraight_throughcalc_aux_lossprob_for_samplesampled_bitsindicesrR   aux_kl_losskl_divsoft_Gr   r   r   forwardQ   s&    zBinaryMapper.forward)r.   NN)__name__
__module____qualname__rT   rB   rb   __classcell__r   r   rK   r   r:   ;   s    r:   c                      s   e Zd Zddddddee e e ddd	d
d fddZedd Z			dddZe	
 deedddfdddZ		d ddZ  ZS )!FreeTransformerr;   N@         Tr.   r@   r/   )	enc_depth
dim_latentattn_dim_headheadslatent_bitsper_token_latentsrI   binary_mapper_kwargs
enc_kwargs
dec_kwargskl_loss_weightlatent_dropout_probpad_idrq   dictrr   rs   c                  s0  t    t||}t||| _tj||dd| _tt	
|d | _|| _td||||	dddddd	||| _tj||
dd| _t|
|fi || _tj| jj|dd| _t|| _|dkrutd||||	dddd||nd | _|dks~J td||||	dddd||| _|| _|| _d S )	NF)biasg{Gz?T)	r1   depthrm   rn   
only_crosscross_attenduse_rmsnormrotary_pos_embpre_norm_has_final_normr   )r1   ry   rm   rn   r}   r|   r~   r   )rA   rB   r   r   	Embedding	token_embLineartoken_unembed	ParameterrO   randnquery_token_for_latentsrp   r
   encoderto_latent_bit_logitsr:   binary_mapperrD   from_latent_to_conditionDropoutlatent_dropoutr   decoder_headdecoder_tailrv   rt   )rJ   
num_tokensr1   dec_head_depthdec_tail_depthmax_seq_lenrk   rl   rm   rn   ro   rp   rI   rq   rr   rs   rt   ru   rv   kwargsrK   r   r   rB      s|   


	
	
zFreeTransformer.__init__c                 C  s   t |  jS r   )next
parametersdevice)rJ   r   r   r   r      s   zFreeTransformer.deviceFc                 C  s   t || j}g |jd d |jR \}}}t| jd|d}t }	|r8t|d|d}tj||d}
|	j	|
|
d | j
|f||d|	}| |}| j||d	\}}|sV|S ||fS )
Nr-   
d -> b 1 dbzb 1 d -> b n d)nr   )poscontext_pos)contextcontext_mask)r[   )r   rp   rN   r   r   r   rw   rO   r   updater   r   r   )rJ   decoder_head_embedsmaskreturn_kl_lossrp   batchseq_lenr   query_tokensencoder_kwargs
rotary_pospooled
bit_logitsone_hot_latentskl_lossr   r   r   encode_to_latents   s0    
z!FreeTransformer.encode_to_latentsg?)threslogit_filter_kwargsc                 C  s  t |d\}}|jd }d }	t|rLt|st|| jd}|jtjtj	fv r0t
|| jj }|jdkr=t|d|d}n
|jdkrGt|d}| |}	d  }
}|jd	 }|}| |}ttd|| D ]`}t| jrw| j||
d
d\}}n|d }}t|
r|
jnd}| j||||	d
d\}}|d d d	f }| |}||fi |}t|}t||fd\}}t|| |fd\}}|r|}
|}qe||S )Nz* nr   r   r;   r   r   r-   zb d -> b 1 dr/   T)cachereturn_hiddens)r   seq_pos_offsetself_attn_kv_residualsr   zb *zb * d)r,   rN   r   r   r   r   dtyperO   intrQ   r3   rR   r   rD   rS   ndimr   r   r   r   rangemaxr   cache_lengthr   r   r   r   )rJ   promptsr   latentsfilter_logits_fnr   use_kv_cacheinverse_packr   	condition
head_cache
tail_cache
prompt_len	generatedtokens_
head_embednext_head_cacher   
tail_embednext_tail_cacher6   sampledr   r   r   generate  sD   










zFreeTransformer.generatec                 C  s"  |j d |j}}|d d d df |d d dd f }}| |}t| jr-| |}t|rH| |}t| jr@| |}|| jk}	d}
n	|}|| jk}	d }
| j||	|
dd\}}| |}| |}| j	||d}| 
|}tjt|d|| jd	}||| j  }|s|S ||f}||fS )
Nr   r/   r;   FT)r   rp   r   )r   zb n l -> b l n)ignore_index)rN   r   r   r   r   rv   r   r   r   r   r   r3   cross_entropyr   rt   )rJ   seqseq_for_latentsreturn_all_lossesr   r   labelsr   tokens_for_latentsencoder_maskrp   r   r   r   r6   ar_loss
total_losslossesr   r   r   rb   \  s@   *










	zFreeTransformer.forward)rq   rw   rr   rw   rs   rw   )NFN)r   rw   )NF)rc   rd   re   rT   rw   rB   propertyr   r   rO   no_gradr   r   rb   rf   r   r   rK   r   rg      s<    	Z

+Prg   )r   ))
__future__r   mathrO   r   r   r   r   r   torch.nn.functional
functionalr3   torch.nnr   r	   x_transformers.x_transformersr
   r   r   %x_transformers.autoregressive_wrapperr   r   r   einops.layers.torchr   r   einopsr   r   r   r   r   r   r   r   r!   r,   rT   r9   r:   rg   r   r   r   r   <module>   s$     

I