o
    Ni                     @   s   d dl mZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 d dlZd dlmZ dd Zdd	 Zd
d ZdddZdd ZG dd deZdS )    )deepcopyN)Module)TransformerWrapper)	rearrangec                 C   s   | d uS N )vr   r   F/home/ubuntu/.local/lib/python3.10/site-packages/x_transformers/dpo.pyexists   s   r
   c                 C   s   |   D ]}d|_qd S )NF)
parametersrequires_grad)moduleparamr   r   r	   freeze_all_layers_   s   r   c                 C   sL   |d d d df |d d dd f }}| |}|j dd}td||S )N   dimzb n [l], b n -> b n)log_softmaxeinxget_at)modelseqsrc_seqtgt_seqlogitslog_probr   r   r	   log_prob_from_model_and_seq   s   *r   c                 C   sv   t |s
| jddS |jd | jd d kr |d d d df }| | d} | jdd}|jdd}||jdd S )Nr   r   r   g        gh㈵>)min)r
   meanshapemasked_fillsumclamp)	log_probsmasknumdenr   r   r	   masked_mean   s   r(   c                  G   s<   g t t| } t| dkrd S | ^}}|D ]}||@ }q|S )Nr   )filterr
   len)masksr%   
rest_masks	rest_maskr   r   r	   maybe_and_mask&   s   
r.   c                       sB   e Zd Zddddef fddZdd Zddd	d
dZ  ZS )DPOg?N)betapad_idr   c                   s4   t    || _t|| _t| j || _|| _d S r   )super__init__policy_modelr   	ref_modelr   r0   r1   )selfr   r0   r1   	__class__r   r	   r3   4   s   



zDPO.__init__c                 C   s
   | j  S r   )r4   r   )r6   r   r   r	   r   D   s   
zDPO.parameterspreferred_seq_maskunpreferred_seq_maskc                   s*  |j dksJ |j|jksJ t| jr&t s|| jk ts&|| jk	 t  | j  t| j|}t| j|}W d    n1 sGw   Y  t| j	|}t| j	|}	t
|   t
| t fdd||f\}}tfdd||	f\}}	||	 }
|| }t| j|
|   }| S )N   c                    
   t |  S r   r(   t)r:   r   r	   <lambda>k      
 zDPO.forward.<locals>.<lambda>c                    r=   r   r>   r?   )r;   r   r	   rA   l   rB   )ndimr    r
   r1   torchno_gradr5   evalr   r4   r.   mapF
logsigmoidr0   r   )r6   preferred_sequnpreferred_seqprompt_maskr:   r;   ref_preferred_logprobref_unpreferred_logprobpolicy_preferred_logprobpolicy_unpreferred_logprobpolicy_logratiosref_logratioslossesr   r9   r	   forwardG   s.   	




zDPO.forward)__name__
__module____qualname__r   r3   r   rT   __classcell__r   r   r7   r	   r/   3   s    	r/   r   )copyr   rD   torch.nnr   torch.nn.functionalnn
functionalrH   x_transformers.x_transformersr   r   einopsr   r
   r   r   r(   r.   r/   r   r   r   r	   <module>   s    
