o
    	TiZ                     @   sV  d dl Z d dlZd dlmZ d dlmZ d dlmZmZ d dl	Z
d dlZd dlmZmZ d#deded	efd
dZd$dejdejdee d	ejfddZd%dejdejded	ejfddZd%dejdejded	ejfddZG dd dZG dd dZ				d&deeef deeeej ejf  deej deej d eej d	ejfd!d"ZdS )'    N)Mapping)contextmanager)OptionalUnion)is_torch_npu_availableis_torch_xpu_available/nestedsepreturnc                    s4   dt dtdt ddf fdd i } | d| |S )	z>Flatten dictionary and concatenate nested keys with separator.nestprefixintor   Nc                    s`   |   D ])\}}|v rtd d| dt|tr' |||  | q|||| < qd S )Nzseparator 'z' not allowed to be in key '')items
ValueError
isinstancer   )r   r   r   kvrecurser
    </home/ubuntu/.local/lib/python3.10/site-packages/trl/core.pyr      s   
zflatten_dict.<locals>.recurse )dictstr)r	   r
   flatr   r   r   flatten_dict   s    	r   valuesmaskaxisc                 C   s8   |dur| | j |d|j |d S | |   |   S )z,Compute mean of tensor with a masked values.N)r    )sum)r   r   r    r   r   r   masked_mean+   s   r"   Tunbiasedc                 C   sT   t | |}| | }t |d |}|r(| }|dkrtd||d  }|| }|S )z.Compute variance of tensor with masked values.   r   zThe sum of the mask is zero, which can happen when `mini_batch_size=1`;try increase the `mini_batch_size` or `gradient_accumulation_steps`   )r"   r!   r   )r   r   r#   meancentered_valuesvariancemask_sumbessel_correctionr   r   r   
masked_var3   s   
r+   
shift_meanc                 C   s<   t | |t| |}}| | t|d  }|s||7 }|S )z!Whiten values with masked values.g:0yE>)r"   r+   torchrsqrt)r   r   r,   r&   varwhitenedr   r   r   masked_whitenF   s
   r1   c                   @   s0   e Zd ZdZdedefddZdefddZd	S )
LengthSamplerz
    Samples a length
    	min_value	max_valuec                 C   s   t t||| _d S N)listranger   )selfr3   r4   r   r   r   __init__T   s   zLengthSampler.__init__r   c                 C   s   t j| jS r5   )nprandomchoicer   )r8   r   r   r   __call__W   s   zLengthSampler.__call__N)__name__
__module____qualname____doc__intr9   r=   r   r   r   r   r2   O   s    r2   c                   @   s    e Zd ZdZeedd ZdS )PPODecoratorsFc                 c   s    d V  | j r?t rt  tj  t  d S t r+t  tj  t  d S tj	
 rAt  tj	  t  d S d S d S r5   )optimize_device_cacher   gccollectr-   xpuempty_cacher   npucudais_available)clsr   r   r   empty_device_cache^   s"   



	z PPODecorators.empty_device_cacheN)r>   r?   r@   rD   classmethodr   rM   r   r   r   r   rC   [   s
    rC   shape	generatordevicedtypelayoutc              	      s>  |d }p
t j|pt d}durYttsjjnd jj}||jkrE|dkrEd|dkrDtd| d| d| dt n||jkrY|d	krYt	d
| d| dttrht
dkrhd ttrddd   fddt|D }t j|dd|}|S t j d|}|S )zA helper function to create random tensors on the desired `device` with the desired `dtype`. When
    passing a list of generators, you can seed each batch size individually. If CPU generators are passed, the tensor
    is always created on the CPU.
    r   cpuNmpszBThe passed generator was created on 'cpu' even though a tensor on zB was expected. Tensors will be created on 'cpu' and then moved to zk. Note that one can probably slighly speed up this function by passing a generator that was created on the z device.rJ   zCannot generate a z! tensor from a generator of type .r%   )r%   c              	      s$   g | ]}t j|  d qS )rP   rQ   rR   rS   )r-   randn).0irR   rP   rS   rand_devicerO   r   r   
<listcomp>   s    z randn_tensor.<locals>.<listcomp>)dimrW   )r-   stridedrQ   r   r6   typewarningswarnUserWarningr   lenr7   cattorX   )rO   rP   rQ   rR   rS   
batch_sizegen_device_typelatentsr   r[   r   randn_tensorq   s>   

rj   )r   r5   )T)NNNN) rE   ra   collections.abcr   
contextlibr   typingr   r   numpyr:   r-   transformersr   r   r   r   r   Tensorboolr"   r+   r1   r2   rC   tupler6   	GeneratorrQ   rR   rS   rj   r   r   r   r   <module>   s>   &""	
