o
    	Ti                     @   sr  d dl Z d dlmZ d dlZd dlZd dlZd dlmZ d dl m	Z	m
Z
 d dlmZ d dlmZmZmZmZ d dlZd dlZd dlZd dlm  mZ d dlZd dlmZmZ d dlm Z  d dl!m"Z"m#Z# d d	l$m%Z% d d
l&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- d dl.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4 ddl5m6Z6 e1 rd dl7m8Z8 d dl9m:Z: d dl;m<Z< d dl=m>Z> e- rd dl?Z?e0 rd dl@mAZAmBZB e	G dd dZCe	G dd dZD	 		ddeEejF deGdeHdeeG dejFf
ddZIe	G dd  d ZJe	G d!d" d"ZKeL 	#dd$ejFdeMejFejFeGf fd%d&ZNd'e(deOeHePf fd(d)ZQdd+ejFd,eGd-eeGePf d.eGdejFf
d/d0ZRd1ejjSddfd2d3ZTdd5d6ZUG d7d8 d8ZVd9d: ZWd;e6dee' fd<d=ZXdeeOeHeGf  fd>d?ZYd;e6dd@fdAdBZZddDdEZ[ddFdGZ\dHej]ddfdIdJZ^dKZ_dLZ`e	G dMdN dNe+Zae	G dOdP dPe,ZbejcfdQejFdejFfdRdSZdd1ejjSdTejFdUeGdVeGdeMejFejFejFf f
dWdXZed1ejjSdTejFdUeGde/fdYdZZf	[dd1ejjSd\eGd]egd^egdejjSf
d_d`ZhdaeGdUeGdbejFdejFfdcddZideejjSdfejFdUeGdge)deMejFejFf f
dhdiZjeL d1ejjSdfejFdjeGdUeGdge)f
dkdlZkdmeeG dneGdoeOeHeEeG f dpeGdqeOeHeEeG f dreGdseOeHeEeG f fdtduZldveGdqeOeHeEeG f dseOeHeEeG f fdwdxZmdyejFdaeGdUeGdeMejFejFf fdzd{Zndd|d}Zod~ejFde*deEeH fddZp				ddeeH deHdeHdeeH deEeH deeH deHdeeH deeH deeH deeH de"fddZqdeeH fddZrdeHdej]ddfddZsdejFdejFdeejFeMejFdf f fddZtdejFdejFdeejFeMejFdf f fddZudejFfddZvddeGdejFfddZw	ddeEeH deEeH deOeHeEeP f deEeP deGdeGddfddZxdS )    N)deque)	dataclassfield)version)AnyLiteralOptionalUnion)AcceleratorPartialState)AcceleratorState)	ModelCardModelCardData)pad_sequence)BitsAndBytesConfigEvalPredictionGenerationConfigPreTrainedTokenizerBaseTrainerStateTrainingArgumentsis_comet_available)ModelOutputis_peft_availableis_rich_availableis_torch_mlu_availableis_torch_npu_availableis_torch_xpu_available   )ModelConfig)Console)Panel)Table)Text)
LoraConfig
PeftConfigc                   @   sz   e Zd ZU dZeed< dZeed< dZeed< dZ	e
ed< d	Ze
ed
< dd Zdeee
ef  dee
ejf fddZdS )DataCollatorForChatMLz3
    Data collator for ChatML format datasets.
    	tokenizerignore_indexN
max_lengthprompt
prompt_keymessagesmessages_keyc                 C   s6   | j jd u r
td| jd u rt| j jd| _d S d S )NzTThe tokenizer does not have a pad token. Please set `pad_token_id` in the tokenizer.i   )r&   pad_token_id
ValueErrorr)   minmodel_max_lengthself r4   E/home/ubuntu/.local/lib/python3.10/site-packages/trl/trainer/utils.py__post_init__P   s
   
z#DataCollatorForChatML.__post_init__examplesreturnc              	   C   s4  g }g }g }g }g }|D ]}| | jd }|d u r+|| j d d }	| jj|	ddd}d|vri|| j }
| jj|
ddd}| j|d| jdd dd}||d  d|v r\||d  n,|dgt|d   n||d  d|v r|||d  n|dgt|d   | j|dt|d dd dd}||d  ||d  | jgt|d  }t|d }|d |d  ||d < || qd	d
 |D }dd
 |D }dd
 |D }t	|d| jj
d}t	|ddd}t	|d| jd}dd
 |D }dd
 |D }t	|d| jj
d}t	|ddd}|||||dS )NFT)tokenizeadd_generation_prompt	input_ids)
truncationr)   paddingreturn_tensorsadd_special_tokensattention_mask   c                 S      g | ]
}t j|t jd qS dtypetorchtensorlong.0idsr4   r4   r5   
<listcomp>       z2DataCollatorForChatML.__call__.<locals>.<listcomp>c                 S   rC   rD   rG   rL   maskr4   r4   r5   rN      rO   c                 S   rC   rD   rG   )rL   labelr4   r4   r5   rN      rO   left)padding_sidepadding_valuer   c                 S   rC   rD   rG   rK   r4   r4   r5   rN      rO   c                 S   rC   rD   rG   rP   r4   r4   r5   rN      rO   )r<   rA   labelspromptsprompt_attention_mask)getr+   r-   r&   apply_chat_templater)   appendlenr(   padr.   )r3   r7   r<   rA   prompts_input_idsrX   rV   exampleformatted_promptr*   messageformatted_messagetokenized_messagetokenized_promptrR   completion_start_idxr4   r4   r5   __call__W   s~   

	zDataCollatorForChatML.__call__)__name__
__module____qualname____doc__r   __annotations__r(   intr)   r+   strr-   r6   listdictr   rH   Tensorrf   r4   r4   r4   r5   r%   D   s   
 ,r%   c                   @   sp   e Zd ZU dZeed< dZeee	f ed< dZ
ee ed< dZe	ed< d	eee	ef  d
ee	ef fddZdS )RewardDataCollatorWithPaddinga\  
    Reward DataCollator class that pads the inputs to the maximum length of the batch.

    Args:
        tokenizer (`PreTrainedTokenizerBase`):
            The tokenizer used for encoding the data.
        padding (`Union[bool, str, `PaddingStrategy`]`, `optional`, defaults to `True`):
            padding_strategy to pass to the tokenizer.
        pad_to_multiple_of (`int` or `None`, `optional`, defaults to `None`):
            If set will pad the sequence to a multiple of the provided value.
        return_tensors (`str`, `optional`, defaults to `"pt"`):
            The tensor type to use.
    r&   Tr>   Npad_to_multiple_ofptr?   featuresr8   c           
      C   s   g }g }g }d|d v }|D ]7}d|vs d|vs d|vs d|vr$t d||d |d d ||d |d d |rE||d  q| jj|| j| j| jd	}| jj|| j| j| jd	}|d
 |d |d
 |d dd}	|r~tj|tj	d}||	d< |	S )Nmarginr   input_ids_choseninput_ids_rejectedattention_mask_chosenattention_mask_rejectedz{The features should include `input_ids_chosen`, `attention_mask_chosen`, `input_ids_rejected` and `attention_mask_rejected`)r<   rA   )r>   rr   r?   r<   rA   T)rv   rx   rw   ry   return_lossrE   )
r/   r[   r&   r]   r>   rr   r?   rH   rI   float)
r3   rt   features_chosenfeatures_rejectedru   
has_marginfeaturebatch_chosenbatch_rejectedbatchr4   r4   r5   rf      s^   z&RewardDataCollatorWithPadding.__call__)rg   rh   ri   rj   r   rk   r>   r	   boolrm   rr   r   rl   r?   rn   ro   r   rf   r4   r4   r4   r5   rq      s   
 *rq   righttensorsrU   rT   rr   r8   c                 C   s   t dd | D d }|dur%|d | }|dkr%|d  || 7  < tjt| g|R || d j| d jd}t| D ]>\}}|dkrP|d |j	d  }	n|dkrWd}	nt
dt|	|	|j	d  }
|
ftd	d
 |j	dd D  }||| |< q>|S )a  
    Pads a list of tensors to the same shape along the first dimension.

    Args:
        tensors (`list[torch.Tensor]`):
            List of input tensors to pad.
        padding_value (`int`):
            Value to use for padding. Default is 0.
        padding_side (`str`):
            Side on which to add padding. Must be 'left' or 'right'. Default is 'right'.
        pad_to_multiple_of (`int`, *optional*, defaults to `None`):
            If set will pad the sequence to a multiple of the provided value.

    Returns:
        `torch.Tensor`:
            A single tensor containing the padded tensors.

    Examples:
    ```python
    >>> import torch

    >>> pad([torch.tensor([1, 2, 3]), torch.tensor([4, 5])])
    tensor([[1, 2, 3],
            [4, 5, 0]])

    >>> pad([torch.tensor([[1, 2], [3, 4]]), torch.tensor([[5, 6]])])
    tensor([[[1, 2],
            [3, 4]],
            [[5, 6],
            [0, 0]]])
    ```
    c                 S   s   g | ]}|j qS r4   )shaperL   tr4   r4   r5   rN     s    zpad.<locals>.<listcomp>r   NrF   devicerS   r   z&padding_side must be 'left' or 'right'c                 s   s    | ]}t d |V  qdS )r   N)slice)rL   sr4   r4   r5   	<genexpr>1  s    zpad.<locals>.<genexpr>rB   )npmaxtolistrH   fullr\   rF   r   	enumerater   r/   r   tuple)r   rU   rT   rr   output_shape	remainderoutputir   	seq_start	seq_sliceslicesr4   r4   r5   r]      s    '*"r]   c                   @   s`   e Zd ZU dZdZeed< dZeed< dZe	e
 ed< deeeef  d	eeef fd
dZdS )DPODataCollatorWithPaddinga  
    DPO DataCollator class that pads the tokenized inputs to the maximum length of the batch.

    Args:
        pad_token_id (`int` defaults to 0):
            The tokenizer's pad_token_id.
        label_pad_token_id (`int`, defaults to -100):
            The label used for masking.
        is_encoder_decoder (`bool` or `None`, `optional`, defaults to `None`):
            Whether you model has an encoder_decoder architecture.
    r   r.   r'   label_pad_token_idFis_encoder_decoderrt   r8   c                    s  i }|d   D ]̉dr| jrYfdd|D }dr2dr2| jd u r.td| j}ndr:d}nd	sCd
v rG| j}ntd dt|d|d|< qdrk| jd u rgtd| j}n!drt| j}ndr|d}ndrd}ntd ddv rd}nd}drtj	 ntj
  fdd|D }t|||d|< qdrtfdd|D |< qfdd|D |< q|S )Nr   )
_input_ids_attention_mask_labels_pixel_valuesc                    s   g | ]	}t |  qS r4   )rH   
LongTensorrL   exkr4   r5   rN   O      z7DPODataCollatorWithPadding.__call__.<locals>.<listcomp>r*   r<   zPadding is enabled, but the tokenizer is not configured with a padding token. Explicitly set `tokenizer.pad_token` (e.g. `tokenizer.pad_token = tokenizer.eos_token`) before calling the trainer.r   )chosenrejected
completiondecoderzUnexpected key in batch ''T)batch_firstrU   r   r   r   )prompt_input_idsrX   rS   r   c                    s   g | ]}t j|  d qS rD   )rH   rI   r   rF   r   r4   r5   rN         rU   rT   _logpsc                       g | ]}|  qS r4   r4   r   r   r4   r5   rN         c                    r   r4   r4   r   r   r4   r5   rN     r   )keysendswithr   
startswithr.   r/   r   r   rH   float32int64r]   rI   )r3   rt   padded_batchto_padrU   rT   r4   r   r5   rf   I  sT   









z#DPODataCollatorWithPadding.__call__N)rg   rh   ri   rj   r.   rl   rk   r   r   r   r   rn   ro   rm   r   rf   r4   r4   r4   r5   r   7  s   
 *r   c                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZ	eed< dZ
eed	< e d
ejdeeef fddZdefddZededefddZdS )RunningMomentsz
    Calculates the running mean and standard deviation of a data stream. Reference:
    https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L75
    acceleratorr   meanrB   stdvargW:countxsr8   c                 C   s  | j jrt| j |\}}}n| }tj|dd\}}| | }}|| j }| j| }|| }| j	| j |d | j | |  }|| }	|  j|| | 
 7  _|	| }
|
| |d    
 | _|

 | _	|| _|
 || |d    
 fS )zT
        Updates running moments from batch's moments computed across ranks
        F)unbiasedr   rB   )r   use_distributedget_global_statisticsnumelrH   var_meanr{   r   r   r   itemsqrtr   )r3   r   xs_meanxs_varxs_countdelta	tot_countnew_sumold_sumtot_sumnew_varr4   r4   r5   update  s    

"
$zRunningMoments.update	json_pathc                 C   sp   | j jr6tj| dd d}tj|dddd }t|dd	d
}|| W d   dS 1 s/w   Y  dS dS )zDSave the content of this instance in JSON format inside `json_path`.c                 S   s   dd | D S )Nc                 S   s   i | ]\}}|d kr||qS )r   r4   rL   r   vr4   r4   r5   
<dictcomp>  r   zARunningMoments.save_to_json.<locals>.<lambda>.<locals>.<dictcomp>r4   )xr4   r4   r5   <lambda>  s    z-RunningMoments.save_to_json.<locals>.<lambda>)dict_factoryr   T)indent	sort_keys
wutf-8encodingN)r   is_main_processdataclassesasdictjsondumpsopenwrite)r3   r   	save_dictjson_stringfr4   r4   r5   save_to_json  s   "zRunningMoments.save_to_jsonc                 C   sL   t |dd}| }W d   n1 sw   Y  | dd|it|S )z3Create an instance from the content of `json_path`.r   r   Nr   r4   )r   readr   loads)clsr   r   r   textr4   r4   r5   load_from_json  s   
zRunningMoments.load_from_jsonN)rg   rh   ri   rj   r
   rk   r   r{   r   r   r   rH   no_gradrp   r   r   rm   r   classmethodr   r4   r4   r4   r5   r     s   
 	r   cpur   c           
      C   s   | | j}tj| |du r| n| g|jd}| |}|\}}|| }t|| d |du r7dn|}| |}|| }	| ||	 || fS )z
    Computes element-wise mean and variance of the tensor across processes. Reference:
    https://github.com/OpenLMLab/MOSS-RLHF/blob/40b91eb2f2b71b16919addede0341d2bef70825d/utils.py#L57C1-L73C75
    Nr   r   rB   )	tor   rH   rI   sumr   reducemulr   )
r   r   rQ   r   sum_and_count
global_sumr   global_meansum_var
global_varr4   r4   r5   r     s   ,
$
r   	eval_predc              	   C   s   | \}}|j dkr(tj|dd}tdd t||D }tdd |D }n@|d d df |d d df k}t| }|dkrWtd	| d
t	|d d df  dt
 ||  }||  }tj|dd}tj||ktd  }d|iS )N   r   )axisc                 S   s0   g | ]\}}t ||D ]
\}}|d kr|qqS r'   )zip)rL   
predictionrR   plblr4   r4   r5   rN     s   0 z$compute_accuracy.<locals>.<listcomp>c                 S   s"   g | ]}|D ]}|d kr|qqS r  r4   )rL   rR   r  r4   r4   r5   rN     s   " r   rB   z
There are z out of zu instances where the predictions for both options are equal. These instances are ignored in the accuracy computation.rE   accuracy)ndimr   argmaxarrayr  rl   r   warningswarnr\   UserWarningr{   r   r   )r   predictionsrV   
equal_maskequal_predictions_countr  r4   r4   r5   compute_accuracy  s&   
  

r  r9   rI   length	pad_valuedimc                 C   sV   |  ||kr	| S t| j}||  | ||< tj| |tj|| j| jd g|dS )Nr   r  )sizern   r   rH   catonesrF   r   )rI   r  r  r  pad_sizer4   r4   r5   pad_to_length   s   
r  modelc                 C   s&   |   D ]}t|tjjrd|_qd S )Nr   )modules
isinstancerH   nnDropoutr  r  moduler4   r4   r5   disable_dropout_in_model  s
   r"   c              	   C   s:   | | }| || krt | d|  d| d| |  |S )Nz, inexact division: z / z = )r/   )abcustom_error_messageqr4   r4   r5   	exact_div  s   "r(  c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	PerPromptStatTrackeraI  
    Class for tracking statistics per prompt. Mainly used to calculate advantage for the DPPO algorithm

    Args:
        buffer_size (`int`):
            Size of the buffer to keep for each prompt.
        min_count (`int`):
            Minimum number of samples to keep in the buffer before calculating the mean and std.
    c                 C   s   || _ || _i | _d S N)buffer_size	min_countstats)r3   r+  r,  r4   r4   r5   __init__(  s   
zPerPromptStatTracker.__init__c           	      C   s   t |}t |}t |}t |}|D ]Q}|||k }|| jvr,t| jd| j|< | j| | t| j| | j	k rKt 
|}t |d }nt 
| j| }t | j| d }|| | |||k< q|S )N)maxlengư>)r   r
  unique
empty_liker-  r   r+  extendr\   r,  r   r   )	r3   rW   rewardsr0  
advantagesr*   prompt_rewardsr   r   r4   r4   r5   r   -  s    





zPerPromptStatTracker.updatec                 C   s   dd | j  D S )Nc                 S   s.   i | ]\}}|t |t |t|d qS ))r   r   r   )r   r   r   r\   r   r4   r4   r5   r   C  s   . z2PerPromptStatTracker.get_stats.<locals>.<dictcomp>)r-  itemsr2   r4   r4   r5   	get_statsB  s   zPerPromptStatTracker.get_statsN)rg   rh   ri   rj   r.  r   r7  r4   r4   r4   r5   r)    s
    
r)  c                    st   |   D ]3\ }t|tjjsd v r|tj}qt fdddD r7t|dr7|j	j
tjkr7|tj}qd S )Nnormc                 3   s    | ]}| v V  qd S r*  r4   )rL   r   namer4   r5   r   J  s    z.peft_module_casting_to_bf16.<locals>.<genexpr>)lm_headembed_tokenswtewpeweight)named_modulesr  rH   r  	LayerNormr   r   anyhasattrr?  rF   bfloat16r   r4   r9  r5   peft_module_casting_to_bf16F  s   
rE  
model_argsc                 C   s@   | j rtd| j| j| j| jd}|S | jrtdd}|S d }|S )NT)load_in_4bitbnb_4bit_compute_dtypebnb_4bit_quant_typebnb_4bit_use_double_quantbnb_4bit_quant_storage)load_in_8bit)rG  r   torch_dtyperI  use_bnb_nested_quantrL  )rF  quantization_configr4   r4   r5   get_quantization_configP  s    rP  c                   C   s    t j st rdt jiS d S )Nr#  )rH   cudais_availabler   r   local_process_indexr4   r4   r4   r5   get_kbit_device_mapc  s   rT  zOptional[PeftConfig]c                 C   sJ   | j du rd S t stdt| j| j| j| j| jd| j	| j
| jd	}|S )NFzYou need to have PEFT library installed in your environment, make sure to install `peft`. Make sure to run `pip install -U peft`.none)		task_typertarget_modules
lora_alphalora_dropoutbias
use_rslorause_doramodules_to_save)use_peftr   r/   r#   lora_task_typelora_rlora_target_modulesrY  rZ  r\  r]  lora_modules_to_save)rF  peft_configr4   r4   r5   get_peft_configj  s$   
re     c                 C   sZ   t dg| jt | jj }t || j}|dkr+t |d|  d|  S |S )a1  
    Get the exponent cap of a value. This is used to cap the exponent of a value to avoid overflow. The formula is :
    log(value.dtype.max) E.g.
      For float32 data type, the maximum exponent value is 88.7228 to 4 decimal points.

    Args:
        value (`torch.Tensor`):
            The input tensor to obtain the data type
        decimal (`int`):
            The number of decimal points of the output exponent cap. eg: direct calling exp(log(torch.float32.max))
            will result in inf so we cap the exponent to 88.7228 to avoid overflow.
    rB   r   
   )	rH   zerosr   rF   finfor   logr   floor)valuedecimal
vdtype_maxvdtype_log_maxr4   r4   r5   get_exp_cap  s   "&rp  c                 C   s(   |dk rt | n|}ttj| |dS )Nr   )r   )rp  rH   expclamp)rl  capr4   r4   r5   cap_exp  s   rt  dfc                 C   sh   t  stdt }tdd}| jD ]}|| q|  D ]\}}|j|t	
   q|| d S )NzgThe function `print_rich_table` requires the `rich` library. Please install it with `pip install rich`.T)
show_lines)r   ImportErrorr   r!   columns
add_columniterrowsadd_rowastyperm   r   print)ru  consoletablecolumn_rowr4   r4   r5   print_rich_table  s   

r  zT{% for message in messages %}{{' ' + message['content']}}{% endfor %}{{ eos_token }}z{% for message in messages %}{{message['role'].capitalize() + ': ' + message['content'] + '

'}}{% endfor %}{% if add_generation_prompt %}{{ 'Assistant:' }}{% endif %}c                   @   s   e Zd ZU dZeed< dS )OnlineTrainerStater   episodeN)rg   rh   ri   r  rl   rk   r4   r4   r4   r5   r    s   
 r  c                       sn  e Zd ZU dZedddidZeed< edddidZe	e
 ed	< eddd
idZe	e ed< edddidZe	e ed< edddidZeed< edddidZe	e ed< edddidZeed< edddidZeed< edddidZeed< edddidZe	ed  ed< edddidZe	e ed< ed dd!idZeed"< eddd#idZe	e ed$< ed%dd&idZeed'< eddd(idZe	e ed)< eddd*idZe	e ed+< eddd,idZe	e ed-< eddd.idZe	e ed/< eddd0idZe	e ed1< eddd2idZe	e ed3< eddd4idZ e	e ed5< ed6dd7idZ!e
ed8<  fd9d:Z"  Z#S );OnPolicyConfiga  
    Base configuration class for on-policy trainers.

    This class includes only the parameters that are specific to some on-policy training. For a full list of training
    arguments, please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this
    class may differ from those in [`~transformers.TrainingArguments`].

    Using [`~transformers.HfArgumentParser`] we can turn this class into
    [argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the
    command line.

    Parameters:
        run_name (`str` or `None`, *optional*, defaults to `None`):
            Name of the run.
        dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
            Number of processes to use for processing the dataset.
        num_mini_batches (`int`, *optional*, defaults to `1`):
            Number of minibatches to split a batch into.
        total_episodes (`int` or `None`, *optional*, defaults to `None`):
            Total number of episodes in the dataset.
        local_rollout_forward_batch_size (`int`, *optional*, defaults to `64`):
            Per rank no grad forward pass in the rollout phase.
        num_sample_generations (`int`, *optional*, defaults to `10`):
            Number of debugging samples generations (i.e., `generate_completions` calls) throughout training.
        response_length (`int`, *optional*, defaults to `53`):
            Length of the response.
        stop_token (`str` or `None`, *optional*, defaults to `None`):
            Specifies the stop token to use for text generation. This parameter is mutually exclusive with
            `stop_token_id`.

            - `None`: No stop token is applied, unless `stop_token_id` is specified.
            - `'eos'`: Uses the tokenizer's `eos_token`.

        stop_token_id (`int` or `None`, *optional*, defaults to `None`):
            Specifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied,
            unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.
        temperature (`float`, *optional*, defaults to `0.7`):
            Sampling temperature.
        missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
            Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
            generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
            value.
        sft_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
            Path to the SFT model.
        world_size (`int` or `None`, *optional*, defaults to `None`):
            Number of processes (GPUs) to use for the training.
        num_total_batches (`int` or `None`, *optional*, defaults to `None`):
            Number of total batches to train.
        micro_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Micro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).
        local_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Batch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).
        batch_size (`int` or `None`, *optional*, defaults to `None`):
            Batch size across devices (HF's `per_device_train_batch_size` * `world_size` *
            `gradient_accumulation_steps`).
        local_mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Mini batch size per GPU.
        mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
            Mini batch size across GPUs.
        push_to_hub (`bool`, *optional*, defaults to `False`):
            Whether to push the model to the Hub after training.
    rg  helpzLog every X updates steps. Should be an integer or a float in range `[0,1)`. If smaller than 1, will be interpreted as ratio of total training steps.)defaultmetadatalogging_stepsNzWhether to use bf16 (mixed) precision instead of 32-bit. Requires Ampere or higher NVIDIA architecture or Intel XPU or using CPU (use_cpu) or Ascend NPU. If not set, it defaults to `True` if `fp16` is not set.bf16zName of the run.run_namez6Number of processes to use for processing the dataset.dataset_num_procrB   z,Number of minibatches to split a batch into.num_mini_batchesz(Total number of episodes in the dataset.total_episodes@   z3Per rank no grad forward pass in the rollout phase. local_rollout_forward_batch_sizezaNumber of debugging samples generations (i.e., `generate_completions` calls) throughout training.num_sample_generations5   zLength of the response.response_lengthzoSpecifies the stop token to use for text generation. This parameter is mutually exclusive with `stop_token_id`.eos
stop_tokenzSpecifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied, unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.stop_token_idgffffff?zSampling temperature.temperaturezPenalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive value.missing_eos_penaltyzEleutherAI/pythia-160mzPath to the SFT model.sft_model_pathz3Number of processes (GPUs) to use for the training.
world_sizez!Number of total batches to train.num_total_batcheszTMicro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).micro_batch_sizezXBatch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).local_batch_sizeznBatch size across devices (HF's `per_device_train_batch_size` * `world_size` * `gradient_accumulation_steps`).
batch_sizezMini batch size per GPU.local_mini_batch_sizezMini batch size across GPUs.mini_batch_sizeFz4Whether to push the model to the Hub after training.push_to_hubc                    s(   | j d u r	| j n| j | _ t   d S r*  )r  fp16superr6   r2   	__class__r4   r5   r6   f  s   zOnPolicyConfig.__post_init__)$rg   rh   ri   rj   r   r  r{   rk   r  r   r   r  rm   r  rl   r  r  r  r  r  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r6   __classcell__r4   r4   r  r5   r    s   
 @	r  boolsc                 C   s<   |  d}||  | tj||| jd }tj|ddjS )a  
    Takes an N-dimensional bool tensor and returns an (N-1)-dimensional tensor of integers giving the position of the
    first True in each "row".

    Returns the length of the rows (bools.size(-1)) if no element is True in a given row.

    Args:
        bools (`torch.Tensor`):
            An N-dimensional boolean tensor.
        dtype (`torch.dtype`, optional):
            The desired data type of the output tensor. Defaults to `torch.long`.

    Returns:
        `torch.Tensor`:
            An (N-1)-dimensional tensor of integers indicating the position of the first True in each row. If no True
            value is found in a row, returns the length of the row.
    r9   r   r  )r  typerH   aranger   r0   values)r  rF   row_lenzero_or_indexr4   r4   r5   first_true_indicesl  s   
"r  query_responsesr.   context_lengthc                 C   s   ||k}| d|  }t| | j}t|| d}||||dddd}| |jd }	t|dd|df |kd | }
|	|	tj	|	
d|	jd|
f d|
fS )	a5  
    Computes the reward logits and the rewards for a given model and query responses.

    Args:
        model (`torch.nn.Module`):
            The model used to compute the reward logits.
        query_responses (`torch.Tensor`):
            The tensor containing the query responses.
        pad_token_id (`int`):
            The token ID representing the pad token.
        context_length (`int`):
            The length of the context in the query responses.

    Returns:
        tuple:
            - `reward_logits` (`torch.Tensor`):
                The logits for the reward model.
            - `final_rewards` (`torch.Tensor`):
                The final rewards for each query response.
            - `sequence_lengths` (`torch.Tensor`):
                The lengths of the sequences in the query responses.
    rB   r   TF)r<   rA   position_idsreturn_dictoutput_hidden_states	use_cacher9   Nr   )cumsumrJ   getattrbase_model_prefixrH   masked_fillscorehidden_statesr  r  r  r   squeeze)r  r  r.   r  rA   r  lm_backboner<   r   reward_logitssequence_lengthsr4   r4   r5   
get_reward  s.   $r  c                 C   s<   ||k}| d|  }t|| d}| |||dddS )a  
    Performs a forward pass through the model with the given query responses and pad token ID.

    Args:
        model (`torch.nn.Module`):
            The model to perform the forward pass.
        query_responses (`torch.Tensor`):
            The tensor containing the query responses.
        pad_token_id (`int`):
            The token ID representing the pad token.

    Returns:
        `ModelOutput`:
            The output of the model, including hidden states.
    rB   r   T)r<   rA   r  r  r  )r  rJ   rH   r  )r  r  r.   rA   r  r<   r4   r4   r5   forward  s   r  Fper_device_train_batch_sizer  r  c           	      C   s   ddl }t j}|j}|d d dkr1||d< |d ddd}|r(d	d
i|d< n;|r0d	d
i|d< n2t| drct| jddrCt| jjnt| jdd}|durc|d d dkrc|	|| d| dd |j
| |d^} }|   | S )a  
    Prepares the model for training with DeepSpeed (both for stage 2 and 3), configuring the appropriate settings based
    on the model and batch size.

    Args:
        model (`torch.nn.Module`):
            The model to be prepared for DeepSpeed training.
        per_device_train_batch_size (`int`):
            The training batch size per device.

    Returns:
        `torch.nn.Module`:
            The model initialized and configured with DeepSpeed for training.
    r   Nzero_optimizationstager   train_micro_batch_size_per_gpuF)r  prescale_gradientswall_clock_breakdownenabledTr  r  confighidden_sizeshidden_sizerg  )z$zero_optimization.reduce_bucket_sizez4zero_optimization.stage3_param_persistence_thresholdz-zero_optimization.stage3_prefetch_bucket_size)r  r  )	deepspeedr   deepspeed_plugindeepspeed_configrC  r  r  r   r  r   
initializeeval)	r  r  r  r  r  r  config_kwargsr  r  r4   r4   r5   prepare_deepspeed  s:   
r  r  	responsesc                 C   sf   t || kd}dgt| d  |jd g }tj|jd |jdj| }t	|||k|}|S )aA  
    Truncates the responses at the first occurrence of the stop token, filling the rest with pad tokens.

    Args:
        stop_token_id (`int`):
            The token ID representing the stop token where truncation occurs.
        pad_token_id (`int`):
            The token ID representing the pad token used to fill the truncated responses.
        responses (`torch.Tensor`):
            The tensor containing the responses to be truncated.

    Returns:
        `torch.Tensor`:
            The truncated responses tensor with pad tokens filled after the stop token.
    r9   rB   r   )
r  	unsqueezer\   r  r   rH   r  r   viewr  )r  r.   r  
trunc_idxsnew_sizeidxspostprocessed_responsesr4   r4   r5   truncate_response  s
   "r  r  queriesgeneration_configc           	      C   sl   |j d }||k}t|| d}| j|||ddd}t|jd}tj||jdd|df fdd|fS )a  
    Generates sequences from the language model backbone in a way that does not affect padding tokens.

    Args:
        lm_backbone (`torch.nn.Module`):
            The language model backbone used for generation.
        queries (`torch.Tensor`):
            The tensor containing the input queries.
        pad_token_id (`int`):
            The token ID representing the pad token.
        generation_config (`GenerationConfig`):
            The configuration for the generation process.

    Returns:
        tuple:
            - `generated_sequences` (`torch.Tensor`):
                The concatenated tensor of input queries and generated sequences.
            - `logits` (`torch.Tensor`):
                The logits output from the generation process.
    rB   r   T)r<   rA   r  return_dict_in_generateoutput_scoresNr  )r   rH   r  generatestackscoresr  	sequences)	r  r  r.   r  r  rA   r<   r   logitsr4   r4   r5   r  "  s   
	(r  r  c                 C   s   g }g }|j d }td||D ]}||||  }	t| |	||\}
}||
 || qt||dd}t|ddd}|d|j d d | }|jdg|j dd  R  d | }||fS )Nr   r   r   r9   r   )r   ranger  r[   r]   r  )r  r  r  r.   r  r  logitssr  r   queryquery_responser  padded_query_responsespadded_logitssr4   r4   r5   batch_generationI  s$   

$r  bos_token_idprompt_len_input_idsprompt_tokenschosen_prompt_len_input_idschosen_tokensrejected_prompt_len_input_idsrejected_tokensc                 C   s   | d ur^|dks| |d d kr"| g|d  |d< dg|d  |d< |dks.| |d d kr@| g|d  |d< dg|d  |d< |dksL| |d d kr^| g|d  |d< dg|d  |d< |||fS )Nr   r   rB   rX   r4   )r  r  r  r  r  r  r  r4   r4   r5   add_bos_token_if_neededj  s   	
r  eos_token_idc                 C   s   t |d dks| |d d kr|d |  |d d t |d dks.| |d d kr<|d |  |d d ||fS )Nr<   r   r9   rA   rB   )r\   r[   )r  r  r  r4   r4   r5   add_eos_token_if_needed  s     r  r<   c                 C   s   t | |kd}dgt|  d  | jd g }tj| jd | jdj| }t	| ||k|}t	t
| ||kd}||fS )a  
    Truncates the input tensor from the right side after the first occurrence of the stop token.

    Args:
        input_ids (`torch.Tensor`):
            The tensor containing the responses to be truncated
        stop_token_id (`int`):
            The token ID representing the stop token where truncation occurs
        pad_token_id (`int`):
            The token ID representing the pad token used to fill the truncated responses

    Returns:
        tuple:
            - `output_ids` (`torch.Tensor`):
                The truncated responses tensor with pad tokens filled after the stop token
            - `mask` (`torch.Tensor`):
                The mask tensor to indicate the padding tokens
    r9   rB   r   r   )r  r  r\   r  r   rH   r  r   r  r  	ones_like)r<   r  r.   r  r  r  
output_idsrQ   r4   r4   r5   truncate_right  s   "r  c                   C   sJ   t  r
tj  dS t rtj  dS t rtj  dS tj  dS )a3  Empties the cache of the available torch device.

    This function checks for the availability of different torch devices (XPU, MLU, NPU, CUDA) and empties the cache of
    the first available device it finds.

    If none of the specific devices are available, it defaults to emptying the CUDA cache.
    N)	r   rH   xpuempty_cacher   mlur   npurQ  r4   r4   r4   r5   r    s   r  inputsr&   c                    s     j | dd} fdd|D S )ay  
    Decodes the input tensor and strips the padding tokens.

    Args:
        inputs (`torch.Tensor`):
            The input tensor to be decoded.
        tokenizer (`transformers.PreTrainedTokenizerBase`):
            The tokenizer used to decode the input tensor.

    Returns:
        `list[str]`:
            The list of decoded strings with padding tokens stripped.
    F)skip_special_tokensc                    s   g | ]	}|  jd qS r#  )replace	pad_token)rL   dr&   r4   r5   rN     r   z,decode_and_strip_padding.<locals>.<listcomp>)batch_decode)r  r&   decodedr4   r  r5   decode_and_strip_padding  s   r  
base_model
model_namehub_model_iddataset_nametags	wandb_urltrainer_nametrainer_citationpaper_titlepaper_id	comet_urlc                 C   s   t | |dd|dg|d}tj|fi dttddd| d	|d
|d|d|d|
d|d|d|d|	dtddtddtddtddtd}|S )a  
    Generate a `ModelCard` from a template.

    Args:
        base_model (`str` or `None`):
            Base model name.
        model_name (`str`):
            Model name.
        hub_model_id (`str`):
            Hub model ID as `username/model_id`.
        dataset_name (`str` or `None`):
            Dataset name.
        tags (`list[str]`):
            Tags.
        wandb_url (`str` or `None`):
            Weights & Biases run URL.
        comet_url (`str` or `None`):
            Comet experiment URL.
        trainer_name (`str`):
            Trainer name.
        trainer_citation (`str` or `None`, defaults to `None`):
            Trainer citation as a BibTeX entry.
        paper_title (`str` or `None`, defaults to `None`):
            Paper title.
        paper_id (`str` or `None`, defaults to `None`):
            ArXiv paper ID as `YYMM.NNNNN`.

    Returns:
        `ModelCard`:
            A ModelCard object.
    transformerslicensegenerated_from_trainer)r  datasetslibrary_namelicencer  r	  template_pathtrlztemplates/lm_model_card.mdr  r  r  r  r
  r  r  r  r  r  trl_versiontransformers_versionpytorch_versionrH   datasets_versionr  tokenizers_version
tokenizers)r   r   from_templaterm   pkg_resourcesfilesjoinpathr   )r  r  r  r  r	  r
  r  r  r  r  r  	card_datacardr4   r4   r5   generate_model_card  sX   ,	
r$  c                   C   s$   t  sdS t durt jS dS )zt
    If Comet integration is enabled, return the URL of the current Comet experiment; otherwise, return `None`.
    N)r   comet_mlget_running_experimenturlr4   r4   r4   r5   get_comet_experiment_url  s
   
r(  r:  r  c                 C   s4   t  stdt }|dur|j|| d dS dS )a  
    If Comet integration is enabled logs a table to the Comet experiment if it is currently running.

    Args:
        name (`str`):
            Table name.
        table (`pd.DataFrame`):
            The Pandas DataFrame containing the table to log.
    zLThe comet-ml is not installed. Please install it first: pip install comet-mlN)tabular_datafilename)r   ModuleNotFoundErrorr%  r&  	log_table)r:  r  
experimentr4   r4   r5   log_table_to_comet_experiment$  s   
r.  rQ   .c                    s   | j \}}|  }dd |D }|jdd}tj||jdd}||d | |d}fdd|D }|jdd}	|	dk}
|
	 rQt
|
tj n| |ddd f } fd	d|D }|sj|S |g|R S )
a  
    Shift non-zero elements in the mask and corresponding tensors to the left.

    This function operates on a binary mask and any number of additional tensors with the same dimensions as the mask.
    For each row, non-zero values are shifted to the leftmost positions. Then, columns that contain only zeros across
    all rows are truncated from the mask and tensors. Visually, this operation can be represented as follows:

    ```
    [[0, 0, x, x, x, x],  ->  [[x, x, x, x],
     [0, x, x, x, 0, 0]]       [x, x, x, 0]]
    ```

    Args:
        mask (`torch.Tensor`):
            2D tensor (binary mask) with shape `(N, M)`.
        *tensors (`torch.Tensor`)
            One or more 2D tensors with the same shape as `mask`. These tensors will be processed alongside `mask`,
            with non-zero values shifted and excess zero columns truncated in the same manner.

    Returns:
        `torch.Tensor`:
            Updated binary mask with non-zero values flushed to the left and trailing zero columns removed.
        `*torch.Tensor`
            Updated tensors, processed in the same way as the mask.

    Example:
    ```python
    >>> mask = torch.tensor([[0, 0, 1, 1, 1], [0, 1, 1, 0, 0]])
    >>> tensor = torch.tensor([[9, 9, 2, 3, 4], [9, 5, 6, 9, 9]])
    >>> new_mask, new_tensor = flush_left(mask, tensor)
    >>> print(new_mask)
    tensor([[1, 1, 1],
            [1, 1, 0]])

    >>> print(new_tensor)
    tensor([[2, 3, 4],
            [5, 6, 0]])
    ```
    c                 S      g | ]}|  qS r4   cloner   r4   r4   r5   rN   b  r   zflush_left.<locals>.<listcomp>rB   r  r   r   c                       g | ]}| d  qS rB   gatherr   idx_rollr4   r5   rN   i      Nc                    s    g | ]}|d d d  f qS r*  r4   r   )first_empty_colr4   r5   rN   p       )r   r1  r	  rH   r  r   r  r5  r   rB  rl   r   int8)rQ   r   r  M	mask_copyfirst_non_zeropos	mask_rollrolled_tensorscol_sums
empty_colsflushed_maskflushed_tensorsr4   )r9  r7  r5   
flush_left6  s    
( rF  c                    s   | j \}}|  }dd |D }t|}|jdd}tj||jdd}||d | |d}fdd|D }	|j	dd}
|
dk}|
 rVt|tj n| |dd df } fd	d|	D }|so|S |g|R S )
zs
    Shift non-zero elements in the mask and corresponding tensors to the right. See `flush_left` for details.
    c                 S   r/  r4   r0  r   r4   r4   r5   rN     r   zflush_right.<locals>.<listcomp>rB   r  r   r   c                    r2  r3  r4  r   r6  r4   r5   rN     r8  Nc                    s    g | ]}|d d  d f qS r*  r4   r   )first_non_empty_colr4   r5   rN     r:  )r   r1  rH   fliplrr	  r  r   r  r5  r   rB  rl   r   r;  )rQ   r   r  r<  r=  flipped_maskr>  r?  r@  rA  rB  non_empty_colsrD  rE  r4   )rG  r7  r5   flush_rightw  s"   

 rK  c           	      C   s   | j tjtjfv r'tj| d|ddd}tdd | D }|| }|S g }t| |D ]\}}t	j
|dd}|jd|ddd}|| q.t|}|S )aw  
    A memory-efficient implementation of the common `log_softmax -> gather` operation.

    This function is equivalent to the following naive implementation:
    ```python
    logps = torch.gather(logits.log_softmax(-1), dim=-1, index=index.unsqueeze(-1)).squeeze(-1)
    ```

    Args:
        logits (`torch.Tensor`):
            Logits tensor of shape `(..., num_classes)`.
        index (`torch.Tensor`):
            Index tensor of shape `(...)`, specifying the positions to gather from the log-softmax output.

    Returns:
        `torch.Tensor`:
            Gathered log probabilities with the same shape as `index`.
    r9   )r  indexc                 S   s   g | ]	}t j|d dqS )r9   r  )rH   	logsumexp)rL   lgr4   r4   r5   rN     r   z)selective_log_softmax.<locals>.<listcomp>r  )rF   rH   r   float64r5  r  r  r  r  Flog_softmaxr[   )	r  rL  selected_logitslogsumexp_valuesper_token_logps
row_logits
row_labels	row_logpsrow_per_token_logpsr4   r4   r5   selective_log_softmax  s   	
rY  rB   
chunk_sizec                 C   sT   g }| j |ddD ]}tj|dd}t|| d }|| q	t|}|S )a  
    Compute the Shannon entropy (in nats) for each row of *logits* without
    materialising the full soft-max in memory.
    The batch dimension is processed in chunks of size `chunk_size` so that
    only a subset of rows is expanded to probabilities at any one time.

    Args:
        logits (`torch.Tensor`):
            Logits tensor of shape `(..., num_classes)`. Entropy is taken along the last axis; all
            leading dimensions are preserved.
        chunk_size (`int`, *optional*, defaults to `1`):
            Number of rows to process per iteration.

    Returns:
        `torch.Tensor`:
            Entropy values with shape `logits.shape[:-1]`.
    r   r  r9   )splitrP  rQ  rH   rq  r   r2  r  )r  rZ  per_token_entropieslogits_chunklogpschunk_entropyr4   r4   r5   entropy_from_logits  s   
r`  rW   completionsr3  r4  stepnum_samplesc                    s  t  stdt }tdddd}|jddd |jdd	d  D ]
}|j|d
dd q#|jdddd |durI|tkrCd}n|dkrIdS |dur}tt	t|fddD fddD fdd
 D  fddD  t	tD ]*fdd D }	|jt t g|	  dR   |  qt|dd| dd}
||
 dS )u  
    Print out a sample of model completions to the console with multiple reward metrics.

    This function creates a nicely formatted table showing prompt-completion pairs, useful for monitoring model outputs
    during training. It requires the `rich` library to be installed.

    Args:
        prompts (`list[str]`):
            List of prompts.
        completions (`list[str]`):
            List of completions corresponding to the prompts.
        rewards (`dict[str, list[float]]`):
            Dictionary where keys are reward names and values are lists of rewards.
        advantages (`list[float]`):
            List of advantages corresponding to the prompts and completions.
        step (`int`):
            Current training step number, used in the output title.
        num_samples (`int` or `None`, *optional*, defaults to `None`):
            Number of random samples to display. If `None` (default), all items will be displayed.

    Example:
    ```python
    >>> from trl.trainer.utils import print_prompt_completions_sample

    >>> prompts = ["The sky is", "The sun is"]
    >>> completions = [" blue.", " in the sky."]
    >>> rewards = {"Correctness": [0.123, 0.456], "Format": [0.789, 0.101]}
    >>> advantages = [0.987, 0.654]
    >>> print_prompt_completions_sample(prompts, completions, rewards, advantages, 42)
    ╭──────────────────────────── Step 42 ─────────────────────────────╮
    │ ┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┓ │
    │ ┃ Prompt     ┃ Completion   ┃ Correctness ┃ Format ┃ Advantage ┃ │
    │ ┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━┩ │
    │ │ The sky is │  blue.       │        0.12 │   0.79 │      0.99 │ │
    │ ├────────────┼──────────────┼─────────────┼────────┼───────────┤ │
    │ │ The sun is │  in the sky. │        0.46 │   0.10 │      0.65 │ │
    │ └────────────┴──────────────┴─────────────┴────────┴───────────┘ │
    ╰──────────────────────────────────────────────────────────────────╯
    ```
    zvThe function `print_prompt_completions_sample` requires the `rich` library. Please install it with `pip install rich`.Tz
bold white)show_headerheader_styleexpandPromptbright_yellow)style
Completionbright_greenz	bold cyanr   )ri  justify	Advantagezbold magentaNr   c                       g | ]} | qS r4   r4   rL   r   )rW   r4   r5   rN     r   z3print_prompt_completions_sample.<locals>.<listcomp>c                    rn  r4   r4   ro  )ra  r4   r5   rN     r   c                    s$   i | ]\} | fd dD qS )c                    rn  r4   r4   ro  valr4   r5   rN     r   z>print_prompt_completions_sample.<locals>.<dictcomp>.<listcomp>r4   rL   key)indicesrp  r5   r     s   $ z3print_prompt_completions_sample.<locals>.<dictcomp>c                    rn  r4   r4   ro  )r4  r4   r5   rN      r   c                    s   g | ]
}|   d qS ).2fr4   rr  )r   r3  r4   r5   rN   #  rO   ru  FzStep )rf  titleborder_style)r   rw  r   r!   ry  r   r\   randomsampler  r6  r{  r"   add_sectionr    r}  )rW   ra  r3  r4  rb  rc  r~  r  reward_namereward_valuespanelr4   )r4  ra  r   rt  rW   r3  r5   print_prompt_completions_sample  s8   00
r~  )r   r   N)Nr   )r9   r  )rf  )FF)r8   N)NNNNr3  r*  )yr   importlib.resources	resourcesr  r   rx  r  collectionsr   r   r   importlib.metadatar   typingr   r   r   r	   numpyr   pandaspdrH   torch.nn.functionalr  
functionalrP  torch.utils.data
accelerater
   r   accelerate.stater   huggingface_hubr   r   torch.nn.utils.rnnr   r  r   r   r   r   r   r   r   transformers.utilsr   r   r   r   r   r   trainer.model_configr   rich.consoler   
rich.panelr    
rich.tabler!   	rich.textr"   r%  peftr#   r$   r%   rq   rn   rp   rl   rm   r]   r   r   r   r   r   ro   r{   r  r  Moduler"  r(  r)  rE  rP  rT  re  rp  rt  	DataFramer  SIMPLE_SFT_CHAT_TEMPLATESIMPLE_CHAT_TEMPLATEr  r  rJ   r  r  r  r   r  r  r  r  r  r  r  r  r  r$  r(  r.  rF  rK  rY  r`  r~  r4   r4   r4   r5   <module>   s  $ 		bO
BS:,%
)


 8
2
!
6
' 



	

J..A#"