o
    	TiI                     @   sn   d dl Z d dlmZ d dlmZmZmZmZ ddlm	Z	 G dd dej
ZG dd de	ZG d	d
 d
e	ZdS )    N)AutoModelForCausalLMAutoModelForSeq2SeqLMis_torch_npu_availableis_torch_xpu_available   )PreTrainedModelWrapperc                       s(   e Zd ZdZ fddZdd Z  ZS )	ValueHeadze
    The ValueHead class implements a head for GPT2 that returns a scalar for each output token.
    c                    s   t    t|ds|dd}n|j}|rt|nt | _t|dr(|j	}t|dr1|j
}nt|drH|jrHt|drHt|jdrH|jj	}t|d| _t | _d S )Nsummary_dropout_probg?hidden_sizeword_embed_proj_dimis_encoder_decoderdecoderr   )super__init__hasattrpopr	   nnDropoutIdentitydropoutr
   r   r   r   LinearsummaryFlattenflatten)selfconfigkwargsr	   r
   	__class__ R/home/ubuntu/.local/lib/python3.10/site-packages/trl/models/modeling_value_head.pyr      s   




zValueHead.__init__c                 C   s8   |  |}|j| jjjkr|| jjj}| |}|S N)r   dtyper   weightto)r   hidden_statesoutputr   r   r    forward2   s
   

zValueHead.forward)__name__
__module____qualname____doc__r   r'   __classcell__r   r   r   r    r      s    r   c                       sb   e Zd ZdZeZdZ fddZdd Z				dd	d
Z	dd Z
dd Zdd Zdd Z  ZS )!AutoModelForCausalLMWithValueHeadaW  
    An autoregressive model with a value head in addition to the language model head. This class inherits from
    `~trl.PreTrainedModelWrapper` and wraps a `transformers.PreTrainedModel` class. The wrapper class supports classic
    functions such as `from_pretrained`, `push_to_hub` and `generate`. To call a method of the wrapped model, simply
    manipulate the `pretrained_model` attribute of this class.

    Class attributes:
        - **transformers_parent_class** (`transformers.PreTrainedModel`) -- The parent class of the wrapped model. This
            should be set to `transformers.AutoModelForCausalLM` for this class.
        - **supported_args** (`tuple`) -- A tuple of strings that are used to identify the arguments that are supported
            by the `ValueHead` class. Currently, the supported args are:
            - **summary_dropout_prob** (`float`, `optional`, defaults to `None`) -- The dropout probability for the
                `ValueHead` class.
            - **v_head_initializer_range** (`float`, `optional`, defaults to `0.2`) -- The initializer range for the
                `ValueHead` if a specific initialization strategy is selected.
            - **v_head_init_strategy** (`str`, `optional`, defaults to `None`) -- The initialization strategy for the
                `ValueHead`. Currently, the supported strategies are:
                - **`None`** -- Initializes the weights of the `ValueHead` with a random distribution. This is the
                  default strategy.
                - **"normal"** -- Initializes the weights of the `ValueHead` with a normal distribution.
    r	   v_head_initializer_rangev_head_init_strategyc                    sN   t  j|fi | | |\}}}t| jjfi || _| jdi | dS )a  
        Initializes the model.

        Args:
            pretrained_model (`transformers.PreTrainedModel`):
                The model to wrap. It should be a causal language model such as GPT2. or any model mapped inside the
                `AutoModelForCausalLM` class.
            kwargs (`dict`, `optional`):
                Additional keyword arguments, that are passed to the `ValueHead` class.
        Nr   )r   r   _split_kwargsr   pretrained_modelr   v_head_init_weightsr   r2   r   v_head_kwargs_r   r   r    r   \   s   z*AutoModelForCausalLMWithValueHead.__init__c                 K   Z   | dd}| dd}|du rdS |dkr+| jjjjjd|d | jjjj  dS dS )av  
        Initializes the weights of the value head. The default initialization strategy is random. Users can pass a
        different initialization strategy by passing the `v_head_init_strategy` argument when calling
        `.from_pretrained`. Supported strategies are:
        - `normal`: initializes the weights with a normal distribution.

        Args:
            **kwargs (`dict`, `optional`):
                Additional keyword arguments, that are passed to the `ValueHead` class. These arguments can contain the
                `v_head_init_strategy` argument as well as the `v_head_initializer_range` argument.
        r/   皙?r0   Nnormal        meanstdr   r3   r   r#   datanormal_biaszero_r   r   initializer_rangeinit_strategyr   r   r    r4   l   s   z/AutoModelForCausalLMWithValueHead._init_weightsNFc                 K   s   d|d< ||d< | j r| jjjdkr|d | jd||d|}|jd }|j}|j}	|j| j	j
jjkr?|| j	j
jj}| 	|d}
|jtjkrQ| }|rZ||	|
|jfS ||	|
fS )	aQ  
        Applies a forward pass to the wrapped model and returns the logits of the value head.

        Args:
            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
                Indices of input sequence tokens in the vocabulary.
            past_key_values (`tuple(tuple(torch.FloatTensor))`, `optional`):
                Contains pre-computed hidden-states (key and values in the attention blocks) as computed by the model
                (see `past_key_values` input) to speed up sequential decoding.
            attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, `optional`):
                Mask to avoid performing attention on padding token indices. Mask values selected in ``[0, 1]``:
                - 1 for tokens that are **not masked**,
                - 0 for tokens that are **masked**.
            return_past_key_values (bool): A flag indicating if the computed hidden-states should be returned.
            kwargs (`dict`, `optional`):
                Additional keyword arguments, that are passed to the wrapped model.
        Toutput_hidden_statespast_key_valuesPREFIX_TUNING)	input_idsattention_maskNr   )is_peft_modelr2   active_peft_config	peft_typer   r%   logitslossdevicer3   r   r#   r$   squeezer"   torchfloat32floatrH   r   rJ   rH   rK   return_past_key_valuesr   base_model_outputlast_hidden_state	lm_logitsrQ   valuer   r   r    r'      s*   


z)AutoModelForCausalLMWithValueHead.forwardc                 O      | j j|i |S )a/  
        A simple wrapper around the `generate` method of the wrapped model. Please refer to the
        [`generate`](https://huggingface.co/docs/transformers/internal/generation_utils) method of the wrapped model
        for more information about the supported arguments.

        Args:
            *args (`list`, *optional*):
                Positional arguments passed to the `generate` method of the wrapped model.
            **kwargs (`dict`, *optional*):
                Keyword arguments passed to the `generate` method of the wrapped model.
        r2   generater   argsr   r   r   r    r_      s   z*AutoModelForCausalLMWithValueHead.generatec                 O   T   | j s| jj|i |}ni }| jj|i |}| D ]\}}||d| < q|S z
        Returns the state dictionary of the model. We add the state dictionary of the value head to the state
        dictionary of the wrapped model by prepending the key with `v_head.`.
        v_head.rM   r2   
state_dictr3   itemsr   ra   r   pretrained_model_state_dictv_head_state_dictkvr   r   r    rf         z,AutoModelForCausalLMWithValueHead.state_dictc                 O      | j | j_ | jj|i |S r!   r3   r2   push_to_hubr`   r   r   r    rp         
z-AutoModelForCausalLMWithValueHead.push_to_hubc                    s   t | D ]}d|v r||||dd< q| jj|dd ~t| jdrzd| jj	 v s7d| jj	 v r;t
dt t| jj	 d	  t trct rUd
   nt r^d   nd   | j | _ fdd}| | d| _dS dS )
        We add the state dictionary of the value head to the state dictionary of the wrapped model by prepending the
        key with `v_head.`. This function removes the `v_head.` prefix from the keys of the value head state
        dictionary.
        rd    Fstricthf_device_mapcpudiskdThe model is offloaded on CPU or disk - CPU & disk offloading is not supported for ValueHead models.r   znpu:zxpu:zcuda:c                    :   d}|D ]}t |tjr|| f7 }q||f7 }q|S )Nr   
isinstancerT   Tensorr$   moduleinputoutputs
new_outputr&   first_devicer   r    set_device_hook   s   zDAutoModelForCausalLMWithValueHead.post_init.<locals>.set_device_hookTN)listkeysr   replacer3   load_state_dictr   r2   rv   values
ValueErrorsetr|   intr   r   r$   register_forward_hookis_sequential_parallel)r   rf   rk   r   r   r   r    	post_init   s0   


	
z+AutoModelForCausalLMWithValueHead.post_initNNNF)r(   r)   r*   r+   r   transformers_parent_classsupported_argsr   r4   r'   r_   rf   rp   r   r,   r   r   r   r    r-   >   s    
7r-   c                       sr   e Zd ZdZeZg dZdZ fddZdd Z	dd	 Z
d
d Zdd Zdd Z				dddZdd Z  ZS )"AutoModelForSeq2SeqLMWithValueHeada  
    A seq2seq model with a value head in addition to the language model head. This class inherits from
    `~trl.PreTrainedModelWrapper` and wraps a `transformers.PreTrainedModel` class. The wrapper class supports classic
    functions such as `from_pretrained` and `push_to_hub` and also provides some additional functionalities such as
    `generate`.

    Args:
        pretrained_model (`transformers.PreTrainedModel`):
            The model to wrap. It should be a causal language model such as GPT2. or any model mapped inside the
            `AutoModelForSeq2SeqLM` class.
        kwargs:
            Additional keyword arguments passed along to the `ValueHead` class.
    )lm_head	embed_outoutput_projectionr.   c                    sd   t  j|fi | | |\}}}d| _|  stdt| jjfi || _	| j
di | d S )NTzOThe model does not have a language model head, please use a model that has one.r   )r   r   r1   r   _has_lm_headr   r   r2   r   r3   r4   r5   r   r   r    r      s   z+AutoModelForSeq2SeqLMWithValueHead.__init__c                    s6   | j  D ]\ }t fdd| jD r dS qdS )Nc                 3       | ]}| v V  qd S r!   r   .0	attributenamer   r    	<genexpr>/      zBAutoModelForSeq2SeqLMWithValueHead._has_lm_head.<locals>.<genexpr>TF)r2   named_modulesanylm_head_namings)r   _moduler   r   r    r   ,  s
   z/AutoModelForSeq2SeqLMWithValueHead._has_lm_headc                    s   t | D ]}d|v r||||dd< q| jj|dd ~t| jdrnd| jj	 v s7d| jj	 v r;t
d| j D ]\}tfd	d
| jD rV|jj  nq@| j | _ fdd}| | d| _dS dS )rr   rd   rs   Frt   rv   rw   rx   ry   c                 3   r   r!   r   r   r   r   r    r   J  r   z?AutoModelForSeq2SeqLMWithValueHead.post_init.<locals>.<genexpr>c                    rz   )a  
                A hook that sets the device of the output of the model to the device of the first parameter of the
                model.

                Args:
                    module (`nn.Module`):
                        The module to which the hook is attached.
                    input (`tuple`):
                        The input to the module.
                    outputs (`tuple`):
                        The output of the module.
                r   r{   r~   )lm_head_devicer   r    r   Q  s   zEAutoModelForSeq2SeqLMWithValueHead.post_init.<locals>.set_device_hookTN)r   r   r   r   r3   r   r   r2   rv   r   r   r   r   r   r#   rR   r$   r   r   )r   rf   rk   r   r   r   )r   r   r    r   3  s,   

z,AutoModelForSeq2SeqLMWithValueHead.post_initc                 O   rb   rc   re   rh   r   r   r    rf   i  rm   z-AutoModelForSeq2SeqLMWithValueHead.state_dictc                 O   rn   r!   ro   r`   r   r   r    rp   y  rq   z.AutoModelForSeq2SeqLMWithValueHead.push_to_hubc                 K   r8   )z>
        We initialize the weights of the value head.
        r/   r9   r0   Nr:   r;   r<   r?   rD   r   r   r    r4   ~  s   z0AutoModelForSeq2SeqLMWithValueHead._init_weightsNFc                 K   s   ||d< | j r| jjjdkr|d | jd||dd|}|jd }|j}|j}	| |	d}
|j
tjkr<| }|rE||	|
|jfS ||	|
fS )NrH   rI   T)rJ   rK   rG   rL   r   )rM   r2   rN   rO   r   decoder_hidden_statesrP   rQ   r3   rS   r"   rT   rU   rV   rH   rW   r   r   r    r'     s&   


z*AutoModelForSeq2SeqLMWithValueHead.forwardc                 O   r]   )z:
        We call `generate` on the wrapped model.
        r^   r`   r   r   r    r_     s   z+AutoModelForSeq2SeqLMWithValueHead.generater   )r(   r)   r*   r+   r   r   r   r   r   r   r   rf   rp   r4   r'   r_   r,   r   r   r   r    r   	  s"    6
"r   )rT   torch.nnr   transformersr   r   r   r   modeling_baser   Moduler   r-   r   r   r   r   r    <module>   s   ( L