o
    	TieM                     @   s  d dl Z d dlmZ d dlmZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZ d dlmZ d dlmZmZmZmZ dd	lmZmZ eefZerbd d
lmZ d dlmZ d dlmZ d dlm Z  eG dd dZ!de!iZ"		d4dededee
d  dee# de$eef f
ddZ%	d5dedede&dee# de$eee'e# f f
ddZ(d6ddZ)d7d d!Z*d7d"d#Z+d6d$d%Z,e	&d8ded' d(d)d*e-fd+d,Z.d9d.d/Z/d0d1 Z0G d2d3 d3Z1dS ):    N)contextmanager)deepcopy)	dataclass)TYPE_CHECKINGAnyLiteralOptionalUnion)version)
AddedTokenAutoTokenizerPreTrainedModelPreTrainedTokenizer   )!AutoModelForCausalLMWithValueHead"AutoModelForSeq2SeqLMWithValueHead)Accelerator)DeepSpeedEngine)Module)DistributedDataParallelc                   @   sf   e Zd ZU dZdZeed< dZeed< dZeed< e	dd Z
e	d	d
 Ze	dd Ze	dd ZdS )ChatMlSpecialTokensziDataclass for special tokens used in ChatML, including system, user, assistant, bos, eos, and pad tokens.z<|im_start|>	bos_tokenz
<|im_end|>	eos_token	pad_tokenc                 C      | j  dS )Nsystemr   self r   D/home/ubuntu/.local/lib/python3.10/site-packages/trl/models/utils.pyr   1      zChatMlSpecialTokens.systemc                 C   r   )Nuserr   r   r   r   r    r"   5   r!   zChatMlSpecialTokens.userc                 C   r   )N	assistantr   r   r   r   r    r#   9   r!   zChatMlSpecialTokens.assistantc                 C   s   d| j  d| j d| j dS )Nz {% for message in messages %}{{'z2' + message['role'] + '
' + message['content'] + 'z7' + '
'}}{% endfor %}{% if add_generation_prompt %}{{ 'z
' }}{% endif %})r   r   r#   r   r   r   r    chat_template=   s   z!ChatMlSpecialTokens.chat_templateN)__name__
__module____qualname____doc__r   str__annotations__r   r   propertyr   r"   r#   r$   r   r   r   r    r   )   s   
 


r   chatmlmodel	tokenizerformatresize_to_multiple_ofreturnc                 C   s   |j dur	td|tvrtd| dt  t|  }|j|_|j|_|j|_|d|j|jgi |j |_ | jt	|j
|durE|ndd t| dddur`|j| j_|j| j_|j| j_t| dddurw|j| j_|j| j_|j| j_| |fS )	a  
    Setup chat format by adding special tokens to the tokenizer, setting the correct format, and extending the
    embedding layer of the model based on the new special tokens.

    <Tip warning="true"> We recommend using [`clone_chat_template`] instead of this function.

    </Tip>

    If the model already has a chat template, this will throw an error. If you want to overwrite it, please set
    `tokenizer.chat_template` to `None`.

    Args:
        model (`~transformers.PreTrainedModel`): The model to be modified.
        tokenizer (`~transformers.PreTrainedTokenizer`): The tokenizer to be modified.
        format (`Optional[Literal["chatml"]]`): The format to be set. Defaults to "chatml".
        resize_to_multiple_of (`int` or `None`): Number to resize the embedding layer to. Defaults to None.

    Returns:
        model (`~transformers.PreTrainedModel`):
            The modified model.
        tokenizer (`~transformers.PreTrainedTokenizer`):
            The modified tokenizer.
    NzcChat template is already added to the tokenizer. If you want to overwrite it, please set it to NonezFormat z" not available. Please use one of additional_special_tokensnew_num_tokenspad_to_multiple_ofconfiggeneration_config)r$   
ValueErrorFORMAT_MAPPINGkeysr   r   r   add_special_tokensresize_token_embeddingslenvocabgetattrpad_token_idr6   bos_token_ideos_token_idr7   )r-   r.   r/   r0   chat_formatr   r   r    setup_chat_formatL   s2   







rD   @   source_tokenizer_pathc           	         s  t |}|  _ fdd|j D } | |j _ j| j	_ j| j
_| jt j|dur5|ndd d}| jt jkretd| d} |}|d7 }|dkr]|| | jt jksCt j| jkr|td	t j d
| j ddd |D } |}|  |fS )a  
    Clones a chat template from a source tokenizer to the target tokenizer and updates the model accordingly.

    This function:
    - Copies the chat template from a source tokenizer to the target tokenizer.
    - Adds any new tokens from the source tokenizer to the target tokenizer.
    - Sets and synchronizes the EOS token across the tokenizer and model.
    - Resizes the model's token embeddings to match the new vocabulary size, optionally rounding it up to a multiple of
      a specified value. In such cases, dummy tokens are added to the tokenizer to ensure the vocabulary size matches
      the embedding dimensions.

    Args:
        model (`PreTrainedModel`):
            Model to update.
        tokenizer (`PreTrainedTokenizer`):
            Tokenizer to update.
        source_tokenizer_path (`str`):
            Path or identifier of the pretrained tokenizer to clone from.
        resize_to_multiple_of (`int` or `None`, *optional*, defaults to `64`):
            The embedding layer will be resized to the new vocabulary size. If this is not `None`, it will round up the
            new vocabulary size to the nearest multiple of this value.

    Returns:
        model (`PreTrainedModel`):
            Updated model with resized token embeddings and EOS token configured.
        tokenizer (`~transformers.PreTrainedTokenizer`):
            Updated tokenizer with the chat template and special tokens applied.
        added_tokens (`list[int]`):
            List of tokens that were added to the tokenizer from the source tokenizer.

    Example:
    ```python
    from transformers import AutoModelForCausalLM, AutoTokenizer
    from trl import clone_chat_template

    model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-1B")
    tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-1B")
    model, tokenizer, added_tokens = clone_chat_template(model, tokenizer, "Qwen/Qwen3-0.6B")
    ```
    c                    s   g | ]
}|j  jvr|qS r   )contentr>   .0tokenr.   r   r    
<listcomp>   s    z'clone_chat_template.<locals>.<listcomp>Nr3   r   z
<extra_id_>r   zAVocabulary size mismatch after resizing: tokenizer vocab size is z, but model embedding size is zB. This indicates an internal error in the token alignment process.c                 S   s   g | ]}|j qS r   )rG   rH   r   r   r    rL      s    )r   from_pretrainedget_chat_templater$   added_tokens_decodervalues
add_tokensr   rB   r6   r7   r<   r=   r>   
vocab_sizer   appendRuntimeErrorconvert_tokens_to_ids)	r-   r.   rF   r0   tokenizer_sourceadded_tokensidxdummy_tokenis_addedr   rK   r    clone_chat_template   s<   
/









r\   r   c                 C   s   t | dsdS | jdurt | jdr| jj}n| jdur | j}ntdt|jddD ]}|j  q+|jD ]}|	  q6|j
D ]}|	  q@g |_g |_
dS )z:Removes the optimizer hooks from a DeepSpeed ZeRO-3 model.	optimizerNparameter_offload8The model optimizer is None, which is not yet supported.Trecurse)hasattrr]   r^   rU   iter_paramsmoduleds_active_sub_modulesclearforward_hooksremovebackward_hooks)r-   optimizer_offloadparamhookr   r   r    remove_hooks   s   







rm   Fc                 C   s   t | j|d|  S )Nr`   )	itertoolschainnamed_parametersds_external_parameters)
sub_modulera   r   r   r    get_all_parameters  s   rs   c                 C   s   dd t | |D S )Nc                 S   s   g | ]\}}|qS r   r   )rI   _rk   r   r   r    rL     s    ziter_params.<locals>.<listcomp>)rs   )rd   ra   r   r   r    rc     s   rc   c                 C   s   ddl }t| dsdS | jdurt| jdr| jj}n| jdur$| j}ntdt|jtdkr;||j	 dS |
|j	 dS )z7Adds the optimizer hooks from a DeepSpeed ZeRO-3 model.r   Nr]   r^   r_   z0.16.4)	deepspeedrb   r]   r^   rU   r
   parse__version___register_deepspeed_modulerd   _register_hooks_recursively)r-   ru   rj   r   r   r    	add_hooks  s   


rz   T)r   r   acceleratorr   gather_deepspeed3_paramsc                 c   s    | | }|jjdurJ|jjjdkrJ|s| | V  dS ddl}|j|   t|  | | V  t	|  W d   dS 1 sCw   Y  dS |V  dS )a  
    Context manager to unwrap distributed or accelerated models for generation tasks.

    Args:
        model (`Union[DistributedDataParallel, DeepSpeedEngine]`):
            Model to be unwrapped.
        accelerator (`~accelerate.Accelerator`):
            Accelerator instance managing the model.
        gather_deepspeed3_params (`bool`, *optional*, defaults to `True`):
            Whether to gather weights for DeepSpeed ZeRO Stage 3 models. If `False`, skips parameter gathering, which
            can be more memory-efficient but may lead to slower generation times.

    Yields:
        Unwrapped model.

    Example:
    ```python
    with unwrap_model_for_generation(model, accelerator) as unwrapped_model:
        generated_outputs = unwrapped_model.generate(input_ids)
    ```
    N   r   )
unwrap_modelstatedeepspeed_plugin
zero_stageru   zeroGatheredParameters
parametersrm   rz   )r-   r{   r|   unwrapped_modelru   r   r   r    unwrap_model_for_generation"  s   

"
r   r   c                 C   s   ddl }|jj}t|j}|d d }| durDt| jddr$t| jjnt| jdd}|durD|dkrD|	|| d| d	| | d
 |dkrNd|d d< |j
| |d^} }|   | S )a  Prepares the model for DeepSpeed inference or evaluation by initializing it with the appropriate configuration.

    Adapted from accelerate:
    https://github.com/huggingface/accelerate/blob/739b135f8367becb67ffaada12fe76e3aa60fefd/src/accelerate/accelerator.py#L1473
    r   Nzero_optimizationstagehidden_sizeshidden_sizer}   
   g?)z$zero_optimization.reduce_bucket_sizez4zero_optimization.stage3_param_persistence_thresholdz-zero_optimization.stage3_prefetch_bucket_size)r-   r6   )ru   r   r   r   deepspeed_configr?   r6   maxr   update
initializeeval)r-   r{   ru   r   config_kwargsr   r   rt   r   r   r    prepare_deepspeedL  s*   

r   c                 C   s   ddl m} ddlm} t| |sGt| |sG|jj|  |jj}|jp&|j	|j
|j|j|j|j|j|j|j|j|j|jd}|| fi |} |   | S )Nr   )
FSDPModule)FullyShardedDataParallel)sharding_strategycpu_offloadauto_wrap_policymixed_precisionsync_module_statesbackward_prefetchforward_prefetchuse_orig_paramsparam_init_fnignored_moduleslimit_all_gathers	device_id)torch.distributed.fsdpr   2torch.distributed.fsdp.fully_sharded_data_parallelr   
isinstancer   fsdp_pluginset_auto_wrap_policyr   reshard_after_forwardr   r   mixed_precision_policyr   r   r   r   r   r   r   devicer   )r-   r{   r   FSDPr   kwargsr   r   r    prepare_fsdpt  s*   
r   c                
   @   sf   e Zd ZdZdejdejdededef
ddZdejdejd	d
fddZ	dejdejd	d
fddZ
d
S )_ForwardRedirectionaI  Implements the `forward-redirection`.

    Taken from Pytorch-lightning:
    https://github.com/Lightning-AI/pytorch-lightning/blob/02311d03fb982560246eead7c08104481fac9579/src/lightning/pytorch/strategies/strategy.py#L602

    A method call to a wrapped module gets rerouted through the wrapper's `forward` method instead.

    wrapper_moduleoriginal_modulemethodargsr   c                    sL   j dtdtdtf fdd}|_ |i |} |S )a  Reroutes a method call through the `wrapper_module`'s `forward` method.

        Args:
            wrapper_module: The module that has `original_module` wrapped.
            original_module: The module that was wrapped inside `wrapper_module`.
            method_name: The name of the method that should be called on the `original_module` after inputs get
                redirected through the `wrapper_module`'s `forward` method.
            *args: The positional arguments to the method `method_name`. They will get passed to a patched
                `forward` method instead.
            **kwargs: The keyword arguments to the method `method_name`. They will get passed to a patched
                `forward` method instead.

        _args_kwargsr1   c                     s$   _  | i |} |S N)forwardon_after_inner_forward)r   r   outr   original_forwardr   r   r   r   r    wrapped_forward  s   z5_ForwardRedirection.__call__.<locals>.wrapped_forward)r   r   on_after_outer_forward)r   r   r   r   r   r   r   wrapper_outputr   r   r    __call__  s   "
z_ForwardRedirection.__call__r1   Nc                 C      d S r   r   r   r   r   r   r   r    r        z*_ForwardRedirection.on_after_inner_forwardc                 C   r   r   r   r   r   r   r    r     r   z*_ForwardRedirection.on_after_outer_forward)r%   r&   r'   r(   nnr   callabler   r   r   r   r   r   r   r    r     s    	
"r   )r,   N)rE   )r-   r   r1   N)F)T)r-   r   r{   r   )2rn   
contextlibr   copyr   dataclassesr   typingr   r   r   r   r	   torch.nnr   	packagingr
   transformersr   r   r   r   modeling_value_headr   r   SUPPORTED_ARCHITECTURES
accelerater   deepspeed.runtime.enginer   r   torch.nn.parallel.distributedr   r   r9   inttuplerD   r)   listr\   rm   rs   rc   rz   boolr   r   r   r   r   r   r   r    <module>   sz   


K

]



)(