o
    	Ti                     @   sZ   d dl mZmZmZmZ d dlZd dlmZmZm	Z	m
Z
 ddlmZmZ G dd dZdS )    )AnyCallableOptionalUnionN)GenerationConfigPreTrainedTokenizerPreTrainedTokenizerFastset_seed   )SUPPORTED_ARCHITECTURESPreTrainedModelWrapperc                   @   s   e Zd Z				ddedeeef deee	 gee
 f deded	ee d
edee ddfddZ		ddeee ejeej eee  f dedeee	ejf  deee	  fddZdS )BestOfNSampler   N   model	tokenizerqueries_to_scoreslength_samplersample_sizeseedn_candidatesgeneration_configreturnc	           	      C   s   |durt | t|ttfstdt| t|ts)tdt| dt || _|| _|| _	|| _
|| _|| _|| _dS )a  
        Initialize the sampler for best-of-n generation

        Args:
            model (`PreTrainedModelWrapper`):
                The pretrained model to use for generation
            tokenizer (`PreTrainedTokenizer` or `PreTrainedTokenizerFast`):
                Tokenizer associated with the pretrained model
            queries_to_scores (`Callable[[list[str]], list[float]]`):
                Callable that takes a list of generated texts and returns the associated reward scores
            length_sampler (`Any`):
                Sampler used to sample the length of the generated text
            sample_size (`int`):
                Number of samples to generate for each query
            seed (`int`, *optional*):
                Random seed used to control generation
            n_candidates (`int`):
                Number of candidates to return for each query
            generation_config (`GenerationConfig`, *optional*):
                Generation config passed to the underlying model's `generate` method. See `GenerationConfig`
                (https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig)
                for more details
        NzHtokenizer must be a PreTrainedTokenizer or PreTrainedTokenizerFast, got z,model must be a PreTrainedModelWrapper, got z  - supported architectures are: )r	   
isinstancer   r   
ValueErrortyper   r   r   r   r   
gen_configr   r   )	selfr   r   r   r   r   r   r   r    r   P/home/ubuntu/.local/lib/python3.10/site-packages/trl/extras/best_of_n_sampler.py__init__   s"   "

zBestOfNSampler.__init__Ttokenized_queryskip_special_tokensdevicec           
         s  d}t |tjr|jdkr|d}n,t |tr?t|d }|tu r+t|d}n|tju r8dd |D }ndd |D }g }|D ]B}|	| j
df}| jj||f|  | jd|  | jj |d t|  }	 fd	d|	| jjD  |  qC|S )
a  
        Generate the best of n samples for input queries

        Args:
            tokenized_query (`list[int]` or `torch.Tensor` or `list[torch.Tensor]` or `list[int]`):
                represents either a single tokenized query (a single tensor or a list of integers) or a batch of
                tokenized queries (a list of tensors or a list of lists of integers)
            skip_special_tokens (`bool`):
                Whether to remove the special tokens from the output
            device (`str` or `torch.device`, *optional*):
                The device on which the model will be loaded
            **generation_kwargs (`dict`, *optional*):
                Additional keyword arguments passed along to the underlying model's `generate` method. This is used to
                override generation config

        Returns:
            list[list[str]]: A list of lists of generated texts
        Nr   r   c                 S   s   g | ]}| d qS )r   )reshape).0tensorr   r   r   
<listcomp>q   s    z+BestOfNSampler.generate.<locals>.<listcomp>c                 S   s   g | ]
}t |d qS r$   )torchr(   r&   )r'   queryr   r   r   r)   s   s    )max_new_tokensr   )r"   c                    s   g | ]} | qS r   r   )r'   ioutputr   r   r)      s    )r   r*   Tensorndim	unsqueezelistr   intr(   repeatr   r   generatetor   r   squeezer   batch_decoder   topkr   indicesappend)
r   r!   r"   r#   generation_kwargsquerieselement_typeresultr+   scoresr   r.   r   r6   O   s8   

zBestOfNSampler.generate)r   Nr   N)TN)__name__
__module____qualname__r   r   r   r   r   r3   strfloatr   r4   r   r   r    r*   r0   boolr#   r6   r   r   r   r   r      sF    
	

:"
r   )typingr   r   r   r   r*   transformersr   r   r   r	   modelsr   r   r   r   r   r   r   <module>   s
   