o
    پi;                  	   @   s  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	m
Z
mZ d dlmZ er:d dlZd dlmZmZ d dlmZ dgZeed d	f Zd
d ZG dd deZG dd dZdi i ddfdedee dedefddZdi i fdedee dedefddZdS )    N)TYPE_CHECKINGIteratorListOptionalTupleUnion)GenerationParametersSamplingParameters)	Tokenizer)PreTrainedModelPreTrainedTokenizer)OutlinesLogitsProcessortransformers)torch.DoubleTensorr   .c                  C   s   zddl m}  W n ty   G dd d} Y nw zddl m} W n ty1   G dd d}Y nw zddlm} W n tyJ   G d	d
 d
}Y nw zddlm} W n tyc   G dd d}Y nw | |||fS )zGet all the Llama tokenizer types/classes that need work-arounds.

    When they can't be imported, a dummy class is created.

    r   )LlamaTokenizerc                   @      e Zd ZdS )z1get_llama_tokenizer_types.<locals>.LlamaTokenizerN__name__
__module____qualname__ r   r   P/home/ubuntu/.local/lib/python3.10/site-packages/outlines/models/transformers.pyr          r   )LlamaTokenizerFastc                   @   r   )z5get_llama_tokenizer_types.<locals>.LlamaTokenizerFastNr   r   r   r   r   r   %   r   r   )CodeLlamaTokenizerc                   @   r   )z5get_llama_tokenizer_types.<locals>.CodeLlamaTokenizerNr   r   r   r   r   r   ,   r   r   )CodeLlamaTokenizerFastc                   @   r   )z9get_llama_tokenizer_types.<locals>.CodeLlamaTokenizerFastNr   r   r   r   r   r   3   r   r   )transformers.models.llamar   ImportErrorr   transformers.models.code_llamar   r   )r   r   r   r   r   r   r   get_llama_tokenizer_types   s2   r   c                   @   s   e Zd ZdZdddZdeeee f ded fd	d
Z	dddee fddZ
dedefddZdd Zdd Zdd Zdd ZdS )TransformerTokenizerz@Represents a tokenizer for models in the `transformers` library.	tokenizerr   c                 K   s~   || _ | j j| _| j j| _| j jd u r| j j| j _| j| _n
| j j| _| j j| _t| j j| _| j  | _	t
| j t | _d S N)r!   eos_token_id	eos_tokenpad_token_id	pad_tokensetall_special_tokensspecial_tokens	get_vocab
vocabulary
isinstancer   is_llama)selfr!   kwargsr   r   r   __init__A   s   




zTransformerTokenizer.__init__promptreturn)torch.LongTensorr3   c                 K   s2   d|d< d|d< | j |fi |}|d |d fS )NTpaddingptreturn_tensors	input_idsattention_maskr!   )r.   r1   r/   outputr   r   r   encodeR   s   zTransformerTokenizer.encode	token_idsr3   c                 C   s   | j j|dd}|S )NT)skip_special_tokens)r!   batch_decode)r.   r<   textr   r   r   decodeZ   s   zTransformerTokenizer.decodetokenc                 C   s>   ddl m} | j|g}| jr||s|dkrd| S |S )Nr   )SPIECE_UNDERLINEz<0x20> )transformers.file_utilsrB   r!   convert_tokens_to_stringr-   
startswith)r.   rA   rB   stringr   r   r   convert_token_to_string^   s   z,TransformerTokenizer.convert_token_to_stringc                 C   sJ   t |t| r#t| drt| dr|j| jko|j| jkS |j| jkS tS )N
model_namer/   )r,   typehasattrrI   r/   r!   NotImplemented)r.   otherr   r   r   __eq__j   s   zTransformerTokenizer.__eq__c                 C   s   ddl m} t|| jS )Nr   )Hasher)datasets.fingerprintrO   hashr!   )r.   rO   r   r   r   __hash__t   s   zTransformerTokenizer.__hash__c                 C   s   d| j i}|S Nr!   r9   r.   stater   r   r   __getstate__y   s   
z!TransformerTokenizer.__getstate__c                 C   s   |  |d  d S rS   )r0   rT   r   r   r   __setstate__}   s   z!TransformerTokenizer.__setstate__N)r!   r   )r   r   r   __doc__r0   r   strr   r   r;   r@   rH   rN   rR   rV   rW   r   r   r   r   r    >   s    


r    c                   @   s4  e Zd ZdZ				d$ddZ	d%d	d
dd
dee dedee f fddZ	d%d	d
dd
dee ddfddZ	de
eee f deded dede
eee eee  f f
ddZde
eee f deded dedee
eee f  f
ddZde
eee f deded dedef
ddZdd Zd&d"d#ZdS )'Transformersz"Represents a `transformers` model.modelr   r!   r   c                 C   s   || _ t|| _d S r"   )r[   r    r!   )r.   r[   r!   r   r   r   r0      s   zTransformers.__init__Nr7   r3   r8   past_key_valuesr2   ztorch.FloatTensorc              	   C   s   zddl }W n ty   td Y nw d|j  k r dk s#J  J |r,|d d}|  | j||ddd|d	}W d   n1 sFw   Y  |j|jfS )
a   Compute a forward pass through the transformer model.

        Parameters
        ----------
        input_ids
            The input token ids.  Must be one or two dimensional.
        attention_mask
            The attention mask.  Must be one or two dimensional.
        past_key_values
            A tuple of tuples containing the cached key and value tensors for each
            attention head.

        Returns
        -------
        The computed logits and the new cached key and value tensors.

        r   NzGThe `torch` library needs to be installed to use `transformers` models.   ).r^   TF)r8   return_dictoutput_attentionsoutput_hidden_statesr\   )torchr   ndim	unsqueezeinference_moder[   logitsr\   )r.   r7   r8   r\   rb   r:   r   r   r   forward   s*   

zTransformers.forwardc                 C   s,   |  |||\}}|ddd d f }||fS )N.r^   )rg   )r.   r7   r8   r\   rf   kv_cachenext_token_logitsr   r   r   __call__   s   zTransformers.__call__promptsgeneration_parameterslogits_processorr   sampling_parametersc           
      C   s   t |tr| j|g\}}n| j|\}}|| jj|| jjd}dt| jj	j
 vr5|d= | ||||}| j||fi |}	t |trQ|	d}	| |	S )a  Generate text using `transformers`.

        Arguments
        ---------
        prompts
            A prompt or list of prompts.
        generation_parameters
            An instance of `GenerationParameters` that contains the prompt,
            the maximum number of tokens, stop sequences and seed. All the
            arguments to `SequenceGeneratorAdapter`'s `__cal__` method.
        logits_processor
            The logits processor to use when generating text.
        sampling_parameters
            An instance of `SamplingParameters`, a dataclass that contains
            the name of the sampler to use and related parameters as available
            in Outlines.

        Returns
        -------
        The generated text
        r7   r8   r8   r   )r,   rY   r!   r;   tor[   deviceinspect	signaturerg   
parameterskeys_get_generation_kwargs_generate_output_seqsqueeze_decode_generation)
r.   rk   rl   rm   rn   r7   r8   inputsgeneration_kwargsgenerated_idsr   r   r   generate   s(   



zTransformers.generatec                 c   s    t |tr| j|g\}}n| j|\}}|| jj|| jjd}dt| jj	j
 vr6|d= | ||||}| j||fi |}	t |trR|	d}	t|	dD ]}
|	d|
d}| |V  qYdS )a  
        Temporary stream stand-in which implements stream() signature
        and equivalent behaviour but isn't yielded until generation completes.

        TODO: implement following completion of https://github.com/huggingface/transformers/issues/30810
        ro   r8   r   r^   N)r,   rY   r!   r;   rp   r[   rq   rr   rs   rg   rt   ru   rv   rw   rx   rangesizeselectrd   ry   )r.   rk   rl   rm   rn   r7   r8   rz   r{   r|   ioutput_group_idsr   r   r   stream   s0   


zTransformers.streamc                 C   s   ddl m}m}m} t|\}}	}
t|\}}}}}|du r$td}|
dur,||
 |dur6||g}nd}|||	|p>d||||dk|dkrJ|nd| jj| jj	d
}t
||| jjd	S )
zR
        Conert outlines generation parameters into model.generate kwargs
        r   )GenerationConfigLogitsProcessorListset_seedNi   @   multinomialbeam_search)
max_new_tokensstop_stringsnum_return_sequencestop_ptop_ktemperature	do_sample	num_beamsr#   r%   )rm   generation_configr!   )r   r   r   r   dataclassesastupleintr!   r#   r%   dict)r.   rk   rl   rm   rn   r   r   r   r   stop_atseedsamplernum_samplesr   r   r   logits_processor_listr   r   r   r   rv   +  s:   
z#Transformers._get_generation_kwargsc                 K   s   |d }| j jdi |d|i|}| j jjr|}n|d d |jd d f }|jp,d}|dkrGt|trG|d}	|jp?d}
|	|	|
d}|S )Nr7   r   r   r   r^   r   )
r[   r}   configis_encoder_decodershaper   r,   listr   view)r.   rk   rz   r   r{   r7   
output_idsr|   r   
batch_sizer   r   r   r   rw   Z  s"   




z!Transformers._generate_output_seqr|   torch.Tensorc                    st   t  jdkrj gd S t  jdkrj S t  jdkr2 fddtt  D S td j )Nr   r      r]   c                    s   g | ]
}j  | qS r   )r!   r@   ).0r   r|   r.   r   r   
<listcomp>x  s    z3Transformers._decode_generation.<locals>.<listcomp>z7Generated outputs aren't 1D, 2D or 3D, but instead are )lenr   r!   r@   r~   	TypeError)r.   r|   r   r   r   ry   r  s   

zTransformers._decode_generation)r[   r   r!   r   r"   )r|   r   )r   r   r   rX   r0   r   r   KVCacheTyperg   rj   r   rY   r   r   r	   r}   r   r   r   rv   rw   ry   r   r   r   r   rZ      s|    

2

:
,
/rZ   rI   rq   model_kwargstokenizer_kwargsc           
      C   s   |du s|du rz
ddl m}m} W n ty   tdw |du r$|}|du r*|}|dur2||d< |j| fi |}|dd |j| fi |}	t||	S )a  Instantiate a model from the `transformers` library and its tokenizer.

    Parameters
    ----------
    model_name
        The name of the model as listed on Hugging Face's model page.
    device
        The device(s) on which the model should be loaded. This overrides
        the `device_map` entry in `model_kwargs` when provided.
    model_kwargs
        A dictionary that contains the keyword arguments to pass to the
        `from_pretrained` method when loading the model.
    tokenizer_kwargs
        A dictionary that contains the keyword arguments to pass to the
        `from_pretrained` method when loading the tokenizer.

    Returns
    -------
    A `TransformersModel` model instance.

    Nr   )AutoModelForCausalLMAutoTokenizerzWThe `transformers` library needs to be installed in order to use `transformers` models.
device_mappadding_sideleft)r   r   r   r   from_pretrained
setdefaultrZ   )
rI   rq   r   r   model_classtokenizer_classr   r   r[   r!   r   r   r   r     s$   
c                 C   s:   zddl m} W n ty   tdw t | ||||dS )Nr   )MambaForCausalLMzaThe `mamba_ssm`, `torch` and `transformer` libraries needs to be installed in order to use Mamba.)rI   rq   r   r   r   )r   r   r   )rI   rq   r   r   r   r   r   r   mamba  s   r   )r   rr   typingr   r   r   r   r   r   outlines.generate.apir   r	   outlines.models.tokenizerr
   rb   r   r   r   outlines.processorsr   __all__r   r   r    rZ   rY   r   r   r   r   r   r   <module>   sT     *C  
6