o
    i                     @   sP   d dl Z d dlmZmZ d dlmZmZmZmZ ddlm	Z	 G dd de	Z
dS )    N)AutoModelForCausalLMAutoTokenizer)LogitsProcessorList RepetitionPenaltyLogitsProcessorTemperatureLogitsWarperTopPLogitsWarper   )	BaseModelc                   @   s:   e Zd Z		dddZ			ddd	Z			dd
dZdS )TransformersModelcudaNc                 K   sN   || _ |r|nd}tj||dkrtjntj|d| _t|| _| j	  d S )Nzekwek/Soprano-1.1-80Mr   )dtype
device_map)
devicer   from_pretrainedtorchbfloat16float32modelr   	tokenizereval)selfr   
model_pathkwargsmodel_name_or_path r   Q/home/ubuntu/.local/lib/python3.10/site-packages/soprano/backends/transformers.py__init__   s   zTransformersModel.__init__ffffff?333333?333333?c                 C   s<  |dkrd}| j |ddddd| j}t  | jj|d |d dd|||| j jddd	
}W d    n1 s9w   Y  g }| jjj	}t
t|D ]P}	|j|	 }
g }t|j}t
|D ]"}|
||
d
 |  }||kr||j| d |	dd d f  q]t| }|
d  |krdnd}|||d qK|S )N        MbP?ptT   )return_tensorspadding
truncation
max_length	input_idsattention_mask)
r(   r)   max_new_tokens	do_sampletop_ptemperaturerepetition_penaltypad_token_idreturn_dict_in_generateoutput_hidden_statesr   stoplengthfinish_reasonhidden_state)r   tor   r   no_gradr   generater/   configeos_token_idrangelen	sequenceshidden_statessizeappendstacksqueezeitem)r   promptsr,   r-   r.   inputsoutputsresr<   iseqr@   num_output_tokensjtokenlast_hidden_stater6   r   r   r   infer   sT   



*
zTransformersModel.inferc                 #   s   |dkrd}| j |dd| j}|d }t  |dkr% t|d t |dkr4t|d |dk r@t|d	  fd
d}t	  | j
|ddd}|j}	|jd d dd d f }
|}||
|}d}| j
jj}t|D ]S}tj||gdd}| j
||	ddd}|j}	|jd d d dd d f }d }| |krd}n||d krd}||dV  |r n|jd d dd d f }
||
|}quW d    d S W d    d S 1 sw   Y  d S )Nr    r!   r"   )r$   r(   g      ?)penalty)r-   )r,   c                    s4    || }||}t jjj|dd}t j|ddS )Nr2   dimr   )num_samples)r   nn
functionalsoftmaxmultinomial)logits	input_seqscoresprobslogits_processorlogits_warperr   r   get_next_token^   s   

z6TransformersModel.stream_infer.<locals>.get_next_tokenT)	use_cacher1   r2   r#   rR   )past_key_valuesra   r1   r3   r   r4   r5   )r   r8   r   r   rB   r   r   r   r   r9   r   rb   rY   r;   r<   r=   catr@   rE   )r   promptr,   r-   r.   rG   r(   r`   rH   rb   next_token_logitsgenerated_ids
next_tokenr*   r<   rJ   current_hidden_stater6   r   r]   r   stream_inferF   sh   


1"zTransformersModel.stream_infer)r   N)r   r   r   )__name__
__module____qualname__r   rP   ri   r   r   r   r   r
      s    

/r
   )r   transformersr   r   r   r   r   r   baser	   r
   r   r   r   r   <module>   s
    