o
    i                     @   s<   d dl Z d dlmZmZmZ ddlmZ G dd deZdS )    N)pipelineTurbomindEngineConfigGenerationConfig   )	BaseModelc                   @   s<   e Zd Z			dddZ			dd	d
Z			dddZdS )LMDeployModelcudad   Nc                 K   sP   |dksJ d|d t jdj }t|d}|r|nd}t|d|d| _d S )Nr   zclmdeploy only supports cuda devices, consider changing device or using a different backend instead.i   )cache_max_entry_countzekwek/Soprano-1.1-80MERROR)	log_levelbackend_config)torchr   get_device_propertiestotal_memoryr   r   )selfdevicecache_size_mb
model_pathkwargscache_size_ratior   model_name_or_path r   M/home/ubuntu/.local/lib/python3.10/site-packages/soprano/backends/lmdeploy.py__init__   s   
zLMDeployModel.__init__ffffff?333333?333333?c           	      C   sH   t dd|||dd}| j||d}g }|D ]}||j|jd q|S N
generationTi   )output_last_hidden_state	do_sampletop_ptemperaturerepetition_penaltymax_new_tokens)
gen_config)finish_reasonhidden_state)r   r   appendr'   last_hidden_state)	r   promptsr"   r#   r$   r&   	responsesresresponser   r   r   infer   s   
zLMDeployModel.inferc                 c   sF    t dd|||dd}| jj|g|d}|D ]
}|j|jdV  qd S r   )r   r   stream_inferr'   r*   )r   promptr"   r#   r$   r&   r,   r.   r   r   r   r0   +   s   
zLMDeployModel.stream_infer)r   r	   N)r   r   r   )__name__
__module____qualname__r   r/   r0   r   r   r   r   r      s    

r   )r   lmdeployr   r   r   baser   r   r   r   r   r   <module>   s    