o
    ॵid                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZ d d	lmZ d d
lmZ d dl m!Z! G dd dZ"G dd de"Z#G dd de"Z$defddZ%dS )    N)contextmanager)AnyDict	GeneratorListOptionalUnion)version)nn)PreTrainedModel)GreedySearchDecoderOnlyOutput) GreedySearchEncoderDecoderOutputLogitsProcessorListSampleDecoderOnlyOutputSampleEncoderDecoderOutputStoppingCriteriaListvalidate_stopping_criteria)Input)
Frameworks)device_placementc                   @   s   e Zd ZdefddZdS )StreamingOutputMixinreturnc                 O   s   t )z
        Support the input of Model and Pipeline.
        The output is a `Generator` type,
        which conforms to the output standard of modelscope.
        )NotImplementedError)selfargskwargs r   U/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/utils/streaming_output.pystream_generate   s   z$StreamingOutputMixin.stream_generateN)__name__
__module____qualname__r   r   r   r   r   r   r      s    r   c                
   @   s   e Zd Zdeeee f defddZdedee	e
f dee	e
f fddZdee	e
f d	ee	e
f d
ee	e
f defddZdeee	e
f  ded	ee	e
f d
ee	e
f def
ddZdS )PipelineStreamingOutputMixininputr   c                    s   t jts
J djsjrjd rjs  |dd}jdi |\ }}t |t	r] fdd|D }|du rSg }|D ]}	|
|	|| qD|S ||||}|S | }
|
||}|S )a  
        Similar to the `Pipeline.__call__` method.
        it supports the input that the pipeline can accept,
        and also supports batch input.

        self.model must be a subclass of StreamingOutputMixin
        and implement the stream method.
        z,pipeline.model must be StreamingOutputMixin!r   
batch_sizeNc                    s   g | ]} | qS r   )_preprocess_with_check).0ipreprocess_paramsr   r   r   
<listcomp><   s    
z@PipelineStreamingOutputMixin.stream_generate.<locals>.<listcomp>r   )
isinstancemodelr   has_multiple_modelsmodels_model_prepareprepare_modelpop_sanitize_parameterslistappend_stream_single_stream_batchr%   )r   r#   r   r   r$   forward_paramspostprocess_paramsmodel_input_listoutputelemodel_inputr   r(   r   r   '   s@   

z,PipelineStreamingOutputMixin.stream_generater)   c                 C   s   |  | | j|fi |S N)_check_input
preprocess)r   r#   r)   r   r   r   r%   Q   s   
z3PipelineStreamingOutputMixin._preprocess_with_checkr<   r7   r8   c              	   c   s    t | j| jV | jtjkr6t  | jr| |}| jj	|fi |}W d    n1 s0w   Y  n
| jj	|fi |}|D ]}| j
|fi |}| | |V  qBW d    d S 1 saw   Y  d S r=   )r   	frameworkdevice_namer   torchno_grad_auto_collate_collate_fnr,   r   postprocess_check_output)r   r<   r7   r8   streamoutr   r   r   r5   W   s,   


"z+PipelineStreamingOutputMixin._stream_singler9   r$   c              
   #   sN   g }g }t | j| j tdt||D ]Z}t|| t|}|| }	||	 | ||| }
| jtj	krct	
  | jrF| |
}
|| jj|
fi | W d    n1 s]w   Y  q|| jj|
fi | qd gt| }d}|t|k rd}tt||D ]x\}\}}	zdt|}
t|	D ]Y i }|
 D ]8\}}|d urt|ttfrt|d t	jrt| fdd|D ||< q|  ||< q|  d  ||< q| j|fi |}| | ||   }|||< qW q ty   |d7 }Y qw |V  |t|k sW d    |S W d    |S 1 s w   Y  |S )Nr   c                 3   s     | ]}|  d   V  qdS )   Nr   )r&   e	batch_idxr   r   	<genexpr>   s
    
z=PipelineStreamingOutputMixin._stream_batch.<locals>.<genexpr>rJ   )r   r@   rA   rangelenminr4   _batchr   rB   rC   rD   rE   r,   r   	enumeratezipnextitemsr+   tupler3   TensortyperF   rG   StopIteration)r   r9   r$   r7   r8   stream_listreal_batch_sizesr'   endreal_batch_sizebatched_outoutput_liststop_streamsrH   rI   kelementoutput_indexr   rL   r   r6   k   s   





3
33z*PipelineStreamingOutputMixin._stream_batchN)r   r    r!   r   r   r   r   r   r   strr   r%   r5   intr6   r   r   r   r   r"   %   s6    
*







r"   c                   @   sF  e Zd ZdefddZededefddZe										dd	e	j
d
ee dee dee dee deeeee f  dee dee dee dee dedefddZe											dd	e	j
d
ee dee dee dee dee deeeee f  dee dee dee dee dedefddZdS )#PretrainedModelStreamingOutputMixinr   c                 O   sd   t | tr| n| j}t |tsJ d| | |j|i |W  d    S 1 s+w   Y  d S )Nz-self or self.model must be `PretrainedModel`!)r+   r   r,   _replace_generategenerate)r   r   r   r,   r   r   r   r      s   $z3PretrainedModelStreamingOutputMixin.stream_generater,   c                 c   s    t tjt dkrd}d}nd}d}t||}t||}t||t| j| t||t| j	| d V  t||| t||| d S )Nz4.39.0_greedy_search_samplegreedy_searchsample)
r	   parsetransformers__version__getattrsetattrtypes
MethodTypestream_greedy_searchstream_sample)r   r,   greedy_search_namesample_nameorigin_greedy_searchorigin_sampler   r   r   rh      s"   

z5PretrainedModelStreamingOutputMixin._replace_generateNF	input_idslogits_processorstopping_criteria
max_lengthpad_token_ideos_token_idoutput_attentionsoutput_hidden_statesoutput_scoresreturn_dict_in_generatesynced_gpusc              	   k   s   |d ur|nt  }|d ur|nt }|d ur"tdt t||}|d ur(|n| jj}|d ur2|n| jj}t	|t
r>|g}|d urKt||jnd }|	d urS|	n| jj}	|d ur]|n| jj}|d urg|n| jj}|
d urq|
n| jj}
|
r{|	r{dnd }|
r|rdnd }|
r|rdnd }|
r|rdnd }|
r| jjr|r|d dnd }|r|d dnd }tj|jd tj|jd}d}	 |rt|rd
nd|j}tj|tjjd | d
krd S | j|fi |}| di |d	||d}|r|rq|jd d dd d f }|||}|
rG|	r||f7 }|r5|| jjr%|j fn|j!f7 }| jjr5||j"f7 }|rG|| jjrB|j#fn|j$f7 }tj%|dd}|d urf|d u r\t&d|| |d|   }tj'||d d d f gdd}|
r| jjrt(|||||||dV  nt)||||dV  n|V  | j*||| jjd}|d ur|+|,|jd d-|.dj/dd}|0 dkrd	}|||rd	}|r|sd S q)Nz`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList([MaxLengthCriteria(max_length=max_length)])` instead.r   encoder_outputs
attentionshidden_statesr   dtypedeviceFT              ?opreturn_dictr   r   dimGIf `eos_token_id` is defined, make sure that `pad_token_id` is defined.rJ   	sequencesscoresencoder_attentionsencoder_hidden_statesdecoder_attentionscross_attentionsdecoder_hidden_statesr   r   r   r   is_encoder_decoder)1r   r   warningswarnUserWarningr   generation_configr   r   r+   rf   rB   tensortor   r   r   r   r   configr   getonesshapelongdist
all_reduceReduceOpSUMitemprepare_inputs_for_generationlogitsr   r   r   r   r   argmax
ValueErrorcatr   r   #_update_model_kwargs_for_generationmultilene	unsqueezeprodmax)r   r{   r|   r}   r~   r   r   r   r   r   r   r   model_kwargseos_token_id_tensorr   r   r   r   r   r   unfinished_sequencesthis_peer_finishedthis_peer_finished_flagmodel_inputsoutputsnext_token_logitsnext_tokens_scoresnext_tokensr   r   r   ru      s2  














z8PretrainedModelStreamingOutputMixin.stream_greedy_searchlogits_warperc              	   k   s   |d ur|nt  }|d ur|nt }|d ur"tdt t||}|d ur(|nt  }|d ur1|n| jj}|d ur;|n| jj}t	|t
rG|g}|d urTt||jnd }|
d ur\|
n| jj}
|d urf|n| jj}|	d urp|	n| jj}	|d urz|n| jj}|r|
rdnd }|r|rdnd }|r|rdnd }|r|	rdnd }|r| jjr|r|d dnd }|	r|d dnd }tj|jd tj|jd}d}	 |rt|rd
nd|j}tj|tjjd | d
krd S | j|fi |}| di |d	||	d}|r|rq|jd d dd d f }|||}|||}|rW|
r(||f7 }|rE|| jjr5|j fn|j!f7 }| jjrE||j"f7 }|	rW|| jjrR|j#fn|j$f7 }t%j&j'|dd}tj(|dd)d}|d ur|d u rwt*d|| |d|   }tj+||d d d f gdd}|r| jjrt,|||||||dV  nt-||||dV  n|V  | j.||| jjd}|d ur|/|0|jd d1|2dj3dd}|4 dkrd	}|||rd	}|r|sd S q)Nz`max_length` is deprecated in this function, use `stopping_criteria=StoppingCriteriaList(MaxLengthCriteria(max_length=max_length))` instead.r   r   r   r   r   r   FTr   r   r   r   r   r   rJ   )num_samplesr   r   r   r   )5r   r   r   r   r   r   r   r   r   r+   rf   rB   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   
functionalsoftmaxmultinomialsqueezer   r   r   r   r   r   r   r   r   r   r   )r   r{   r|   r}   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   next_token_scoresprobsr   r   r   r   rv   p  s8  















z1PretrainedModelStreamingOutputMixin.stream_sample)
NNNNNNNNNF)NNNNNNNNNNF)r   r    r!   r   r   r   r   rh   staticmethodrB   
LongTensorr   r   r   rf   r   r   boolru   rv   r   r   r   r   rg      s    	
 .	
rg   r,   c                 C   s6   t | }|tf}t |j|i | j}|j| j |S r=   )rY   rg   r   r   __dict__update)r,   pretrained_classparent_classes	new_modelr   r   r   add_stream_generate%  s   r   )&rs   r   
contextlibr   typingr   r   r   r   r   r   rB   torch.distributeddistributedr   ro   	packagingr	   r
   r   transformers.generationr   r   r   r   r   r   r   modelscope.pipelines.baser   modelscope.utils.constantr   modelscope.utils.devicer   r   r"   rg   r   r   r   r   r   <module>   s.         