o
    
۾i+                     @   s  d Z ddlmZ ddlmZmZmZ ddlmZ ddl	m
Z
 ddlZddlmZ ddlmZmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5 ddl6m7Z7 ddl8m9Z9m:Z:m;Z; ddl<m=Z= ee>Z?defddZ@dddedeAfddZBG dd  d e0ZCG d!d" d"e4ZDG d#d$ d$e2eD ZEG d%d& d&e3eD ZFe=d'e"jGeFeDeEd(G d)d* d*ejHe9e;ZIdS )+z"Wrapper around `Terratorch` models    )OrderedDict)IterableMappingSequence)cached_property)AnyN)DummyDataGeneratorInferenceRunnerInputDefinitionInputTypeEnum)BatchFeature)
VllmConfig)BaseDummyOptions)init_logger)IdentityPooler)default_weight_loader)AutoWeightsLoader)MULTIMODAL_REGISTRY)	ImageItemModalityDataMultiModalDataDictMultiModalFieldConfigMultiModalInputsMultiModalKwargsItemsMultiModalUUIDDictPlaceholderRange)DictEmbeddingItemsModalityDataItemsMultiModalDataItemsMultiModalDataParser)BaseDummyInputsBuilderBaseMultiModalProcessorBaseProcessingInfoPromptUpdate)IntermediateTensors   )IsAttentionFreeMultiModalEmbeddingsSupportsMultiModal)	attn_typeinput_definitionc                 C   s   t | j S N)setdatakeys)r*    r/   Y/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/terratorch.py_terratorch_field_namesH   s   r1   T	is_sharedr3   c                   s.   dt ttjf dt ttf f fdd}|S )N	hf_inputsreturnc                    sV   t ttf  } j D ]\}}d}|jtjkr(r!tj|ddnt	|||< q|S )Nimager%   )
batch_size)
dictstrr   r-   itemstyper   tensorsharedbatched)r4   fieldsnameinputmodalityr*   r3   r/   r0   _terratorch_field_configQ   s   z;_terratorch_field_factory.<locals>._terratorch_field_config)r   r9   torchTensorr   )r*   r3   rD   r/   rC   r0   _terratorch_field_factoryL   s   
rG   c                       sl   e Zd Zdef fddZdeeejf e	e
 B deeef dB f fddZd	edef fd
dZ  ZS )TerratorchMultiModalDataParserr*   c                    s   t  j|i | || _d S r+   )super__init__r*   )selfr*   argskwargs	__class__r/   r0   rJ   d   s   
z'TerratorchMultiModalDataParser.__init__r-   r5   Nc                    s2   t |trt|dt| jt| jdS t |S )Nr6   )rB   required_fieldsfields_factory)
isinstancer8   r   r1   r*   rG   rI   _parse_image_data)rK   r-   rN   r/   r0   rS   i   s   
z0TerratorchMultiModalDataParser._parse_image_datamm_datac                    s   d|vrd|i}t  |S Nr6   )rI   parse_mm_data)rK   rT   rN   r/   r0   rV   w   s   z,TerratorchMultiModalDataParser.parse_mm_data)__name__
__module____qualname__r
   rJ   r8   r9   rE   rF   r   r   r   r   rS   r   r   rV   __classcell__r/   r/   rN   r0   rH   c   s    rH   c                   @   s@   e Zd ZedefddZdd Zdeee	dB f fddZ
dS )	TerratorchProcessingInfor5   c                 C   s"   |    d }tdi |d S )Npretrained_cfgrA   r/   )get_hf_configto_dictr
   )rK   r\   r/   r/   r0   r*      s   z)TerratorchProcessingInfo.input_definitionc                 C   s   t | j|  dS )N)expected_hidden_size)rH   r*   _get_expected_hidden_sizerK   r/   r/   r0   get_data_parser   s   z(TerratorchProcessingInfo.get_data_parserNc                 C   s   dd iS rU   r/   ra   r/   r/   r0   get_supported_mm_limits   s   z0TerratorchProcessingInfo.get_supported_mm_limits)rW   rX   rY   r   r
   r*   rb   r   r9   intrc   r/   r/   r/   r0   r[   ~   s
    r[   c                	       sn   e Zd Zdef fddZdeeef defddZ	dd	edeeef d
eee	f dB de
fddZ  ZS )TerratorchInputBuilderinfoc                    s(   t  | t| j  d | _d S Nr\   )rI   rJ   r   rf   r]   r^   dummy_data_generator)rK   rf   rN   r/   r0   rJ      s   
zTerratorchInputBuilder.__init__	mm_countsr5   c                 C   s   dS )N r/   )rK   ri   r/   r/   r0   get_dummy_text   s   z%TerratorchInputBuilder.get_dummy_textNseq_len
mm_optionsc                 C   s   |rt d | j S )NzeConfigurable multimodal profiling options are not supported for Terratorch. They are ignored for now.)loggerwarningrh   get_dummy_mm_data)rK   rl   ri   rm   r/   r/   r0   rp      s
   	
z(TerratorchInputBuilder.get_dummy_mm_datar+   )rW   rX   rY   r[   rJ   r   r9   rd   rk   r   r   rp   rZ   r/   r/   rN   r0   re      s    
re   c                   @   s   e Zd Zdddedeeef dedeeef fddZ	d	e
deeef d
edee fddZ		ddeee B d	e
deeef deeef dB dedB defddZdS )TerratorchMultiModalProcessorTr2   r4   hf_processor_mm_kwargsr3   r5   c                C   s   t | jj|d}||S )Nr2   )rG   rf   r*   )rK   r4   rr   r3   factoryr/   r/   r0   _get_mm_fields_config   s
   z3TerratorchMultiModalProcessor._get_mm_fields_configmm_itemsout_mm_kwargsc                 C   s   g S r+   r/   )rK   ru   rr   rv   r/   r/   r0   _get_prompt_updates   s   z1TerratorchMultiModalProcessor._get_prompt_updatesNprompttokenization_kwargsmm_uuidsc                 C   s   |d u ri }| j ||||d}| |\}}tdd | D dd}	dtdddgi}
t|	| j|	|d	d
}tddg|||
dS )N)rz   c                 S   s"   i | ]\}}|t |d qS )r   )rE   r<   	unsqueeze).0kvr/   r/   r0   
<dictcomp>   s   " z7TerratorchMultiModalProcessor.apply.<locals>.<dictcomp>pt)tensor_typer6   r   )offsetlengthFr2   
multimodalr%   )r;   prompt_token_ids	mm_kwargs	mm_hashesmm_placeholders)	_hash_mm_items_get_hf_mm_datar   r:   r   r   from_hf_inputsrt   r   )rK   rx   ru   rr   ry   rz   r   _passthrough_datamm_processed_datar   r   r/   r/   r0   apply   s4   	z#TerratorchMultiModalProcessor.applyNN)rW   rX   rY   r   r   r9   objectboolr   rt   r   r   r   r#   rw   listrd   r   r   r   r/   r/   r/   r0   rq      sF    






rq   attention_free)rf   dummy_inputsc                       s   e Zd ZdZdZededededB fddZdd	e	d
ef fddZ
	dddddejdedB dejdB dedejf
ddZ		d dejdB dejdedB dejdB def
ddZdeeeejf  dee fddZ  ZS )!
TerratorchTrB   ir5   Nc                 C   s   | drd S td)Nr6   z Only image modality is supported)
startswith
ValueError)clsrB   r   r/   r/   r0   get_placeholder_str   s   
zTerratorch.get_placeholder_strrj   vllm_configprefixc                    s:   t    |jj d }t|| _| jj| _t | _	d S rg   )
rI   rJ   model_config	hf_configr^   r	   inference_runnermodelr   pooler)rK   r   r   configrN   r/   r0   rJ      s
   


zTerratorch.__init__F)is_multimodalhandle_oov_mm_token	input_idsmultimodal_embeddingsr   r   c                C   s   t |jd dfS )Nr   )rE   emptyshape)rK   r   r   r   r   r/   r/   r0   embed_input_ids  s   zTerratorch.embed_input_ids	positionsintermediate_tensorsinputs_embedsrM   c                 K   s   | j jdi |}|jS )Nr/   )r   forwardoutput)rK   r   r   r   r   rM   model_outputr/   r/   r0   r     s   zTerratorch.forwardweightsc                 C   s  g }t |  }g }|D ]k\}}t|t tfrg|dkrf|}| D ]B\}}	d| }d|v r/q!d|v r9|dd}||v r\d|v rG|dd}|| }
t|
dt}||
|	 || q!|||	f q! nqt|t	j
rw|d| |f qt| }||}|t|S )N
state_dictzinference_runner.	pos_embedz_timm_module.rj   weight_loaderzinference_runner.model.)r8   named_buffersrR   r   r:   replacegetattrr   appendrE   rF   r   load_weightsunionr,   )rK   r   params_listmodel_buffersloaded_bufferskeyvalueweights_to_parser@   weightbufferr   loaderautoloaded_weightsr/   r/   r0   r     s>   


zTerratorch.load_weights)rj   r+   r   )rW   rX   rY   "supports_multimodal_raw_input_onlyis_pooling_modelclassmethodr9   rd   r   r   rJ   rE   rF   r'   r   r   r$   r   r   r   tupler,   r   rZ   r/   r/   rN   r0   r      sF    

,r   )J__doc__collectionsr   collections.abcr   r   r   	functoolsr   typingr   rE   torch.nnnnterratorch.vllmr   r	   r
   r   transformersr   vllm.configr   vllm.config.multimodalr   vllm.loggerr   !vllm.model_executor.layers.poolerr   -vllm.model_executor.model_loader.weight_utilsr    vllm.model_executor.models.utilsr   vllm.multimodalr   vllm.multimodal.inputsr   r   r   r   r   r   r   r   vllm.multimodal.parser   r   r   r   vllm.multimodal.processingr    r!   r"   r#   vllm.sequencer$   
interfacesr&   r'   r(   interfaces_baser)   rW   rn   r1   r   rG   rH   r[   re   rq   register_processorModuler   r/   r/   r/   r0   <module>   sR   (

>