o
    }oi\                  
   @   s  d dl Z d dlZd dlmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z& d dl'm(Z( zd dl)Z)W n e*y   e&+d Y nw z
d dl,m-Z- dZ.W n e/e0fy   dZ.Y nw dd Z1d.ddZ2dd Z3dd Z4dd Z5d/ddZ6d.d d!Z7d"ed#ed$efd%d&Z8d"ed#ed$ed'ee
ef fd(d)Z9d*d+ Z:d,d- Z;dS )0    N)AnyCallableTuple)Trainer)TorchElasticEnvironment)
DictConfig	OmegaConf	open_dict)Image)CLIPImageProcessorSiglipImageProcessor)image_transform)process_image)fake_initialize_model_parallel)NLPDDPStrategyNLPFSDPStrategyNLPSaveRestoreConnector)PEFT_CONFIG_MAP)torch_dtype_from_precision)AppStatelogging)inject_model_parallel_rankz;The package `decord` was not installed in this environment.)dist_checkpointingTFc                 C   s6   | j dkr	| d } | d  d} dd | D }|S )zD
    Convert a numpy image or a batch of images to a PIL image.
       )N.   uint8c                 S   s   g | ]}t |qS  )r
   	fromarray).0imager   r   [/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/multimodal/parts/utils.py
<listcomp>9   s    z numpy_to_pil.<locals>.<listcomp>)ndimroundastype)images
pil_imagesr   r   r    numpy_to_pil2   s
   
r'   c                 C   s   t j| j| j| j|dS )N)dtypedevice	generator)torchrandnshaper(   r)   )xr*   r   r   r    
randn_like>   s   r/   c                 C   s$   | j }| j j}t|||fi | _ dS )z/Apply mixins to a class instance after creationN)	__class____name__type)objmixinbase_clsbase_cls_namer   r   r    extend_instanceB   s
   

r7   c                 C   sL   |dkr| S | d}|dk rt| |S tt| |d| ||d d S )zp
    Return nested attribute of obj
    Example: getattr_recursive(obj, 'a.b.c') is equivalent to obj.a.b.c
     .r   N   )findgetattrgetattr_recursive)r3   attir   r   r    r=   K   s   

$r=   c                 C   s@   d|v rt | d|ddd } t| |dd | dS )zx
    Set nested attribute of obj
    Example: setattr_recursive(obj, 'a.b.c', val) is equivalent to obj.a.b.c = val
    r9   N)r=   joinsplitsetattr)r3   r>   valr   r   r    setattr_recursiveY   s   rE   c                 K   sN   || rd S || r|| fi | |   D ]}t||f||d| qd S )N)apply_conditionstopping_condition)childrenapply_with_stopping_condition)moduleapply_fnrF   rG   
other_argschildr   r   r    rI   c   s   rI   c              
   C   sn  t j rt d}nt d}t }t }t }d}t	 }zytj
| r0|j| |d n| }t| |jdurI|jdkrI|||j}ntj
||j}|j||d}	|	du r|durd}t|d	}
tj
||j}tj
|d
 }tj
|sJ d| dtj|
|tjjjd}
|
d }	W t| nt| w W d   |	|fS 1 sw   Y  |	|fS )zE
    Shared method to load model weights from a given nemo_path.
    cudacpuF)	path2file
out_folderNr:   )map_locationT)
state_dictr   z	Expected z to be a directory.)sharded_state_dictcheckpoint_dirstrictrS   )r+   rN   is_availabler)   r   osgetcwdr   tempfileTemporaryDirectorypathisfile_unpack_nemo_filechdirmodel_parallel_size$_inject_model_parallel_rank_for_ckptmodel_weights_ckptrA   _load_state_dict_from_diskdictsplitextisdirr   load
validationStrictHandlingLOG_UNEXPECTED)	nemo_pathrT   rR   save_restore_connectorcwd	app_stateis_dist_ckpttmpdirmodel_weightsrS   
checkpointtmp_model_weights_ckpttmp_model_weights_dirr   r   r    load_nemo_model_weightsn   sJ   





##ru   model_providercfgmodel_cfg_modifierc              	   C   s  g }| dddkr|t  tddd}td||d|j}t }g }|jD ]o}|js/q)|j	drst
j|jr@|j|_| j|j||dd	}	t|	 ||	 W d   n1 s]w   Y  | j|j||	|dd
}
||
 q)|j	drtd | j|j|j d|d}
||
 q)td|j dd }|jjdur|jjj||d |j  dd |D }|D ]	}
|
 d q||fS )a  
    Set up a trainer and NeMo model for inference.

    Args:
        model_provider (Any): An object that provides the NeMo model.
        cfg (DictConfig): The configuration dictionary, containing the
            necessary settings for the trainer and the models.
        model_cfg_modifier (Callable): A function that modifies the model
            configuration for inference.

    Returns:
        Tuple[Trainer, Any]: A tuple containing the trainer and the model.
    cluster_typeNBCPTFno_ddp_communication_hookfind_unused_parameterspluginsstrategy.nemorestore_pathtrainerrl   return_configr   r   override_config_pathrl   rV   .ckptnLoading from .ckpt checkpoint for inference is experimental! It doesn't support models with model parallelism!hparams_filer   r   zUnrecognized checkpoint type: c                   S      d S Nr   r   r   r   r    dummy      z5setup_trainer_and_models_for_inference.<locals>.dummyr   c                 S   s   g | ]}|  qS r   )rN   )r   modelr   r   r    r!      s    z:setup_trainer_and_models_for_inference.<locals>.<listcomp>r   )getappendr   r   r   r   r   modelsrestore_from_pathendswithrX   r\   rf   model_extracted_dirrestore_fromr	   r   warningload_from_checkpointr   
ValueErrorr   launcherlaunchsetup_environmentevalrequires_grad_)rv   rw   rx   r   r   r   rl   r   single_model_cfg	model_cfgr   r   r   r   r    &setup_trainer_and_models_for_inference   sh   




r   returnc           
   
   C   s  g }| t  |jddstd tddd}n)td t|jdd|jdd	|jd
d|jdd|jj	|ddd}t
d||d|j}t }|jjdur|jjdsgtj|jjrtj|jjrt|jj|_| j|jj||dd}t| || W d   n1 sw   Y  | j|jj|||dd}n'|jjdrtd | j|jj|jd|d}ntd | |j|d}dd }	|jjdur|jjj|	|d |j  | }| d ||fS )a  
    Set up a trainer and NeMo model for inference.

    Args:
        model_provider (Any): An object that provides the NeMo model.
        cfg (DictConfig): The configuration dictionary, containing the
            necessary settings for the trainer and the model.
        model_cfg_modifier (Callable): A function that modifies the model
            configuration for inference.

    Returns:
        Tuple[Trainer, Any]: A tuple containing the trainer and the model.
    fsdpFz"FSDP is False, using DDP strategy.Tr{   zUsing FSDP strategy.fsdp_limit_all_gathersfsdp_sharding_strategyfullfsdp_cpu_offloadfsdp_grad_reduce_dtype    fsdp_set_buffer_dtypeN)limit_all_gatherssharding_strategycpu_offloadgrad_reduce_dtype	precisionset_buffer_dtyper~   r   r   r   r   r   r   r   z<Loading a model from scratch for inference. Tread carefully.)rw   r   c                   S   r   r   r   r   r   r   r    r   ]  r   z4setup_trainer_and_model_for_inference.<locals>.dummyr   r   )r   r   r   r   r   infor   r   r   r   r   r   r   r   rX   r\   rf   r   r   r	   r   r   r   r   r   r   rN   r   r   )
rv   rw   rx   r   r   r   rl   r   r   r   r   r   r    %setup_trainer_and_model_for_inference  st   








r   c           
         s  ddl m} g }| dd dkr|t  td|t d| j}| jj| jj	 | j
| j ks4J d| jrt }tj| jrE| j|_|j| j|d|dtd t, d	_d _d _|j_| d
d jj_d	_d	_| j
_
| j_W d    n1 sw   Y  |j| j||d dd urtj j! }|d ur "| j| nU| j#rt$ }| j
dks| jdkr| j
| j |_%| j
|_
| j|_t&|j%|j'| j
| j| j(d\|_)|_*|_+|_%|_,|_(|_-t.tj/| j#| j0}|j1|| j2|d nt3d 4  zd  j5j6j7_W n
 t8y   Y nw z	d  j5j9j6j7_W n
 t8y1   Y nw  fdd} fdd}	 ||	fS )Nr   )MegatronNevaModelry   rz   r~   zZdevices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_sizeT)r   r   r   rl   Fbase_model_file)r   r   r   rl   peftr:   )
world_sizeranktensor_model_parallel_size_pipeline_model_parallel_size_#pipeline_model_parallel_split_rank_r   z+need at least a nemo file or checkpoint dirc                    s   t | trt| d}n| }t jdr jjjn jj}t	||j
j}jdv r3|tj}njdv r?|tj}n|tj}|jddjddjddS )NRGBrJ   )   16z16-mixed)r   32z32-truer   dim)
isinstancestrr
   openconverthasattrr   rJ   image_processorr   dataimage_aspect_ratior   r2   r+   float16float32bfloat16	unsqueeze)maybe_image_pathr   	processormediar   neva_cfgr   r    r     s   


z8create_neva_model_and_processor.<locals>.image_processorc                    s  t | trt| jjdkrtd  	dg}nsjjdkr6tt
d   	dg}n[jjdkrJtd  	dg}nGjjdkrXdd	 D }n9tt
jj}tjdt
d
 |td}fdd	|D }t
|jjk r||d  t
|jjk sn| }tjdrjjjnjjjjdkrt|jt|j}}|| }d\}}tt|| |}	j|ddd|	idd }n'jjdkrdd   fdd	|D }j|ddd }n	j|ddd }|tj}
|
jddjddS )Nfirstr   r   middle   lastr@   c                 S   s    g | ]}t | d qS r   r
   r   asnumpyr   r   framer   r   r    r!     s     zLcreate_neva_model_and_processor.<locals>.video_processor.<locals>.<listcomp>r:   )r(   c                    s$   g | ]}t  |  d qS r   r   )r   r?   )vrr   r    r!     s   $ rJ   keep)i     ptFshortest_edge)return_tensorsdo_center_cropsizepixel_valuespadc                 S   s~   | j \}}||kr| S ||kr't| j||f|}|| d|| d f |S t| j||f|}|| || d df |S )Nr   r   )r   r
   newmodepaste)pil_imgbackground_colorwidthheightresultr   r   r    expand2square  s   
zOcreate_neva_model_and_processor.<locals>.video_processor.<locals>.expand2squarec                    s&   g | ]} |t d d jD qS )c                 s   s    | ]	}t |d  V  qdS )r   N)int)r   r.   r   r   r    	<genexpr>  s    zVcreate_neva_model_and_processor.<locals>.video_processor.<locals>.<listcomp>.<genexpr>)tuple
image_meanr   )r   r   r   r    r!     s   & )r   r   )r   r   decordVideoReaderr   splice_single_framer
   r   r   r   len
num_framesminnplinspacer   r   r   r   rJ   r   r   maxr   
preprocessr2   r   r   r   )maybe_video_pathframesr   indicesmax_hwmin_hwaspect_ratiomax_lenmin_lenr   media_tensorsr   )r   r   r   r    video_processor  sJ   

$z8create_neva_model_and_processor.<locals>.video_processorr   ):Anemo.collections.multimodal.models.multimodal_llm.neva.neva_modelr   r   r   r   r   r   r   devices	num_nodestensor_model_parallel_sizepipeline_model_parallel_sizeneva_model_filer   rX   r\   rf   r   r   r   
set_structr	   sequence_parallel"activations_checkpoint_granularityactivations_checkpoint_methodr   mm_cfgllmfrom_pretrainedapply_rope_fusionfp8r   r   peft_schemeload_adaptersrU   r   r`   r   global_rank"pipeline_model_parallel_split_ranktensor_model_parallel_rankpipeline_model_parallel_rankexpert_model_parallel_rankdata_parallel_size$virtual_pipeline_model_parallel_rankr   rA   checkpoint_namer   r   r   freezer   language_modelencoderAttributeErrorrJ   )
rw   r   r   r   rl   peft_cfg_clsrn   checkpoint_pathr   r  r   r   r    create_neva_model_and_processork  s   


	
9r'  c                 C   s  | j ddryddlm} || j j}|jd dks"|jd dkr-tj| j jtjd}n|jd dks;|jd d	krFt	j| j jtjd}nt
d
| j d}t|drw|d urw||jd |jd fkswJ d| d|jd |jd f |S | dd}t|dd d d}|S )Nfrom_hfFr   )
AutoConfigCLIPVisionModel	CLIPModel)torch_dtypeSiglipVisionModelSiglipModelzSCurrently only support CLIPImageProcessor and SiglipImageProcessor from Huggingface	crop_sizer   r   z
Crop size z7 does not match the HuggingFace CLIP model's crop size )r   r   )is_trainmeanstd)vision_encoderr   transformersr)  r  architecturesr   r+   r   r   r   r   r/  r   )r  r)  configr   r/  r   r   r    create_image_processor  s:   

r7  r   )NN)<rX   rZ   typingr   r   r   numpyr   r+   lightning.pytorchr   &lightning.pytorch.plugins.environmentsr   	omegaconfr   r   r	   PILr
   r4  r   r   Anemo.collections.multimodal.data.clip.augmentations.augmentationsr   2nemo.collections.multimodal.data.neva.neva_datasetr   :nemo.collections.nlp.modules.common.megatron.megatron_initr   (nemo.collections.nlp.parts.nlp_overridesr   r   r   &nemo.collections.nlp.parts.peft_configr   &nemo.collections.nlp.parts.utils_funcsr   
nemo.utilsr   r   nemo.utils.model_utilsr   r   	Exceptionr   megatron.corer   HAVE_MEGATRON_COREImportErrorModuleNotFoundErrorr'   r/   r7   r=   rE   rI   ru   r   r   r'  r7  r   r   r   r    <module>   sn   
	


4
a

h )