o
    ٷiO                     @   s&  d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dl	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZmZmZmZmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* ee+Z,dZ-dZ.G dd dZ/dS )    N)	GeneratorIterable)Path)cast)hf_hub_download)nn)ModelConfig)
LoadConfig)init_logger)QuantizeMethodBase)download_gguf'download_safetensors_index_file_from_hfdownload_weights_from_hf"filter_duplicate_safetensors_files%filter_files_not_needed_for_inferencemaybe_download_from_modelscopesafetensors_weights_iterator)resolve_obj_by_qualname)set_default_torch_dtype)OmniDiffusionConfig)HSDPInferenceConfig)get_gguf_adapter)initialize_modelzmodel_index.jsonz.diffusion_pytorch_model.safetensors.index.jsonc                   @   s@  e Zd ZU dZdZejG dd dZdZe	e
d< dZe	e
d< defd	d
ZdededB dedB dedee dB deeee ef fddZdddeeeejf ddf fddZdejdeeeejf ddf fddZdejded fddZdejdee fddZdeddfd d!Z	"	d=d#e d$ed%ed&edB dejf
d'd(Z!dejd)ej"ddfd*d+Z#dejddfd,d-Z$d#e defd.d/Z%dddefd0d1Z&dejdee fd2d3Z'd4ededB defd5d6Z(dddejd#e deeeejf ddf fd7d8Z)dejd#e dee fd9d:Z*	"	d=d#e d%ed&edB dejfd;d<Z+dS )>DiffusersPipelineLoaderzCModel loader that can load diffusers pipeline components from disk.   c                   @   sh   e Zd ZU dZeed< 	 edB ed< 	 edB ed< 	 dZeed< 	 dZeed	< 	 dZ	e
e dB ed
< dS )z'DiffusersPipelineLoader.ComponentSourcezA source for weights.model_or_pathN	subfolderrevision prefixTfall_back_to_ptallow_patterns_overrides)__name__
__module____qualname____doc__str__annotations__r   r    boolr!   list r*   r*   e/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/model_loader/diffusers_loader.pyComponentSource0   s   
 r,           counter_before_loading_weightscounter_after_loading_weightsload_configc                 C   s
   || _ d S N)r0   )selfr0   r*   r*   r+   __init__I   s   
z DiffusersPipelineLoader.__init__model_name_or_pathr   Nr   r    r!   returnc                    s|  t ||p|}tj|}| jj}d}t}	 r  d|	 n|	}
|dkr&d}|dkr/ddg}ntd| |r=|dg7 }|d	urC|} d	urP fd
d|D }|s`t|| jj	||| jj
d}n|}g }|D ]}|ttj||7 }t|dkrtdd |D } nqf|r|st||
| jj	|  d	urtj| n|}t|||	}nt|}t|dkrtd| d|||fS )zYPrepare weights for the model.

        If the model is not local, it will be downloaded.F/autohfz*.safetensorsz*.binzUnknown load_format: z*.ptNc                    s   g | ]	}  d | qS )r6   r*   ).0patternr   r*   r+   
<listcomp>z       z<DiffusersPipelineLoader._prepare_weights.<locals>.<listcomp>)ignore_patternsr   c                 s   s    | ]}| d V  qdS )z.safetensorsN)endswith)r9   fr*   r*   r+   	<genexpr>       z;DiffusersPipelineLoader._prepare_weights.<locals>.<genexpr>z$Cannot find any model weights with ``)r   ospathisdirr0   load_formatDIFFUSION_MODEL_WEIGHTS_INDEX
ValueErrorr   download_dirr>   globjoinlenanyr   r   r   RuntimeError)r2   r4   r   r   r    r!   is_localrG   use_safetensors
index_fileindex_file_with_subfolderallow_patterns	hf_folderhf_weights_filesr:   filter_folderr*   r;   r+   _prepare_weightsV   sf   


z(DiffusersPipelineLoader._prepare_weightssourcec                    s\   |   j j j j j\}}}t|| jj| jj	}| j
dkr%t | _
 fdd|D S )z?Get an iterator for the model weights based on the load format.r-   c                 3   "    | ]\}} j | |fV  qd S r1   r   r9   nametensorrY   r*   r+   rA           z@DiffusersPipelineLoader._get_weights_iterator.<locals>.<genexpr>)rX   r   r   r   r    r!   r   r0   use_tqdm_on_loadsafetensors_load_strategyr.   timeperf_counter)r2   rY   rU   rV   rQ   weights_iteratorr*   r_   r+   _get_weights_iterator   s   


z-DiffusersPipelineLoader._get_weights_iteratormodelc                 c   s*    |  |}|D ]
}| |E d H  qd S r1   )_get_weight_sourcesrf   )r2   rg   sourcesrY   r*   r*   r+   get_all_weights   s
   
z'DiffusersPipelineLoader.get_all_weights)r,   .c                 C   s   t tttj t|ddS )Nweights_sourcesr*   )tupler   r   r   r,   getattr)r2   rg   r*   r*   r+   rh      s   
z+DiffusersPipelineLoader._get_weight_sourcesc                    sf   dd |  D }| |}|s|S tdd |D r|S tdd |D   s*|S  fdd|D S )zDReturn parameter names that should be covered by strict load checks.c                 S      h | ]\}}|qS r*   r*   r9   r]   _r*   r*   r+   	<setcomp>       zHDiffusersPipelineLoader._get_expected_parameter_names.<locals>.<setcomp>c                 s   s    | ]}|j d kV  qdS )r   Nr[   r9   rY   r*   r*   r+   rA      rB   zHDiffusersPipelineLoader._get_expected_parameter_names.<locals>.<genexpr>c                 s   s    | ]	}|j r|j V  qd S r1   r[   rs   r*   r*   r+   rA      s    c                    s   h | ]	}|  r|qS r*   )
startswithr9   r]   source_prefixesr*   r+   rq      r=   )named_parametersrh   rN   rl   )r2   rg   all_parameter_namesri   r*   rv   r+   _get_expected_parameter_names   s   
z5DiffusersPipelineLoader._get_expected_parameter_namesmodel_configc                 C   s   | j |jd |jdd d d S )NT)r4   r   r   r    r!   )rX   rg   r   )r2   r{   r*   r*   r+   download_model   s   
z&DiffusersPipelineLoader.download_modeldefault	od_configload_devicerG   custom_pipeline_namec              	   C   s   t |}t|j] |jjr| j|||d}n?| |dkr$t|}n|dkr1t|}||d}W d   n1 s;w   Y  t	
d| | |rR| || n| | | || W d   | S 1 sjw   Y  | S )z+Load a model with the given configurations.)rG   r   r}   custom_pipeliner~   NzLoading weights on %s ...)torchdevicer   dtypeparallel_configuse_hsdp_load_model_with_hsdpr   r   loggerdebug_is_gguf_quantization_load_weights_with_ggufload_weights_process_weights_after_loadingeval)r2   r~   r   rG   r   target_devicerg   	model_clsr*   r*   r+   
load_model   s.   





z"DiffusersPipelineLoader.load_modelr   c                 C   sv   |  D ]4\}}t|dd}t|tr8t| d}|dur!|j}||k}|r,|| || |r8|| qdS )zProcess weights after loading for quantization methods.

        This handles vLLM's quantization methods that need to process weights
        after loading (e.g., FP8 online quantization from BF16/FP16 weights).
        quant_methodN)	named_modulesrm   
isinstancer   next
parametersr   toprocess_weights_after_loading)r2   rg   r   rp   moduler   module_deviceneeds_device_mover*   r*   r+   r     s   



z6DiffusersPipelineLoader._process_weights_after_loadingc                 C   sb   |  |}|| |}t | _td| j| j  |d ur-|| }|r/t	d| d S d S )Nz!Loading weights took %.2f seconds8Following weights were not initialized from checkpoint: )
rz   r   rj   rc   rd   r/   r   	info_oncer.   rI   )r2   rg   weights_to_loadloaded_weightsweights_not_loadedr*   r*   r+   r   (  s   


z$DiffusersPipelineLoader.load_weightsc                 C   s   |j }|d u r	dS t|tr+t|dd }|dkrdS |d}|s)tddS t|ds:t|dd }t	|S |
 dk}|sDdS t|dd }|d u rRtddS )	NFmethodr   gguf
gguf_model9GGUF quantization requires quantization_config.gguf_modelTget_name)quantization_configr   dictr&   getlowerrI   hasattrrm   r(   r   )r2   r~   quant_configr   r   is_ggufr*   r*   r+   r   :  s*   


z-DiffusersPipelineLoader._is_gguf_quantizationc                 C   s   |j dkrdS |jdS )NtransformerTztransformer.)r   r   rt   )r2   rY   r*   r*   r+   _is_transformer_sourceU  s   
z.DiffusersPipelineLoader._is_transformer_sourcec                 C   s.   dd |  D }|dd | D  |S )Nc                 S   rn   r*   r*   ro   r*   r*   r+   rq   ]  rr   zDDiffusersPipelineLoader._get_model_loadable_names.<locals>.<setcomp>c                 s   s    | ]\}}|V  qd S r1   r*   ro   r*   r*   r+   rA   ^  s    zDDiffusersPipelineLoader._get_model_loadable_names.<locals>.<genexpr>)rx   updatenamed_buffers)r2   rg   namesr*   r*   r+   _get_model_loadable_namesZ  s   z1DiffusersPipelineLoader._get_model_loadable_namesr   c                 C   s   t j|r|S d|v r#|dr#|dd\}}t|||| jjdS d|v r@d|v r@|dd\}}t||| jj|| jj	dS t
d|d)	Nr6   z.gguf   )repo_idfilenamer   	cache_dir:)r   r   r>   zUnrecognized GGUF reference: zL (expected local file, <repo_id>/<filename>.gguf, or <repo_id>:<quant_type>))rD   rE   isfiler?   rsplitr   r0   rJ   r   r>   rI   )r2   r   r   r   r   
quant_typer*   r*   r+   _resolve_gguf_model_patha  s,   
z0DiffusersPipelineLoader._resolve_gguf_model_pathc           	         sX   |j }t|dd }|d u rtd| ||j}t|| |}| } fdd|D S )Nr   r   c                 3   rZ   r1   r[   r\   r_   r*   r+   rA     r`   zEDiffusersPipelineLoader._get_gguf_weights_iterator.<locals>.<genexpr>)r   rm   rI   r   r   r   re   )	r2   rY   rg   r~   r   r   	gguf_fileadapterweights_iterr*   r_   r+   _get_gguf_weights_iterator|  s   z2DiffusersPipelineLoader._get_gguf_weights_iteratorc              	      s   |  |}t d  |D ]J| rL|| ||O  p%| | tfdd D }|s5q| } fdd|D }||O q|| O q| |}| }|rit	d| S )Nc                 3   s$    | ]}| jo| vV  qd S r1   )rt   r   ru   )loadedrY   r*   r+   rA     s    
zBDiffusersPipelineLoader._load_weights_with_gguf.<locals>.<genexpr>c                 3   s,    | ]\}}| v r|vr||fV  qd S r1   r*   r\   )loadable_namesr   r*   r+   rA     s    "r   )
rh   setr   r   r   r   rN   rf   rz   rI   )r2   rg   r~   ri   has_missing_for_sourcehf_iterr   r   r*   )r   r   rY   r+   r     s.   



z/DiffusersPipelineLoader._load_weights_with_ggufc                 C   s   ddl m} |j}td|j|j|jd}|dkrt|}n|dkr*t|}||d}| 	| g }	t
|dd	}
|
d	u r?td
|	d|
f t
|dd	}|d	urW|	d|f |	D ]\}}td| ||| qY|S )a  Load model with HSDP sharding for inference.

        The pipeline contains multiple components (text_encoder, VAE, transformer).
        Only the transformer is sharded with HSDP. Other components are loaded normally.

        Approach: Load weights first using model's load_weights (handles QKV fusion etc.),
        then apply HSDP sharding to redistribute weights across GPUs.
        r   )apply_hsdp_to_modelT)enabledhsdp_replicate_sizehsdp_shard_sizeparam_dtyper}   r   r   r   Nz+Model has no transformer attribute for HSDPtransformer_2zApplying HSDP to %s)$vllm_omni.diffusion.distributed.hsdpr   r   r   r   r   r   r   r   r   rm   rI   appendr   r   )r2   r~   rG   r   r   r   hsdp_configrg   r   transformers_to_shardr   r   r]   transr*   r*   r+   r     s4   


z-DiffusersPipelineLoader._load_model_with_hsdp)r}   N),r"   r#   r$   r%   DEFAULT_NUM_THREADSdataclasses	dataclassr,   r.   floatr'   r/   r	   r3   r   r&   r(   r)   rl   rX   r   r   Tensorrf   r   Modulerj   rh   r   rz   r   r|   r   r   r   r   r   r   r   r   r   r   r   r   r*   r*   r*   r+   r   *   s   
 

&Z

"
#r   )0r   rK   rD   rc   collections.abcr   r   pathlibr   typingr   r   huggingface_hubr   r   vllm.configr   vllm.config.loadr	   vllm.loggerr
   3vllm.model_executor.layers.quantization.base_configr   -vllm.model_executor.model_loader.weight_utilsr   r   r   r   r   r   r   vllm.utils.import_utilsr   vllm.utils.torch_utilsr   vllm_omni.diffusion.datar   r   r   .vllm_omni.diffusion.model_loader.gguf_adaptersr   vllm_omni.diffusion.registryr   r"   r   MODEL_INDEXrH   r   r*   r*   r*   r+   <module>   s2   $	