o
    ٷi                     @   sT  d Z ddlZddlZddlZddlZddlZddlZddlmZmZ ddl	Z	ddl
mZ ddlZddlmZ ddlmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZ dd
lmZ ddl Z ddl!m"Z" ddlm#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) e"e*Z+de,fddZ-eG dd dZ.eG dd dZ/G dd dZ0dS )zModel Manager for cache-dit serving.

Adapted from SGLang's model management:
https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/managers/tokenizer_manager.py
    N)datetimetimezone)BytesIO)OptionalDictAnyList)	dataclass)DiffusionPipelineFlowMatchEulerDiscreteScheduler)export_to_video)LoraBaseMixin)Image)init_logger)WanImageToVideoPipeline   )current_platform   )prepare_extra_parallel_modules)get_default_params_modifierspipeline_quant_config_pathc              
   C   s   ddl m} td|   zXddl}ddl}|jd| }|du s'|jdu r.t	d|  |j
|}||j|j< |j| t|dsMt	d|  d	| }t||s_t	d
t| td |W S  ty~ } ztd|  d|   d}~ww )z7Load pipeline quantization config from a custom module.r   )PipelineQuantizationConfigz+Loading pipeline quantization config from: Npipeline_quant_configzCannot load module from get_pipeline_quant_configzModule z3 must have a 'get_pipeline_quant_config()' functionzQget_pipeline_quant_config() must return a PipelineQuantizationConfig object, got z:Successfully loaded quantization config from custom modulez(Failed to load quantization config from : )diffusers.quantizersr   loggerinfoimportlib.utilsysutilspec_from_file_locationloader
ValueErrormodule_from_specmodulesnameexec_modulehasattrr   
isinstancetype	Exceptionerror)r   r   	importlibr   specmodulequantization_confige r2   Q/home/ubuntu/.local/lib/python3.10/site-packages/cache_dit/serve/model_manager.pyload_pipeline_quant_config"   s>   



r4   c                   @   s   e Zd ZU dZeed< dZee ed< dZe	ed< dZ
e	ed< dZe	ed	< d
Zeed< dZeee  ed< dZee	 ed< dZe	ed< dZeee  ed< dZee	 ed< dZee	 ed< dZeed< dZeed< dZee ed< dd ZdS )GenerateRequestzImage/Video generation request.prompt negative_prompti   widthheight2   num_inference_stepsg      @guidance_scaleNsigmasseedr   
num_images
image_urls
num_frames   fpsFinclude_statsbase64output_format
output_dirc                 C   sf   d }| j rdd | j D }d| jd d d| j d| j d| j d| j d	| j d
| j d| dS )Nc                 S   s,   g | ]}t |d krdt | dn|qS )d   z<data:z chars>)len).0urlr2   r2   r3   
<listcomp>e   s     z,GenerateRequest.__repr__.<locals>.<listcomp>zGenerateRequest(prompt=r;   z..., width=z	, height=z, num_inference_steps=z, guidance_scale=z, seed=z, num_images=z, image_urls=))rA   r6   r9   r:   r<   r=   r?   r@   )selfimage_urls_reprr2   r2   r3   __repr__b   s*   zGenerateRequest.__repr__)__name__
__module____qualname____doc__str__annotations__r8   r   r9   intr:   r<   r=   floatr>   r   r?   r@   rA   rB   rD   rE   boolrG   rH   rQ   r2   r2   r2   r3   r5   N   s$   
 r5   c                   @   s~   e Zd ZU dZdZeee  ed< dZ	ee ed< dZ
eeeef  ed< dZee ed< dZee ed< dZee ed< dS )	GenerateResponsez Image/Video generation response.Nimagesvideostats	time_costinference_start_timeinference_end_time)rR   rS   rT   rU   r\   r   r   rV   rW   r]   r^   r   r   r_   rY   r`   ra   r2   r2   r2   r3   r[   q   s   
 r[   c                #   @   s  e Zd ZdZdejddddddddddddddfdedee deej de	d	ee
eef  d
e	dee de	dee dee
eef  dee de	dee dee dee dee de	f"ddZdeeeef  defddZdd Zdededefd d!Zd"ee d#eeej  fd$d%Zd&ee d#efd'd(Zd)ejd&ed*ed#efd+d,Zd&ed*ed-ed#efd.d/Zd0ed#efd1d2Zd#e
eef fd3d4ZdS )5ModelManagerz.Manages diffusion model loading and inference.NTF
model_pathdevicetorch_dtypeenable_cachecache_configenable_cpu_offload
device_mapenable_compileparallel_typeparallel_argsattn_backendquantizequantize_typer   	lora_path	lora_name	fuse_lorac              
   C   s   || _ |pt rtjnd| _|| _|| _|pi | _|| _|| _	|| _
|	| _|
p)i | _|| _|| _|| _|| _|| _|| _|| _d | _t | _td| d| j d|	 d|  d S )Ncpuz&Initializing ModelManager: model_path=z	, device=z, parallel_type=z, attn_backend=)rc   r   is_accelerator_availabledevice_typerd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   r   rp   rq   rr   pipesetwarmed_up_shapesr   r   )rO   rc   rd   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   r   rp   rq   rr   r2   r2   r3   __init__   s8   

zModelManager.__init__resolutionsr6   c                 C   s   | j d u r	td|D ]F\}}||f}|| jv rq| jdv r"t  td| d| d | j |||dd}| j| td| d|  | jdv rQt  qd S )	N*Model not loaded. Call load_model() first.tpulyssesringzStartup warming up for shape x...r   r6   r:   r9   r<   zStartup warmup completed for )	rv   RuntimeErrorrx   rk   distbarrierr   r   add)rO   rz   r6   r9   r:   	shape_key_r2   r2   r3   startup_warmup   s*   



zModelManager.startup_warmupc                    s  t d| j  d}t }| jr$| jr$t| j}tt|dg p!g }n| jr,t d | jo@| j	duo@| j	dvo@d|vo@d|v}d| jv rYt d	 t
j| j| j| j|d
| _n[d| jv rtjdd  }|dv rddlm} t d |j| j| j| j|d
| _n.|dv rddlm} t d |j| j| j| j|d
| _ntdtj| j| j| j|d
| _| jdurQ| jdurQt| jtst d nt d| j d| j  | jj| j| jd t d d| j v r d| j v r t d dt dd d!t dd"d#dd d$d d%d d d&}t!"|| j_#t d' d|v p+d|v p+|}| j$o2| }	|	rKt d( | j$  | j%  t d) nt d* n| jdus]| jdurbt d+ d}
| j&rt d, dd-l'm(} |d.d/}
| j)r| j)* D ]\}}t+|
|| qd}| j&r|
durt,| j| j|
d0}d}| j-durt d1| j- d2| j.  dd3l'm/} dd4l0m1} ddl2m3} |4 r|5 nd5}| j-d6kr|j6n|j7}| j.8d7d }| j.8d8d }t9| j||d9}|| j.d:< ||| j-d;kr|nd| j-d<kr|nd| j-d6kr|nd| j.d=}|
dus&|dur0t'j&| j|
||d> | jr| j	dur| j	dv rK|du rJt d? nd|v sUd|v r`t d@| j	 dA ndBh dCt:j;j<dDt=f fdEdF}t>| jdrt| jdd}t|t:j;j<rt dG|j?j@ dH| j	 dI t+| jdt'j|| j	||dJ n|durt dKtA| dL t>| jdrt| jdd}t|t:j;j<rt dM|j?j@ dH| j	 dI t+| jdt'j|| j	||dJ n|durt dNtA| dL | jdu r| jBtCjDkrt dOtCjD  | jE| jB | jFr.tCG d5kr.t dP | jH  | jIdurWt>| jjJdQrNt dR| jI  | jjJK| jI n	t dS| jI  | jLrmt dT t'M  t:N| jjJ| j_Jt dU dS )VzLoad the diffusion model.zLoading model: Ncomponents_to_quantizez?Quantization enabled but no pipeline_quant_config_path provided)bitsandbytes_4bittransformertransformer_2zWan2.2-I2V-A14B-Diffusersz8Detected Wan2.2-I2V model, using WanImageToVideoPipeline)re   ri   r0   zLTX-2CACHE_DIT_LTX2_PIPELINEt2v)r   
text2videotextdefaultr   )LTX2Pipelinez8Detected LTX-2 model, using LTX2Pipeline (text-to-video))i2vimage2videoimage)LTX2ImageToVideoPipelinezEDetected LTX-2 model, using LTX2ImageToVideoPipeline (image-to-video)zAInvalid CACHE_DIT_LTX2_PIPELINE. Please set it to 't2v' or 'i2v'.z6Pipeline does not support LoRA. Skipping LoRA loading.zLoading LoRA weights from: /)weight_namez LoRA weights loaded successfullyqwenlightz9Detected Qwen-Image-Lightning LoRA, updating scheduler...      Fi      g      ?exponentialT)base_image_seq_len
base_shiftinvert_sigmasmax_image_seq_len	max_shiftnum_train_timestepsshiftshift_terminalstochastic_samplingtime_shift_typeuse_beta_sigmasuse_dynamic_shiftinguse_exponential_sigmasuse_karras_sigmasz%Scheduler updated for Lightning modelz'Fusing LoRA weights into transformer...z,LoRA weights fused and unloaded successfullyzHKeeping LoRA weights separate (fusion disabled or transformer quantized)zFBoth --lora-path and --lora-name must be provided to load LoRA weightszEnabling DBCache acceleration)DBCacheConfiggQ?)residual_diff_threshold)rv   rc   cache_config_objzEnabling parallelism: type=z, args=)ParallelismConfig)ParallelismBackendr   r}   parallel_text_encoderparallel_vae)r   r   extra_parallel_modulesr~   r   )backendulysses_size	ring_sizetp_sizeparallel_kwargs)rg   params_modifiersparallelism_configzRequested bitsandbytes_4bit quantization but no --pipeline-quant-config-path provided. Please provide a PipelineQuantizationConfig that sets quant_backend='bitsandbytes_4bit'.ztTransformer is already quantized by diffusers PipelineQuantizationConfig; skipping cache-dit(torchao) quantize_type=.QwenImageTransformer2DModelmreturnc                    s   | j j vS )N)	__class__rR   )r   class_not_supported_per_rowr2   r3   is_per_row_supported  s   z5ModelManager.load_model.<locals>.is_per_row_supportedzQuantizing transformer module: z to z (torchao) ...)
quant_typeper_rowz>Cannot quantize transformer: it is not a torch.nn.Module (got z).z!Quantizing transformer_2 module: z@Cannot quantize transformer_2: it is not a torch.nn.Module (got zMoving pipeline to zEnabling CPU offloadset_attention_backendzSetting attention backend to zDTransformer does not support set_attention_backend, ignoring --attn zEnabling torch.compilezModel loaded successfully)Or   r   rc   rw   rn   r   r4   getattrwarningro   r   from_pretrainedre   ri   rv   osenvirongetstriplower	diffusersr   r   r#   r
   rp   rq   r)   r   r,   load_lora_weightsmathlogr   from_config	schedulerrr   unload_lora_weightsrf   	cache_ditr   rg   itemssetattrr   rk   rl   r   cache_dit.parallelismr   torch.distributeddistributedis_initializedget_world_sizeNATIVE_PYTORCHNATIVE_DIFFUSERpopr   torchnnModulerZ   r(   r   rR   r*   rd   r   ru   torh   device_countenable_model_cpu_offloadrm   r   r   rj   set_compile_configscompile)rO   r0   !components_quantized_by_diffusers!will_torchao_quantize_transformerltx2_pipeliner   r   scheduler_config transformer_quantized_or_will_beshould_fuser   r   keyvaluer   r   r   r   r   
world_sizer   r   r   r   r   r   r   r2   r   r3   
load_model   s  









 








	



	
	



zModelManager.load_modelr9   r:   c              
   C   s   ||f}| j rc|| jvre| jdv rt  td| d| d z| j|||dd}| j| td| d|  W n t	yW } zt
d|  W Y d }~nd }~ww | jdv rgt  d S d S d S d S )	Nr|   zWarming up for shape r   r   r   r   zWarmup completed for zWarmup failed: )rj   rx   rk   r   r   r   r   rv   r   r+   r   )rO   r9   r:   r6   r   r   r1   r2   r2   r3   _warmup_if_needed  s,   

zModelManager._warmup_if_neededrA   r   c                 C   s@  |sdS g }t |D ]\}}z|drBdt| d}td|d  d|  |dd\}}t|}t	t
|d	}	n|d
r~d|dd  t|dkrVdnd }td|d  d|  tj|dd}
|
  t	t
|
jd	}	nQt|dkrdt| d}td|d  d|  ztj|dd}t	t
|d	}	W n" ty    w d| }td|d  d|  t	|d	}	||	 td|d  d|	j  W q
 ty } z-t|dkrdt| d}n|}td|d  d| d|  td|d  d| d}~ww |S )z6Load images from URLs, local paths, or base64 strings.Nzdata:image/zdata URI (length: rN   zLoading image r   z from ,RGB)zhttp://zhttps://zURL: P   r   r7   zDownloading image    )timeoutrI   zraw base64 string (length: T)validatezlocal path: zImage z loaded successfully: z<data of length >zFailed to load image r   )	enumerate
startswithrJ   r   r   splitrF   	b64decoder   openr   convertrequestsr   raise_for_statuscontentr+   appendsizer,   r   )rO   rA   r\   idxrL   log_descheaderbase64_dataimg_datar   responser1   	error_urlr2   r2   r3   _load_images_from_urls  sP   


&

  z#ModelManager._load_images_from_urlsrH   c                 C   s&   |d ur
t j|S t jt  dS )Noutputs)r   pathabspathjoingetcwd)rO   rH   r2   r2   r3   _resolve_output_dir$  s   z ModelManager._resolve_output_dirr   r&   c                 C   s6   t j|dd t j||}|j|dd t j|S )NTexist_okPNGformat)r   makedirsr  r  saver  )rO   r   rH   r&   r  r2   r2   r3   _save_image_to_dir)     zModelManager._save_image_to_dirrD   c                 C   s6   t j|dd t j||}t|||d t j|S )NTr  rD   )r   r  r  r  r   r  )rO   video_framesrH   r&   rD   r  r2   r2   r3   _save_video_to_dir/  r  zModelManager._save_video_to_dirrequestc           /      C   s>	  | j d u r	td|jdvrtd|j d|jd uo"t|jdk}|jd uo,|jdk}|o0|}d }|rN| |j}|rNt	dt| d|rId	nd
  |s\|s\| 
|j|j|j |j}|d u ru| jdv rud}t	| j d|  |rzd	}n|rd}n|rd}nd}t	| d|jd d  d|  d }|d urtjdd|}td| d | jdv rdd lm}	 |	  ttj}
| j }|rzt|j}d|jv }W n ty   d}Y nw |std|j|j|j|j |j!|d}|j"d ur'zt| j j}d|jv r|j"|d< nt#d W n ty&   |j"|d< Y nw |rz|j|d< z1t|j}d|jv rK|j$d urGt%|j$nd |d< d!|jv rUd"|d!< d#|jv r_d|d#< W n tyy   |j$d urst%|j$nd |d< Y nw |j&|d$< |r|r|r|d |d< t	d%|d j'  nt|dkr|d |d< n||d< |j(rzt| j j}d&|jv r|j(|d&< W n ty   |j(|d&< Y nw |dVi |}| jdv rdd lm}	 |	  ttj}|
j)|
j*d' d' d(}|j)|j*d' d' d(}|| + }|j,d)d*)d+d,}|j,d)d*)d+d,}d}| jdv rH|	- rH|	. rHz|	/ dk}W n tyG   d}Y nw | jd ur|dd lm}	 |	/ }|rmt	d-| d.t|j0d  d/ nt	d-| d0t|j1 d1 d }|r|j2r| j3rt45| j }|rd2d3d4 |D i}d }d }|st6d d d |||d5S |r=|j0d }t	d6t| d7|d8d9 |jd:kr| 7|j8}t9|d;d }|d uruzfdd<l:m;} |} | d= < =d>}!t>|!}"|d }#t?|#tj@st>|#}#|#% A }#t9t9|d?d d@d }$t9|$dAdB}$tBjC|ddC tBjDEtBjDF|dDtGH jI dE}%||"t%|j$|#|$|%dF |%}W nT tyt }& z't#dGtJ|&jK dH|& dI | jL||dDtGH jI dE|j$dJ}W Y d }&~&n%d }&~&ww | jL||dDtGH jI dE|j$dJ}ntMjNdEdKdL}'|'jO}(W d    n	1 sw   Y  zt9|d;d }|d urdd<l:m;} |} | d= < =d>}!t>|!}"|d }#t?|#tj@st>|#}#|#% A }#t9t9|d?d d@d }$t9|$dAdB}$||"t%|j$|#|$|(dF ntP||(|j$dM tQ|(dN})|)R }*tST|*U }W d    n	1 sw   Y  W tBjDV|(r.tBW|( nftBjDV|(r<tBW|( w w g }|jd:krl| 7|j8}tX|j1D ]\}+},|Y| jZ|,|dOtGH jI dP|+ dQdR qPn|j1D ]},t[ }-|,j\|-dSdT tST|-] U }.|Y|. qot	dU|d8d9 t6||||||d5S )WNr{   )rF   r  zInvalid output_format: z. Must be 'base64' or 'path'.r   r   zLoaded z input image(s) for r   editingr|   *   z mode: using fixed seed zvideo generationedit
generationz
: prompt='r;   z...', seed=rs   )rd   zCreated generator with seed z on CPUr   TzlCurrent LTX-2 pipeline does not support image2video. Please restart server with CACHE_DIT_LTX2_PIPELINE=i2v.)r6   r9   r:   r<   r=   	generatorr>   z9Pipeline does not support sigmas, ignoring request.sigmasrB   
frame_rateg      8@output_typenpreturn_dictnum_images_per_promptz#Using first image for image2video: r8   r   )microsecondmilliseconds)timespecz+00:00ZzRank z: Generated video with z framesz: Generated z imagescache_statsc                 S   sH   g | ] }|j rt|j nd |jrt|jng |jrt|jnd dqS )N)cache_optionscached_stepsr   )r1  rV   r2  listr   )rK   sr2   r2   r3   rM     s    z)ModelManager.generate.<locals>.<listcomp>)r\   r]   r^   r_   r`   ra   z Video generation completed with z frames in z.2fr4  r  audio)encode_video   uint8vocoderconfigoutput_sampling_ratei]  r  video_z.mp4)rD   r5  audio_sample_rateoutput_pathz!encode_video(with audio) failed (r   z/), falling back to export_to_video(video-only).)r&   rD   F)suffixdeleter  rbimage_r   z.png)r&   r  r  zImage generation completed in r2   )^rv   r   rG   r#   rA   rJ   rB   r  r   r   r   r9   r:   r6   r?   rk   r   	Generatormanual_seeddebugr   r   r   r   nowr   utcinspect	signature__call__
parametersr+   r<   r=   r>   r   rD   rY   r@   r  r8   replacer,  total_seconds	isoformatis_availabler   get_rankframesr\   rE   rf   r   summaryr[   r  rH   r   %diffusers.pipelines.ltx2.export_utilsr6  roundastype
from_numpyr)   Tensorrs   r   r  r  r  r  uuiduuid4hexr*   rR   r   tempfileNamedTemporaryFiler&   r   r   readrF   	b64encodedecodeexistsunlinkr   r  r  r   r  getvalue)/rO   r!  is_edit_modeis_video_modeis_image2video_modeinput_imagesr?   mode_strr&  r   start_dt_rawpipe_to_usesigaccepts_imagepipe_kwargsoutput
end_dt_rawstart_dtend_dtr_   r`   ra   is_primary_rankrankr^   
stats_listimages_payloadvideo_payloadr  out_dirr5  r6  video_npvideo_uint8video_taudio_tsample_rateout_pathr1   tmp_filetmp_pathfvideo_bytesr  r   bufferedimg_strr2   r2   r3   generate5  s   

$
	

"

$	










	zModelManager.generatec                 C   s"   | j | jt| j| j| jdudS )zGet model information.N)rc   rd   re   rf   	is_loaded)rc   rd   rV   re   rf   rv   )rO   r2   r2   r3   get_model_infot  s   zModelManager.get_model_info)rR   rS   rT   rU   r   bfloat16rV   r   dtyperZ   r   r   ry   r   tuplerX   r   r   r   r   r  r  r  r   r5   r[   r  r  r2   r2   r2   r3   rb   }   s    	

/   ,  Arb   )1rU   r   rF   rH  r[  r   rX  r   r   r   r   r   r   r  ior   typingr   r   r   r   dataclassesr	   r   r
   r   diffusers.utilsr   diffusers.loaders.lora_baser   PILr   r   cache_dit.loggerr   r   	platformsr   utilsr   cache_alignmentr   rR   r   rV   r4   r5   r[   rb   r2   r2   r2   r3   <module>   s>    ,"