o
    i                     @   s  d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZmZmZmZmZ d dlmZmZmZ d dl m!Z! d dl"m#Z#m$Z$m%Z% d dl&m'Z' d dl(m)Z) G dd dZ*e+ dddZ,e-dkre,  dS dS )    N)Iterator)GaussianNoiser)LoraPathStrengthAndSDOps)Registry)TilingConfigget_video_chunks_number)QuantizationPolicy)Audio)ImageConditioningInput$default_2_stage_distilled_arg_parserdetect_checkpoint_path)AudioDecoderDiffusionStageImageConditionerPromptEncoderVideoDecoderVideoUpsampler)DISTILLED_SIGMA_VALUESSTAGE_2_DISTILLED_SIGMA_VALUESdetect_params)SimpleDenoiser)assert_resolutioncombined_image_conditionings
get_device)encode_video)ModalitySpecc                   @   s   e Zd ZdZ				ddedededee dejdB d	e	dB d
e
dB defddZ			ddededededededee dedB dededB deeej ef fddZdS )DistilledPipelinez
    Two-stage distilled video generation pipeline.
    Stage 1 generates video at half of the target resolution, then Stage 2 upsamples
    by 2x and refines with additional denoising steps for higher quality output.
    NFdistilled_checkpoint_path
gemma_rootspatial_upsampler_pathlorasdevicequantizationregistrytorch_compilec	           	   	   C   s   |pt  | _tj| _t||| j| j|d| _t|| j| j|d| _t	|| j| jt
||||d| _t||| j| j|d| _t|| j| j|d| _t|| j| j|d| _d S )N)r#   )r    r"   r#   r$   )r   r!   torchbfloat16dtyper   prompt_encoderr   image_conditionerr   tuplestager   	upsamplerr   video_decoderr   audio_decoder)	selfr   r   r   r    r!   r"   r#   r$    r0   H/home/ubuntu/LTX-2/packages/ltx-pipelines/src/ltx_pipelines/distilled.py__init__/   s(   	zDistilledPipeline.__init__promptseedheightwidth
num_frames
frame_rateimagestiling_configenhance_promptstreaming_prefetch_countreturnc                    s  t dd tjjd|}t|d}tj j|g|	tdkr*d d nd |
d\}|j	|j
}}ttj}d d  fdd	}jt||||||t||d
t|d|
d
\}}|jd d }ttj} fdd	}jt||||||t|||d  |dt||d  |jd|
d
\}}|j||}|j}||fS )NT)r5   r6   is_two_stage)r!   )	generatorr   )enhance_first_promptenhance_prompt_imager<      c                    s   t |  jdS N)r9   r5   r6   video_encoderr'   r!   r   r!   enc)r'   r9   r/   	stage_1_h	stage_1_wr0   r1   <lambda>o       z,DistilledPipeline.__call__.<locals>.<lambda>)contextconditionings)rL   )
denoisersigmasnoiserr6   r5   framesfpsvideoaudior<      c                    s   t |  jdS rC   rE   rF   )r'   r5   r9   r/   r6   r0   r1   rJ      rK   )rL   rM   noise_scaleinitial_latent)rL   rV   rW   )r   r%   	Generatorr!   manual_seedr   r&   r(   lenvideo_encodingaudio_encodingTensorr   tor)   r+   r   r   r,   latentr   itemr-   r.   )r/   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r?   rP   ctx_pvideo_contextaudio_contextstage_1_sigmasstage_1_conditioningsvideo_stateaudio_stateupscaled_video_latentstage_2_sigmasstage_2_conditioningsdecoded_videodecoded_audior0   )r'   r5   r9   r/   rH   rI   r6   r1   __call__P   sr   





zDistilledPipeline.__call__)NNNF)NFN)__name__
__module____qualname____doc__strlistr   r%   r!   r   r   boolr2   intfloatr
   r   r*   r   r]   r	   rm   r0   r0   r0   r1   r   (   sb    	
*	
r   r=   c            	      C   s   t  t j tdd} t| }t|d}| }t|j	|j
|j|jr)t|jnd|j|jd}t }t|j|}||j|j|j|j|j|j|j||j|jd
\}}t||j||j|d d S )NT)	distilled)paramsr0   )r   r   r   r    r"   r$   )
r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   )rS   rR   rT   output_pathvideo_chunks_number)logging	getLoggersetLevelINFOr   r   r   
parse_argsr   r   r   r   lorar*   r"   compiler   defaultr   r7   r3   r4   r5   r6   r8   r9   r;   r<   r   ry   )	checkpoint_pathrx   parserargspipeliner:   rz   rS   rT   r0   r0   r1   main   sD   



r   __main__)r=   N).r{   collections.abcr   r%   ltx_core.components.noisersr   ltx_core.loaderr   ltx_core.loader.registryr   ltx_core.model.video_vaer   r   ltx_core.quantizationr   ltx_core.typesr	   ltx_pipelines.utils.argsr
   r   r   ltx_pipelines.utils.blocksr   r   r   r   r   r   ltx_pipelines.utils.constantsr   r   r   ltx_pipelines.utils.denoisersr   ltx_pipelines.utils.helpersr   r   r   ltx_pipelines.utils.media_ior   ltx_pipelines.utils.typesr   r   inference_moder   rn   r0   r0   r0   r1   <module>   s.      	&
