o
    Gi:                     @   s   d dl Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddl
mZ dd	lmZ d
dlmZ d
dlmZ eeZ	dde jde jdB defddZG dd deZdS )    N   )PipelineImageInput)AutoencoderKLLTXVideo)	deprecate
get_logger)randn_tensor)VideoProcessor   )DiffusionPipeline   )LTXLatentUpsamplerModel)LTXPipelineOutputsampleencoder_output	generatorsample_modec                 C   sR   t | dr|dkr| j|S t | dr|dkr| j S t | dr%| jS td)Nlatent_distr   argmaxlatentsz3Could not access latents of provided encoder_output)hasattrr   r   moder   AttributeError)r   r   r    r   h/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/ltx/pipeline_ltx_latent_upsample.pyretrieve_latents   s   

r   c                       s  e Zd ZdZdededdf fddZ						d9d	ejdB d
e	dej
dB dejdB dejdB dejdB dejfddZd:dejdejdefddZdejdedejfddZe	d:dejdejdejdedejf
ddZe	d:dejdejdejdedejf
ddZd d! Zd"d# Zd$d% Zd&d' Zd(d) Ze 		*	+		,		,	,		-	.d;d	ee dB d/e	d0e	dejdB d1eee B d2eee B dB d3ed4edejeej B dB d5edB d6efd7d8Z  ZS )<LTXLatentUpsamplePipeline vaelatent_upsamplerreturnNc                    sf   t    | j||d t| dd d ur| jjnd| _t| dd d ur'| jjnd| _t	| jd| _
d S )N)r   r   r          )vae_scale_factor)super__init__register_modulesgetattrr   spatial_compression_ratiovae_spatial_compression_ratiotemporal_compression_ratiovae_temporal_compression_ratior   video_processor)selfr   r   	__class__r   r   r$   /   s   
z"LTXLatentUpsamplePipeline.__init__r   video
batch_sizedtypedevicer   r   c                    s   |d ur|j ||dS j |jjdt tr:t |kr,tdt  d| d fddt|D }n
 fddD }tj	|dd	 |}
|jjjj}|S )
Nr2   r1   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.c                    s,   g | ]}t j| d  | qS r   r   r   encode	unsqueeze).0ir   r,   r/   r   r   
<listcomp>T   s     z=LTXLatentUpsamplePipeline.prepare_latents.<locals>.<listcomp>c                    s$   g | ]}t j|d  qS r4   r5   )r8   vid)r   r,   r   r   r;   X   s   $ r   dim)tor   r1   
isinstancelistlen
ValueErrorrangetorchcat_normalize_latentslatents_meanlatents_std)r,   r/   r0   r1   r2   r   r   init_latentsr   r:   r   prepare_latents@   s"   	
z)LTXLatentUpsamplePipeline.prepare_latents      ?reference_latentsfactorc                 C   s   |  }t|dD ]8}t|dD ].}tj|||f dd\}}tj|||f dd\}	}
|||f |
 |	 | | |||f< qqt|||}|S )a5  
        Applies Adaptive Instance Normalization (AdaIN) to a latent tensor based on statistics from a reference latent
        tensor.

        Args:
            latent (`torch.Tensor`):
                Input latents to normalize
            reference_latents (`torch.Tensor`):
                The reference latents providing style statistics.
            factor (`float`):
                Blending factor between original and transformed latent. Range: -10.0 to 10.0, Default: 1.0

        Returns:
            torch.Tensor: The transformed latent tensor
        r   r   Nr=   )clonerD   sizerE   std_meanlerp)r,   r   rM   rN   resultr9   cr_sdr_meani_sdi_meanr   r   r   adain_filter_latent^   s   &z-LTXLatentUpsamplePipeline.adain_filter_latentcompressionc                 C   sD   |d }t |}t d| |d  }dd| |  }|| }|S )aL  
        Applies a non-linear tone-mapping function to latent values to reduce their dynamic range in a perceptually
        smooth way using a sigmoid-based compression.

        This is useful for regularizing high-variance latents or for conditioning outputs during generation, especially
        when controlling dynamic behavior with a `compression` factor.

        Args:
            latents : torch.Tensor
                Input latent tensor with arbitrary shape. Expected to be roughly in [-1, 1] or [0, 1] range.
            compression : float
                Compression strength in the range [0, 1].
                - 0.0: No tone-mapping (identity transform)
                - 1.0: Full compression effect

        Returns:
            torch.Tensor
                The tone-mapped latent tensor of the same shape as input.
        g      ?g      @rL   g?)rE   abssigmoid)r,   r   rZ   scale_factorabs_latentssigmoid_termscalesfilteredr   r   r   tone_map_latentsz   s   
z*LTXLatentUpsamplePipeline.tone_map_latentsrH   rI   scaling_factorc                 C   sP   | ddddd| j| j}| ddddd| j| j}| | | | } | S Nr   viewr?   r2   r1   r   rH   rI   rc   r   r   r   rG         z,LTXLatentUpsamplePipeline._normalize_latentsc                 C   sP   | ddddd| j| j}| ddddd| j| j}| | | | } | S rd   rf   rh   r   r   r   _denormalize_latents   ri   z.LTXLatentUpsamplePipeline._denormalize_latentsc                 C   *   d| j j d}tdd| | j  dS )z
        Enable sliced VAE decoding. When this option is enabled, the VAE will split the input tensor in slices to
        compute decoding in several steps. This is useful to save some memory and allow larger batch sizes.
        z%Calling `enable_vae_slicing()` on a `zl` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_slicing()`.enable_vae_slicing0.40.0N)r.   __name__r   r   enable_slicingr,   depr_messager   r   r   rl         z,LTXLatentUpsamplePipeline.enable_vae_slicingc                 C   rk   )z
        Disable sliced VAE decoding. If `enable_vae_slicing` was previously enabled, this method will go back to
        computing decoding in one step.
        z&Calling `disable_vae_slicing()` on a `zm` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_slicing()`.disable_vae_slicingrm   N)r.   rn   r   r   disable_slicingrp   r   r   r   rs      rr   z-LTXLatentUpsamplePipeline.disable_vae_slicingc                 C   rk   )a  
        Enable tiled VAE decoding. When this option is enabled, the VAE will split the input tensor into tiles to
        compute decoding and encoding in several steps. This is useful for saving a large amount of memory and to allow
        processing larger images.
        z$Calling `enable_vae_tiling()` on a `zk` is deprecated and this method will be removed in a future version. Please use `pipe.vae.enable_tiling()`.enable_vae_tilingrm   N)r.   rn   r   r   enable_tilingrp   r   r   r   ru      s   z+LTXLatentUpsamplePipeline.enable_vae_tilingc                 C   rk   )z
        Disable tiled VAE decoding. If `enable_vae_tiling` was previously enabled, this method will go back to
        computing decoding in one step.
        z%Calling `disable_vae_tiling()` on a `zl` is deprecated and this method will be removed in a future version. Please use `pipe.vae.disable_tiling()`.disable_vae_tilingrm   N)r.   rn   r   r   disable_tilingrp   r   r   r   rw      rr   z,LTXLatentUpsamplePipeline.disable_vae_tilingc                 C   s   || j  dks|| j  dkrtd| d| d|d ur%|d ur%td|d u r1|d u r1tdd|  kr>dksCtd tdd S )	Nr   z8`height` and `width` have to be divisible by 32 but are z and .z1Only one of `video` or `latents` can be provided.z/One of `video` or `latents` has to be provided.r   z8`tone_map_compression_ratio` must be in the range [0, 1])r(   rC   )r,   r/   heightwidthr   tone_map_compression_ratior   r   r   check_inputs   s   z&LTXLatentUpsamplePipeline.check_inputs             pilTrz   r{   decode_timestepdecode_noise_scaleadain_factorr|   output_typereturn_dictc              	   C   sH  | j |||||d |d urd}n|jd }| j}|d ur[t|}|| j dkrJ|| j | j d }|d | }td| j dt| d| d | jj|||d}|j	|t
jd	}| j||t
j||	|d
}| || jj| jj| jjj}|	| jj}| |}|dkr| |||}n|}|dkr| ||}|
dkr| || jj| jj| jjj}|}ne| jjjsd }nJt|j|	||jd}t|ts|g| }|d u r|}n
t|ts|g| }t
j|||jd	}t
j|||jd	d d d d d d f }d| | ||  }| jj||ddd }| jj||
d}|   |s|fS t |dS )N)r/   rz   r{   r   r|   r   r   z-Video length expected to be of the form `k * z + 1` but is z. Truncating to z frames.)rz   r{   r3   )r/   r0   r1   r2   r   r   r   latent)r   r2   r1   F)r   )r   )frames)!r}   shape_execution_devicerB   r*   loggerwarningr+   preprocess_videor?   rE   float32rK   rj   r   rH   rI   configrc   r   r1   rY   rb   rG   timestep_conditioningr   r@   rA   tensordecodepostprocess_videomaybe_free_model_hooksr   )r,   r/   rz   r{   r   r   r   r   r|   r   r   r   r0   r2   
num_frameslatents_upsampledtimestepnoiser   r   r   __call__   s   
	






z"LTXLatentUpsamplePipeline.__call__)Nr   NNNN)rL   )Nr~   r   Nr   Nr   r   Nr   T) rn   
__module____qualname__model_cpu_offload_seqr   r   r$   rE   Tensorintr1   r2   	GeneratorrK   floatrY   rb   staticmethodrG   rj   rl   rs   ru   rw   r}   no_gradrA   r   strboolr   __classcell__r   r   r-   r   r   ,   s    
 		

	
r   )Nr   )rE   image_processorr   modelsr   utilsr   r   utils.torch_utilsr   r+   r   pipeline_utilsr
   modeling_latent_upsamplerr   pipeline_outputr   rn   r   r   r   r   r   r   r   r   r   r   <module>   s&   
