o
    ۷i                     @   sb   d dl Z ddlmZmZ ddlmZ G dd de jjZG dd de jjZ	G d	d
 d
eeZ
dS )    N   )ConfigMixinregister_to_config)
ModelMixinc                       sF   e Zd ZddededB def fddZdejd	ejfd
dZ  ZS )ResBlockNr   channelsmid_channelsdimsc                    s   t    |d u r|}|dkrtjjntjj}|||ddd| _tjd|| _|||ddd| _	tjd|| _
tj | _d S )N   r      kernel_sizepadding    )super__init__torchnnConv2dConv3dconv1	GroupNormnorm1conv2norm2SiLU
activation)selfr   r   r	   Conv	__class__ g/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/pipelines/ltx/modeling_latent_upsampler.pyr      s   
zResBlock.__init__hidden_statesreturnc                 C   sH   |}|  |}| |}| |}| |}| |}| || }|S )N)r   r   r   r   r   )r   r#   residualr!   r!   r"   forward#   s   




zResBlock.forward)Nr   )	__name__
__module____qualname__intr   r   Tensorr&   __classcell__r!   r!   r   r"   r      s     r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )PixelShuffleNDr
   r
   r
   c                    s*   t    || _|| _|dvrtdd S )N)r   r
   r   zdims must be 1, 2, or 3)r   r   r	   upscale_factors
ValueError)r   r	   r/   r   r!   r"   r   /   s   
zPixelShuffleND.__init__c              
   C   s   | j dkr*|ddg| jd d R dddddddd	ddd	dddS | j dkrN|ddg| jd d R ddd	dddd	dddS | j dkrn|ddg| jd d R ddddd	dddS d S )
Nr   r   r      r
            )r	   	unflattenr/   permuteflatten)r   xr!   r!   r"   r&   8   s   

<
6zPixelShuffleND.forward)r.   )r'   r(   r)   r   r&   r,   r!   r!   r   r"   r-   .   s    	r-   c                       sb   e Zd ZdZe						dded	ed
edededef fddZdej	dej	fddZ
  ZS )LTXLatentUpsamplerModela  
    Model to spatially upsample VAE latents.

    Args:
        in_channels (`int`, defaults to `128`):
            Number of channels in the input latent
        mid_channels (`int`, defaults to `512`):
            Number of channels in the middle layers
        num_blocks_per_stage (`int`, defaults to `4`):
            Number of ResBlocks to use in each stage (pre/post upsampling)
        dims (`int`, defaults to `3`):
            Number of dimensions for convolutions (2 or 3)
        spatial_upsample (`bool`, defaults to `True`):
            Whether to spatially upsample the latent
        temporal_upsample (`bool`, defaults to `False`):
            Whether to temporally upsample the latent
          r5   r   TFin_channelsr   num_blocks_per_stager	   spatial_upsampletemporal_upsamplec                    sd  t    || _| _|| _ | _|| _|| _ dkrtj	j
ntj	j}||ddd| _tj	d| _tj	 | _tj	 fddt|D | _|rd|rdtj	tj	jd dddtd| _n2|r{tj	tj	j
d	 dddtd| _n|rtj	tj	jd dddtd| _ntd
tj	 fddt|D | _||ddd| _d S )Nr
   r   r   r   r   c                       g | ]}t  d qS )r	   r   .0_r	   r   r!   r"   
<listcomp>x       z4LTXLatentUpsamplerModel.__init__.<locals>.<listcomp>   r5   z9Either spatial_upsample or temporal_upsample must be Truec                    rA   rB   rC   rD   rG   r!   r"   rH      rI   )r   r   r=   r   r>   r	   r?   r@   r   r   r   r   initial_convr   initial_normr   initial_activation
ModuleListrange
res_blocks
Sequentialr-   	upsamplerr0   post_upsample_res_blocks
final_conv)r   r=   r   r>   r	   r?   r@   ConvNdr   rG   r"   r   _   s@   

"z LTXLatentUpsamplerModel.__init__r#   r$   c                 C   s~  |j \}}}}}| jdkrX|ddddddd}| |}| |}| |}| jD ]}||}q,| |}| j	D ]}||}q;| 
|}|d|dfddddd}|S | |}| |}| |}| jD ]}||}qj| jr| |}|d d d d dd d d d d f }n!|ddddddd}| |}|d|dfddddd}| j	D ]}||}q| 
|}|S )Nr
   r   r   r   r5   r1   )shaper	   r7   r8   rK   rL   rM   rP   rR   rS   rT   r6   r@   )r   r#   
batch_sizenum_channels
num_framesheightwidthblockr!   r!   r"   r&      s:   















(



zLTXLatentUpsamplerModel.forward)r;   r<   r5   r   TF)r'   r(   r)   __doc__r   r*   boolr   r   r+   r&   r,   r!   r!   r   r"   r:   L   s.    2r:   )r   configuration_utilsr   r   models.modeling_utilsr   r   Moduler   r-   r:   r!   r!   r!   r"   <module>   s   