o
    ۷i*                     @   s   d dl mZ d dlZd dlmZ d dlmZmZ eG dd dZdedefd	d
ZG dd dej	Z
G dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZdS )    )	dataclassN)	rearrange)Tensornnc                   @   sb   e Zd ZU eed< eed< eed< eed< eed< ee ed< eed< eed< eed	< eed
< dS )AutoEncoderParams
resolutionin_channels
downsamplechout_chch_multnum_res_blocks
z_channelsscale_factorshift_factorN)__name__
__module____qualname__int__annotations__listfloat r   r   b/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/diffusion/models/bagel/autoencoder.pyr      s   
 r   xreturnc                 C   s   | t |  S N)torchsigmoid)r   r   r   r   swish!   s   r   c                       sF   e Zd Zdef fddZdedefddZdedefd	d
Z  ZS )	AttnBlockr   c                    sp   t    || _tjd|ddd| _tj||dd| _tj||dd| _tj||dd| _	tj||dd| _
d S )N    ư>T
num_groupsnum_channelsepsaffine   )kernel_size)super__init__r   r   	GroupNormnormConv2dqkvproj_outselfr   	__class__r   r   r+   &   s   
zAttnBlock.__init__h_r   c           	      C   s   |  |}| |}| |}| |}|j\}}}}t|d }t|d }t|d }tj	|||}t|d||||dS )Nzb c h w -> b 1 (h w) czb 1 (h w) c -> b c h w)hwcb)
r-   r/   r0   r1   shaper   
contiguousr   
functionalscaled_dot_product_attention)	r4   r7   r/   r0   r1   r;   r:   r8   r9   r   r   r   	attention1   s   



zAttnBlock.attentionr   c                 C   s   ||  | | S r   )r2   r@   r4   r   r   r   r   forward?   s   zAttnBlock.forward)	r   r   r   r   r+   r   r@   rB   __classcell__r   r   r5   r   r    %   s    r    c                       s.   e Zd Zdedef fddZdd Z  ZS )ResnetBlockr   out_channelsc                    s   t    || _|d u r|n|}|| _tjd|ddd| _tj||dddd| _tjd|ddd| _	tj||dddd| _
| j| jkrPtj||dddd| _d S d S )	Nr!   r"   Tr#      r(   r)   stridepaddingr   )r*   r+   r   rE   r   r,   norm1r.   conv1norm2conv2nin_shortcut)r4   r   rE   r5   r   r   r+   D   s   
zResnetBlock.__init__c                 C   sZ   |}|  |}t|}| |}| |}t|}| |}| j| jkr)| |}|| S r   )rJ   r   rK   rL   rM   r   rE   rN   )r4   r   r8   r   r   r   rB   Q   s   




zResnetBlock.forward)r   r   r   r   r+   rB   rC   r   r   r5   r   rD   C   s    rD   c                       0   e Zd Zdef fddZdefddZ  ZS )
Downsampler   c                    s$   t    tj||dddd| _d S )NrF      r   rG   r*   r+   r   r.   convr3   r5   r   r   r+   b   s   
zDownsample.__init__r   c                 C   s&   d}t jj||ddd}| |}|S )N)r   r(   r   r(   constantr   )modevalue)r   r>   padrS   )r4   r   rW   r   r   r   rB   g   s   
zDownsample.forwardr   r   r   r   r+   r   rB   rC   r   r   r5   r   rP   a   s    rP   c                       rO   )Upsampler   c                    s$   t    tj||dddd| _d S )NrF   r(   rG   rR   r3   r5   r   r   r+   o   s   
zUpsample.__init__r   c                 C   s    t jj|ddd}| |}|S )Ng       @nearest)r   rU   )r   r>   interpolaterS   rA   r   r   r   rB   s   s   
zUpsample.forwardrX   r   r   r5   r   rY   n   s    rY   c                       sL   e Zd Zdedededee dedef fddZd	ed
efddZ  ZS )Encoderr   r   r
   r   r   r   c                    sp  t    || _t|| _|| _|| _|| _tj	|| jdddd| _
|}dt| }|| _t | _| j}	t| jD ]I}
t }t }|||
  }	|||
  }t| jD ]}|t|	|d |}	qUt }||_||_|
| jd kr}t|	|_|d }| j| q:t | _t|	|	d| j_t|	| j_t|	|	d| j_tjd|	dd	d
| _tj	|	d| dddd| _d S )NrF   r(   rG   )r(   r   rE   rQ   r!   r"   Tr#   )r*   r+   r
   lennum_resolutionsr   r   r   r   r.   conv_intuple
in_ch_mult
ModuleListdownrangeappendrD   ModuleblockattnrP   r	   midblock_1r    attn_1block_2r,   norm_outconv_out)r4   r   r   r
   r   r   r   curr_resrb   block_ini_levelrh   ri   	block_out_rd   r5   r   r   r+   z   sB   
	



zEncoder.__init__r   r   c                 C   s   |  |g}t| jD ]C}t| jD ]'}| j| j| |d }t| j| jdkr4| j| j| |}|| q|| jd krN|| j| 	|d  q|d }| j
|}| j
|}| j
|}| |}t|}| |}|S )Nr   r(   )r`   re   r_   r   rd   rh   r^   ri   rf   r	   rj   rk   rl   rm   rn   r   ro   )r4   r   hsrr   i_blockr8   r   r   r   rB      s$   

zEncoder.forward	r   r   r   r   r   r+   r   rB   rC   r   r   r5   r   r\   y   s    1r\   c                       sP   e Zd Zdededee dedededef fdd	Zd
edefddZ  ZS )Decoderr
   r   r   r   r   r   r   c                    s  t    || _t|| _|| _|| _|| _d| jd  | _||| jd   }|d| jd   }	d||	|	f| _	t
j||dddd| _t
 | _t||d| j_t|| j_t||d| j_t
 | _tt| jD ]C}
t
 }t
 }|||
  }t| jd D ]}|t||d |}qt
 }||_||_|
dkrt||_|	d }	| jd| qit
jd|dd	d
| _t
j||dddd| _ d S )NrQ   r(   rF   rG   r]   r   r!   r"   Tr#   )!r*   r+   r
   r^   r_   r   r   r   ffactorz_shaper   r.   r`   rg   rj   rD   rk   r    rl   rm   rc   upreversedre   rf   rh   ri   rY   upsampleinsertr,   rn   ro   )r4   r
   r   r   r   r   r   r   rq   rp   rr   rh   ri   rs   rt   r|   r5   r   r   r+      s@   





zDecoder.__init__zr   c                 C   s   |  |}| j|}| j|}| j|}tt| jD ]6}t| jd D ] }| j	| j
| |}t| j	| jdkrG| j	| j| |}q'|dkrT| j	| |}q| |}t|}| |}|S )Nr(   r   )r`   rj   rk   rl   rm   r}   re   r_   r   r|   rh   r^   ri   r~   rn   r   ro   )r4   r   r8   rr   rw   r   r   r   rB      s"   


zDecoder.forwardrx   r   r   r5   r   ry      s"    5ry   c                       s:   e Zd Zddedef fddZdedefd	d
Z  ZS )DiagonalGaussianTr(   sample	chunk_dimc                    s   t    || _|| _d S r   )r*   r+   r   r   )r4   r   r   r5   r   r   r+     s   

zDiagonalGaussian.__init__r   r   c                 C   s@   t j|d| jd\}}| jrt d| }||t |  S |S )NrQ   )dimg      ?)r   chunkr   r   exp
randn_like)r4   r   meanlogvarstdr   r   r   rB     s
   zDiagonalGaussian.forward)Tr(   )	r   r   r   boolr   r+   r   rB   rC   r   r   r5   r   r     s    r   c                       sX   e Zd Zdef fddZdedefddZdedefd	d
ZdedefddZ  Z	S )AutoEncoderparamsc              	      sn   t    t|j|j|j|j|j|jd| _	t
|j|j|j|j|j|j|jd| _t | _|j| _|j| _d S )N)r   r   r
   r   r   r   )r   r   r
   r   r   r   r   )r*   r+   r\   r   r   r
   r   r   r   encoderry   r   decoderr   regr   r   )r4   r   r5   r   r   r+   "  s*   
	zAutoEncoder.__init__r   r   c                 C   s$   |  | |}| j|| j  }|S r   )r   r   r   r   )r4   r   r   r   r   r   encode:  s   zAutoEncoder.encoder   c                 C   s   || j  | j }| |S r   )r   r   r   )r4   r   r   r   r   decode?  s   
zAutoEncoder.decodec                 C   s   |  | |S r   )r   r   rA   r   r   r   rB   C  s   zAutoEncoder.forward)
r   r   r   r   r+   r   r   r   rB   rC   r   r   r5   r   r   !  s
    r   )dataclassesr   r   einopsr   r   r   r   r   rg   r    rD   rP   rY   r\   ry   r   r   r   r   r   r   <module>   s   JO