o
    }oiD+                     @   s   d dl mZ d dlZd dlZd dlmZmZ d dlmZm	Z	m
Z
mZmZ eG dd dZdd ZG d	d
 d
ejZG dd dejZG dd dejZG dd dejZdS )    )	dataclassN)Tensornn)
Downsample	NormalizeResnetBlockUpsample	make_attnc                   @   s   e Zd ZU ee ed< ee ed< dZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dS )AutoEncoderConfigch_multattn_resolutions   
resolution   in_channels   chout_ch   num_res_blocks   
z_channelsgxz,C?scale_factorg=U?shift_factorvanilla	attn_typeTdouble_z        dropoutNckpt)__name__
__module____qualname__listint__annotations__r   r   r   r   r   r   r   floatr   r   strr   boolr   r    r)   r)   ^/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/vae/autoencoder.pyr
      s   
 r
   c                 C   s   t jj| S N)torchr   
functionalsilu)xr)   r)   r*   nonlinearity+   s   r0   c                       s\   e Zd Zdddddddededee d	ed
ee dededef fddZdd Z  ZS )Encoderr   TFr   )r   resamp_with_convr   use_linear_attnr   r   r   r   r   r   r   r   r   c             
      s  t    |r	d}|| _d| _t|| _|| _|| _|| _t	j
j|| jdddd| _|}dt| }|| _t
 | _t| jD ]Z}t
 }t
 }|||  }|||  }t| jD ]}|t||| j|	d |}||v rw|t||d qZt
 }||_||_|| jd krt||
|_|d	 }| j| q?t
 | _t||| j|	d| j_t||d| j_t||| j|	d| j_t|| _t	j
j||rd	| n|dddd| _ d S )
Nlinearr   r      kernel_sizestridepaddingr5   r   out_channelstemb_channelsr   r   r   )!super__init__r   temb_chlennum_resolutionsr   r   r   r,   r   Conv2dconv_intuple
in_ch_mult
ModuleListdownrangeappendr   r	   Moduleblockattnr   
downsamplemidblock_1attn_1block_2r   norm_outconv_out)selfr   r   r   r   r   r   r   r   r   r2   r   r3   r   curr_resrG   i_levelrM   rN   block_in	block_outi_blockrI   	__class__r)   r*   r@   1   s`   









zEncoder.__init__c                 C   s   d }|  |g}t| jD ]D}t| jD ](}| j| j| |d |}t| j| jdkr7| j| j| |}|| q|| jd krQ|| j| 	|d  q|d }| j
||}| j
|}| j
||}| |}t|}| |}|S )Nr   r5   )rE   rJ   rC   r   rI   rM   rB   rN   rK   rO   rP   rQ   rR   rS   rT   r0   rU   )rV   r/   tembhsrX   r[   hr)   r)   r*   forwardy   s&   

zEncoder.forwardr    r!   r"   r$   r#   r@   rb   __classcell__r)   r)   r\   r*   r1   0   s0    	
Hr1   c                       s^   e Zd Zddddddddededee d	ed
ee dededef fddZdd Z  ZS )Decoderr   TFr   )r   r2   give_pre_endtanh_outr3   r   r   r   r   r   r   r   r   r   c             
      s  t    |r	d}|| _d| _t|| _|| _|| _|| _|| _	|| _
dt| }||| jd   }|d| jd   }d|||f| _td| jt| j tjj||dddd| _t | _t||| j|	d	| j_t||d
| j_t||| j|	d	| j_t | _tt| jD ]T}t }t }|||  }t| jd D ]}| t||| j|	d	 |}||v r| t||d
 qt }||_!||_"|dkrt#||
|_$|d }| j%d| qt&|| _'tjj||dddd| _(d S )Nr4   r   r:   r5   r   z+Working with z of shape {} = {} dimensions.r   r6   r;   r>   ))r?   r@   r   rA   rB   rC   r   r   r   rf   rg   rF   z_shapeprintformatnpprodr,   r   rD   rE   rL   rP   r   rQ   r	   rR   rS   rH   upreversedrJ   rK   rM   rN   r   upsampleinsertr   rT   rU   )rV   r   r   r   r   r   r   r   r   r   r2   rf   rg   r3   r   ignorekwargsrG   rY   rW   rX   rM   rN   rZ   r[   rm   r\   r)   r*   r@      sb   









zDecoder.__init__c                 C   s   |j | _d }| |}| j||}| j|}| j||}tt| j	D ]7}t| j
d D ]!}| j| j| ||}t| j| jdkrP| j| j| |}q/|dkr]| j| |}q&| jrc|S | |}t|}| |}| jryt|}|S )Nr5   r   )shapelast_z_shaperE   rP   rQ   rR   rS   rn   rJ   rC   r   rm   rM   rB   rN   ro   rf   rT   r0   rU   rg   r,   tanh)rV   zr_   ra   rX   r[   r)   r)   r*   rb      s.   



zDecoder.forwardrc   r)   r)   r\   r*   re      s2    	
Nre   c                       s:   e Zd Zddedef fddZdedefd	d
Z  ZS )DiagonalGaussianTr5   sample	chunk_dimc                    s   t    || _|| _d S r+   )r?   r@   rw   rx   )rV   rw   rx   r\   r)   r*   r@   	  s   

zDiagonalGaussian.__init__ru   returnc                 C   s@   t j|d| jd\}}| jrt d| }||t |  S |S )Nr   )dimg      ?)r,   chunkrx   rw   exp
randn_like)rV   ru   meanlogvarstdr)   r)   r*   rb     s
   zDiagonalGaussian.forward)Tr5   )	r    r!   r"   r(   r$   r@   r   rb   rd   r)   r)   r\   r*   rv     s    rv   c                       s`   e Zd Zdef fddZdedefddZdedefd	d
ZdedefddZdd Z	  Z
S )AutoEncoderparamsc                    s   t    t|j|j|j|j|j|j|j	|j
|j|j|jd| _t|j|j|j|j|j|j|j|j	|j
|j|jd| _t | _|j| _|j| _|| _|jd urW| |j d S d S )N)r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   )r?   r@   r1   r   r   r   r   r   r   r   r   r   r   r   encoderre   decoderrv   regr   r   r   r   load_from_checkpoint)rV   r   r\   r)   r*   r@     sD   

zAutoEncoder.__init__r/   ry   c                 C   s$   |  | |}| j|| j  }|S r+   )r   r   r   r   )rV   r/   ru   r)   r)   r*   encode=  s   zAutoEncoder.encoderu   c                 C   s   || j  | j }| |S r+   )r   r   r   )rV   ru   r)   r)   r*   decodeB  s   
zAutoEncoder.decodec                 C   s   |  | |S r+   )r   r   )rV   r/   r)   r)   r*   rb   F  s   zAutoEncoder.forwardc                 C   sF   ddl m} ||}| |\}}t|dkr!td|  d S d S )Nr   )	load_filez3Following keys are missing from checkpoint loaded: )safetensors.torchr   load_state_dictrB   loggerwarning)rV   	ckpt_pathload_sft
state_dictmissing
unexpectedr)   r)   r*   r   I  s   z AutoEncoder.load_from_checkpoint)r    r!   r"   r
   r@   r   r   r   rb   r   rd   r)   r)   r\   r*   r     s    %r   )dataclassesr   numpyrk   r,   r   r   %nemo.collections.diffusion.vae.blocksr   r   r   r   r	   r
   r0   rL   r1   re   rv   r   r)   r)   r)   r*   <module>   s   es