o
    ߥi.H                     @   s   d dl Zd dlZd dlmZ d dlm  mZ dgZdd Z	dddZ
G dd	 d	eZG d
d dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejjZdS )    NAutoencoderKLc                 C   s   | t |  S N)torchsigmoid)x r   m/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/multi_modal/video_synthesis/autoencoder.pynonlinearity   s   r	       c                 C   s   t jj|| dddS )Ngư>T)
num_groupsnum_channelsepsaffine)r   nn	GroupNorm)in_channelsr   r   r   r   	Normalize   s   r   c                   @   s@   e Zd ZdddZdd ZdddZg d	fd
dZdd ZdS )DiagonalGaussianDistributionFc                 C   s   || _ tj|ddd\| _| _t| jdd| _|| _td| j | _t| j| _	| jr@t
| jj| j jd | _	| _d S d S )N      dimg      >g      4@      ?device)
parametersr   chunkmeanlogvarclampdeterministicexpstdvar
zeros_liketor   )selfr   r    r   r   r   __init__   s   z%DiagonalGaussianDistribution.__init__c                 C   s*   | j | jt| j jj| jjd  }|S )Nr   )r   r"   r   randnshaper%   r   r   r&   r   r   r   r   sample$   s   
z#DiagonalGaussianDistribution.sampleNc                 C   s   | j r	tdgS |d u r%dtjt| jd| j d | j g dd S dtjt| j|j d|j | j|j  d | j |j g dd S )N        r   r         ?r   r      r   )r    r   Tensorsumpowr   r#   r   )r&   otherr   r   r   kl)   s&   
zDiagonalGaussianDistribution.klr.   c                 C   sR   | j r	tdgS tdtj }dtj|| j t|| j	 d| j
  |d S )Nr,          @r   r   r   )r    r   r0   nplogpir1   r   r2   r   r#   )r&   r+   dimslogtwopir   r   r   nll7   s   z DiagonalGaussianDistribution.nllc                 C   s   | j S r   )r   r&   r   r   r   mode@   s   z!DiagonalGaussianDistribution.mode)Fr   )__name__
__module____qualname__r'   r+   r4   r;   r=   r   r   r   r   r      s    

	r   c                       s.   e Zd Zdddd fdd
Zdd Z  ZS )	ResnetBlockNFi   )out_channelsconv_shortcuttemb_channelsc                   s   t    || _|d u r|n|}|| _|| _t|| _tjj	||dddd| _
|dkr3tj||| _t|| _tj|| _tjj	||dddd| _| j| jkrp| jrbtjj	||dddd| _d S tjj	||dddd| _d S d S )Nr/   r   kernel_sizestridepaddingr   )superr'   r   rB   use_conv_shortcutr   norm1r   r   Conv2dconv1Linear	temb_projnorm2Dropoutdropoutconv2rC   nin_shortcut)r&   r   rB   rC   rR   rD   	__class__r   r   r'   F   sB   




zResnetBlock.__init__c                 C   s   |}|  |}t|}| |}|d ur'|| t|d d d d d d f  }| |}t|}| |}| |}| j| jkrQ| j	rL| 
|}|| S | |}|| S r   )rK   r	   rM   rO   rP   rR   rS   r   rB   rJ   rC   rT   )r&   r   tembhr   r   r   forwardl   s    

&




zResnetBlock.forwardr>   r?   r@   r'   rY   __classcell__r   r   rU   r   rA   D   s    &rA   c                       $   e Zd Z fddZdd Z  ZS )	AttnBlockc                    s~   t    || _t|| _tjj||dddd| _tjj||dddd| _	tjj||dddd| _
tjj||dddd| _d S )Nr   r   rE   )rI   r'   r   r   normr   r   rL   qkvproj_out)r&   r   rU   r   r   r'      s   





zAttnBlock.__init__c                 C   s   |}|  |}| |}| |}| |}|j\}}}}	|||||	 }|ddd}|||||	 }t||}
|
t	|d  }
tj
jj|
dd}
|||||	 }|
ddd}
t||
}|||||	}| |}|| S )Nr   r   r   g      r   )r^   r_   r`   ra   r)   reshapepermuter   bmmintr   
functionalsoftmaxrb   )r&   r   h_r_   r`   ra   bcrX   ww_r   r   r   rY      s(   




zAttnBlock.forwardrZ   r   r   rU   r   r]      s    r]   c                       r\   )Upsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )Nr/   r   rE   rI   r'   	with_convr   r   rL   convr&   r   rp   rU   r   r   r'      s   

zUpsample.__init__c                 C   s(   t jjj|ddd}| jr| |}|S )Nr5   nearest)scale_factorr=   )r   r   rg   interpolaterp   rq   r*   r   r   r   rY      s   
zUpsample.forwardrZ   r   r   rU   r   rn      s    rn   c                       r\   )
Downsamplec                    s6   t    || _| jrtjj||dddd| _d S d S )Nr/   r   r   rE   ro   rr   rU   r   r   r'      s   

zDownsample.__init__c                 C   sF   | j rd}tjjj||ddd}| |}|S tjjj|ddd}|S )N)r   r   r   r   constantr   )r=   valuer   )rF   rG   )rp   r   r   rg   padrq   
avg_pool2d)r&   r   ry   r   r   r   rY      s   
zDownsample.forwardrZ   r   r   rU   r   rv      s    rv   c                       s0   e Zd Zddddd fdd
Zdd Z  ZS )	Encoderr   r         r,   T)ch_multrR   resamp_with_convdouble_zc       
      
      s  t    || _d| _t|| _|| _|| _|| _t	j
j|| jdddd| _|}dt| }|| _t
 | _t| jD ]X}t
 }t
 }|||  }|||  }t| jD ]}|t||| j|d |}||v rq|t| qVt
 }||_||_|| jd krt|||_|d }| j| q;t
 | _t||| j|d| j_t|| j_t||| j|d| j_t|| _t	j
j||
rd|	 n|	dddd| _ d S )Nr   r/   r   rE   )r   r   rB   rD   rR   r   )!rI   r'   chtemb_chlennum_resolutionsnum_res_blocks
resolutionr   r   r   rL   conv_intuple
in_ch_mult
ModuleListdownrangeappendrA   r]   Moduleblockattnrv   
downsamplemidblock_1attn_1block_2r   norm_outconv_out)r&   r   r   r   attn_resolutionsrR   r   r   r   
z_channelsr   ignore_kwargscurr_resr   i_levelr   r   block_in	block_outi_blockr   rU   r   r   r'      sz   






zEncoder.__init__c                 C   s   d }|  |g}t| jD ]D}t| jD ](}| j| j| |d |}t| j| jdkr7| j| j| |}|| q|| jd krQ|| j| 	|d  q|d }| j
||}| j
|}| j
||}| |}t|}| |}|S )Nr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   )r&   r   rW   hsr   r   rX   r   r   r   rY   !  s&   

zEncoder.forwardrZ   r   r   rU   r   r{      s    Kr{   c                       s2   e Zd Zdddddd fdd
Zdd	 Z  ZS )
Decoderr|   r,   TF)r   rR   r   give_pre_endtanh_outc             
      s  t    || _d| _t|| _|| _|	| _|| _|| _	|| _
||| jd   }|	d| jd   }d|
||f| _tjj|
|dddd| _t | _t||| j|d| j_t|| j_t||| j|d| j_t | _tt| jD ]R}t }t }|||  }t| jd D ]}|t||| j|d |}||v r|t| qt }||_||_|dkrt|||_|d }| j d| qqt!|| _"tjj||dddd| _#d S )Nr   r   r   r/   rE   r   )$rI   r'   r   r   r   r   r   r   r   r   r   z_shaper   r   rL   r   r   r   rA   r   r]   r   r   r   upreversedr   r   r   r   rn   upsampleinsertr   r   r   )r&   r   out_chr   r   r   rR   r   r   r   r   r   r   ignorekwargsr   r   r   r   r   r   r   r   rU   r   r   r'   ?  st   








zDecoder.__init__c                 C   s   |j | _d }| |}| j||}| j|}| j||}tt| j	D ]7}t| j
d D ]!}| j| j| ||}t| j| jdkrP| j| j| |}q/|dkr]| j| |}q&| jrc|S | |}t|}| |}| jryt|}|S )Nr   r   )r)   last_z_shaper   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r   r   tanh)r&   zrW   rX   r   r   r   r   r   rY     s.   



zDecoder.forwardrZ   r   r   rU   r   r   =  s    Mr   c                       s~   e Zd Z						d fdd	Zdd Zdd	 Zd
d Zdd ZdddZdd Z	dd Z
e dddZdd Z  ZS )r   NimageFc	           	   	      s   t    || _|| _tdi || _tdi || _|d s!J tj	
d|d  d| d| _tj	
||d d| _|| _|d urVt|tksJJ | dtd|dd |d ur]|| _|d u| _|d urm| | d S d S )Nr   r   r   r   colorizer/   r   )rI   r'   learn_logvar	image_keyr{   encoderr   decoderr   r   rL   
quant_convpost_quant_conv	embed_dimtyperf   register_bufferr(   monitoruse_emainit_from_ckpt)	r&   ddconfigr   	ckpt_pathr   colorize_nlabelsr   	ema_decayr   rU   r   r   r'     s0   
	
zAutoencoderKL.__init__c                 C   sr   t j|ddd }t| }dd l}| }|D ]}|ddkr/|dd }|| ||< q| j|dd	 d S )
Ncpu)map_location
state_dictr   first_stage_modelzfirst_stage_model.r   T)strict)	r   loadlistkeyscollectionsOrderedDictfindsplitload_state_dict)r&   pathsdr   r   sd_newr`   k_newr   r   r   r     s   zAutoencoderKL.init_from_ckptc                 O   s   | j r
| |  d S d S r   )r   	model_ema)r&   argskwargsr   r   r   on_train_batch_end  s   z AutoencoderKL.on_train_batch_endc                 C   s    |  |}| |}t|}|S r   )r   r   r   )r&   r   rX   moments	posteriorr   r   r   encode  s   

zAutoencoderKL.encodec                 C   s   |  |}| |}|S r   )r   r   )r&   r   decr   r   r   decode  s   

zAutoencoderKL.decodeTc                 C   s2   |  |}|r| }n| }| |}||fS r   )r   r+   r=   r   )r&   inputsample_posteriorr   r   r   r   r   r   rY     s   


zAutoencoderKL.forwardc                 C   s@   || }t |jdkr|d }|ddddjtjd }|S )Nr/   ).Nr   r   r   )memory_format)r   r)   rd   r%   r   contiguous_formatfloat)r&   batchr`   r   r   r   r   	get_input  s   
zAutoencoderKL.get_inputc                 C   s
   | j jjS r   )r   r   weightr<   r   r   r   get_last_layer  s   
zAutoencoderKL.get_last_layerc                 K   s  t  }| || j}|| j}|s| |\}}|jd dkr2|jd dks(J | |}| |}| t	|
 |d< ||d< |sG| jr|  3 | |\}	}
|jd dkrg|	jd dksbJ | |	}	| t	|

 |d< |	|d< W d    n1 sw   Y  ||d< |S )Nr   r/   samplesreconstructionssamples_emareconstructions_emainputs)dictr   r   r%   r   r)   to_rgbr   r   
randn_liker+   r   	ema_scope)r&   r   only_inputslog_emar   r7   r   xrecr   xrec_emaposterior_emar   r   r   
log_images  s0   





	zAutoencoderKL.log_imagesc              	   C   st   | j dksJ t| ds| dtd|jd dd| tj|| j	d}d||
   | |
   d }|S )Nsegmentationr   r/   r   )r   r5   r-   )r   hasattrr   r   r(   r)   r%   Fconv2dr   minmaxr*   r   r   r   r     s   
$zAutoencoderKL.to_rgb)Nr   NNNF)T)FF)r>   r?   r@   r'   r   r   r   r   rY   r   r   r   no_gradr   r   r[   r   r   rU   r   r     s$     
	c                       sB   e Zd Zdd fdd
Zdd Zdd Zd	d
 Zdd Z  ZS )IdentityFirstStageFvq_interfacec                   s   || _ t   d S r   )r   rI   r'   )r&   r   r   r   rU   r   r   r'   )  s   zIdentityFirstStage.__init__c                 O      |S r   r   r&   r   r   r   r   r   r   r   -     zIdentityFirstStage.encodec                 O   r   r   r   r   r   r   r   r   0  r  zIdentityFirstStage.decodec                 O   s   | j r
|d g dfS |S )N)NNNr   r   r   r   r   quantize3  s   zIdentityFirstStage.quantizec                 O   r   r   r   r   r   r   r   rY   8  r  zIdentityFirstStage.forward)	r>   r?   r@   r'   r   r   r  rY   r[   r   r   rU   r   r   '  s    r   )r
   )numpyr6   r   torch.nnr   torch.nn.functionalrg   r   __all__r	   r   objectr   r   rA   r]   rn   rv   r{   r   r   r   r   r   r   r   <module>   s    
-?,irx