o
    ziX                     @   s  d dl mZmZ d dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ dd	lmZ d d
l mZ d dlmZmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ ddlmZm Z m!Z!m"Z" ddlm#Z# G dd deeZ$dS )    )CallableUnionN)Tensor)Module)
DataLoaderDataset)vjp)ExponentialMovingAverage   )DEVICE)r   )ABCabstractmethod)tqdm)datetime)nullcontext)VESDEVPSDETSVESDESDE)load_architecturec                       s<  e Zd Zddddefdeeef deeef def fddZ	de
fdd	Zede
fd
dZede
fddZdd Zdd Zddddede
fddZe ddddddddedededede
f
ddZe g dd fd!ed"efd#d$Zdd%d&d'dd(ed)ded)dd*ddd+d,ddddd-dfd.efd/d0Z  ZS )1ScoreModelBaseNmodelsdemodel_checkpointc                    s  t    |d u r|d u rtd|d u st|tr-t|||||d\}}| _|| nt|dr8||j	 |d u rgd|
 v rItd d}nd|
 v rVtd d	}nd
|
 v rctd d}ntdt|tr| dvrztd| d| |d< d|
 vrd|d< | d	krt|d |d |d d}nG| dkrd|
 vrd|d< t|d
 |d |d |d d}n&| dkrd|
 vrd|d< t|d |d |d |d |d |d d}|jj|d< || _	|| _|| _| j| || _|| _d S )Nz6Must provide one of 'model' or 'checkpoints_directory')r   devicehyperparametersr   r   t_starz1Using the Truncated Scaled Variance Exploding SDEtsve	sigma_minz Using the Variance Exploding SDEvebeta_minz!Using the Variance Preserving SDEvpz;SDE parameters are missing, please specify which sde to use)r   r!   r   zThe SDE z provided is no supportedr   T      ?	sigma_max)r   r$   r"   epsilongh㈵>beta_max)r    r&   r"   r%   r   beta)r   r$   r   r'   r"   r%   model_architecture)super__init__
ValueError
isinstancestrr   loaded_checkpointupdatehasattrr   keysprintKeyErrorlowerr   r   r   	__class____name__checkpoints_directoryr   tor   r   )selfr   r   r7   r   r   r   hyperparamsr5    E/home/ubuntu/.local/lib/python3.10/site-packages/score_models/base.pyr*      sx   
	

	
zScoreModelBase.__init__returnc                 G   s   | j ||g|R  S N)scorer9   txargsr<   r<   r=   forwarda   s   zScoreModelBase.forwardc                 G      d S r?   r<   rA   r<   r<   r=   r@   d      zScoreModelBase.scorec                 G   rF   r?   r<   )r9   rC   rD   r<   r<   r=   loss_fnh   rG   zScoreModelBase.loss_fnc                 G   sD   | j ||}| j ||}|d|d  | j||g|R    }|S )N      ?   )r   drift	diffusionr@   )r9   rB   rC   rD   fgf_tilder<   r<   r=   	ode_driftl   s   $zScoreModelBase.ode_driftc                 O   s   | j | j||g|R i |S r?   )
divergencedrift_fn)r9   rB   rC   rD   kwargsr<   r<   r=   hessianr   s   zScoreModelBase.hessianr
   
rademachern_cotangent_vectors
noise_typerW   c                   s   |j ^}}t||gdgt| }	t|gt|	}
|dkr'|
 }
 fdd}t||	\}}|
||
d  djdd}|S )Nr
   rU   c                    s   | g R  S r?   r<   rC   rD   rR   rB   r<   r=   <lambda>   s    z+ScoreModelBase.divergence.<locals>.<lambda>r   )dim)	shapetorchtilelen
randn_likesignr   flattensum)r9   rR   rB   rC   rW   rX   rD   BDsamplesvectorsrM   _vjp_funcrQ   r<   rZ   r=   rQ   u   s   

zScoreModelBase.divergencer   Euler)rW   rX   verbosemethodt0t1	ode_stepsrn   ro   c                   sD  ||d|r	dnd}
|j ^}}d}t|gj| }|| | } fdd} fdd}tt||
dD ]Y}|d	krX|||||  }||||| 7 }|| }q;|d
kr| }|||}|||  }|d|||| |  |  }|d|||||| |  | 7 }|| }q;td|j	
||7 }|S )aM  
        A basic implementation of Euler discretisation method of the ODE associated 
        with the marginals of the learned SDE.
        
        ode_steps: Number of steps to perform in the ODE
        hutchinsons_samples: Number of samples to draw to compute the trace of the Jacobian (divergence)
        
        Note that this estimator only compute the likelihood for one trajectory. 
        For more precise log likelihood estimation, tile x along the batch dimension
        and averge the results. You can also increase the number of ode steps and increase
        the number of cotangent vector for the Hutchinson estimator.
        
        Using the instantaneous change of variable formula
        (Chen et al. 2018,https://arxiv.org/abs/1806.07366)
        See also Song et al. 2020, https://arxiv.org/abs/2011.13456)
        rV   FT        c                    s   j | |g R  S r?   )rP   rB   rC   )rD   r9   r<   r=   r[      s    z/ScoreModelBase.log_likelihood.<locals>.<lambda>c                    s   j j| |g R i S r?   )rQ   rP   rr   rD   rS   r9   r<   r=   r[      s    )disablerk   HeunrI   z2Invalid method, please select either Euler or Heun)r]   r^   onesr8   r   r   rangecloneNotImplementedErrorr   priorlog_prob)r9   rC   rp   rW   rX   rl   rm   rn   ro   rD   rt   re   rf   log_prB   dtrM   divri   
previous_xrK   new_xr<   rs   r=   log_likelihood   s.   



$
zScoreModelBase.log_likelihoodr#   	conditionlikelihood_score_fnc                 C   s  t |ttfstdt| |^}}|du rdnd}|du r$dd }| j||g| j	}	| jj
| jj  | }
t|| j	| jj
 }tt| }D ]}}|d| d|d	  d
d| j|d	  dd|	  d ||
7 }|d	 | jjk r |S | j||	}| j||	|d | j||	g|R  ||||	    }t|	|
 d  }|	||
  }|||  }	tt|	rtd  |S qR|S )a  
        An Euler-Maruyama integration of the model SDE
        
        shape: Shape of the tensor to sample (including batch size)
        steps: Number of Euler-Maruyam steps to perform
        likelihood_score_fn: Add an additional drift to the sampling for posterior sampling. Must have the signature f(t, x)
        guidance_factor: Multiplicative factor for the likelihood drift
        z<condition must be a list or tuple or torch.Tensor, received Nrz   	posteriorc                 S   s   dS )Nrq   r<   rr   r<   r<   r=   r[          z'ScoreModelBase.sample.<locals>.<lambda>zSampling from the z | t = r   z.1fz | sigma = .1ez
| scale ~ rJ   rI   z=Diffusion is not stable, NaN were produced. Stopped sampling.)r,   listtupler+   typer   rz   sampler8   r   r"   r%   r^   rv   r   rw   set_descriptionitemsigmastdrL   rK   r@   ra   anyisnanr2   )r9   r]   stepsr   r   guidance_factorre   rf   sampling_fromrC   r}   rB   pbarri   rN   rM   dwx_meanr<   r<   r=   r      s8   4	8zScoreModelBase.sampled   g-C6?gH.?Finfrq   
   rJ   score_modeldatasetc           <      C   s
  t jj| j |d}t| j |d}t|||dd}|du r$t|}|du r+| j}|dur3|j	}nd}dd }|durLt
j|rKt
j|d }n|du r[|d	 t d
 }d}d}|dush|durd}|du rut
j||}t
j|st
| t
j|d}t
j|st|dC}tji d|d|d|d|d|d|d|d|	d|
d|d|d|d|d|d|d|d||d d! W d   n1 sw   Y  t
j|d"}t
j|s
t|d}tj| j|d d! W d   n	1 sw   Y  tt
j|d#} tt
j|d$}!d%d& | D }"d'd& | D }#|"r|durd| |"| }$| jt j|$| jjd( |t j|!||k | jd( td)| d*|  |}n4t|"}%| |% }$|!|% }&| jt j|$| jd( |t j|&| jd( td)|"|%  d*|  |"|% }|durt  | t!d+}'g }(d})t"" }*d}+d},t#|}-t$t%| }.D ]-}/t"" |* |
d, |+ kr nt"" }0d}1d}2t%|D ]}3t"" }4zt&|-}5W n t'y   t#|}-t&|-}5Y nw t(|5t)t*fr	|5^}6}7n|5}6g }7|dur||6}6|+  | j,|6g|7R  }8|8-  |)|k r?|j.D ]}9|t/|)| d- |9d.< q/|dkrPt j0j1j2| j |d/ |3  |4  t"" |4 }:|1|:7 }1|2t!|87 }2|)d07 })q|1t| }1|2t| }2|.5d1|/d0 d2d3|2d4d5 |(6|2 |d6krtd7|/ d8|2d9d:|1d9d; n|d0kr|/d0 | dkrtd7|/ d8|2d4 t7|2rtd<  n"|2d0|	 |' k r|2}'|}n|d08 }t"" |* |
d, krd},|r|/d0 | dks|dks|/|d0 ks|,r|d07 }tt
j|d=d>d?}|8| d@|2 dA W d   n	1 s+w   Y  |9  t :| j; t
j|dB|2dCd	|dDdE W d   n	1 sWw   Y  t :|; t
j|dF|2dCd	|dDdE tt
j|dG} dHd& | D }"dId& | D }#t|"d6| krt<|"};t
=t
j|dB|#|; dCd	|"|; dDdE t
=t
j|dF|#|; dCd	|"|; dDdE |#|;= |"|;= |dkrtdJ  n|,rtdK  n|/dkrt"" |0 }+qtdLt"" |* d, dMdN |>|   |(S )OaO  
        Train the model on the provided dataset.

        Parameters:
            dataset (torch.utils.data.Dataset): The training dataset.
            preprocessing_fn (function, optional): A function to preprocess the input data. Default is None.
            learning_rate (float, optional): The learning rate for optimizer. Default is 1e-4.
            ema_decay (float, optional): The decay rate for Exponential Moving Average. Default is 0.9999.
            batch_size (int, optional): The batch size for training. Default is 1.
            shuffle (bool, optional): Whether to shuffle the dataset during training. Default is False.
            epochs (int, optional): The number of epochs for training. Default is 100.
            patience (float, optional): The patience value for early stopping. Default is infinity.
            tolerance (float, optional): The tolerance value for early stopping. Default is 0.
            max_time (float, optional): The maximum training time in hours. Default is infinity.
            warmup (int, optional): The number of warmup iterations for learning rate. Default is 0.
            clip (float, optional): The gradient clipping value. Default is 0.
            model_checkpoint (float, optional): If checkpoints_directory is provided, this can be used to restart training from checkpoint.
            checkpoints_directory (str, optional): The directory to save model checkpoints. Default is None.
            checkpoints (int, optional): The interval for saving model checkpoints. Default is 10 epochs.
            models_to_keep (int, optional): The number of best models to keep. Default is 3.
            seed (int, optional): The random seed for numpy and torch. Default is None.
            logname (str, optional): The logname for saving checkpoints. Default is None.
            logdir (str, optional): The path to the directory in which to create the new checkpoint_directory with logname.
            logname_prefix (str, optional): The prefix for the logname. Default is "score_model".

        Returns:
            list: List of loss values during training.
        )lr)decayF)
batch_sizeshuffle	drop_lastNc                 S   s   | S r?   r<   rY   r<   r<   r=   r[   A  r   z$ScoreModelBase.fit.<locals>.<lambda>ri   z%y%m%d%H%M%Sr   Tzscript_params.jsonwpreprocessinglearning_rate	ema_decayr   r   epochspatience	tolerancemax_timewarmupclipcheckpoint_directorycheckpointsmodels_to_keepseedlognamelogname_prefix   )indentzmodel_hparams.jsonzcheckpoint*.ptzoptimizer*.ptc              	   S   ,   g | ]}t td tj|d d qS z[0-9]+r   intrefindallospathsplit.0r   r<   r<   r=   
<listcomp>v     , z&ScoreModelBase.fit.<locals>.<listcomp>c              	   S   r   z([0-9]{1}.[0-9]+e[+-][0-9]{2})r   floatr   r   r   r   r   r   r<   r<   r=   r   w  r   )map_locationzLoaded checkpoint z of r   i  r#   r   )max_normr
   zEpoch dz	 | Cost: r   z |rJ   zepoch z | cost z.2ez | time per step z szModel exploded and returns NaNzscore_sheet.txta)mode 
checkpoint_z.4e03dz.pt
optimizer_z*.ptc              	   S   r   r   r   r   r<   r<   r=   r     r   c              	   S   r   r   r   r   r<   r<   r=   r     r   zReached patiencezOut of timezFinished training after z.3fz hours.)?r^   optimAdamr   
parametersr	   r   r`   r7   r6   r   r   isdirr   r   nowstrftimejoinmkdirisfileopenjsondumpr   globindexload_state_dictloadr   r2   npargmaxmanual_seedr   timeiterr   rw   nextStopIterationr,   r   r   	zero_gradrH   backwardparam_groupsminimumnnutilsclip_grad_norm_stepr/   r   appendr   writeaverage_parameterssave
state_dictargminremovecopy_to)<r9   r   preprocessing_fnr   r   r   r   r   r   r   r   r   r   r7   r   r   r   r   r   logdirn_iterations_in_epochr   rl   	optimizerema
dataloaderpreprocessing_namesave_checkpointlatest_checkpointscript_params_pathrM   model_hparams_pathpaths	opt_pathscheckpoint_indicesscorescheckpoint_pathmax_checkpoint_indexopt_path	best_losslossesr   global_startestimated_time_for_epochout_of_time	data_iterr   epochepoch_starttime_per_step_epoch_meancostri   startXrC   rD   lossrN   _timeindex_to_deleter<   r<   r=   fit   sn  5
	









 

"
0
0,
..

zScoreModelBase.fit)r6   
__module____qualname__r   r   r-   r   r   r   r*   r   rE   r   r@   rH   rP   rT   rQ   r^   no_gradr   r   r   r   r   r   r  __classcell__r<   r<   r;   r=   r      s    

I	
P,r   )%typingr   r   r^   r   torch.nnr   torch.utils.datar   r   
torch.funcr   	torch_emar	   r   r   abcr   r   r   r   r   r   r   r   numpyr   r   
contextlibr   r   r   r   r   r   r   r   r<   r<   r<   r=   <module>   s&     