o
    	TiOJ                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ d dlZd dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d
dlmZ ddlmZ ddlmZmZ e rcd dlZee Z!G dd deZ"dS )    N)defaultdict)Path)AnyCallableOptionalUnion)Accelerator)
get_logger)ProjectConfigurationset_seed)PyTorchModelHubMixin)is_wandb_available   )DDPOStableDiffusionPipeline   )AlignPropConfig)generate_model_cardget_comet_experiment_urlc                       s4  e Zd ZdZddgZ	d.dedeeje	e
 e	e gejf deg e	e
ef f ded	eeeeegef  f
d
dZdd ZdedefddZdd ZdejdedejfddZdd Zdd Zdd Zd/d d!Zd.d"ee fd#d$Zd%d& Z fd'd(Z			d0d)ee
 d*ee
 d+ee
ee
 df fd,d-Z  ZS )1AlignPropTrainera  
    The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is
    heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based
    pipelines are supported

    Attributes:
        config (`AlignPropConfig`):
            Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details.
        reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`):
            Reward function to be used
        prompt_function (`Callable[[], tuple[str, Any]]`):
            Function to generate prompts to guide model
        sd_pipeline (`DDPOStableDiffusionPipeline`):
            Stable Diffusion pipeline to be used for training.
        image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`):
            Hook to be called to log images
    trl	alignpropNconfigreward_functionprompt_functionsd_pipelineimage_samples_hookc              	   C   s  t dt |d u rt d || _|| _|| _|| _td!i | jj}| jj	r}t
jt
j| jj	| j_	dt
j| jj	vr}ttdd t
| jj	}t|dkr]td| jj	 tdd	 |D }t
j| jj	d|d
  | j_	|d
 d |_td!| jj| jj|| jjd| jj| _|jd uo|jdk}	| jjr| jj| jj|	st |! dn|! | jj"d t#$d|  t%| jj&dd || _'| j'j(d| jj) dddd | jjdkrt*j+}
n| jjdkrt*j,}
nt*j-}
| j'j.j/| jj0|
d | j'j1j/| jj0|
d | j'j2j/| jj0|
d | j'3 }| j4| j5 | j6| j7 | jj8r/dt*j9j:j;_8| <t=|ts;|> n|| _?| j'1| j'j@| jjAd u rOdgn| jjAddd| j'j@jBdjC/| jj0d | _D| j'jEpn| jjE| _EtF| j'dr| j'jGr| jH|| j?\}| _?ttdd |> | _In| jH|| j?\| _I| _?|j	rt#$d|j	  | jJ|j	 tK|j	Ld d
 d | _Md S d| _Md S )"NzEAlignPropTrainer is deprecated and will be removed in version 0.23.0.z8No image_samples_hook provided; no images will be loggedcheckpoint_c                 S   s   d| v S )Nr    )xr   r   Q/home/ubuntu/.local/lib/python3.10/site-packages/trl/trainer/alignprop_trainer.py<lambda>Y   s    z+AlignPropTrainer.__init__.<locals>.<lambda>r   zNo checkpoints found in c                 S   s   g | ]}t |d d qS )_)intsplit).0r   r   r   r   
<listcomp>_   s    z-AlignPropTrainer.__init__.<locals>.<listcomp>r"   r   )log_withmixed_precisionproject_configgradient_accumulation_stepstensorboard)alignprop_trainer_config)r   init_kwargs
T)device_specificFTimestep)positiondisableleavedescdynamic_ncolsfp16bf16)dtype pt
max_lengthreturn_tensorspadding
truncationr;   use_lorac                 S   s   | j S N)requires_grad)pr   r   r   r       s    zResuming from r!   r   )NwarningswarnDeprecationWarning	prompt_fn	reward_fnr   image_samples_callbackr
   project_kwargsresume_fromospathnormpath
expanduserbasenamelistfilterlistdirlen
ValueErrorsortedjoin	iterationr   r'   r(   !train_gradient_accumulation_stepsaccelerator_kwargsacceleratoris_main_processinit_trackerstracker_project_namedictto_dicttracker_kwargsloggerinfor   seedr   set_progress_bar_configis_local_main_processtorchfloat16bfloat16float32vaetodevicetext_encoderunetget_trainable_layersregister_save_state_pre_hook_save_model_hookregister_load_state_pre_hook_load_model_hook
allow_tf32backendscudamatmul_setup_optimizer
isinstance
parameters	optimizer	tokenizernegative_promptsmodel_max_length	input_idsneg_prompt_embedautocasthasattrr@   preparetrainable_layers
load_stater#   r$   first_epoch)selfr   r   r   r   r   accelerator_project_configcheckpointscheckpoint_numbersis_using_tensorboardinference_dtyper   ro   r   r   r   __init__=   s   




zAlignPropTrainer.__init__c                 C   s"   |  |d |d |d \}}|S )Nimagespromptsprompt_metadata)rH   )r   prompt_image_pairsrewardreward_metadatar   r   r   compute_rewards   s   z AlignPropTrainer.compute_rewardsepochglobal_stepc           	      C   s<  t t}| jj  t| jjD ]}| j	| jj{ | 
 g t S | j| jjd}| |}||d< | j|   }| |}| j| | jjrf| jt| jts_| j n| j| jj | j  | j  W d   n1 szw   Y  W d   n1 sw   Y  W d   n1 sw   Y  |d |   |d |!  |d |"  q| jjrdd |# D }| jj$|d	d
}|%d|i | jj&||d |d7 }t t}nt'd| j(dur|| jj) dkr| (||| jj*d  |dkr|| jj+ dkr| jj,r| j-  |S )a  
        Perform a single step of training.

        Args:
            epoch (int): The current epoch.
            global_step (int): The current global step.

        Side Effects:
            - Model weights are updated
            - Logs the statistics to the accelerator trackers.
            - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step,
              and the accelerator tracker.

        Returns:
            global_step (int): The updated global step.
        )
batch_sizerewardsNreward_mean
reward_stdlossc                 S   s"   i | ]\}}|t t |qS r   )rg   meantensor)r%   kvr   r   r   
<dictcomp>   s   " z)AlignPropTrainer.step.<locals>.<dictcomp>r   )	reductionr   )stepr   zsOptimization step should have been performed by this point. Please check calculated gradient accumulation settings.r   ).r   rQ   r   ro   trainranger   rY   r[   
accumulater   rg   enable_grad_generate_samplestrain_batch_sizer   gatherdetachcpunumpycalculate_lossbackwardsync_gradientsclip_grad_norm_rz   r   r{   train_max_grad_normr|   r   	zero_gradappendr   stditemitemsreduceupdatelogrU   rI   log_image_freqtrackers	save_freqr\   
save_state)	r   r   r   rc   r!   r   r   rewards_visr   r   r   r   r      sX   &



  
&
zAlignPropTrainer.stepc                 C   s   d|   }|S )a(  
        Calculate the loss for a batch of an unpacked sample

        Args:
            rewards (torch.Tensor):
                Differentiable reward scalars for each generated image, shape: [batch_size]

        Returns:
            loss (torch.Tensor) (all of these are of shape (1,))
        g      $@)r   )r   r   r   r   r   r   r     s   zAlignPropTrainer.calculate_loss
advantages
clip_rangeratioc                 C   s8   | | }| t |d| d|  }t t ||S )Ng      ?)rg   clampr   maximum)r   r   r   r   unclipped_lossclipped_lossr   r   r   r      s   
zAlignPropTrainer.lossc                 C   sL   | j jrdd l}|jj}ntjj}||| j j| j j| j j	f| j j
| j jdS )Nr   )lrbetasweight_decayeps)r   train_use_8bit_adambitsandbytesoptim	AdamW8bitrg   AdamWtrain_learning_ratetrain_adam_beta1train_adam_beta2train_adam_weight_decaytrain_adam_epsilon)r   trainable_layers_parametersr   optimizer_clsr   r   r   ry   .  s   
z!AlignPropTrainer._setup_optimizerc                 C   s   | j ||| |  d S rA   )r   save_checkpointpop)r   modelsweights
output_dirr   r   r   rr   >  s   z!AlignPropTrainer._save_model_hookc                 C   s   | j || |  d S rA   )r   load_checkpointr   )r   r   	input_dirr   r   r   rt   B  s   z!AlignPropTrainer._load_model_hookTc                    s  i } j |dd}|du rt fddt|D  \}}n	dd t|D } jj|ddd jjjd	j j	j
} j|d
 }|ra jj|| jj jj jj jj jj jjdd	}	n j|| jj jj jjdd}	|	j}
|
|d< ||d< ||d< |S )a  
        Generate samples from the model

        Args:
            batch_size (int): Batch size to use for sampling
            with_grad (bool): Whether the generated RGBs should have gradients attached to it.

        Returns:
            prompt_image_pairs (dict[Any])
        r   Nc                    s   g | ]}   qS r   )rG   r%   r!   r   r   r   r&   V  s    z6AlignPropTrainer._generate_samples.<locals>.<listcomp>c                 S   s   g | ]}i qS r   r   r   r   r   r   r&   X  s    r:   r;   Tr<   r   )	prompt_embedsnegative_prompt_embedsnum_inference_stepsguidance_scaleetatruncated_backprop_randtruncated_backprop_timesteptruncated_rand_backprop_minmaxoutput_type)r   r   r   r   r   r   r   r   r   )r   repeatzipr   r   r}   r   r   rl   r[   rm   rn   rgb_with_gradr   sample_num_stepssample_guidance_scale
sample_etar   r   r   r   )r   r   	with_gradr   r   sample_neg_prompt_embedsr   
prompt_idsr   	sd_outputr   r   r   r   r   F  sP    	z"AlignPropTrainer._generate_samplesepochsc                 C   s6   d}|du r
| j j}t| j|D ]}| ||}qdS )z>
        Train the model for a given number of epochs
        r   N)r   
num_epochsr   r   r   )r   r   r   r   r   r   r   r     s   zAlignPropTrainer.trainc                 C   s   | j | |   d S rA   )r   save_pretrainedcreate_model_card)r   save_directoryr   r   r   _save_pretrained  s   z!AlignPropTrainer._save_pretrainedc                    sL   | j jd u rt| j jj}n	| j jdd }| j|d t || d S )N/r"   )
model_name)	argshub_model_idr   r   namer$   r   super_save_checkpoint)r   modeltrialr   	__class__r   r   r     s
   z!AlignPropTrainer._save_checkpointr   dataset_nametagsc                 C   s   |   sdS t| jjdrtj| jjjs| jjj}nd}|du r&t }nt	|t
r/|h}nt|}t| jjdr?|d || j td}t||| j||t r]tjdur]tjjndt d|ddd	}|tj| jjd
 dS )a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        N_name_or_pathunsloth_versionunslothaS          @article{prabhudesai2024aligning,
            title        = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}},
            author       = {Mihir Prabhudesai and Anirudh Goyal and Deepak Pathak and Katerina Fragkiadaki},
            year         = 2024,
            eprint       = {arXiv:2310.03739}
        }	AlignPropzCAligning Text-to-Image Diffusion Models with Reward Backpropagationz
2310.03739)
base_modelr   r   r  r  	wandb_url	comet_urltrainer_nametrainer_citationpaper_titlepaper_idz	README.md)is_world_process_zeror   r   r   rL   rM   isdirr  setrz   straddr   
_tag_namestextwrapdedentr   r   r   wandbrunurlr   saverW   r   r   )r   r   r  r  r
  citation
model_cardr   r   r   r     s8    


z"AlignPropTrainer.create_model_cardrA   )TN)NNN) __name__
__module____qualname____doc__r  r   r   rg   Tensortupler  r   r   r   r   r   r#   r   r   floatr   ry   rr   rt   r   r   r   r   r   rQ   r   __classcell__r   r   r  r   r   (   sV    
 J

<

r   )#rL   r  rD   collectionsr   pathlibr   typingr   r   r   r   rg   
accelerater   accelerate.loggingr	   accelerate.utilsr
   r   huggingface_hubr   transformersr   r   r   alignprop_configr   utilsr   r   r  r  rb   r   r   r   r   r   <module>   s&   