o
    pi+[                    @   sj  d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	Z	ddl
Z
ddlZddlmZ ddlmZmZmZmZmZmZmZmZ dd	lmZmZmZmZmZmZ dd
lmZ e raddl m!Z! e rpddl"m#Z# ddlm$Z$ e%e&Z'i ddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6Z(i dd7d8idd7d9id:d7d;idd7d<idd7d=idd7d>id?d7d@idd7dAidd7dBidCd7dDid*dEdFdGdHdEdIdGd,dJdKdGdLdJdMdGd.d7dNidOd7dPidQd7dRid7dSid7dTid7dUid7dVid7dWid7dXidYZ)dZdZdZdZd[d[d[d[d\d[d]
Z*d^d_d`dadbdcdddedfdgdh
didjdkdldmdidjdkdldndod^d_d`dadbdcdpdqdrdsdt
didjdkdldmdidjdkdldndoi dududvdvdwdwdxdxdydzd{d|d}d}d~d~dddddddddddddddddddddddddddddddd	ddZ+g dZ,ddddddddddddZ-dZ.dZ/dZ0dZ1dZ2ddgZ3dZ4dZZ5g dZ6G dd de7Z8dd Z9dd Z:dd Z;						dddZ<dddZ=dd Z>dd Z?dd Z@dd ZAddĄ ZBddƄ ZCddȄ ZDddʄ ZEdd̄ ZFdd΄ ZGdddЄZHdd҄ ZI	dddԄZJdddքZKd	dd؄ZLdddڄZMdd܄ ZNddބ ZOdd ZPdd ZQdddZRdd ZSdd ZTdddZU	d
ddZV					dddZW	dddZXdddZYdd ZZdd Z[dd Z\dd Z]dd Z^				ddd Z_dd Z`dd ZadS (  z7Conversion script for the Stable Diffusion checkpoints.    N)nullcontext)BytesIO)urlparse   )load_state_dict)DDIMSchedulerDPMSolverMultistepSchedulerEDMDPMSolverMultistepSchedulerEulerAncestralDiscreteSchedulerEulerDiscreteSchedulerHeunDiscreteSchedulerLMSDiscreteSchedulerPNDMScheduler)SAFETENSORS_WEIGHTS_NAMEWEIGHTS_NAME	deprecateis_accelerate_availableis_transformers_availablelogging)_get_model_file)AutoImageProcessor)init_empty_weights)load_model_dict_into_metav2zMmodel.diffusion_model.input_blocks.2.1.transformer_blocks.0.attn2.to_k.weightxl_basezEconditioner.embedders.1.model.transformer.resblocks.9.mlp.c_proj.bias
xl_refinerzEconditioner.embedders.0.model.transformer.resblocks.9.mlp.c_proj.biasupscalez<model.diffusion_model.input_blocks.10.0.skip_connection.bias
controlnetz!control_model.time_embed.0.weightplayground-v2-5edm_mean
inpaintingz-model.diffusion_model.input_blocks.0.0.weightclipzLcond_stage_model.transformer.text_model.embeddings.position_embedding.weight	clip_sdxlzSconditioner.embedders.0.transformer.text_model.embeddings.position_embedding.weightclip_sd3zPtext_encoders.clip_l.transformer.text_model.embeddings.position_embedding.weight	open_clipz-cond_stage_model.model.token_embedding.weightopen_clip_sdxlz2conditioner.embedders.1.model.positional_embeddingopen_clip_sdxl_refinerz-conditioner.embedders.0.model.text_projectionopen_clip_sd3zPtext_encoders.clip_g.transformer.text_model.embeddings.position_embedding.weightstable_cascade_stage_bz$down_blocks.1.0.channelwise.0.weightstable_cascade_stage_cclip_txt_mapper.weightsd3zJmodel.diffusion_model.joint_blocks.0.context_block.adaLN_modulation.1.biaszjdown_blocks.0.motion_modules.0.temporal_transformer.transformer_blocks.0.attention_blocks.0.pos_encoder.pez9mid_block.motion_modules.0.temporal_transformer.norm.biasz=up_blocks.2.motion_modules.0.temporal_transformer.norm.weight(controlnet_cond_embedding.conv_in.weightz controlnet_cond_embedding.weightz,double_blocks.0.img_attn.norm.key_norm.scale)animatediffanimatediff_v2animatediff_sdxl_betaanimatediff_scribbleanimatediff_rgbfluxpretrained_model_name_or_pathz(stabilityai/stable-diffusion-xl-base-1.0z+stabilityai/stable-diffusion-xl-refiner-1.0
xl_inpaintz0diffusers/stable-diffusion-xl-1.0-inpainting-0.1z-playgroundai/playground-v2.5-1024px-aestheticz(stabilityai/stable-diffusion-x4-upscalerz$runwayml/stable-diffusion-inpaintinginpainting_v2z)stabilityai/stable-diffusion-2-inpaintingz"lllyasviel/control_v11p_sd15_cannyz stabilityai/stable-diffusion-2-1v1zrunwayml/stable-diffusion-v1-5zstabilityai/stable-cascadedecoder)r3   	subfolderstable_cascade_stage_b_litedecoder_litez stabilityai/stable-cascade-priorpriorstable_cascade_stage_c_lite
prior_litez/stabilityai/stable-diffusion-3-medium-diffusersanimatediff_v1z&guoyww/animatediff-motion-adapter-v1-5r.   z(guoyww/animatediff-motion-adapter-v1-5-2z(guoyww/animatediff-motion-adapter-v1-5-3z+guoyww/animatediff-motion-adapter-sdxl-betaz&guoyww/animatediff-sparsectrl-scribblez!guoyww/animatediff-sparsectrl-rgbzblack-forest-labs/FLUX.1-devz black-forest-labs/FLUX.1-schnell)animatediff_v3r/   r0   r1   flux-devflux-schnell   i   i   )
r   r   r4   r   r   r    r5   r   r   r6   time_embed.0.weightztime_embed.0.biasztime_embed.2.weightztime_embed.2.biaszinput_blocks.0.0.weightzinput_blocks.0.0.biaszout.0.weightz
out.0.biaszout.2.weightz
out.2.bias)
time_embedding.linear_1.weighttime_embedding.linear_1.biastime_embedding.linear_2.weighttime_embedding.linear_2.biasconv_in.weightconv_in.biaszconv_norm_out.weightzconv_norm_out.biaszconv_out.weightzconv_out.biaszlabel_emb.0.0.weightzlabel_emb.0.0.biaszlabel_emb.0.2.weightzlabel_emb.0.2.bias)zclass_embedding.linear_1.weightzclass_embedding.linear_1.biaszclass_embedding.linear_2.weightzclass_embedding.linear_2.bias)zadd_embedding.linear_1.weightzadd_embedding.linear_1.biaszadd_embedding.linear_2.weightzadd_embedding.linear_2.bias)layersclass_embed_typeaddition_embed_typezinput_hint_block.0.weightzinput_hint_block.0.biaszinput_hint_block.14.weightzinput_hint_block.14.bias)
rD   rE   rF   rG   rH   rI   r,   z&controlnet_cond_embedding.conv_in.biasz)controlnet_cond_embedding.conv_out.weightz'controlnet_cond_embedding.conv_out.biaszencoder.conv_in.weightzencoder.conv_in.biaszencoder.conv_out.weightzencoder.conv_out.biaszencoder.conv_norm_out.weightzencoder.norm_out.weightzencoder.conv_norm_out.biaszencoder.norm_out.biaszdecoder.conv_in.weightzdecoder.conv_in.biaszdecoder.conv_out.weightzdecoder.conv_out.biaszdecoder.conv_norm_out.weightzdecoder.norm_out.weightzdecoder.conv_norm_out.biaszdecoder.norm_out.biaszquant_conv.weightzquant_conv.biaszpost_quant_conv.weightzpost_quant_conv.biaspositional_embeddingztoken_embedding.weightzln_final.weightzln_final.biastext_projection)z/text_model.embeddings.position_embedding.weightz,text_model.embeddings.token_embedding.weightz"text_model.final_layer_norm.weightz text_model.final_layer_norm.biastext_projection.weightz
resblocks.ln_1ln_2z.c_fc.z.c_proj.z.attnz	ln_final.)	ztext_model.encoder.layers.layer_norm1layer_norm2z.fc1.z.fc2.z
.self_attnz(transformer.text_model.final_layer_norm.z8transformer.text_model.embeddings.token_embedding.weightz;transformer.text_model.embeddings.position_embedding.weight)rJ   transformer)unetr   vaeopenclip)zAcond_stage_model.model.transformer.resblocks.23.attn.in_proj_biaszCcond_stage_model.model.transformer.resblocks.23.attn.in_proj_weightzBcond_stage_model.model.transformer.resblocks.23.attn.out_proj.biaszDcond_stage_model.model.transformer.resblocks.23.attn.out_proj.weightz9cond_stage_model.model.transformer.resblocks.23.ln_1.biasz;cond_stage_model.model.transformer.resblocks.23.ln_1.weightz9cond_stage_model.model.transformer.resblocks.23.ln_2.biasz;cond_stage_model.model.transformer.resblocks.23.ln_2.weightz=cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.biasz?cond_stage_model.model.transformer.resblocks.23.mlp.c_fc.weightz?cond_stage_model.model.transformer.resblocks.23.mlp.c_proj.biaszAcond_stage_model.model.transformer.resblocks.23.mlp.c_proj.weightz&cond_stage_model.model.text_projectionscaled_linearg_QK?g~jt?linear  epsilon      ?FT   leading)beta_schedule
beta_startbeta_endinterpolation_typenum_train_timestepsprediction_typesample_max_valueset_alpha_to_oneskip_prk_stepssteps_offsettimestep_spacingzfirst_stage_model.g{P?      ?model.diffusion_model.zcontrol_model.zcond_stage_model.transformer.z$conditioner.embedders.0.transformer.conditioner.embedders.0.model.)zhttps://huggingface.co/zhuggingface.co/zhf.co/zhttps://hf.co/c                       s   e Zd Zd fdd	Z  ZS )SingleFileComponentErrorNc                    s   || _ t | j  d S N)messagesuper__init__)selfro   	__class__ a/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/loaders/single_file_utils.pyrq     s   z!SingleFileComponentError.__init__rn   )__name__
__module____qualname__rq   __classcell__ru   ru   rs   rv   rm     s    rm   c                 C   s   t | }|jr|jrdS dS )NTF)r   schemenetloc)urlresultru   ru   rv   is_valid_url  s   r   c                 C   s   t | stdd}d }d}tD ]}| |d} qt|| }|s*td ||fS |d d|d }|d	}||fS )
NzOInvalid `pretrained_model_name_or_path` provided. Please set it to a valid URL.z#([^/]+)/([^/]+)/(?:blob/main/)?(.+)rn    zFUnable to identify the repo_id and weights_name from the provided URL.r]   /r      )	r   
ValueErrorVALID_URL_PREFIXESreplacerematchloggerwarninggroup)r3   patternweights_namerepo_idprefixr   ru   ru   rv   !_extract_repo_id_and_weights_name"  s   

r   c                 C   s>   t j| |}d}ttfD ]}t jt j||rd}q|S )NFT)ospathjoinr   r   isfile)cached_foldernamer3   weights_existr   ru   ru   rv   "_is_model_weights_in_cached_folder6  s   r   c           
   
   C   sZ   t j| r	| } nt| \}}t||||||||d} t| }	d|	v r+|	d }	d|	v s#|	S )N)r   force_download	cache_dirproxieslocal_files_onlytokenrevision
state_dict)r   r   r   r   r   r   )
pretrained_model_link_or_pathr   r   r   r   r   r   r   r   
checkpointru   ru   rv   load_single_file_checkpointA  s$   	r   c                 C   s|   t j| r t| d}| } W d    n1 sw   Y  nt| r3|r*tdtt	| j
} ntdt| }|S )Nrz|`local_files_only` is set to True, but a URL was provided as `original_config_file`. Please provide a valid local file path.zSInvalid `original_config_file` provided. Please set it to a valid file path or URL.)r   r   r   openreadr   r   r   requestsgetcontentyaml	safe_load)original_config_filer   fporiginal_configru   ru   rv   fetch_original_configc  s   

r   c                 C      t d | v rdS dS )Nr!   TFCHECKPOINT_KEY_NAMESr   ru   ru   rv   is_clip_modely     r   c                 C   r   )Nr"   TFr   r   ru   ru   rv   is_clip_sdxl_model  r   r   c                 C   r   )Nr#   TFr   r   ru   ru   rv   is_clip_sd3_model  r   r   c                 C   r   )Nr$   TFr   r   ru   ru   rv   is_open_clip_model  r   r   c                 C   r   )Nr%   TFr   r   ru   ru   rv   is_open_clip_sdxl_model  r   r   c                 C   r   )Nr'   TFr   r   ru   ru   rv   is_open_clip_sd3_model  r   r   c                 C   r   )Nr&   TFr   r   ru   ru   rv   is_open_clip_sdxl_refiner_model  r   r   c                 C   sL   t t|t|t|t|t|t|g}| jdks | jdkr$|r$dS dS )NCLIPTextModelCLIPTextModelWithProjectionTF)anyr   r   r   r   r   r   rw   )	class_objr   is_clip_in_checkpointru   ru   rv   is_clip_model_in_single_file  s   r   c                 C   s`  t d | v r*| t d  jd dkr*t d | v r&| t d  jd dkr&d}|S d}|S t d | v r?| t d  jd dkr?d}|S t d | v rId}|S t d	 | v rSd	}|S t d
 | v r]d
}|S t d | v rgd}|S t d | v rqd}|S t d | v r| t d  jd dkrd}|S t d | v r| t d  jd dkrd}|S t d | v r| t d  jd dkrd}|S t d | v r| t d  jd dkrd}|S t d | v rd}|S t d | v rt d | v rd}|S t d | v rd}|S t d | v rd}|S | t d  jd dkrd}|S | t d  jd dkrd}|S d}|S t d  | v r,d!| v r(d"}|S d#}|S d$}|S )%Nr    r]   	   r   rB   r5   r   r   r   r   r   r)   r      r<      r(   i@  r9   i  r+   r-   r0   r1   r.   r/   i@     r>   r?   r2   guidance_in.in_layer.biasr@   rA   r6   )r   shape)r   
model_typeru   ru   rv   infer_diffusers_model_type  s   "LJ"GDA>;82,& 

r   c                 C   s   t | }t| }|S rn   )r    DIFFUSERS_DEFAULT_PIPELINE_PATHS)r   r   
model_pathru   ru   rv   fetch_diffusers_config  s   r   c                 C   s   |r|S t | }t| }|S rn   )r   'DIFFUSERS_TO_LDM_DEFAULT_IMAGE_SIZE_MAP)r   
image_sizer   ru   ru   rv   set_image_size  s
   r   c                 C   s   t |  }g d}|D ]B}d|ddd  |v r4| | jdkr3| | d d d d ddf | |< qd|v rN| | jdkrN| | d d d d df | |< qd S )N)zquery.weightz
key.weightzvalue.weight.r   r   zproj_attn.weight)listkeysr   splitndim)r   r   	attn_keyskeyru   ru   rv   conv_attn_to_linear$  s    r   c                    s"  |durd}t dd| t||d}d| d d v r/| d d d dur/| d d d d n
| d d d	 d |durHd
}t dd| |}nd }| d d d d d }fddd D }g }	d}
tt|D ]}|
d v rwdnd}|	| |t|d kr|
d9 }
qmg }tt|D ]}|
d v rdnd}|| |
d }
qd durtd trd ntd }nd}dt|d d  }dv rՈd nd}dv r߈d nd}|r|du rd d    fddtd D }d}d}d}d}d}d  durtd  trd  nd  d! }d"v rBd" d#krB|d$v r5d%}d&}nd'}d(v s>J d( }|| ||	|d) ||||||||d*}|durgd+}t dd| ||d,< d-v rrd- |d.< d"v rtd" trd" |d/< d0 |d0< ||d1< |S )2R
    Creates a config for the diffusers based on the config of the LDM model.
    NzConfiguring UNet2DConditionModel with the `image_size` argument to `from_single_file`is deprecated and will be ignored in future versions.r   1.0.0r   unet_configmodelparamsnetwork_configzConfiguring UNet2DConditionModel with the `num_in_channels` argument to `from_single_file`is deprecated and will be ignored in future versions.in_channelsfirst_stage_configddconfigc                       g | ]} d  | qS )model_channelsru   .0mult)unet_paramsru   rv   
<listcomp>R      z9create_unet_diffusers_config_from_ldm.<locals>.<listcomp>channel_multr]   attention_resolutionsCrossAttnDownBlock2DDownBlock2Dr   CrossAttnUpBlock2D	UpBlock2Dtransformer_depthch_mult	num_headsuse_linear_in_transformerFr   num_head_channelsc                    s   g | ]} | qS ru   ru   )r   c)head_dim_multru   rv   r   u  s    context_dimr   num_classes
sequential)r   i   	text_time   
projectionadm_in_channelsnum_res_blocks)sample_sizer   down_block_typesblock_out_channelslayers_per_blockcross_attention_dimattention_head_dimuse_linear_projectionrK   rL   addition_time_embed_dim%projection_class_embeddings_input_dimtransformer_layers_per_blockzConfiguring UNet2DConditionModel with the `upcast_attention` argument to `from_single_file`is deprecated and will be ignored in future versions.upcast_attentiondisable_self_attentionsonly_cross_attentionnum_class_embedsout_channelsup_block_types)r   r   rangelenappend
isinstanceintr   )r   r   r   r	  num_in_channelsdeprecation_messager   
vae_paramsr  r   
resolutioni
block_typer  r  vae_scale_factorhead_dimr  rK   rL   r  r  r   configru   )r   r   rv   %create_unet_diffusers_config_from_ldm0  s   









r  c                 K   s   |d urd}t dd| t||d}| d d d d }t| |d}|d |d	 |d
 |d |d |d |d |d |d |d |d |d |d d}|S )NzoConfiguring ControlNetModel with the `image_size` argumentis deprecated and will be ignored in future versions.r   r   r   r   r   control_stage_confighint_channelsr   r   r  r  r  r  r  rK   rL   r  r  r  )conditioning_channelsr   r   r  r  r  r  r  rK   rL   r  r  r  )r   r   r  )r   r   r   kwargsr  r   diffusers_unet_configcontrolnet_configru   ru   rv   +create_controlnet_diffusers_config_from_ldm  s,   r$  c              
      s8  |durd}t dd| t||d}d|v r#d|v r#|d }|d }nd}d}| d d	 d
 d	 d  |du rB|durB|durBt}n|du rWd| d d	 v rW| d d	 d }n|du r]t} fdd d D }dgt| }dgt| }	| d  d ||	| d  d |d	}
|dur|dur|
||d |
S )r   NzmConfiguring AutoencoderKL with the `image_size` argumentis deprecated and will be ignored in future versions.r   r   r   r   edm_stdr   r   r   r   scale_factorc                    r   )chru   r   r  ru   rv   r     r   z8create_vae_diffusers_config_from_ldm.<locals>.<listcomp>r   DownEncoderBlock2DUpDecoderBlock2Dr   out_ch
z_channelsr   )	r   r   r  r   r  r  latent_channelsr  scaling_factor)latents_meanlatents_std)r   r   PLAYGROUND_VAE_SCALING_FACTORLDM_VAE_DEFAULT_SCALING_FACTORr  update)r   r   r   r.  r  r/  r0  r  r   r  r  ru   r(  rv   $create_vae_diffusers_config_from_ldm  sB   
r4  c                 C   sh   | D ]/}| dd dd dd dd d	d
 dd}|r*| |d |d }||||< qd S )Nzin_layers.0norm1zin_layers.2conv1zout_layers.0norm2zout_layers.3conv2zemb_layers.1time_emb_projskip_connectionconv_shortcutoldnewr   r   ldm_keysnew_checkpointr   mappingldm_keydiffusers_keyru   ru   rv   #update_unet_resnet_ldm_to_diffusers  s   
rE  c                 C   s0   | D ]}| |d |d }||||< qd S )Nr<  r=  r>  r?  ru   ru   rv   &update_unet_attention_ldm_to_diffusers  s   rF  c                 C   s8   | D ]}| |d |d  dd}||||< qd S )Nr<  r=  nin_shortcutr;  r>  )r   rA  r   rB  rC  rD  ru   ru   rv   "update_vae_resnet_ldm_to_diffusers  s   rH  c                 C   s   | D ]l}| |d |d  dd dd dd d	d
 dd dd dd dd dd dd}||||< || j}t|dkrX|| d d d d df ||< qt|dkrn|| d d d d ddf ||< qd S )Nr<  r=  znorm.weightzgroup_norm.weightz	norm.biaszgroup_norm.biaszq.weightto_q.weightzq.bias	to_q.biaszk.weightto_k.weightzk.bias	to_k.biaszv.weightto_v.weightzv.bias	to_v.biasproj_out.weightto_out.0.weightproj_out.biasto_out.0.biasr   r      )r   r   r   r  )r   rA  r   rB  rC  rD  r   ru   ru   rv   &update_vae_attentions_ldm_to_diffusers!  s*   
  rT  c                 K   s  d| v }|ri }|   D ]}|dr:| | dd}|d ||dd< |d ||dd< |d	 ||dd
< q|drf| | dd}|d ||dd< |d ||dd< |d	 ||dd< q|drx| | }|||dd< q|dr| | }|||dd< q| | ||< q|S i }|   D ]}|dr| | dd}|d ||dd< |d ||dd< |d	 ||dd
< q|dr| | dd}|d ||dd< |d ||dd< |d	 ||dd< q|dr| | }|||dd< q|dr| | }|||dd< q|dr,| | }|||dd< q|dr?| | }|||dd< q| | ||< q|S )Nr*   in_proj_weightr   r   zattn.in_proj_weightrI  r]   rK  r   rM  in_proj_biaszattn.in_proj_biasrJ  rL  rN  zout_proj.weightzattn.out_proj.weightrP  zout_proj.biaszattn.out_proj.biasrR  zclip_mapper.weightzclip_txt_pooled_mapper.weightzclip_mapper.biaszclip_txt_pooled_mapper.bias)r   endswithchunkr   )r   r!  
is_stage_cr   r   weightsru   ru   rv   4convert_stable_cascade_unet_single_file_to_diffusers;  s`   





r[  c              
      s  i t |  }t}tdd |D dkrE|rEtd td |D ] }|drCdd|d	d
d  }| 	||
|d< q#n%tdd |D dkrUtd |D ]}||ri| 	||
|d< qWi }td d }	|	 D ]\}
}|vrqv| ||
< qvd|v r|d dv rtd d }| D ]
\}
}| ||
< qd|v r|d dkrtd d }| D ]
\}
}| ||
< qd|v r|d durdv r؈d |d< tdd D }fddt|D }tdd D }fddt|D }tdd D }fddt|D }td
|D ]  d
 |d d
  } d
 |d d
  } fd d!|  D }t||d"  d#d$| d%| d& d"  d'v ru	d"  d'|d$| d(< 	d"  d)|d$| d*<  fd+d!|  D }|rt||d"  d,d$| d-| d& q| D ]9}t|d
 d.}
|d/ d.krt|| |d0| d1|
 d&d2 qt|| |d0| d3|
 d&d2 qt|D ]  |d d
  } |d d
  } fd4d!|  D }t||d5  d#d6| d%| d&  fd7d!|  D }|r.t||d5  d,d6| d-| d& d5  d8v rSd5  d8 |d6| d9< d5  d: |d6| d;< d5  d<v rxd5  d< |d6| d9< d5  d= |d6| d;< q|S )>zN
    Takes a state dict and a config, and returns a converted checkpoint.
    c                 s       | ]}| d V  qdS 	model_emaN
startswithr   kru   ru   rv   	<genexpr>}      z.convert_ldm_unet_checkpoint.<locals>.<genexpr>d   z,Checkpoint has both EMA and non-EMA weights.zIn this conversion only the EMA weights are extracted. If you want to instead extract the non-EMA weights (useful to continue fine-tuning), please make sure to remove the `--extract_ema` flag.zmodel.diffusion_modelz
model_ema.r   r   r]   Nc                 s   r\  r]  r_  ra  ru   ru   rv   rc    rd  zIn this conversion only the non-EMA weights are extracted. If you want to instead extract the EMA weights (usually better for inference), please make sure to add the `--extract_ema` flag.rU   rJ   rK   )timestepr   rL   r   r  zlabel_emb.weightzclass_embedding.weightc                 S   ,   h | ]}d |v rd |ddd qS input_blocksr   Nr   r   r   r   layerru   ru   rv   	<setcomp>     , z.convert_ldm_unet_checkpoint.<locals>.<setcomp>c                        i | ]   fd dD qS )c                       g | ]}d   |v r|qS input_blocks.ru   r   r   layer_idru   rv   r         :convert_ldm_unet_checkpoint.<locals>.<dictcomp>.<listcomp>ru   r   unet_state_dictrt  rv   
<dictcomp>      z/convert_ldm_unet_checkpoint.<locals>.<dictcomp>c                 S   rg  middle_blockr   Nr   rj  rk  ru   ru   rv   rm    rn  c                    ro  )c                    rp  middle_block.ru   rs  rt  ru   rv   r     rv  rw  ru   rx  ry  rt  rv   r{    r|  c                 S   rg  )output_blocksr   Nr   rj  rk  ru   ru   rv   rm    rn  c                    ro  )c                    rp  )output_blocks.ru   rs  rt  ru   rv   r     rv  rw  ru   rx  ry  rt  rv   r{    r|  r  c                    0   g | ]}d   d|v rd   d|vr|qS rr  .0.0.opru   rs  r  ru   rv   r         *z/convert_ldm_unet_checkpoint.<locals>.<listcomp>rr  r  down_blocks.	.resnets.r<  r=  .0.op.weight.downsamplers.0.conv.weight
.0.op.bias.downsamplers.0.conv.biasc                        g | ]}d   d|v r|qS rr  .1ru   rs  r  ru   rv   r          r  .attentions.r   r   r  mid_block.resnets.rB  mid_block.attentions.c                    r  )r  r  r  ru   rs  r  ru   rv   r     r  r  
up_blocks.c                    r  )r  r  z.1.convru   rs  r  ru   rv   r     r  z.1.conv.weight.upsamplers.0.conv.weightz.1.conv.bias.upsamplers.0.conv.biasz.2.conv.weightz.2.conv.bias)r   r   LDM_UNET_KEYsumr   r   r`  r   r   r   r   DIFFUSERS_TO_LDM_MAPPINGitemsr  r  rE  rF  max)r   r  extract_emar!  r   unet_keyr   flat_ema_keyrA  ldm_unet_keysrD  rC  class_embed_keysaddition_embed_keysnum_input_blocksri  num_middle_blocksmiddle_blocksnum_output_blocksr  block_idlayer_in_block_idresnets
attentionsru   )r  rz  rv   convert_ldm_unet_checkpoints  s  
















r  c              
      s6  d| v r|  ni  t |  }t}|D ]}||r%| | ||d< qi }td d }| D ]\}}	|	 vr;q2 |	 ||< q2tdd  D }
 fddt	|
D }t	d	|
D ]|d	 |d
 d	  }d	 |d
 d	  }fdd| D }t
|| d dd| d| d d d v r d d|d| d<  d d|d| d< fdd| D }|rt|| d dd| d| d q[t	|
D ]  d d|d d<  d d|d d< qtdd  D } fd dt	|D }| D ]9}t|d	 d!}|d" d!kr:t
|| | d#| d$| dd% qt|| | d#| d&| dd% q d'|d(<  d)|d*< d+d  D }t|}t	d	|d	 D ])}|d	 }d"| } d,| d|d-| d<  d,| d|d-| d< qo|S ).NrC   r   r   rJ   c                 S   rg  rh  rj  rk  ru   ru   rv   rm  5  rn  z0convert_controlnet_checkpoint.<locals>.<setcomp>c                    ro  )c                    rp  rq  ru   rs  rt  ru   rv   r   8  rv  <convert_controlnet_checkpoint.<locals>.<dictcomp>.<listcomp>ru   rx  controlnet_state_dictrt  rv   r{  7  r|  z1convert_controlnet_checkpoint.<locals>.<dictcomp>r]   r  c                    r  r  ru   rs  r  ru   rv   r   A  r  z1convert_controlnet_checkpoint.<locals>.<listcomp>rr  r  r  r  r  r  r  r  r  c                    r  r  ru   rs  r  ru   rv   r   S  r  r  r  zzero_convs.z	.0.weightzcontrolnet_down_blocks.z.weightz.0.biasz.biasc                 S   rg  r}  rj  rk  ru   ru   rv   rm  c  rn  c                    ro  )c                    rp  r  ru   rs  rt  ru   rv   r   f  rv  r  ru   rx  r  rt  rv   r{  e  r|  r   r   r  r  r  r  zmiddle_block_out.0.weightzcontrolnet_mid_block.weightzmiddle_block_out.0.biaszcontrolnet_mid_block.biasc                 S   s<   h | ]}d |v rd|vrd|vrd |ddd qS )input_hint_blockzinput_hint_block.0zinput_hint_block.14r   Nr   rj  rk  ru   ru   rv   rm    s
    zinput_hint_block.z!controlnet_cond_embedding.blocks.)r   r   LDM_CONTROLNET_KEYr`  r   r   r  r  r  r  rE  rF  r  )r   r  r!  r   controlnet_keyr   rA  ldm_controlnet_keysrD  rC  r  ri  r  r  r  r  r  r  cond_embedding_blocksnum_cond_embedding_blocksidxdiffusers_idxcond_block_idru   )r  r  rv   convert_controlnet_checkpoint  s   




 




r  c              	      s  i t |  }tdd |D rtnd}|D ]}||r)| |||d< qi }td }| D ]\}}|vr=q4| ||< q4t	|d }	fddt
|	D }
t
|	D ]Ffdd	|
 D }t||d
 dd ddd d dv rd d|d d< d d|d d< qYdd	 D }d}t
d|d D ]fdd	|D }t||d dd  dd qdd	 D }t||dddd t	|d }fd dt
|D }t
|D ]K|d    fd!d	|  D }t||d"  dd# ddd d$  d%v rAd$  d% |d& d'< d$  d( |d& d)< qd*d	 D }d}t
d|d D ]fd+d	|D }t||d dd  dd qRd,d	 D }t||dddd t| |S )-Nc                 s   s    | ]}| tV  qd S rn   )r`  LDM_VAE_KEYra  ru   ru   rv   rc    rd  z-convert_ldm_vae_checkpoint.<locals>.<genexpr>r   rV   r   c                    ro  )c                    rp  )down.ru   rs  rt  ru   rv   r     rv  9convert_ldm_vae_checkpoint.<locals>.<dictcomp>.<listcomp>ru   rx  vae_state_dictrt  rv   r{        z.convert_ldm_vae_checkpoint.<locals>.<dictcomp>c                    .   g | ]}d   |v rd   d|vr|qS )r  z.downsampleru   rs  r  ru   rv   r     s   . z.convert_ldm_vae_checkpoint.<locals>.<listcomp>r  z.blockr  z.resnetsr  r  zencoder.down.z.downsample.conv.weightzencoder.down_blocks.r  z.downsample.conv.biasr  c                 S      g | ]}d |v r|qS )zencoder.mid.blockru   rs  ru   ru   rv   r     r   r   r]   c                    rp  )zencoder.mid.block_ru   rs  r  ru   rv   r     rv  z
mid.block_r  c                 S   r  )zencoder.mid.attnru   rs  ru   ru   rv   r     r   z
mid.attn_1zmid_block.attentions.0r  c                    ro  )c                    rp  )up.ru   rs  rt  ru   rv   r     rv  r  ru   rx  r  rt  rv   r{    r  c                    r  )r  z	.upsampleru   rs  )r  ru   rv   r     s    (r  r  zdecoder.up.z.upsample.conv.weightzdecoder.up_blocks.r  z.upsample.conv.biasr  c                 S   r  )zdecoder.mid.blockru   rs  ru   ru   rv   r     r   c                    rp  )zdecoder.mid.block_ru   rs  r  ru   rv   r     rv  c                 S   r  )zdecoder.mid.attnru   rs  ru   ru   rv   r     r   )r   r   r   r  r`  r   r   r  r  r  r  rH  rT  r   )r   r  r   vae_keyr   rA  vae_diffusers_ldm_maprD  rC  num_down_blocksdown_blocksr  mid_resnetsnum_mid_res_blocksmid_attentionsnum_up_blocks	up_blocksru   )r  r  r  rv   convert_ldm_vae_checkpoint  s   








r  c                 C   sh   t |  }i }g }|t |r|| |D ]}|D ]}||r0||d}| |||< qq|S )Nr   )r   r   extendLDM_CLIP_PREFIX_TO_REMOVEr  r`  r   r   )r   remove_prefixr   text_model_dictremove_prefixesr   r   rD  ru   ru   rv   convert_ldm_clip_checkpoint  s   


r  cond_stage_model.model.c                 C   s4  i }|d }||v rt || jd }nt| jdr| jj}nt}t| }t}t	d d }|
 D ]'\}	}
||
 }
|
|vr@q3|
|v rEq3|
drT||
 j ||	< q3||
 ||	< q3|D ]}||v rdq]||d slq]||d d}	t	d d }|
 D ]\}}|	||d	dd
d}	q~|d	r||}|d |d d f   ||	d < |||d d d f   ||	d < ||d d d d f   ||	d < q]|d
r||}|d |   ||	d < |||d    ||	d < ||d d    ||	d < q]||||	< q]|S )NrN   r   projection_dimrW   rJ   ztransformer.r   rT   z.in_proj_weightz.in_proj_biasz.q_proj.weightr   z.k_proj.weightz.v_proj.weightz.q_proj.biasz.k_proj.biasz.v_proj.bias)r  r   hasattrr  r  !LDM_OPEN_CLIP_TEXT_PROJECTION_DIMr   r    SD_2_TEXT_ENCODER_KEYS_TO_IGNOREr  r  rW  T
contiguousr`  r   r   clonedetach)
text_modelr   r   r  text_proj_keytext_proj_dimr   keys_to_ignoreopenclip_diffusers_ldm_maprD  rC  r    transformer_diffusers_to_ldm_mapnew_keyold_keyweight_valueru   ru   rv   convert_open_clip_checkpoint  sV   



$
*

"r  r   c                    s  |rd|i}nt |}|r8td t|st|r#d}||d< d}nt|r0d}||d< d}nd}||d< d}| jjdi |||d}t rKt	nt
}	|	  | |}
W d    n1 s_w   Y  |
jjjjjd	 }t|rvt|}n~t|r|td
  jd	 |krt|}njt|r|td  jd	 |krt|d}t||d< nNt|rd}t|
||d}n@t|r|td  jd	 |krd}t|
||d}n't|rd}t|
||d}nt|r|td  jd	 |krt|d}ntdt rt|
||d}n	|
j|dd\}}|
jd ur|
jD ]  fdd|D }qt|dkr5td| j dd|g  |d ur?|
 | |
!  |
S ) Nr3   zDetected legacy CLIP loading behavior. Please run `from_single_file` with `local_files_only=False once to update the local cache directory with the necessary CLIP model config files. Attempting to load CLIP model from legacy cache directory.openai/clip-vit-large-patch14r   stabilityai/stable-diffusion-2text_encoder(laion/CLIP-ViT-bigG-14-laion2B-39B-b160kr8   r   r   r"   r#   z!text_encoders.clip_l.transformer.rO   r  )r   r%   zconditioner.embedders.1.model.rl   r'   z!text_encoders.clip_g.transformer.zDThe provided checkpoint does not seem to contain a valid CLIP model.dtypeF)strictc                        g | ]}t  |d u r|qS rn   r   searchra  patru   rv   r     r  z8create_diffusers_clip_model_from_ldm.<locals>.<listcomp>r   ESome weights of the model checkpoint were not used when initializing : 
 , ru   )"r   r   r   r   r   r   config_classfrom_pretrainedr   r   r   r  
embeddingsposition_embeddingweightr   r  r   r   torcheyer  r   r   r   r   r   r   "_keys_to_ignore_on_load_unexpectedr  rw   r   toeval)clsr   r8   r  torch_dtyper   is_legacy_loadingclip_configmodel_configctxr   position_embedding_dimdiffusers_format_checkpointr   unexpected_keys_ru   r  rv   $create_diffusers_clip_model_from_ldmN  s   	







r  c                 K   sb  | dd }| dd }|d urd}tdd| |d ur$d}tdd| t}t|d}	d|v r3|d nd }
|rBt|d d	 d
d}nd}||d< |	dkrY|d u rX|
dkrVdnd}n|p\d}||d< |	dv rhd}n4|	dkrod}n-|r|d d	  d}|d d	  d}nd}d}||d< ||d< d|d< d|d< d|d< |d kr| ddd!d"ddd d#d$S |d u r| |S |d%krd"|d&< t|}|S |d'krt|}|S |d(krt|}|S |dkrt	|}|S |d)krt
|}|S |d*krt|}|S |d+kr	t|}|S |dkr)d,d-dd.d"ddd/d0d1d2d3d4d5dd6}td9i |}|S td7| d8):Nscheduler_typerd   ziPlease pass an instance of a Scheduler object directly to the `scheduler` argument in `from_single_file`.r   zPlease configure an instance of a Scheduler with the appropriate `prediction_type` and pass the object directly to the `scheduler` argument in `from_single_file`.r   global_stepr   r   	timestepsrZ   rc   r   iY r[   v_prediction)r   r   euler
playgroundedm_dpm_solver_multisteplinear_start
linear_endg{Gz?g(\µ?r`   ra   rX   r_   Fclip_samplerf   low_res_schedulerg-C6?Tfixed_small)ra   r_   r`   r  rc   rd   trained_betasvariance_typepndmrg   lmsheunzeuler-ancestraldpmddimzdpmsolver++gףp=
?zerog      @r\   rj   g      T@gMb`?r   midpoint)algorithm_typedynamic_thresholding_ratioeuler_at_finalfinal_sigmas_typelower_order_finalrc   rd   rhore   
sigma_data	sigma_max	sigma_minsolver_ordersolver_typethresholdingzScheduler of type z doesn't exist!ru   )r   r   SCHEDULER_DEFAULT_CONFIGr   getattrfrom_configr   r   r   r   r
   r   r   r	   r   )r  r   component_namer   r!  r  rd   r  scheduler_configr   r  rc   r`   ra   	schedulerru   ru   rv   _legacy_load_scheduler  s   


+
(
%
"




r4  c                 C   s   |rd|i}nt |}t|st|rd}||d< d}nt|r)d}||d< d}nd}||d< d}| jdi |||d}|S )	Nr3   r  r   r  	tokenizerr  r  ru   )r   r   r   r   r  )r  r   r  r   r  r8   r5  ru   ru   rv   _legacy_load_clip_tokenizer>  s    
r6  c                 C   s6   ddl m} tjd| |d}|jd| |d}||dS )Nr   )StableDiffusionSafetyCheckerz'CompVis/stable-diffusion-safety-checker)r   r  )safety_checkerfeature_extractor))pipelines.stable_diffusion.safety_checkerr7  r   r  )r   r  r7  r9  r8  ru   ru   rv   _legacy_load_safety_checkerX  s   
r;  c                 C   s(   | j ddd\}}tj||gdd}|S Nr   r   dimrX  r  cat)r  r>  shiftscale
new_weightru   ru   rv   swap_scale_shiftj     rD  c                 K   st  i }t |  }|D ]}d|v r| || |dd< q
t tdd | D d d }d}| d|d	< | d
|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< t|D ]}tj| d | d!d"d#d$\}}	}
tj| d | d%d"d#d$\}}}tj| d | d&d"d#d$\}}}tj| d | d'd"d#d$\}}}t|g|d(| d)< t|g|d(| d*< t|	g|d(| d+< t|g|d(| d,< t|
g|d(| d-< t|g|d(| d.< t|g|d(| d/< t|g|d(| d0< t|g|d(| d1< t|g|d(| d2< t|g|d(| d3< t|g|d(| d4< | d | d5|d(| d6< | d | d7|d(| d8< ||d ks| d | d9|d(| d:< | d | d;|d(| d<< | d | d=|d(| d>< | d | d?|d(| d@< ||d ks| d | dA|d(| dB< | d | dC|d(| dD< n&t	| d | dA|d$|d(| dB< t	| d | dC|d$|d(| dD< | d | dE|d(| dF< | d | dG|d(| dH< | d | dI|d(| dJ< | d | dK|d(| dL< ||d ks| d | dM|d(| dN< | d | dO|d(| dP< | d | dQ|d(| dR< | d | dS|d(| dT< q| dU|dV< | dW|dX< t	| dY|d$|dZ< t	| d[|d$|d\< |S )]Nrk   r   c                 s   ,    | ]}d |v rt |ddd V  qdS )joint_blocksr   r   r]   Nr  r   ra  ru   ru   rv   rc  w     * zBconvert_sd3_transformer_checkpoint_to_diffusers.<locals>.<genexpr>r   r]   r   	pos_embedzpos_embed.pos_embedzx_embedder.proj.weightzpos_embed.proj.weightzx_embedder.proj.biaszpos_embed.proj.biaszt_embedder.mlp.0.weight1time_text_embed.timestep_embedder.linear_1.weightzt_embedder.mlp.0.bias/time_text_embed.timestep_embedder.linear_1.biaszt_embedder.mlp.2.weight1time_text_embed.timestep_embedder.linear_2.weightzt_embedder.mlp.2.bias/time_text_embed.timestep_embedder.linear_2.biascontext_embedder.weightcontext_embedder.biaszy_embedder.mlp.0.weight-time_text_embed.text_embedder.linear_1.weightzy_embedder.mlp.0.bias+time_text_embed.text_embedder.linear_1.biaszy_embedder.mlp.2.weight-time_text_embed.text_embedder.linear_2.weightzy_embedder.mlp.2.bias+time_text_embed.text_embedder.linear_2.biaszjoint_blocks.z.x_block.attn.qkv.weightr   r   r=  z.context_block.attn.qkv.weightz.x_block.attn.qkv.biasz.context_block.attn.qkv.biastransformer_blocks.z.attn.to_q.weightz.attn.to_q.biasz.attn.to_k.weightz.attn.to_k.biasz.attn.to_v.weightz.attn.to_v.biasz.attn.add_q_proj.weightz.attn.add_q_proj.biasz.attn.add_k_proj.weightz.attn.add_k_proj.biasz.attn.add_v_proj.weightz.attn.add_v_proj.biasz.x_block.attn.proj.weightz.attn.to_out.0.weightz.x_block.attn.proj.biasz.attn.to_out.0.biasz.context_block.attn.proj.weightz.attn.to_add_out.weightz.context_block.attn.proj.biasz.attn.to_add_out.biasz".x_block.adaLN_modulation.1.weightz.norm1.linear.weightz .x_block.adaLN_modulation.1.biasz.norm1.linear.biasz(.context_block.adaLN_modulation.1.weightz.norm1_context.linear.weightz&.context_block.adaLN_modulation.1.biasz.norm1_context.linear.biasz.x_block.mlp.fc1.weightz.ff.net.0.proj.weightz.x_block.mlp.fc1.biasz.ff.net.0.proj.biasz.x_block.mlp.fc2.weightz.ff.net.2.weightz.x_block.mlp.fc2.biasz.ff.net.2.biasz.context_block.mlp.fc1.weightz.ff_context.net.0.proj.weightz.context_block.mlp.fc1.biasz.ff_context.net.0.proj.biasz.context_block.mlp.fc2.weightz.ff_context.net.2.weightz.context_block.mlp.fc2.biasz.ff_context.net.2.biasfinal_layer.linear.weightrO  final_layer.linear.biasrQ  %final_layer.adaLN_modulation.1.weightnorm_out.linear.weight#final_layer.adaLN_modulation.1.biasnorm_out.linear.bias)
r   r   popr   setr  r  rX  r@  rD  )r   r!  converted_state_dictr   rb  
num_layerscaption_projection_dimr  sample_qsample_ksample_v	context_q	context_k	context_vsample_q_biassample_k_biassample_v_biascontext_q_biascontext_k_biascontext_v_biasru   ru   rv   /convert_sd3_transformer_checkpoint_to_diffusersp  s   



















rm  c                 C   s   d| v rdS dS )Nz-text_encoders.t5xxl.transformer.shared.weightTFru   r   ru   ru   rv   is_t5_in_single_file   s   rn  c                 C   sR   t |  }i }dg}|D ]}|D ]}||r%||d}| |||< qq|S )Nz text_encoders.t5xxl.transformer.r   )r   r   r`  r   r   )r   r   r  r  r   r   rD  ru   ru   rv   &convert_sd3_t5_checkpoint_to_diffusers  s   
ro  c                    sR  |rd|i}nt |}| jjdi |||d}t rtnt}|  | |}W d    n1 s2w   Y  t|}	t rpt||	|d}
|jd urY|jD ]fdd|
D }
qMt	|
dkrot
d| j dd	|
g  n||	 | jd uo~|tjk}|r|j}ng }|d ur| D ]\ }t fd
d|D r|jtj|_q|S )Nr3   r  r  c                    r  rn   r  ra  r  ru   rv   r   .  r  z=create_diffusers_t5_model_from_checkpoint.<locals>.<listcomp>r   r  r  r  c                 3   s    | ]
}|  d v V  qdS )r   N)r   )r   module_to_keep_in_fp32)r   ru   rv   rc  @  s    z<create_diffusers_t5_model_from_checkpoint.<locals>.<genexpr>ru   )r   r  r  r   r   r   ro  r   r  r  r   r   rw   r   r   _keep_in_fp32_modulesr  float16named_parametersr   datar   float32)r  r   r8   r  r  r   r  r  r   r	  r
  use_keep_in_fp32_moduleskeep_in_fp32_modulesparamru   )r   r  rv   )create_diffusers_t5_model_from_checkpoint  s<   




ry  c                 K   s\   i }|   D ]%\}}d|v rq|||dddddddd	d
ddd< q|S )Npos_encoderz.norms.0z.norm1z.norms.1z.norm2z.ff_normz.norm3z.attention_blocks.0z.attn1z.attention_blocks.1z.attn2z.temporal_transformerr   )r  r   )r   r!  r^  rb  vru   ru   rv   +convert_animatediff_checkpoint_to_diffusersG  s   

	r|  c           !      K   sF  i }t tdd | D d d }t tdd | D d d }d}d}dd	 }| d
|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< | d|d< tdd | D }|r| d|d< | d|d< | d|d < | d!|d"< | d#|d$< | d%|d&< | d'|d(< | d)|d*< t|D ]}	d+|	 d,}
| d-|	 d.||
 d/< | d-|	 d0||
 d1< | d-|	 d2||
 d3< | d-|	 d4||
 d5< tj| d-|	 d6d7d8d9\}}}tj| d-|	 d:d7d8d9\}}}tj| d-|	 d;d7d8d9\}}}tj| d-|	 d<d7d8d9\}}}t|g||
 d=< t|g||
 d>< t|g||
 d?< t|g||
 d@< t|g||
 dA< t|g||
 dB< t|g||
 dC< t|g||
 dD< t|g||
 dE< t|g||
 dF< t|g||
 dG< t|g||
 dH< | d-|	 dI||
 dJ< | d-|	 dK||
 dL< | d-|	 dM||
 dN< | d-|	 dO||
 dP< | d-|	 dQ||
 dR< | d-|	 dS||
 dT< | d-|	 dU||
 dV< | d-|	 dW||
 dX< | d-|	 dY||
 dZ< | d-|	 d[||
 d\< | d-|	 d]||
 d^< | d-|	 d_||
 d`< | d-|	 da||
 db< | d-|	 dc||
 dd< | d-|	 de||
 df< | d-|	 dg||
 dh< qt|D ]}	di|	 d,}
| dj|	 dk||
 dl< | dj|	 dm||
 dn< t|| }||||f}tj	| dj|	 do|d8d9\}}}}tj	| dj|	 dp|d8d9\}}}} t|g||
 d=< t|g||
 d>< t|g||
 d?< t|g||
 d@< t|g||
 dA< t|g||
 dB< t|g||
 dq< t| g||
 dr< | dj|	 ds||
 dJ< | dj|	 dt||
 dL< | dj|	 du||
 dv< | dj|	 dw||
 dx< q| dy|dv< | dz|dx< || d{|d|< || d}|d~< |S )Nc                 s   rF  )double_blocks.r   r   r]   NrH  ra  ru   ru   rv   rc  ]  rI  zCconvert_flux_transformer_checkpoint_to_diffusers.<locals>.<genexpr>r   r]   c                 s   rF  )single_blocks.r   r   r]   NrH  ra  ru   ru   rv   rc  ^  rI  g      @i   c                 S   s(   | j ddd\}}tj||gdd}|S r<  r?  )r  rA  rB  rC  ru   ru   rv   rD  d  rE  zJconvert_flux_transformer_checkpoint_to_diffusers.<locals>.swap_scale_shiftztime_in.in_layer.weightrK  ztime_in.in_layer.biasrL  ztime_in.out_layer.weightrM  ztime_in.out_layer.biasrN  zvector_in.in_layer.weightrQ  zvector_in.in_layer.biasrR  zvector_in.out_layer.weightrS  zvector_in.out_layer.biasrT  c                 s   s    | ]}d |v V  qdS )guidanceNru   ra  ru   ru   rv   rc  |  s    zguidance_in.in_layer.weightz1time_text_embed.guidance_embedder.linear_1.weightr   z/time_text_embed.guidance_embedder.linear_1.biaszguidance_in.out_layer.weightz1time_text_embed.guidance_embedder.linear_2.weightzguidance_in.out_layer.biasz/time_text_embed.guidance_embedder.linear_2.biasztxt_in.weightrO  ztxt_in.biasrP  zimg_in.weightzx_embedder.weightzimg_in.biaszx_embedder.biasrU  r   r}  z.img_mod.lin.weightznorm1.linear.weightz.img_mod.lin.biasznorm1.linear.biasz.txt_mod.lin.weightznorm1_context.linear.weightz.txt_mod.lin.biasznorm1_context.linear.biasz.img_attn.qkv.weightr   r   r=  z.txt_attn.qkv.weightz.img_attn.qkv.biasz.txt_attn.qkv.biaszattn.to_q.weightzattn.to_q.biaszattn.to_k.weightzattn.to_k.biaszattn.to_v.weightzattn.to_v.biaszattn.add_q_proj.weightzattn.add_q_proj.biaszattn.add_k_proj.weightzattn.add_k_proj.biaszattn.add_v_proj.weightzattn.add_v_proj.biasz.img_attn.norm.query_norm.scalezattn.norm_q.weightz.img_attn.norm.key_norm.scalezattn.norm_k.weightz.txt_attn.norm.query_norm.scalezattn.norm_added_q.weightz.txt_attn.norm.key_norm.scalezattn.norm_added_k.weightz.img_mlp.0.weightzff.net.0.proj.weightz.img_mlp.0.biaszff.net.0.proj.biasz.img_mlp.2.weightzff.net.2.weightz.img_mlp.2.biaszff.net.2.biasz.txt_mlp.0.weightzff_context.net.0.proj.weightz.txt_mlp.0.biaszff_context.net.0.proj.biasz.txt_mlp.2.weightzff_context.net.2.weightz.txt_mlp.2.biaszff_context.net.2.biasz.img_attn.proj.weightzattn.to_out.0.weightz.img_attn.proj.biaszattn.to_out.0.biasz.txt_attn.proj.weightzattn.to_add_out.weightz.txt_attn.proj.biaszattn.to_add_out.biaszsingle_transformer_blocks.r~  z.modulation.lin.weightznorm.linear.weightz.modulation.lin.biasznorm.linear.biasz.linear1.weightz.linear1.biaszproj_mlp.weightzproj_mlp.biasz.norm.query_norm.scalez.norm.key_norm.scalez.linear2.weightrO  z.linear2.biasrQ  rV  rW  rX  rY  rZ  r[  )
r   r]  r\  r   r  r  rX  r@  r  r   )!r   r!  r^  r_  num_single_layers	mlp_ratio	inner_dimrD  has_guidancer  block_prefixra  rb  rc  rd  re  rf  rg  rh  ri  rj  rk  rl  mlp_hidden_dim
split_sizeqrb  r{  mlpq_biask_biasv_biasmlp_biasru   ru   rv   0convert_flux_transformer_checkpoint_to_diffusersZ  s6  



$














&

 r  )FNNNNN)Frn   )NNN)NN)r  )r   NNNF)NF)r   NNN)b__doc__r   r   
contextlibr   ior   urllib.parser   r   r  r   models.modeling_utilsr   
schedulersr   r   r	   r
   r   r   r   r   utilsr   r   r   r   r   r   utils.hub_utilsr   transformersr   
accelerater   r   
get_loggerrw   r   r   r   r   r  r  r.  r  r2  r1  r  r  r  OPEN_CLIP_PREFIXr  r   	Exceptionrm   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r$  r4  rE  rF  rH  rT  r[  r  r  r  r  r  r  r4  r6  r;  rD  rm  rn  ro  ry  r|  r  ru   ru   ru   rv   <module>   sh  ( 

	
	

'	
]"U  38 (|dEr  1