o
    }oi{,                  	   @   s8  d dl Z d dlZd dlmZ dd Zdd Zi di dd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.d	dd!d#ddd/d0d1d2d3d4d4d5d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSZdVdTdUZdS )W    N)	load_filec                 C   s   | j }| j}|| }| j}| j }|| }||f}	||f}
|j|	 }|j|
 }|j|
 }g }t|D ]3}|||| |d | d d f  ||||d d d f  ||||d d d f  q1t|}|||d|   g}|S )N      )	num_attention_headsnum_query_groupshidden_sizeviewrangeappendtorchcatreshape)transformer_configqbkbvbhead_numr   heads_per_groupr   	head_sizenew_q_bias_tensor_shapenew_kv_bias_tensor_shape
qkv_bias_liqkv_bias r   h/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/utils/flux_ckpt_converter.py_import_qkv_bias   s&   


& 
r   c                 C   s  | j }| j}|| }| j}| j }|| }| }	||f|	dd   }
||f|	dd   }|j|
 }|j| }|j| }g }t|D ]<}|||| |d | d d d d f  ||||d d d d d f  ||||d d d d d f  qAt|}|j	dksJ |j
|j
d |d | ksJ |j
|j
d |ksJ |j
|j
d |	d ksJ |j
|||d|   |g}|S )Nr      r   r   )r   r   r   sizer   r	   r
   r   r   ndimshaper   )r   qkvr   r   r   r   r   old_tensor_shapenew_q_tensor_shapenew_kv_tensor_shapeqkv_weights_lr   qkv_weightsr   r   r   _import_qkv0   s0   


,$&
 r)   double_blocksznorm1.linear.weightzadaln.adaLN_modulation.1.weightznorm1.linear.biaszadaln.adaLN_modulation.1.biasznorm1_context.linear.weightz'adaln_context.adaLN_modulation.1.weightznorm1_context.linear.biasz%adaln_context.adaLN_modulation.1.biasattn.norm_q.weightz!self_attention.q_layernorm.weightattn.norm_k.weightz!self_attention.k_layernorm.weightzattn.norm_added_q.weightz'self_attention.added_q_layernorm.weightzattn.norm_added_k.weightz'self_attention.added_k_layernorm.weightzattn.to_out.0.weightz!self_attention.linear_proj.weightzattn.to_out.0.biaszself_attention.linear_proj.biaszattn.to_add_out.weightz'self_attention.added_linear_proj.weightzattn.to_add_out.biasz%self_attention.added_linear_proj.biaszff.net.0.proj.weightzmlp.linear_fc1.weightzff.net.0.proj.biaszmlp.linear_fc1.biaszff.net.2.weightzmlp.linear_fc2.weightzff.net.2.biaszmlp.linear_fc2.biaszff_context.net.0.proj.weightzcontext_mlp.linear_fc1.weightzcontext_mlp.linear_fc1.biaszcontext_mlp.linear_fc2.weightzcontext_mlp.linear_fc2.bias)zff_context.net.0.proj.biaszff_context.net.2.weightzff_context.net.2.biassingle_blocks)znorm.linear.weightznorm.linear.biaszproj_mlp.weightzproj_mlp.biasr+   r,   znorm_out.linear.biasz norm_out.adaLN_modulation.1.biasznorm_out.linear.weightz"norm_out.adaLN_modulation.1.weightzproj_out.biaszproj_out.weightz/time_text_embed.guidance_embedder.linear_1.biasz guidance_embedding.in_layer.biasz1time_text_embed.guidance_embedder.linear_1.weightz"guidance_embedding.in_layer.weightz/time_text_embed.guidance_embedder.linear_2.biasz!guidance_embedding.out_layer.biasz1time_text_embed.guidance_embedder.linear_2.weightz#guidance_embedding.out_layer.weightzx_embedder.biaszimg_embed.biaszx_embedder.weightzimg_embed.weightz/time_text_embed.timestep_embedder.linear_1.biasz.timestep_embedding.time_embedder.in_layer.biasz1time_text_embed.timestep_embedder.linear_1.weightz0timestep_embedding.time_embedder.in_layer.weightz/time_text_embed.timestep_embedder.linear_2.biasz/timestep_embedding.time_embedder.out_layer.biasz1time_text_embed.timestep_embedder.linear_2.weightz1timestep_embedding.time_embedder.out_layer.weightzcontext_embedder.biasztxt_embed.biasztxt_embed.weightzvector_embedding.in_layer.biasz vector_embedding.in_layer.weightzvector_embedding.out_layer.biasz!vector_embedding.out_layer.weightcontrolnet_x_embedder.weightcontrolnet_x_embedder.bias)zcontext_embedder.weightz+time_text_embed.text_embedder.linear_1.biasz-time_text_embed.text_embedder.linear_1.weightz+time_text_embed.text_embedder.linear_2.biasz-time_text_embed.text_embedder.linear_2.weightr.   r/   c                    sP  i }t j| r&t | }|D ]}|dr$tt j| |}|| qnt j| r1t| }nt	di }d}d}|
 D ]\}	}
d|	v sOd|	v sOd|	v rPq?d|	v s\d|	v s\d	|	v r]q?|	d
r|	d}|d d|dd  }}tt||}dd|td | g}nI|	drd|	v rq?|	d}|d d|dd  }}tt||}dd|td | g}n|	drdd|	ddd   }nt|	 }|
||< q?t|d D ] dt  d} fdddD \}}}t||| || || ||< dt  d} fdddD \}}}t||| || || ||< dt  d} fdddD \}}}t||| || || ||< dt  d} fdddD \}}}t||| || || ||< qt|d D ] dt  d} fd ddD \}}}t||| || || ||< dt  d} fd!ddD \}}}t||| || || ||< |d"t  d#  d d d$d f  |d"t  d#  d d d d$f  	|dt  d%< |dt  d&< |d"t  d'   |dt  d(< |d"t  d'   |dt  d)< qr|S )*Nz.safetensorsz!Please provide a valid ckpt path.z	attn.to_qz	attn.to_kz	attn.to_vzattn.add_q_projzattn.add_k_projzattn.add_v_projtransformer_blocks.r   r   r*   single_transformer_blocksproj_outr-   controlnet_blockszcontrolnet_double_blocks.zdouble_blocks.z!.self_attention.linear_qkv.weightc                    "   g | ]}d t   d| dqS )transformer_blocks.	.attn.to_.weightstr.0nr   r   r   
<listcomp>      " z.flux_transformer_converter.<locals>.<listcomp>)r!   r"   r#   z.self_attention.linear_qkv.biasc                    r6   )r7   r8   .biasr:   r<   r?   r   r   r@      rA   z'.self_attention.added_linear_qkv.weightc                    r6   )r7   
.attn.add_z_proj.weightr:   r<   r?   r   r   r@      rA   z%.self_attention.added_linear_qkv.biasc                    r6   )r7   rC   z
_proj.biasr:   r<   r?   r   r   r@      rA   zsingle_blocks.c                    r6   )single_transformer_blocks.r8   r9   r:   r<   r?   r   r   r@      rA   c                    r6   )rD   r8   rB   r:   r<   r?   r   r   r@      rA   rD   z.proj_out.weighti   z.mlp.linear_fc2.weightz".self_attention.linear_proj.weightz.proj_out.biasz.mlp.linear_fc2.biasz .self_attention.linear_proj.bias)ospathisdirlistdirendswithload_safetensorsjoinupdateisfileFileNotFoundErroritems
startswithsplitmaxintflux_key_mappingr	   r;   r)   r   detachclone)	ckpt_pathr   diffuser_state_dictfilesfileloaded_dictnew_state_dictnum_single_blocksnum_double_blockskeyvaluetempidxr"   new_keyqkkkvkr   r?   r   flux_transformer_converter   s   










**rg   )NN)	rE   r   safetensors.torchr   rJ   r   r)   rT   rg   r   r   r   r   <module>   s   !	
!"#$%&'()*+,-./0: