o
    پiV                     @   s^   d dl mZmZ d dlmZmZ dedefddZeG dd deZ	eG d	d
 d
eZ
dS )    )	dataclassfield)DiTArchConfig	DiTConfignreturnc                 C   s   d| v ot | dd S )Nblocks.)strisdigitsplit)r   m r   f/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/configs/models/dits/wanvideo.py	is_blocks	   s   r   c                       s  e Zd ZU edd dZeed< edd dZeed< edd dZ	eed< ed	d dZ
eed
< dZeeeef ed< dZdZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zeed!< d"Zed"B ed#< d"Zed"B ed$< d%Zeed&< d"Z ed"B ed'< ed(d dZ!ee ed)< d"Z"ed"B ed*< d+Z#eed,< d-Z$eed.< d/Z%eed0< d1Z&eed2< d3Z'eed4< d5Z(eed6<  fd7d8Z)  Z*S )9WanVideoArchConfigc                   C   s   t gS N)r   r   r   r   r   <lambda>       zWanVideoArchConfig.<lambda>default_factory_fsdp_shard_conditionsc                   C   st   i ddddddddd	d
dddddddddddddddddddddd d!d"d#d$d%S )&Nz^patch_embedding\.(.*)$zpatch_embedding.proj.\1z3^condition_embedder\.text_embedder\.linear_1\.(.*)$z)condition_embedder.text_embedder.fc_in.\1z3^condition_embedder\.text_embedder\.linear_2\.(.*)$z*condition_embedder.text_embedder.fc_out.\1z3^condition_embedder\.time_embedder\.linear_1\.(.*)$z-condition_embedder.time_embedder.mlp.fc_in.\1z3^condition_embedder\.time_embedder\.linear_2\.(.*)$z.condition_embedder.time_embedder.mlp.fc_out.\1z%^condition_embedder\.time_proj\.(.*)$z,condition_embedder.time_modulation.linear.\1z<^condition_embedder\.image_embedder\.ff\.net\.0\.proj\.(.*)$z-condition_embedder.image_embedder.ff.fc_in.\1z6^condition_embedder\.image_embedder\.ff\.net\.2\.(.*)$z.condition_embedder.image_embedder.ff.fc_out.\1z"^blocks\.(\d+)\.attn1\.to_q\.(.*)$zblocks.\1.to_q.\2z"^blocks\.(\d+)\.attn1\.to_k\.(.*)$zblocks.\1.to_k.\2z"^blocks\.(\d+)\.attn1\.to_v\.(.*)$zblocks.\1.to_v.\2z'^blocks\.(\d+)\.attn1\.to_out\.0\.(.*)$zblocks.\1.to_out.\2z$^blocks\.(\d+)\.attn1\.norm_q\.(.*)$zblocks.\1.norm_q.\2z$^blocks\.(\d+)\.attn1\.norm_k\.(.*)$zblocks.\1.norm_k.\2z9^blocks\.(\d+)\.attn1\.attn_op\.local_attn\.proj_l\.(.*)$z$blocks.\1.attn1.local_attn.proj_l.\2z'^blocks\.(\d+)\.attn2\.to_out\.0\.(.*)$zblocks.\1.attn2.to_out.\2z(^blocks\.(\d+)\.ffn\.net\.0\.proj\.(.*)$blocks.\1.ffn.fc_in.\2blocks.\1.ffn.fc_out.\2z)blocks.\1.self_attn_residual_norm.norm.\2)z"^blocks\.(\d+)\.ffn\.net\.2\.(.*)$z^blocks\.(\d+)\.norm2\.(.*)$r   r   r   r   r   r      sL    	
param_names_mappingc                   C   s   i S r   r   r   r   r   r   r   )   s    reverse_param_names_mappingc                   C   s   ddddddddd	d
d
S )Nzblocks.\1.attn1.to_q.\2zblocks.\1.attn1.to_k.\2zblocks.\1.attn1.to_v.\2zblocks.\1.attn1.to_out.0.\2zblocks.\1.attn2.to_q.\2zblocks.\1.attn2.to_k.\2zblocks.\1.attn2.to_v.\2zblocks.\1.attn2.to_out.0.\2r   r   )
z#^blocks\.(\d+)\.self_attn\.q\.(.*)$z#^blocks\.(\d+)\.self_attn\.k\.(.*)$z#^blocks\.(\d+)\.self_attn\.v\.(.*)$z#^blocks\.(\d+)\.self_attn\.o\.(.*)$z$^blocks\.(\d+)\.cross_attn\.q\.(.*)$z$^blocks\.(\d+)\.cross_attn\.k\.(.*)$z$^blocks\.(\d+)\.cross_attn\.v\.(.*)$z$^blocks\.(\d+)\.cross_attn\.o\.(.*)$z^blocks\.(\d+)\.ffn\.0\.(.*)$z^blocks\.(\d+)\.ffn\.2\.(.*)$r   r   r   r   r   r   .   s   lora_param_names_mapping)      r   
patch_sizei   (   num_attention_heads   attention_head_dim   in_channelsout_channelsi   text_dim   freq_dimi 6  ffn_dim
num_layersTcross_attn_normrms_norm_across_headsqk_normgư>epsN	image_dimadded_kv_proj_dimi   rope_max_seq_lenpos_embed_seq_lenc                   C   s   dgS )Nembedderr   r   r   r   r   r   M   r   exclude_lora_layersboundary_ratior
   local_attn_sizer   	sink_size   num_frames_per_block   sliding_window_num_framesoriginalattention_typeg?sla_topkc                    s2   t    | jp
| j| _| j| j | _| j| _d S r   )super__post_init__r'   r&   r"   r$   hidden_sizenum_channels_latents)self	__class__r   r   rB   ^   s   
z WanVideoArchConfig.__post_init__)+__name__
__module____qualname__r   r   list__annotations__r   dictr   r   r    tupleinttext_lenr"   r$   r&   r'   r(   r*   r+   r,   r-   boolr/   r   r0   floatr1   r2   r3   r4   r6   r7   r8   r9   r;   r=   r?   r@   rB   __classcell__r   r   rF   r   r      sJ   
 

r   c                   @   s,   e Zd ZU eedZeed< dZe	ed< dS )WanVideoConfigr   arch_configWanprefixN)
rH   rI   rJ   r   r   rU   r   rL   rW   r   r   r   r   r   rT   e   s   
 rT   N)dataclassesr   r   .sglang.multimodal_gen.configs.models.dits.baser   r   r   rQ   r   r   rT   r   r   r   r   <module>   s   W