from __future__ import annotations
import torch
class GraphModule(torch.nn.Module):
    def forward(self, s59: "Sym(s18)", L_inputs_embeds_: "bf16[s18, 2048]", L_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", s18: "Sym(s18)", s7: "Sym(s7)", L_positions_: "i64[3, s18]", L_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", L_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", L_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", L_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", L_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", L_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", L_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", L_self_modules_norm_parameters_weight_: "bf16[2048]"):
        l_inputs_embeds_ = L_inputs_embeds_
        l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_
        l_positions_ = L_positions_
        l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = L_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_
        l_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_ = L_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_
        l_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_ = L_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_
        l_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_ = L_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_
        l_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_ = L_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_
        l_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_ = L_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_
        l_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_ = L_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_
        l_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_ = L_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_
        l_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_ = L_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_
        l_self_modules_norm_parameters_weight_ = L_self_modules_norm_parameters_weight_
        
        # No stacktrace found for following nodes
        submod_0 = self.submod_0(l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_, l_inputs_embeds_, s59, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem = submod_0[0]
        getitem_1 = submod_0[1]
        getitem_2 = submod_0[2]
        getitem_3 = submod_0[3];  submod_0 = None
        submod_1 = self.submod_1(getitem, s59, getitem_1, getitem_2, getitem_3);  getitem = getitem_1 = getitem_2 = submod_1 = None
        submod_2 = self.submod_2(getitem_3, s59, l_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_, l_inputs_embeds_, l_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_3 = l_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_ = l_inputs_embeds_ = l_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_4 = submod_2[0]
        getitem_5 = submod_2[1]
        getitem_6 = submod_2[2]
        getitem_7 = submod_2[3]
        getitem_8 = submod_2[4];  submod_2 = None
        submod_3 = self.submod_3(getitem_4, s59, getitem_5, getitem_6, getitem_7);  getitem_4 = getitem_5 = getitem_6 = submod_3 = None
        submod_4 = self.submod_4(getitem_7, s59, l_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_, getitem_8, l_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_7 = l_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_ = getitem_8 = l_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_9 = submod_4[0]
        getitem_10 = submod_4[1]
        getitem_11 = submod_4[2]
        getitem_12 = submod_4[3]
        getitem_13 = submod_4[4];  submod_4 = None
        submod_5 = self.submod_5(getitem_9, s59, getitem_10, getitem_11, getitem_12);  getitem_9 = getitem_10 = getitem_11 = submod_5 = None
        submod_6 = self.submod_6(getitem_12, s59, l_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_, getitem_13, l_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_12 = l_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_ = getitem_13 = l_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_14 = submod_6[0]
        getitem_15 = submod_6[1]
        getitem_16 = submod_6[2]
        getitem_17 = submod_6[3]
        getitem_18 = submod_6[4];  submod_6 = None
        submod_7 = self.submod_7(getitem_14, s59, getitem_15, getitem_16, getitem_17);  getitem_14 = getitem_15 = getitem_16 = submod_7 = None
        submod_8 = self.submod_8(getitem_17, s59, l_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_, getitem_18, l_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_17 = l_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_ = getitem_18 = l_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_19 = submod_8[0]
        getitem_20 = submod_8[1]
        getitem_21 = submod_8[2]
        getitem_22 = submod_8[3]
        getitem_23 = submod_8[4];  submod_8 = None
        submod_9 = self.submod_9(getitem_19, s59, getitem_20, getitem_21, getitem_22);  getitem_19 = getitem_20 = getitem_21 = submod_9 = None
        submod_10 = self.submod_10(getitem_22, s59, l_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_, getitem_23, l_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_22 = l_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_ = getitem_23 = l_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_24 = submod_10[0]
        getitem_25 = submod_10[1]
        getitem_26 = submod_10[2]
        getitem_27 = submod_10[3]
        getitem_28 = submod_10[4];  submod_10 = None
        submod_11 = self.submod_11(getitem_24, s59, getitem_25, getitem_26, getitem_27);  getitem_24 = getitem_25 = getitem_26 = submod_11 = None
        submod_12 = self.submod_12(getitem_27, s59, l_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_, getitem_28, l_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_27 = l_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_ = getitem_28 = l_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_29 = submod_12[0]
        getitem_30 = submod_12[1]
        getitem_31 = submod_12[2]
        getitem_32 = submod_12[3]
        getitem_33 = submod_12[4];  submod_12 = None
        submod_13 = self.submod_13(getitem_29, s59, getitem_30, getitem_31, getitem_32);  getitem_29 = getitem_30 = getitem_31 = submod_13 = None
        submod_14 = self.submod_14(getitem_32, s59, l_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_, getitem_33, l_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_32 = l_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_ = getitem_33 = l_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_34 = submod_14[0]
        getitem_35 = submod_14[1]
        getitem_36 = submod_14[2]
        getitem_37 = submod_14[3]
        getitem_38 = submod_14[4];  submod_14 = None
        submod_15 = self.submod_15(getitem_34, s59, getitem_35, getitem_36, getitem_37);  getitem_34 = getitem_35 = getitem_36 = submod_15 = None
        submod_16 = self.submod_16(getitem_37, s59, l_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_, getitem_38, l_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_37 = l_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_ = getitem_38 = l_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_39 = submod_16[0]
        getitem_40 = submod_16[1]
        getitem_41 = submod_16[2]
        getitem_42 = submod_16[3]
        getitem_43 = submod_16[4];  submod_16 = None
        submod_17 = self.submod_17(getitem_39, s59, getitem_40, getitem_41, getitem_42);  getitem_39 = getitem_40 = getitem_41 = submod_17 = None
        submod_18 = self.submod_18(getitem_42, s59, l_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_, getitem_43, l_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_42 = l_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_ = getitem_43 = l_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_44 = submod_18[0]
        getitem_45 = submod_18[1]
        getitem_46 = submod_18[2]
        getitem_47 = submod_18[3]
        getitem_48 = submod_18[4];  submod_18 = None
        submod_19 = self.submod_19(getitem_44, s59, getitem_45, getitem_46, getitem_47);  getitem_44 = getitem_45 = getitem_46 = submod_19 = None
        submod_20 = self.submod_20(getitem_47, s59, l_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_, getitem_48, l_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_47 = l_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_ = getitem_48 = l_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_49 = submod_20[0]
        getitem_50 = submod_20[1]
        getitem_51 = submod_20[2]
        getitem_52 = submod_20[3]
        getitem_53 = submod_20[4];  submod_20 = None
        submod_21 = self.submod_21(getitem_49, s59, getitem_50, getitem_51, getitem_52);  getitem_49 = getitem_50 = getitem_51 = submod_21 = None
        submod_22 = self.submod_22(getitem_52, s59, l_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_, getitem_53, l_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_52 = l_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_ = getitem_53 = l_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_54 = submod_22[0]
        getitem_55 = submod_22[1]
        getitem_56 = submod_22[2]
        getitem_57 = submod_22[3]
        getitem_58 = submod_22[4];  submod_22 = None
        submod_23 = self.submod_23(getitem_54, s59, getitem_55, getitem_56, getitem_57);  getitem_54 = getitem_55 = getitem_56 = submod_23 = None
        submod_24 = self.submod_24(getitem_57, s59, l_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_, getitem_58, l_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_57 = l_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_ = getitem_58 = l_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_59 = submod_24[0]
        getitem_60 = submod_24[1]
        getitem_61 = submod_24[2]
        getitem_62 = submod_24[3]
        getitem_63 = submod_24[4];  submod_24 = None
        submod_25 = self.submod_25(getitem_59, s59, getitem_60, getitem_61, getitem_62);  getitem_59 = getitem_60 = getitem_61 = submod_25 = None
        submod_26 = self.submod_26(getitem_62, s59, l_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_, getitem_63, l_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_62 = l_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_ = getitem_63 = l_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_64 = submod_26[0]
        getitem_65 = submod_26[1]
        getitem_66 = submod_26[2]
        getitem_67 = submod_26[3]
        getitem_68 = submod_26[4];  submod_26 = None
        submod_27 = self.submod_27(getitem_64, s59, getitem_65, getitem_66, getitem_67);  getitem_64 = getitem_65 = getitem_66 = submod_27 = None
        submod_28 = self.submod_28(getitem_67, s59, l_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_, getitem_68, l_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_67 = l_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_ = getitem_68 = l_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_69 = submod_28[0]
        getitem_70 = submod_28[1]
        getitem_71 = submod_28[2]
        getitem_72 = submod_28[3]
        getitem_73 = submod_28[4];  submod_28 = None
        submod_29 = self.submod_29(getitem_69, s59, getitem_70, getitem_71, getitem_72);  getitem_69 = getitem_70 = getitem_71 = submod_29 = None
        submod_30 = self.submod_30(getitem_72, s59, l_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_, getitem_73, l_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_72 = l_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_ = getitem_73 = l_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_74 = submod_30[0]
        getitem_75 = submod_30[1]
        getitem_76 = submod_30[2]
        getitem_77 = submod_30[3]
        getitem_78 = submod_30[4];  submod_30 = None
        submod_31 = self.submod_31(getitem_74, s59, getitem_75, getitem_76, getitem_77);  getitem_74 = getitem_75 = getitem_76 = submod_31 = None
        submod_32 = self.submod_32(getitem_77, s59, l_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_, getitem_78, l_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_77 = l_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_ = getitem_78 = l_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_79 = submod_32[0]
        getitem_80 = submod_32[1]
        getitem_81 = submod_32[2]
        getitem_82 = submod_32[3]
        getitem_83 = submod_32[4];  submod_32 = None
        submod_33 = self.submod_33(getitem_79, s59, getitem_80, getitem_81, getitem_82);  getitem_79 = getitem_80 = getitem_81 = submod_33 = None
        submod_34 = self.submod_34(getitem_82, s59, l_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_, getitem_83, l_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_82 = l_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_ = getitem_83 = l_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_84 = submod_34[0]
        getitem_85 = submod_34[1]
        getitem_86 = submod_34[2]
        getitem_87 = submod_34[3]
        getitem_88 = submod_34[4];  submod_34 = None
        submod_35 = self.submod_35(getitem_84, s59, getitem_85, getitem_86, getitem_87);  getitem_84 = getitem_85 = getitem_86 = submod_35 = None
        submod_36 = self.submod_36(getitem_87, s59, l_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_, getitem_88, l_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_87 = l_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_ = getitem_88 = l_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_89 = submod_36[0]
        getitem_90 = submod_36[1]
        getitem_91 = submod_36[2]
        getitem_92 = submod_36[3]
        getitem_93 = submod_36[4];  submod_36 = None
        submod_37 = self.submod_37(getitem_89, s59, getitem_90, getitem_91, getitem_92);  getitem_89 = getitem_90 = getitem_91 = submod_37 = None
        submod_38 = self.submod_38(getitem_92, s59, l_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_, getitem_93, l_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_92 = l_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_ = getitem_93 = l_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_94 = submod_38[0]
        getitem_95 = submod_38[1]
        getitem_96 = submod_38[2]
        getitem_97 = submod_38[3]
        getitem_98 = submod_38[4];  submod_38 = None
        submod_39 = self.submod_39(getitem_94, s59, getitem_95, getitem_96, getitem_97);  getitem_94 = getitem_95 = getitem_96 = submod_39 = None
        submod_40 = self.submod_40(getitem_97, s59, l_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_, getitem_98, l_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_97 = l_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_ = getitem_98 = l_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_99 = submod_40[0]
        getitem_100 = submod_40[1]
        getitem_101 = submod_40[2]
        getitem_102 = submod_40[3]
        getitem_103 = submod_40[4];  submod_40 = None
        submod_41 = self.submod_41(getitem_99, s59, getitem_100, getitem_101, getitem_102);  getitem_99 = getitem_100 = getitem_101 = submod_41 = None
        submod_42 = self.submod_42(getitem_102, s59, l_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_, getitem_103, l_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_102 = l_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_ = getitem_103 = l_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_104 = submod_42[0]
        getitem_105 = submod_42[1]
        getitem_106 = submod_42[2]
        getitem_107 = submod_42[3]
        getitem_108 = submod_42[4];  submod_42 = None
        submod_43 = self.submod_43(getitem_104, s59, getitem_105, getitem_106, getitem_107);  getitem_104 = getitem_105 = getitem_106 = submod_43 = None
        submod_44 = self.submod_44(getitem_107, s59, l_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_, getitem_108, l_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_107 = l_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_ = getitem_108 = l_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_109 = submod_44[0]
        getitem_110 = submod_44[1]
        getitem_111 = submod_44[2]
        getitem_112 = submod_44[3]
        getitem_113 = submod_44[4];  submod_44 = None
        submod_45 = self.submod_45(getitem_109, s59, getitem_110, getitem_111, getitem_112);  getitem_109 = getitem_110 = getitem_111 = submod_45 = None
        submod_46 = self.submod_46(getitem_112, s59, l_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_, getitem_113, l_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_112 = l_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_ = getitem_113 = l_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_114 = submod_46[0]
        getitem_115 = submod_46[1]
        getitem_116 = submod_46[2]
        getitem_117 = submod_46[3]
        getitem_118 = submod_46[4];  submod_46 = None
        submod_47 = self.submod_47(getitem_114, s59, getitem_115, getitem_116, getitem_117);  getitem_114 = getitem_115 = getitem_116 = submod_47 = None
        submod_48 = self.submod_48(getitem_117, s59, l_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_, getitem_118, l_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_117 = l_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_ = getitem_118 = l_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_119 = submod_48[0]
        getitem_120 = submod_48[1]
        getitem_121 = submod_48[2]
        getitem_122 = submod_48[3]
        getitem_123 = submod_48[4];  submod_48 = None
        submod_49 = self.submod_49(getitem_119, s59, getitem_120, getitem_121, getitem_122);  getitem_119 = getitem_120 = getitem_121 = submod_49 = None
        submod_50 = self.submod_50(getitem_122, s59, l_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_, getitem_123, l_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_122 = l_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_ = getitem_123 = l_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_124 = submod_50[0]
        getitem_125 = submod_50[1]
        getitem_126 = submod_50[2]
        getitem_127 = submod_50[3]
        getitem_128 = submod_50[4];  submod_50 = None
        submod_51 = self.submod_51(getitem_124, s59, getitem_125, getitem_126, getitem_127);  getitem_124 = getitem_125 = getitem_126 = submod_51 = None
        submod_52 = self.submod_52(getitem_127, s59, l_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_, getitem_128, l_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_127 = l_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_ = getitem_128 = l_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_ = l_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_ = None
        getitem_129 = submod_52[0]
        getitem_130 = submod_52[1]
        getitem_131 = submod_52[2]
        getitem_132 = submod_52[3]
        getitem_133 = submod_52[4];  submod_52 = None
        submod_53 = self.submod_53(getitem_129, s59, getitem_130, getitem_131, getitem_132);  getitem_129 = getitem_130 = getitem_131 = submod_53 = None
        submod_54 = self.submod_54(getitem_132, s59, l_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_, getitem_133, l_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_, l_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_, s18, l_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_, l_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_, l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_, l_positions_, s7);  getitem_132 = l_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_ = getitem_133 = l_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_ = l_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_ = s18 = l_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_ = l_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_ = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = s7 = None
        getitem_134 = submod_54[0]
        getitem_135 = submod_54[1]
        getitem_136 = submod_54[2]
        getitem_137 = submod_54[3]
        getitem_138 = submod_54[4];  submod_54 = None
        submod_55 = self.submod_55(getitem_134, s59, getitem_135, getitem_136, getitem_137);  getitem_134 = getitem_135 = getitem_136 = submod_55 = None
        submod_56 = self.submod_56(getitem_137, s59, l_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_, l_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_, getitem_138, l_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_, l_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_, l_self_modules_norm_parameters_weight_);  getitem_137 = s59 = l_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_ = l_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_ = getitem_138 = l_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_ = l_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_ = l_self_modules_norm_parameters_weight_ = None
        return (submod_56,)
        
    class submod_0(torch.nn.Module):
        def forward(self, l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_inputs_embeds_: "bf16[s18, 2048]", s59: "Sym(s18)", l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_0_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = l_inputs_embeds_.to(torch.float32);  l_inputs_embeds_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = to.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add);  add = None
            mul: "f32[s18, 2048]" = to * rsqrt;  to = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_1 * _get_data_attr;  to_1 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 4096]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_0_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear.split([2048, 1024, 1024], dim = -1);  linear = None
            getitem: "bf16[s18, 2048]" = split[0]
            getitem_1: "bf16[s18, 1024]" = split[1]
            getitem_2: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view: "bf16[s18, 16, 128]" = getitem.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_0_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_2: "f32[s18, 16, 128]" = view.to(torch.float32);  view = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 16, 128]" = to_2.pow(2)
            mean_1: "f32[s18, 16, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 16, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 16, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul_2: "f32[s18, 16, 128]" = to_2 * rsqrt_1;  to_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_3: "bf16[s18, 16, 128]" = mul_2.to(torch.bfloat16);  mul_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_3: "bf16[s18, 16, 128]" = to_3 * _get_data_attr_1;  to_3 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem.size();  getitem = None
            view_1: "bf16[s18, 2048]" = mul_3.view(size);  mul_3 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_2: "bf16[s18, 8, 128]" = getitem_1.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_0_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_4: "f32[s18, 8, 128]" = view_2.to(torch.float32);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 8, 128]" = to_4.pow(2)
            mean_2: "f32[s18, 8, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_2: "f32[s18, 8, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 8, 1]" = torch.rsqrt(add_2);  add_2 = None
            mul_4: "f32[s18, 8, 128]" = to_4 * rsqrt_2;  to_4 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 8, 128]" = mul_4.to(torch.bfloat16);  mul_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_5: "bf16[s18, 8, 128]" = to_5 * _get_data_attr_2;  to_5 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_1.size();  getitem_1 = None
            view_3: "bf16[s18, 1024]" = mul_5.view(size_1);  mul_5 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_3: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_3.chunk(2, dim = -1);  getitem_3 = None
            getitem_4: "bf16[3, s18, 64]" = chunk[0]
            getitem_5: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_6: "bf16[s18, 64]" = getitem_4[0]
            clone: "bf16[s18, 64]" = getitem_6.clone();  getitem_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_7: "bf16[s18, 20]" = getitem_4[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_7;  setitem = clone;  getitem_7 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_8: "bf16[s18, 20]" = getitem_4[(2, Ellipsis, slice(2, 60, 3))];  getitem_4 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_8;  setitem_1 = clone;  getitem_8 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_9: "bf16[s18, 64]" = getitem_5[0]
            clone_1: "bf16[s18, 64]" = getitem_9.clone();  getitem_9 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_5[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_10;  setitem_2 = clone_1;  getitem_10 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_11: "bf16[s18, 20]" = getitem_5[(2, Ellipsis, slice(2, 60, 3))];  getitem_5 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_11;  setitem_3 = clone_1;  getitem_11 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_1.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_4: "bf16[s18, 16, 128]" = view_1.view(s18, -1, 128);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_12: "bf16[s18, 16, 128]" = view_4[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_13: "bf16[s18, 16, 0]" = view_4[(Ellipsis, slice(128, None, None))];  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_6: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_7: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_12, 2, dim = -1);  getitem_12 = None
            getitem_14: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_15: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_6: "bf16[s18, 16, 64]" = getitem_14 * to_6
            mul_7: "bf16[s18, 16, 64]" = getitem_15 * to_7
            sub: "bf16[s18, 16, 64]" = mul_6 - mul_7;  mul_6 = mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_8: "bf16[s18, 16, 64]" = getitem_15 * to_6;  getitem_15 = to_6 = None
            mul_9: "bf16[s18, 16, 64]" = getitem_14 * to_7;  getitem_14 = to_7 = None
            add_3: "bf16[s18, 16, 64]" = mul_8 + mul_9;  mul_8 = mul_9 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_3), dim = -1);  sub = add_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_13), dim = -1);  cat = getitem_13 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_3.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 8, 128]" = view_3.view(s18, -1, 128);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_16: "bf16[s18, 8, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_17: "bf16[s18, 8, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_8: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_9: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_16, 2, dim = -1);  getitem_16 = None
            getitem_18: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_19: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_10: "bf16[s18, 8, 64]" = getitem_18 * to_8
            mul_11: "bf16[s18, 8, 64]" = getitem_19 * to_9
            sub_1: "bf16[s18, 8, 64]" = mul_10 - mul_11;  mul_10 = mul_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_12: "bf16[s18, 8, 64]" = getitem_19 * to_8;  getitem_19 = to_8 = None
            mul_13: "bf16[s18, 8, 64]" = getitem_18 * to_9;  getitem_18 = to_9 = None
            add_4: "bf16[s18, 8, 64]" = mul_12 + mul_13;  mul_12 = mul_13 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_4), dim = -1);  sub_1 = add_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_17), dim = -1);  cat_2 = getitem_17 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_6: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_7: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_8: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_9: "bf16[s18, 8, 128]" = getitem_2.view(-1, 8, 128);  getitem_2 = None
            return (view_8, view_9, view_6, view_7)
            
    class submod_1(torch.nn.Module):
        def forward(self, key_2: "bf16[s18, 8, 128]", s59: "Sym(s18)", value: "bf16[s18, 8, 128]", query_2: "bf16[s18, 16, 128]", output_3: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_2, value, 'language_model.model.layers.0.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_2, key_2, value, output_3, 'language_model.model.layers.0.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_2 = key_2 = value = output_3 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_2(torch.nn.Module):
        def forward(self, output_3: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", l_inputs_embeds_: "bf16[s18, 2048]", l_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_3.view(-1, 2048);  output_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_0_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_0_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + l_inputs_embeds_;  to = l_inputs_embeds_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_0_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_0_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_1_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_1_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_1_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_1_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_3(torch.nn.Module):
        def forward(self, key_5: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_1: "bf16[s18, 8, 128]", query_5: "bf16[s18, 16, 128]", output_7: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_5, value_1, 'language_model.model.layers.1.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_5, key_5, value_1, output_7, 'language_model.model.layers.1.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_5 = key_5 = value_1 = output_7 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_4(torch.nn.Module):
        def forward(self, output_7: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_1: "bf16[s18, 2048]", l_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_7.view(-1, 2048);  output_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_1_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_1_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_1;  to = residual_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_1_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_1_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_2_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_2_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_2_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_2_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_5(torch.nn.Module):
        def forward(self, key_8: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_2: "bf16[s18, 8, 128]", query_8: "bf16[s18, 16, 128]", output_11: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_8, value_2, 'language_model.model.layers.2.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_8, key_8, value_2, output_11, 'language_model.model.layers.2.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_8 = key_8 = value_2 = output_11 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_6(torch.nn.Module):
        def forward(self, output_11: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_3: "bf16[s18, 2048]", l_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_11.view(-1, 2048);  output_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_2_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_2_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_3;  to = residual_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_2_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_2_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_3_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_3_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_3_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_3_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_7(torch.nn.Module):
        def forward(self, key_11: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_3: "bf16[s18, 8, 128]", query_11: "bf16[s18, 16, 128]", output_15: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_11, value_3, 'language_model.model.layers.3.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_11, key_11, value_3, output_15, 'language_model.model.layers.3.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_11 = key_11 = value_3 = output_15 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_8(torch.nn.Module):
        def forward(self, output_15: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_5: "bf16[s18, 2048]", l_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_15.view(-1, 2048);  output_15 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_3_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_3_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_5;  to = residual_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_3_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_3_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_4_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_4_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_4_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_4_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_9(torch.nn.Module):
        def forward(self, key_14: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_4: "bf16[s18, 8, 128]", query_14: "bf16[s18, 16, 128]", output_19: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_14, value_4, 'language_model.model.layers.4.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_14, key_14, value_4, output_19, 'language_model.model.layers.4.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_14 = key_14 = value_4 = output_19 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_10(torch.nn.Module):
        def forward(self, output_19: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_7: "bf16[s18, 2048]", l_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_19.view(-1, 2048);  output_19 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_4_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_4_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_7;  to = residual_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_4_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_4_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_5_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_5_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_5_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_5_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_11(torch.nn.Module):
        def forward(self, key_17: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_5: "bf16[s18, 8, 128]", query_17: "bf16[s18, 16, 128]", output_23: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_17, value_5, 'language_model.model.layers.5.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_17, key_17, value_5, output_23, 'language_model.model.layers.5.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_17 = key_17 = value_5 = output_23 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_12(torch.nn.Module):
        def forward(self, output_23: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_9: "bf16[s18, 2048]", l_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_23.view(-1, 2048);  output_23 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_5_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_5_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_9;  to = residual_9 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_5_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_5_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_6_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_6_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_6_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_6_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_13(torch.nn.Module):
        def forward(self, key_20: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_6: "bf16[s18, 8, 128]", query_20: "bf16[s18, 16, 128]", output_27: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_20, value_6, 'language_model.model.layers.6.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_20, key_20, value_6, output_27, 'language_model.model.layers.6.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_20 = key_20 = value_6 = output_27 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_14(torch.nn.Module):
        def forward(self, output_27: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_11: "bf16[s18, 2048]", l_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_27.view(-1, 2048);  output_27 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_6_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_6_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_11;  to = residual_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_6_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_6_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_7_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_7_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_7_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_7_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_15(torch.nn.Module):
        def forward(self, key_23: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_7: "bf16[s18, 8, 128]", query_23: "bf16[s18, 16, 128]", output_31: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_23, value_7, 'language_model.model.layers.7.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_23, key_23, value_7, output_31, 'language_model.model.layers.7.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_23 = key_23 = value_7 = output_31 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_16(torch.nn.Module):
        def forward(self, output_31: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_13: "bf16[s18, 2048]", l_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_31.view(-1, 2048);  output_31 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_7_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_7_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_13;  to = residual_13 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_7_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_7_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_8_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_8_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_8_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_8_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_17(torch.nn.Module):
        def forward(self, key_26: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_8: "bf16[s18, 8, 128]", query_26: "bf16[s18, 16, 128]", output_35: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_26, value_8, 'language_model.model.layers.8.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_26, key_26, value_8, output_35, 'language_model.model.layers.8.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_26 = key_26 = value_8 = output_35 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_18(torch.nn.Module):
        def forward(self, output_35: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_15: "bf16[s18, 2048]", l_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_35.view(-1, 2048);  output_35 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_8_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_8_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_15;  to = residual_15 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_8_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_8_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_9_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_9_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_9_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_9_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_19(torch.nn.Module):
        def forward(self, key_29: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_9: "bf16[s18, 8, 128]", query_29: "bf16[s18, 16, 128]", output_39: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_29, value_9, 'language_model.model.layers.9.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_29, key_29, value_9, output_39, 'language_model.model.layers.9.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_29 = key_29 = value_9 = output_39 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_20(torch.nn.Module):
        def forward(self, output_39: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_17: "bf16[s18, 2048]", l_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_39.view(-1, 2048);  output_39 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_9_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_9_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_17;  to = residual_17 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_9_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_9_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_10_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_10_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_10_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_10_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_21(torch.nn.Module):
        def forward(self, key_32: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_10: "bf16[s18, 8, 128]", query_32: "bf16[s18, 16, 128]", output_43: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_32, value_10, 'language_model.model.layers.10.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_32, key_32, value_10, output_43, 'language_model.model.layers.10.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_32 = key_32 = value_10 = output_43 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_22(torch.nn.Module):
        def forward(self, output_43: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_19: "bf16[s18, 2048]", l_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_43.view(-1, 2048);  output_43 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_10_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_10_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_19;  to = residual_19 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_10_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_10_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_11_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_11_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_11_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_11_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_23(torch.nn.Module):
        def forward(self, key_35: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_11: "bf16[s18, 8, 128]", query_35: "bf16[s18, 16, 128]", output_47: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_35, value_11, 'language_model.model.layers.11.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_35, key_35, value_11, output_47, 'language_model.model.layers.11.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_35 = key_35 = value_11 = output_47 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_24(torch.nn.Module):
        def forward(self, output_47: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_21: "bf16[s18, 2048]", l_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_47.view(-1, 2048);  output_47 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_11_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_11_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_21;  to = residual_21 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_11_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_11_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_12_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_12_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_12_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_12_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_25(torch.nn.Module):
        def forward(self, key_38: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_12: "bf16[s18, 8, 128]", query_38: "bf16[s18, 16, 128]", output_51: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_38, value_12, 'language_model.model.layers.12.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_38, key_38, value_12, output_51, 'language_model.model.layers.12.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_38 = key_38 = value_12 = output_51 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_26(torch.nn.Module):
        def forward(self, output_51: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_23: "bf16[s18, 2048]", l_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_51.view(-1, 2048);  output_51 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_12_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_12_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_23;  to = residual_23 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_12_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_12_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_13_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_13_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_13_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_13_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_27(torch.nn.Module):
        def forward(self, key_41: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_13: "bf16[s18, 8, 128]", query_41: "bf16[s18, 16, 128]", output_55: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_41, value_13, 'language_model.model.layers.13.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_41, key_41, value_13, output_55, 'language_model.model.layers.13.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_41 = key_41 = value_13 = output_55 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_28(torch.nn.Module):
        def forward(self, output_55: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_25: "bf16[s18, 2048]", l_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_55.view(-1, 2048);  output_55 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_13_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_13_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_25;  to = residual_25 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_13_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_13_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_14_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_14_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_14_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_14_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_29(torch.nn.Module):
        def forward(self, key_44: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_14: "bf16[s18, 8, 128]", query_44: "bf16[s18, 16, 128]", output_59: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_44, value_14, 'language_model.model.layers.14.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_44, key_44, value_14, output_59, 'language_model.model.layers.14.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_44 = key_44 = value_14 = output_59 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_30(torch.nn.Module):
        def forward(self, output_59: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_27: "bf16[s18, 2048]", l_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_59.view(-1, 2048);  output_59 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_14_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_14_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_27;  to = residual_27 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_14_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_14_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_15_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_15_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_15_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_15_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_31(torch.nn.Module):
        def forward(self, key_47: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_15: "bf16[s18, 8, 128]", query_47: "bf16[s18, 16, 128]", output_63: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_47, value_15, 'language_model.model.layers.15.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_47, key_47, value_15, output_63, 'language_model.model.layers.15.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_47 = key_47 = value_15 = output_63 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_32(torch.nn.Module):
        def forward(self, output_63: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_29: "bf16[s18, 2048]", l_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_63.view(-1, 2048);  output_63 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_15_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_15_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_29;  to = residual_29 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_15_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_15_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_16_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_16_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_16_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_16_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_33(torch.nn.Module):
        def forward(self, key_50: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_16: "bf16[s18, 8, 128]", query_50: "bf16[s18, 16, 128]", output_67: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_50, value_16, 'language_model.model.layers.16.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_50, key_50, value_16, output_67, 'language_model.model.layers.16.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_50 = key_50 = value_16 = output_67 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_34(torch.nn.Module):
        def forward(self, output_67: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_31: "bf16[s18, 2048]", l_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_67.view(-1, 2048);  output_67 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_16_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_16_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_31;  to = residual_31 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_16_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_16_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_17_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_17_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_17_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_17_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_35(torch.nn.Module):
        def forward(self, key_53: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_17: "bf16[s18, 8, 128]", query_53: "bf16[s18, 16, 128]", output_71: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_53, value_17, 'language_model.model.layers.17.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_53, key_53, value_17, output_71, 'language_model.model.layers.17.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_53 = key_53 = value_17 = output_71 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_36(torch.nn.Module):
        def forward(self, output_71: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_33: "bf16[s18, 2048]", l_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_71.view(-1, 2048);  output_71 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_17_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_17_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_33;  to = residual_33 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_17_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_17_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_18_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_18_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_18_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_18_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_37(torch.nn.Module):
        def forward(self, key_56: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_18: "bf16[s18, 8, 128]", query_56: "bf16[s18, 16, 128]", output_75: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_56, value_18, 'language_model.model.layers.18.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_56, key_56, value_18, output_75, 'language_model.model.layers.18.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_56 = key_56 = value_18 = output_75 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_38(torch.nn.Module):
        def forward(self, output_75: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_35: "bf16[s18, 2048]", l_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_75.view(-1, 2048);  output_75 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_18_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_18_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_35;  to = residual_35 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_18_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_18_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_19_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_19_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_19_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_19_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_39(torch.nn.Module):
        def forward(self, key_59: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_19: "bf16[s18, 8, 128]", query_59: "bf16[s18, 16, 128]", output_79: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_59, value_19, 'language_model.model.layers.19.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_59, key_59, value_19, output_79, 'language_model.model.layers.19.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_59 = key_59 = value_19 = output_79 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_40(torch.nn.Module):
        def forward(self, output_79: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_37: "bf16[s18, 2048]", l_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_79.view(-1, 2048);  output_79 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_19_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_19_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_37;  to = residual_37 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_19_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_19_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_20_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_20_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_20_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_20_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_41(torch.nn.Module):
        def forward(self, key_62: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_20: "bf16[s18, 8, 128]", query_62: "bf16[s18, 16, 128]", output_83: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_62, value_20, 'language_model.model.layers.20.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_62, key_62, value_20, output_83, 'language_model.model.layers.20.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_62 = key_62 = value_20 = output_83 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_42(torch.nn.Module):
        def forward(self, output_83: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_39: "bf16[s18, 2048]", l_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_83.view(-1, 2048);  output_83 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_20_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_20_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_39;  to = residual_39 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_20_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_20_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_21_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_21_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_21_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_21_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_43(torch.nn.Module):
        def forward(self, key_65: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_21: "bf16[s18, 8, 128]", query_65: "bf16[s18, 16, 128]", output_87: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_65, value_21, 'language_model.model.layers.21.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_65, key_65, value_21, output_87, 'language_model.model.layers.21.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_65 = key_65 = value_21 = output_87 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_44(torch.nn.Module):
        def forward(self, output_87: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_41: "bf16[s18, 2048]", l_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_87.view(-1, 2048);  output_87 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_21_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_21_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_41;  to = residual_41 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_21_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_21_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_22_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_22_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_22_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_22_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_45(torch.nn.Module):
        def forward(self, key_68: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_22: "bf16[s18, 8, 128]", query_68: "bf16[s18, 16, 128]", output_91: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_68, value_22, 'language_model.model.layers.22.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_68, key_68, value_22, output_91, 'language_model.model.layers.22.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_68 = key_68 = value_22 = output_91 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_46(torch.nn.Module):
        def forward(self, output_91: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_43: "bf16[s18, 2048]", l_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_91.view(-1, 2048);  output_91 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_22_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_22_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_43;  to = residual_43 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_22_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_22_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_23_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_23_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_23_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_23_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_47(torch.nn.Module):
        def forward(self, key_71: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_23: "bf16[s18, 8, 128]", query_71: "bf16[s18, 16, 128]", output_95: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_71, value_23, 'language_model.model.layers.23.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_71, key_71, value_23, output_95, 'language_model.model.layers.23.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_71 = key_71 = value_23 = output_95 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_48(torch.nn.Module):
        def forward(self, output_95: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_45: "bf16[s18, 2048]", l_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_95.view(-1, 2048);  output_95 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_23_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_23_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_45;  to = residual_45 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_23_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_23_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_24_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_24_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_24_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_24_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_49(torch.nn.Module):
        def forward(self, key_74: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_24: "bf16[s18, 8, 128]", query_74: "bf16[s18, 16, 128]", output_99: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_74, value_24, 'language_model.model.layers.24.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_74, key_74, value_24, output_99, 'language_model.model.layers.24.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_74 = key_74 = value_24 = output_99 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_50(torch.nn.Module):
        def forward(self, output_99: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_47: "bf16[s18, 2048]", l_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_99.view(-1, 2048);  output_99 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_24_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_24_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_47;  to = residual_47 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_24_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_24_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_25_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_25_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_25_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_25_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_51(torch.nn.Module):
        def forward(self, key_77: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_25: "bf16[s18, 8, 128]", query_77: "bf16[s18, 16, 128]", output_103: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_77, value_25, 'language_model.model.layers.25.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_77, key_77, value_25, output_103, 'language_model.model.layers.25.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_77 = key_77 = value_25 = output_103 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_52(torch.nn.Module):
        def forward(self, output_103: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_49: "bf16[s18, 2048]", l_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_103.view(-1, 2048);  output_103 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_25_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_25_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_49;  to = residual_49 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_25_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_25_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_26_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_26_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_26_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_26_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_53(torch.nn.Module):
        def forward(self, key_80: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_26: "bf16[s18, 8, 128]", query_80: "bf16[s18, 16, 128]", output_107: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_80, value_26, 'language_model.model.layers.26.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_80, key_80, value_26, output_107, 'language_model.model.layers.26.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_80 = key_80 = value_26 = output_107 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_54(torch.nn.Module):
        def forward(self, output_107: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_51: "bf16[s18, 2048]", l_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_: "bf16[2048]", l_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_: "bf16[4096, 2048]", s18: "Sym(s18)", l_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_: "bf16[128]", l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_: "bf16[262144, 128]", l_positions_: "i64[3, s18]", s7: "Sym(s7)"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_107.view(-1, 2048);  output_107 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_26_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_26_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_51;  to = residual_51 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_26_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_26_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_);  l_self_modules_layers_modules_27_modules_input_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_3: "bf16[s18, 4096]" = torch._C._nn.linear(mul_4, l_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_, None);  mul_4 = l_self_modules_layers_modules_27_modules_self_attn_modules_qkv_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:151 in forward, code: q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1)
            split = linear_3.split([2048, 1024, 1024], dim = -1);  linear_3 = None
            getitem_2: "bf16[s18, 2048]" = split[0]
            getitem_3: "bf16[s18, 1024]" = split[1]
            getitem_4: "bf16[s18, 1024]" = split[2];  split = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:153 in forward, code: q_by_head = q.view(*q.shape[:-1], q.shape[-1] // self.head_dim, self.head_dim)
            view_1: "bf16[s18, 16, 128]" = getitem_2.view(s18, 16, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_2: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_);  l_self_modules_layers_modules_27_modules_self_attn_modules_q_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_6: "f32[s18, 16, 128]" = view_1.to(torch.float32);  view_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_3: "f32[s18, 16, 128]" = to_6.pow(2)
            mean_2: "f32[s18, 16, 1]" = pow_3.mean(dim = -1, keepdim = True);  pow_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_4: "f32[s18, 16, 1]" = mean_2 + 1e-06;  mean_2 = None
            rsqrt_2: "f32[s18, 16, 1]" = torch.rsqrt(add_4);  add_4 = None
            mul_5: "f32[s18, 16, 128]" = to_6 * rsqrt_2;  to_6 = rsqrt_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_7: "bf16[s18, 16, 128]" = mul_5.to(torch.bfloat16);  mul_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_6: "bf16[s18, 16, 128]" = to_7 * _get_data_attr_2;  to_7 = _get_data_attr_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:155 in forward, code: q = q_by_head.view(q.shape)
            size = getitem_2.size();  getitem_2 = None
            view_2: "bf16[s18, 2048]" = mul_6.view(size);  mul_6 = size = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:156 in forward, code: k_by_head = k.view(*k.shape[:-1], k.shape[-1] // self.head_dim, self.head_dim)
            view_3: "bf16[s18, 8, 128]" = getitem_3.view(s18, 8, 128)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_3: "bf16[128]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_);  l_self_modules_layers_modules_27_modules_self_attn_modules_k_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_8: "f32[s18, 8, 128]" = view_3.to(torch.float32);  view_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_4: "f32[s18, 8, 128]" = to_8.pow(2)
            mean_3: "f32[s18, 8, 1]" = pow_4.mean(dim = -1, keepdim = True);  pow_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_5: "f32[s18, 8, 1]" = mean_3 + 1e-06;  mean_3 = None
            rsqrt_3: "f32[s18, 8, 1]" = torch.rsqrt(add_5);  add_5 = None
            mul_7: "f32[s18, 8, 128]" = to_8 * rsqrt_3;  to_8 = rsqrt_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_9: "bf16[s18, 8, 128]" = mul_7.to(torch.bfloat16);  mul_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_8: "bf16[s18, 8, 128]" = to_9 * _get_data_attr_3;  to_9 = _get_data_attr_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/qwen3.py:158 in forward, code: k = k_by_head.view(k.shape)
            size_1 = getitem_3.size();  getitem_3 = None
            view_4: "bf16[s18, 1024]" = mul_8.view(size_1);  mul_8 = size_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:282 in forward_native, code: cos_sin = cos_sin_cache[positions]
            getitem_5: "bf16[3, s18, 128]" = l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_[l_positions_];  l_self_modules_layers_modules_0_modules_self_attn_modules_rotary_emb_buffers_cos_sin_cache_ = l_positions_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:283 in forward_native, code: cos, sin = cos_sin.chunk(2, dim=-1)
            chunk = getitem_5.chunk(2, dim = -1);  getitem_5 = None
            getitem_6: "bf16[3, s18, 64]" = chunk[0]
            getitem_7: "bf16[3, s18, 64]" = chunk[1];  chunk = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_8: "bf16[s18, 64]" = getitem_6[0]
            clone: "bf16[s18, 64]" = getitem_8.clone();  getitem_8 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_9: "bf16[s18, 20]" = getitem_6[(1, Ellipsis, slice(1, 60, 3))]
            clone[(Ellipsis, slice(1, 60, 3))] = getitem_9;  setitem = clone;  getitem_9 = setitem = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_10: "bf16[s18, 20]" = getitem_6[(2, Ellipsis, slice(2, 60, 3))];  getitem_6 = None
            clone[(Ellipsis, slice(2, 60, 3))] = getitem_10;  setitem_1 = clone;  getitem_10 = setitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:195 in apply_interleaved_rope, code: x_t = x[0].clone()
            getitem_11: "bf16[s18, 64]" = getitem_7[0]
            clone_1: "bf16[s18, 64]" = getitem_11.clone();  getitem_11 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:196 in apply_interleaved_rope, code: x_t[..., 1 : mrope_section[1] * 3 : 3] = x[1, ..., 1 : mrope_section[1] * 3 : 3]
            getitem_12: "bf16[s18, 20]" = getitem_7[(1, Ellipsis, slice(1, 60, 3))]
            clone_1[(Ellipsis, slice(1, 60, 3))] = getitem_12;  setitem_2 = clone_1;  getitem_12 = setitem_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:197 in apply_interleaved_rope, code: x_t[..., 2 : mrope_section[2] * 3 : 3] = x[2, ..., 2 : mrope_section[2] * 3 : 3]
            getitem_13: "bf16[s18, 20]" = getitem_7[(2, Ellipsis, slice(2, 60, 3))];  getitem_7 = None
            clone_1[(Ellipsis, slice(2, 60, 3))] = getitem_13;  setitem_3 = clone_1;  getitem_13 = setitem_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:299 in forward_native, code: query_shape = query.shape
            size_2 = view_2.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:300 in forward_native, code: query = query.view(num_tokens, -1, self.head_size)
            view_5: "bf16[s18, 16, 128]" = view_2.view(s18, -1, 128);  view_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:301 in forward_native, code: query_rot = query[..., : self.rotary_dim]
            getitem_14: "bf16[s18, 16, 128]" = view_5[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:302 in forward_native, code: query_pass = query[..., self.rotary_dim :]
            getitem_15: "bf16[s18, 16, 0]" = view_5[(Ellipsis, slice(128, None, None))];  view_5 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze: "bf16[s18, 1, 64]" = clone.unsqueeze(-2)
            to_10: "bf16[s18, 1, 64]" = unsqueeze.to(torch.bfloat16);  unsqueeze = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_1: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2)
            to_11: "bf16[s18, 1, 64]" = unsqueeze_1.to(torch.bfloat16);  unsqueeze_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_1 = torch.chunk(getitem_14, 2, dim = -1);  getitem_14 = None
            getitem_16: "bf16[s18, 16, 64]" = chunk_1[0]
            getitem_17: "bf16[s18, 16, 64]" = chunk_1[1];  chunk_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_9: "bf16[s18, 16, 64]" = getitem_16 * to_10
            mul_10: "bf16[s18, 16, 64]" = getitem_17 * to_11
            sub: "bf16[s18, 16, 64]" = mul_9 - mul_10;  mul_9 = mul_10 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_11: "bf16[s18, 16, 64]" = getitem_17 * to_10;  getitem_17 = to_10 = None
            mul_12: "bf16[s18, 16, 64]" = getitem_16 * to_11;  getitem_16 = to_11 = None
            add_6: "bf16[s18, 16, 64]" = mul_11 + mul_12;  mul_11 = mul_12 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat: "bf16[s18, 16, 128]" = torch.cat((sub, add_6), dim = -1);  sub = add_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:308 in forward_native, code: query = torch.cat((query_rot, query_pass), dim=-1).reshape(query_shape)
            cat_1: "bf16[s18, 16, 128]" = torch.cat((cat, getitem_15), dim = -1);  cat = getitem_15 = None
            reshape: "bf16[s18, 2048]" = cat_1.reshape(size_2);  cat_1 = size_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:310 in forward_native, code: key_shape = key.shape
            size_3 = view_4.size()
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:311 in forward_native, code: key = key.view(num_tokens, -1, self.head_size)
            view_6: "bf16[s18, 8, 128]" = view_4.view(s18, -1, 128);  view_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:312 in forward_native, code: key_rot = key[..., : self.rotary_dim]
            getitem_18: "bf16[s18, 8, 128]" = view_6[(Ellipsis, slice(None, 128, None))]
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:313 in forward_native, code: key_pass = key[..., self.rotary_dim :]
            getitem_19: "bf16[s18, 8, 0]" = view_6[(Ellipsis, slice(128, None, None))];  view_6 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:163 in forward_static, code: cos = cos.unsqueeze(-2).to(x.dtype)
            unsqueeze_2: "bf16[s18, 1, 64]" = clone.unsqueeze(-2);  clone = None
            to_12: "bf16[s18, 1, 64]" = unsqueeze_2.to(torch.bfloat16);  unsqueeze_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:164 in forward_static, code: sin = sin.unsqueeze(-2).to(x.dtype)
            unsqueeze_3: "bf16[s18, 1, 64]" = clone_1.unsqueeze(-2);  clone_1 = None
            to_13: "bf16[s18, 1, 64]" = unsqueeze_3.to(torch.bfloat16);  unsqueeze_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:167 in forward_static, code: x1, x2 = torch.chunk(x, 2, dim=-1)
            chunk_2 = torch.chunk(getitem_18, 2, dim = -1);  getitem_18 = None
            getitem_20: "bf16[s18, 8, 64]" = chunk_2[0]
            getitem_21: "bf16[s18, 8, 64]" = chunk_2[1];  chunk_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:172 in forward_static, code: o1 = x1 * cos - x2 * sin
            mul_13: "bf16[s18, 8, 64]" = getitem_20 * to_12
            mul_14: "bf16[s18, 8, 64]" = getitem_21 * to_13
            sub_1: "bf16[s18, 8, 64]" = mul_13 - mul_14;  mul_13 = mul_14 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:173 in forward_static, code: o2 = x2 * cos + x1 * sin
            mul_15: "bf16[s18, 8, 64]" = getitem_21 * to_12;  getitem_21 = to_12 = None
            mul_16: "bf16[s18, 8, 64]" = getitem_20 * to_13;  getitem_20 = to_13 = None
            add_7: "bf16[s18, 8, 64]" = mul_15 + mul_16;  mul_15 = mul_16 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/common.py:176 in forward_static, code: output = torch.cat((o1, o2), dim=-1)
            cat_2: "bf16[s18, 8, 128]" = torch.cat((sub_1, add_7), dim = -1);  sub_1 = add_7 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/rotary_embedding/mrope.py:319 in forward_native, code: key = torch.cat((key_rot, key_pass), dim=-1).reshape(key_shape)
            cat_3: "bf16[s18, 8, 128]" = torch.cat((cat_2, getitem_19), dim = -1);  cat_2 = getitem_19 = None
            reshape_1: "bf16[s18, 1024]" = cat_3.reshape(size_3);  cat_3 = size_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:414 in forward, code: output = torch.empty(output_shape, dtype=output_dtype, device=query.device)
            size_4 = torch.Size([s18, 2048]);  s18 = None
            empty: "bf16[s18, 2048]" = torch.empty(size_4, dtype = torch.bfloat16, device = device(type='cuda', index=0));  size_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:419 in forward, code: query = query.view(-1, self.num_heads, self.head_size)
            view_7: "bf16[s18, 16, 128]" = reshape.view(-1, 16, 128);  reshape = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:420 in forward, code: output = output.view(-1, self.num_heads, self.head_size_v)
            view_8: "bf16[s18, 16, 128]" = empty.view(-1, 16, 128);  empty = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:422 in forward, code: key = key.view(-1, self.num_kv_heads, self.head_size)
            view_9: "bf16[s18, 8, 128]" = reshape_1.view(-1, 8, 128);  reshape_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:424 in forward, code: value = value.view(-1, self.num_kv_heads, self.head_size_v)
            view_10: "bf16[s18, 8, 128]" = getitem_4.view(-1, 8, 128);  getitem_4 = None
            return (view_9, view_10, view_7, view_8, to_4)
            
    class submod_55(torch.nn.Module):
        def forward(self, key_83: "bf16[s18, 8, 128]", s59: "Sym(s18)", value_27: "bf16[s18, 8, 128]", query_83: "bf16[s18, 16, 128]", output_111: "bf16[s18, 16, 128]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:453 in forward, code: kv_cache_dummy_dep = torch.ops.vllm.unified_kv_cache_update(
            unified_kv_cache_update: "bf16[0]" = torch.ops.vllm.unified_kv_cache_update(key_83, value_27, 'language_model.model.layers.27.self_attn.attn')
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:456 in forward, code: torch.ops.vllm.unified_attention_with_output(
            unified_attention_with_output = torch.ops.vllm.unified_attention_with_output(query_83, key_83, value_27, output_111, 'language_model.model.layers.27.self_attn.attn', kv_cache_dummy_dep = unified_kv_cache_update);  query_83 = key_83 = value_27 = output_111 = unified_kv_cache_update = unified_attention_with_output = None
            return ()
            
    class submod_56(torch.nn.Module):
        def forward(self, output_111: "bf16[s18, 16, 128]", s59: "Sym(s18)", l_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_: "bf16[2048, 2048]", l_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_: "bf16[2048]", residual_53: "bf16[s18, 2048]", l_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_: "bf16[12288, 2048]", l_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_: "bf16[2048, 6144]", l_self_modules_norm_parameters_weight_: "bf16[2048]"):
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/attention/attention.py:464 in forward, code: return output.view(-1, hidden_size)
            view: "bf16[s18, 2048]" = output_111.view(-1, 2048);  output_111 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear: "bf16[s18, 2048]" = torch._C._nn.linear(view, l_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_, None);  view = l_self_modules_layers_modules_27_modules_self_attn_modules_o_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_);  l_self_modules_layers_modules_27_modules_post_attention_layernorm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to: "f32[s18, 2048]" = linear.to(torch.float32);  linear = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add: "f32[s18, 2048]" = to + residual_53;  to = residual_53 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_1: "bf16[s18, 2048]" = add.to(torch.bfloat16)
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_1: "f32[s18, 2048]" = add.pow(2)
            mean: "f32[s18, 1]" = pow_1.mean(dim = -1, keepdim = True);  pow_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_1: "f32[s18, 1]" = mean + 1e-06;  mean = None
            rsqrt: "f32[s18, 1]" = torch.rsqrt(add_1);  add_1 = None
            mul: "f32[s18, 2048]" = add * rsqrt;  add = rsqrt = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_2: "bf16[s18, 2048]" = mul.to(torch.bfloat16);  mul = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_1: "bf16[s18, 2048]" = to_2 * _get_data_attr;  to_2 = _get_data_attr = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_1: "bf16[s18, 12288]" = torch._C._nn.linear(mul_1, l_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_, None);  mul_1 = l_self_modules_layers_modules_27_modules_mlp_modules_gate_up_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/activation.py:141 in forward_native, code: return F.silu(x[..., :d]) * x[..., d:]
            getitem: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(None, 6144, None))]
            silu: "bf16[s18, 6144]" = torch.nn.functional.silu(getitem);  getitem = None
            getitem_1: "bf16[s18, 6144]" = linear_1[(Ellipsis, slice(6144, None, None))];  linear_1 = None
            mul_2: "bf16[s18, 6144]" = silu * getitem_1;  silu = getitem_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/parameter.py:126 in __torch_function__, code: return super().__torch_function__(func, types, args, kwargs)
            linear_2: "bf16[s18, 2048]" = torch._C._nn.linear(mul_2, l_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_, None);  mul_2 = l_self_modules_layers_modules_27_modules_mlp_modules_down_proj_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:192 in forward_native, code: self.weight.data if self.has_weight else None,
            _get_data_attr_1: "bf16[2048]" = torch._C._autograd._get_data_attr(l_self_modules_norm_parameters_weight_);  l_self_modules_norm_parameters_weight_ = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:145 in forward_static, code: x = x.to(torch.float32)
            to_3: "f32[s18, 2048]" = linear_2.to(torch.float32);  linear_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:150 in forward_static, code: x = x + residual
            add_2: "f32[s18, 2048]" = to_3 + to_1;  to_3 = to_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:151 in forward_static, code: residual = x.to(orig_dtype)
            to_4: "bf16[s18, 2048]" = add_2.to(torch.bfloat16);  to_4 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:169 in forward_static, code: variance = x_var.pow(2).mean(dim=-1, keepdim=True)
            pow_2: "f32[s18, 2048]" = add_2.pow(2)
            mean_1: "f32[s18, 1]" = pow_2.mean(dim = -1, keepdim = True);  pow_2 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:171 in forward_static, code: x = x * torch.rsqrt(variance + variance_epsilon)
            add_3: "f32[s18, 1]" = mean_1 + 1e-06;  mean_1 = None
            rsqrt_1: "f32[s18, 1]" = torch.rsqrt(add_3);  add_3 = None
            mul_3: "f32[s18, 2048]" = add_2 * rsqrt_1;  add_2 = rsqrt_1 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:172 in forward_static, code: x = x.to(orig_dtype)
            to_5: "bf16[s18, 2048]" = mul_3.to(torch.bfloat16);  mul_3 = None
            
             # File: /home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/layers/layernorm.py:174 in forward_static, code: x = x * weight
            mul_4: "bf16[s18, 2048]" = to_5 * _get_data_attr_1;  to_5 = _get_data_attr_1 = None
            return mul_4
            