diff --git "a/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil" "b/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil"
--- "a/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil"
+++ "b/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil"
@@ -154,8 +154,8 @@ program(1.0)
             tensor<int32, []> inputs_embeds_batch_dims_0 = const()[name = tensor<string, []>("inputs_embeds_batch_dims_0"), val = tensor<int32, []>(0)];
             tensor<bool, []> inputs_embeds_validate_indices_0 = const()[name = tensor<string, []>("inputs_embeds_validate_indices_0"), val = tensor<bool, []>(false)];
             tensor<string, []> input_ids_to_int16_dtype_0 = const()[name = tensor<string, []>("input_ids_to_int16_dtype_0"), val = tensor<string, []>("int16")];
-            tensor<int16, [1, 512]> cast_41 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = tensor<string, []>("cast_41")];
-            tensor<fp16, [1, 512, 384]> inputs_embeds_cast_uint16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = cast_41, validate_indices = inputs_embeds_validate_indices_0, x = embeddings_word_embeddings_weight)[name = tensor<string, []>("inputs_embeds_cast_uint16")];
+            tensor<int16, [1, 512]> cast_5 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = tensor<string, []>("cast_5")];
+            tensor<fp16, [1, 512, 384]> inputs_embeds_cast_uint16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = cast_5, validate_indices = inputs_embeds_validate_indices_0, x = embeddings_word_embeddings_weight)[name = tensor<string, []>("inputs_embeds_cast_uint16")];
             tensor<fp16, [1, 512, 384]> var_45 = add(x = inputs_embeds_cast_uint16, y = embeddings_token_type_embeddings_weight)[name = tensor<string, []>("op_45")];
             tensor<fp16, [1, 512, 384]> embeddings_1 = add(x = var_45, y = embeddings_position_embeddings_weight)[name = tensor<string, []>("embeddings_1")];
             tensor<int32, [3]> var_47_perm_0 = const()[name = tensor<string, []>("op_47_perm_0"), val = tensor<int32, [3]>([0, 2, 1])];
@@ -183,2340 +183,2268 @@ program(1.0)
             tensor<fp16, [384]> var_63_beta_0_to_fp16 = const()[name = tensor<string, []>("op_63_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66789568)))];
             tensor<fp16, []> var_63_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_63_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
             tensor<fp16, [1, 384, 1, 512]> var_63_cast_fp16 = batch_norm(beta = var_63_beta_0_to_fp16, epsilon = var_63_epsilon_0_to_fp16, gamma = var_63_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_61_cast_fp16)[name = tensor<string, []>("op_63_cast_fp16")];
-            tensor<fp16, []> var_66_promoted_to_fp16 = const()[name = tensor<string, []>("op_66_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
-            tensor<fp16, [1, 512]> var_67_cast_fp16 = sub(x = mask, y = var_66_promoted_to_fp16)[name = tensor<string, []>("op_67_cast_fp16")];
-            tensor<fp16, []> var_68_to_fp16 = const()[name = tensor<string, []>("op_68_to_fp16"), val = tensor<fp16, []>(0x1.388p+13)];
-            tensor<fp16, [1, 512]> var_69_cast_fp16 = mul(x = var_67_cast_fp16, y = var_68_to_fp16)[name = tensor<string, []>("op_69_cast_fp16")];
-            tensor<int32, [1]> var_71_axes_0 = const()[name = tensor<string, []>("op_71_axes_0"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 512]> var_71_cast_fp16 = expand_dims(axes = var_71_axes_0, x = var_69_cast_fp16)[name = tensor<string, []>("op_71_cast_fp16")];
-            tensor<int32, [1]> var_73_axes_0 = const()[name = tensor<string, []>("op_73_axes_0"), val = tensor<int32, [1]>([2])];
-            tensor<fp16, [1, 1, 1, 512]> var_73_cast_fp16 = expand_dims(axes = var_73_axes_0, x = var_71_cast_fp16)[name = tensor<string, []>("op_73_cast_fp16")];
-            tensor<int32, []> var_77 = const()[name = tensor<string, []>("op_77"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_78 = const()[name = tensor<string, []>("op_78"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_79 = const()[name = tensor<string, []>("op_79"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_104 = const()[name = tensor<string, []>("op_104"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_106 = const()[name = tensor<string, []>("op_106"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_108_pad_type_0 = const()[name = tensor<string, []>("op_108_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_108_pad_0 = const()[name = tensor<string, []>("op_108_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_108 = conv(bias = layers_0_attention_q_proj_bias, dilations = var_106, groups = var_77, pad = var_108_pad_0, pad_type = var_108_pad_type_0, strides = var_104, weight = layers_0_attention_q_proj_weight, x = var_63_cast_fp16)[name = tensor<string, []>("op_108")];
-            tensor<int32, [4]> var_109 = const()[name = tensor<string, []>("op_109"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_110 = reshape(shape = var_109, x = var_108)[name = tensor<string, []>("op_110")];
-            tensor<int32, [2]> var_113 = const()[name = tensor<string, []>("op_113"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_115 = const()[name = tensor<string, []>("op_115"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_117_pad_type_0 = const()[name = tensor<string, []>("op_117_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_117_pad_0 = const()[name = tensor<string, []>("op_117_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_117 = conv(bias = layers_0_attention_k_proj_bias, dilations = var_115, groups = var_77, pad = var_117_pad_0, pad_type = var_117_pad_type_0, strides = var_113, weight = layers_0_attention_k_proj_weight, x = var_63_cast_fp16)[name = tensor<string, []>("op_117")];
-            tensor<int32, [4]> var_118 = const()[name = tensor<string, []>("op_118"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_1 = reshape(shape = var_118, x = var_117)[name = tensor<string, []>("ks_1")];
-            tensor<int32, [2]> var_122 = const()[name = tensor<string, []>("op_122"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_124 = const()[name = tensor<string, []>("op_124"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_126_pad_type_0 = const()[name = tensor<string, []>("op_126_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_126_pad_0 = const()[name = tensor<string, []>("op_126_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_126 = conv(bias = layers_0_attention_v_proj_bias, dilations = var_124, groups = var_77, pad = var_126_pad_0, pad_type = var_126_pad_type_0, strides = var_122, weight = layers_0_attention_v_proj_weight, x = var_63_cast_fp16)[name = tensor<string, []>("op_126")];
-            tensor<int32, [4]> var_127 = const()[name = tensor<string, []>("op_127"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_128 = reshape(shape = var_127, x = var_126)[name = tensor<string, []>("op_128")];
-            tensor<int32, [12]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_129_axis_0 = const()[name = tensor<string, []>("op_129_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_129_0, tensor<fp16, [1, 32, 1, 512]> var_129_1, tensor<fp16, [1, 32, 1, 512]> var_129_2, tensor<fp16, [1, 32, 1, 512]> var_129_3, tensor<fp16, [1, 32, 1, 512]> var_129_4, tensor<fp16, [1, 32, 1, 512]> var_129_5, tensor<fp16, [1, 32, 1, 512]> var_129_6, tensor<fp16, [1, 32, 1, 512]> var_129_7, tensor<fp16, [1, 32, 1, 512]> var_129_8, tensor<fp16, [1, 32, 1, 512]> var_129_9, tensor<fp16, [1, 32, 1, 512]> var_129_10, tensor<fp16, [1, 32, 1, 512]> var_129_11 = split(axis = var_129_axis_0, split_sizes = tile_2, x = var_110)[name = tensor<string, []>("op_129")];
-            tensor<int32, [4]> var_142_perm_0 = const()[name = tensor<string, []>("op_142_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_143_axis_0 = const()[name = tensor<string, []>("op_143_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_11 = transpose(perm = var_142_perm_0, x = ks_1)[name = tensor<string, []>("transpose_11")];
-            tensor<fp16, [1, 512, 1, 32]> var_143_0, tensor<fp16, [1, 512, 1, 32]> var_143_1, tensor<fp16, [1, 512, 1, 32]> var_143_2, tensor<fp16, [1, 512, 1, 32]> var_143_3, tensor<fp16, [1, 512, 1, 32]> var_143_4, tensor<fp16, [1, 512, 1, 32]> var_143_5, tensor<fp16, [1, 512, 1, 32]> var_143_6, tensor<fp16, [1, 512, 1, 32]> var_143_7, tensor<fp16, [1, 512, 1, 32]> var_143_8, tensor<fp16, [1, 512, 1, 32]> var_143_9, tensor<fp16, [1, 512, 1, 32]> var_143_10, tensor<fp16, [1, 512, 1, 32]> var_143_11 = split(axis = var_143_axis_0, split_sizes = tile_3, x = transpose_11)[name = tensor<string, []>("op_143")];
-            tensor<int32, [12]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_156_axis_0 = const()[name = tensor<string, []>("op_156_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_156_0, tensor<fp16, [1, 32, 1, 512]> var_156_1, tensor<fp16, [1, 32, 1, 512]> var_156_2, tensor<fp16, [1, 32, 1, 512]> var_156_3, tensor<fp16, [1, 32, 1, 512]> var_156_4, tensor<fp16, [1, 32, 1, 512]> var_156_5, tensor<fp16, [1, 32, 1, 512]> var_156_6, tensor<fp16, [1, 32, 1, 512]> var_156_7, tensor<fp16, [1, 32, 1, 512]> var_156_8, tensor<fp16, [1, 32, 1, 512]> var_156_9, tensor<fp16, [1, 32, 1, 512]> var_156_10, tensor<fp16, [1, 32, 1, 512]> var_156_11 = split(axis = var_156_axis_0, split_sizes = tile_4, x = var_128)[name = tensor<string, []>("op_156")];
-            tensor<string, []> var_170_equation_0 = const()[name = tensor<string, []>("op_170_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_170 = einsum(equation = var_170_equation_0, values = (var_143_0, var_129_0))[name = tensor<string, []>("op_170")];
-            tensor<fp16, []> var_171_to_fp16 = const()[name = tensor<string, []>("op_171_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_1_cast_fp16 = mul(x = var_170, y = var_171_to_fp16)[name = tensor<string, []>("w_1_cast_fp16")];
-            tensor<string, []> var_174_equation_0 = const()[name = tensor<string, []>("op_174_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_174 = einsum(equation = var_174_equation_0, values = (var_143_1, var_129_1))[name = tensor<string, []>("op_174")];
-            tensor<fp16, []> var_175_to_fp16 = const()[name = tensor<string, []>("op_175_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_3_cast_fp16 = mul(x = var_174, y = var_175_to_fp16)[name = tensor<string, []>("w_3_cast_fp16")];
-            tensor<string, []> var_178_equation_0 = const()[name = tensor<string, []>("op_178_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_178 = einsum(equation = var_178_equation_0, values = (var_143_2, var_129_2))[name = tensor<string, []>("op_178")];
-            tensor<fp16, []> var_179_to_fp16 = const()[name = tensor<string, []>("op_179_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_5_cast_fp16 = mul(x = var_178, y = var_179_to_fp16)[name = tensor<string, []>("w_5_cast_fp16")];
-            tensor<string, []> var_182_equation_0 = const()[name = tensor<string, []>("op_182_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_182 = einsum(equation = var_182_equation_0, values = (var_143_3, var_129_3))[name = tensor<string, []>("op_182")];
-            tensor<fp16, []> var_183_to_fp16 = const()[name = tensor<string, []>("op_183_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_7_cast_fp16 = mul(x = var_182, y = var_183_to_fp16)[name = tensor<string, []>("w_7_cast_fp16")];
-            tensor<string, []> var_186_equation_0 = const()[name = tensor<string, []>("op_186_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_186 = einsum(equation = var_186_equation_0, values = (var_143_4, var_129_4))[name = tensor<string, []>("op_186")];
-            tensor<fp16, []> var_187_to_fp16 = const()[name = tensor<string, []>("op_187_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_9_cast_fp16 = mul(x = var_186, y = var_187_to_fp16)[name = tensor<string, []>("w_9_cast_fp16")];
-            tensor<string, []> var_190_equation_0 = const()[name = tensor<string, []>("op_190_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_190 = einsum(equation = var_190_equation_0, values = (var_143_5, var_129_5))[name = tensor<string, []>("op_190")];
-            tensor<fp16, []> var_191_to_fp16 = const()[name = tensor<string, []>("op_191_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_11_cast_fp16 = mul(x = var_190, y = var_191_to_fp16)[name = tensor<string, []>("w_11_cast_fp16")];
-            tensor<string, []> var_194_equation_0 = const()[name = tensor<string, []>("op_194_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_194 = einsum(equation = var_194_equation_0, values = (var_143_6, var_129_6))[name = tensor<string, []>("op_194")];
-            tensor<fp16, []> var_195_to_fp16 = const()[name = tensor<string, []>("op_195_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_13_cast_fp16 = mul(x = var_194, y = var_195_to_fp16)[name = tensor<string, []>("w_13_cast_fp16")];
-            tensor<string, []> var_198_equation_0 = const()[name = tensor<string, []>("op_198_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_198 = einsum(equation = var_198_equation_0, values = (var_143_7, var_129_7))[name = tensor<string, []>("op_198")];
-            tensor<fp16, []> var_199_to_fp16 = const()[name = tensor<string, []>("op_199_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_15_cast_fp16 = mul(x = var_198, y = var_199_to_fp16)[name = tensor<string, []>("w_15_cast_fp16")];
-            tensor<string, []> var_202_equation_0 = const()[name = tensor<string, []>("op_202_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_202 = einsum(equation = var_202_equation_0, values = (var_143_8, var_129_8))[name = tensor<string, []>("op_202")];
-            tensor<fp16, []> var_203_to_fp16 = const()[name = tensor<string, []>("op_203_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_17_cast_fp16 = mul(x = var_202, y = var_203_to_fp16)[name = tensor<string, []>("w_17_cast_fp16")];
-            tensor<string, []> var_206_equation_0 = const()[name = tensor<string, []>("op_206_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_206 = einsum(equation = var_206_equation_0, values = (var_143_9, var_129_9))[name = tensor<string, []>("op_206")];
-            tensor<fp16, []> var_207_to_fp16 = const()[name = tensor<string, []>("op_207_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_19_cast_fp16 = mul(x = var_206, y = var_207_to_fp16)[name = tensor<string, []>("w_19_cast_fp16")];
-            tensor<string, []> var_210_equation_0 = const()[name = tensor<string, []>("op_210_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_210 = einsum(equation = var_210_equation_0, values = (var_143_10, var_129_10))[name = tensor<string, []>("op_210")];
-            tensor<fp16, []> var_211_to_fp16 = const()[name = tensor<string, []>("op_211_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_21_cast_fp16 = mul(x = var_210, y = var_211_to_fp16)[name = tensor<string, []>("w_21_cast_fp16")];
-            tensor<string, []> var_214_equation_0 = const()[name = tensor<string, []>("op_214_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_214 = einsum(equation = var_214_equation_0, values = (var_143_11, var_129_11))[name = tensor<string, []>("op_214")];
-            tensor<fp16, []> var_215_to_fp16 = const()[name = tensor<string, []>("op_215_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_23_cast_fp16 = mul(x = var_214, y = var_215_to_fp16)[name = tensor<string, []>("w_23_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_217_cast_fp16 = add(x = w_1_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_217_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_218_cast_fp16 = softmax(axis = var_77, x = var_217_cast_fp16)[name = tensor<string, []>("op_218_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_219_cast_fp16 = add(x = w_3_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_219_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_220_cast_fp16 = softmax(axis = var_77, x = var_219_cast_fp16)[name = tensor<string, []>("op_220_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_221_cast_fp16 = add(x = w_5_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_221_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_222_cast_fp16 = softmax(axis = var_77, x = var_221_cast_fp16)[name = tensor<string, []>("op_222_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_223_cast_fp16 = add(x = w_7_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_223_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_224_cast_fp16 = softmax(axis = var_77, x = var_223_cast_fp16)[name = tensor<string, []>("op_224_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_225_cast_fp16 = add(x = w_9_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_225_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_226_cast_fp16 = softmax(axis = var_77, x = var_225_cast_fp16)[name = tensor<string, []>("op_226_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_227_cast_fp16 = add(x = w_11_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_227_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_228_cast_fp16 = softmax(axis = var_77, x = var_227_cast_fp16)[name = tensor<string, []>("op_228_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_229_cast_fp16 = add(x = w_13_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_229_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_230_cast_fp16 = softmax(axis = var_77, x = var_229_cast_fp16)[name = tensor<string, []>("op_230_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_231_cast_fp16 = add(x = w_15_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_231_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_232_cast_fp16 = softmax(axis = var_77, x = var_231_cast_fp16)[name = tensor<string, []>("op_232_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_233_cast_fp16 = add(x = w_17_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_233_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_234_cast_fp16 = softmax(axis = var_77, x = var_233_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_235_cast_fp16 = add(x = w_19_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_235_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_236_cast_fp16 = softmax(axis = var_77, x = var_235_cast_fp16)[name = tensor<string, []>("op_236_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_237_cast_fp16 = add(x = w_21_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_237_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_238_cast_fp16 = softmax(axis = var_77, x = var_237_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_239_cast_fp16 = add(x = w_23_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_239_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_240_cast_fp16 = softmax(axis = var_77, x = var_239_cast_fp16)[name = tensor<string, []>("op_240_cast_fp16")];
-            tensor<string, []> var_242_equation_0 = const()[name = tensor<string, []>("op_242_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_242_cast_fp16 = einsum(equation = var_242_equation_0, values = (var_156_0, var_218_cast_fp16))[name = tensor<string, []>("op_242_cast_fp16")];
+            tensor<int32, [1]> var_76_axes_0 = const()[name = tensor<string, []>("op_76_axes_0"), val = tensor<int32, [1]>([2])];
+            tensor<fp16, [1, 512, 1]> var_76_cast_fp16 = expand_dims(axes = var_76_axes_0, x = mask)[name = tensor<string, []>("op_76_cast_fp16")];
+            tensor<int32, [1]> var_78_axes_0 = const()[name = tensor<string, []>("op_78_axes_0"), val = tensor<int32, [1]>([3])];
+            tensor<fp16, [1, 512, 1, 1]> var_78_cast_fp16 = expand_dims(axes = var_78_axes_0, x = var_76_cast_fp16)[name = tensor<string, []>("op_78_cast_fp16")];
+            tensor<fp16, []> var_80_to_fp16 = const()[name = tensor<string, []>("op_80_to_fp16"), val = tensor<fp16, []>(0x1p+0)];
+            tensor<fp16, [1, 512, 1, 1]> var_81_cast_fp16 = sub(x = var_78_cast_fp16, y = var_80_to_fp16)[name = tensor<string, []>("op_81_cast_fp16")];
+            tensor<fp16, []> var_82_to_fp16 = const()[name = tensor<string, []>("op_82_to_fp16"), val = tensor<fp16, []>(0x1.388p+13)];
+            tensor<fp16, [1, 512, 1, 1]> var_83_cast_fp16 = mul(x = var_81_cast_fp16, y = var_82_to_fp16)[name = tensor<string, []>("op_83_cast_fp16")];
+            tensor<int32, []> var_88 = const()[name = tensor<string, []>("op_88"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_89 = const()[name = tensor<string, []>("op_89"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_90 = const()[name = tensor<string, []>("op_90"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_112 = const()[name = tensor<string, []>("op_112"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_114 = const()[name = tensor<string, []>("op_114"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_116_pad_type_0 = const()[name = tensor<string, []>("op_116_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_116_pad_0 = const()[name = tensor<string, []>("op_116_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_116 = conv(bias = layers_0_attention_q_proj_bias, dilations = var_114, groups = var_88, pad = var_116_pad_0, pad_type = var_116_pad_type_0, strides = var_112, weight = layers_0_attention_q_proj_weight, x = var_63_cast_fp16)[name = tensor<string, []>("op_116")];
+            tensor<int32, [2]> var_119 = const()[name = tensor<string, []>("op_119"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_121 = const()[name = tensor<string, []>("op_121"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_1_pad_type_0 = const()[name = tensor<string, []>("ks_1_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_1_pad_0 = const()[name = tensor<string, []>("ks_1_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_1 = conv(bias = layers_0_attention_k_proj_bias, dilations = var_121, groups = var_88, pad = ks_1_pad_0, pad_type = ks_1_pad_type_0, strides = var_119, weight = layers_0_attention_k_proj_weight, x = var_63_cast_fp16)[name = tensor<string, []>("ks_1")];
+            tensor<int32, [2]> var_126 = const()[name = tensor<string, []>("op_126"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_128 = const()[name = tensor<string, []>("op_128"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_130_pad_type_0 = const()[name = tensor<string, []>("op_130_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_130_pad_0 = const()[name = tensor<string, []>("op_130_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_130 = conv(bias = layers_0_attention_v_proj_bias, dilations = var_128, groups = var_88, pad = var_130_pad_0, pad_type = var_130_pad_type_0, strides = var_126, weight = layers_0_attention_v_proj_weight, x = var_63_cast_fp16)[name = tensor<string, []>("op_130")];
+            tensor<int32, [12]> tile_2 = const()[name = tensor<string, []>("tile_2"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_131_axis_0 = const()[name = tensor<string, []>("op_131_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_131_0, tensor<fp16, [1, 32, 1, 512]> var_131_1, tensor<fp16, [1, 32, 1, 512]> var_131_2, tensor<fp16, [1, 32, 1, 512]> var_131_3, tensor<fp16, [1, 32, 1, 512]> var_131_4, tensor<fp16, [1, 32, 1, 512]> var_131_5, tensor<fp16, [1, 32, 1, 512]> var_131_6, tensor<fp16, [1, 32, 1, 512]> var_131_7, tensor<fp16, [1, 32, 1, 512]> var_131_8, tensor<fp16, [1, 32, 1, 512]> var_131_9, tensor<fp16, [1, 32, 1, 512]> var_131_10, tensor<fp16, [1, 32, 1, 512]> var_131_11 = split(axis = var_131_axis_0, split_sizes = tile_2, x = var_116)[name = tensor<string, []>("op_131")];
+            tensor<int32, [4]> var_144_perm_0 = const()[name = tensor<string, []>("op_144_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_3 = const()[name = tensor<string, []>("tile_3"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_145_axis_0 = const()[name = tensor<string, []>("op_145_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_11 = transpose(perm = var_144_perm_0, x = ks_1)[name = tensor<string, []>("transpose_11")];
+            tensor<fp16, [1, 512, 1, 32]> var_145_0, tensor<fp16, [1, 512, 1, 32]> var_145_1, tensor<fp16, [1, 512, 1, 32]> var_145_2, tensor<fp16, [1, 512, 1, 32]> var_145_3, tensor<fp16, [1, 512, 1, 32]> var_145_4, tensor<fp16, [1, 512, 1, 32]> var_145_5, tensor<fp16, [1, 512, 1, 32]> var_145_6, tensor<fp16, [1, 512, 1, 32]> var_145_7, tensor<fp16, [1, 512, 1, 32]> var_145_8, tensor<fp16, [1, 512, 1, 32]> var_145_9, tensor<fp16, [1, 512, 1, 32]> var_145_10, tensor<fp16, [1, 512, 1, 32]> var_145_11 = split(axis = var_145_axis_0, split_sizes = tile_3, x = transpose_11)[name = tensor<string, []>("op_145")];
+            tensor<int32, [12]> tile_4 = const()[name = tensor<string, []>("tile_4"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_158_axis_0 = const()[name = tensor<string, []>("op_158_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_158_0, tensor<fp16, [1, 32, 1, 512]> var_158_1, tensor<fp16, [1, 32, 1, 512]> var_158_2, tensor<fp16, [1, 32, 1, 512]> var_158_3, tensor<fp16, [1, 32, 1, 512]> var_158_4, tensor<fp16, [1, 32, 1, 512]> var_158_5, tensor<fp16, [1, 32, 1, 512]> var_158_6, tensor<fp16, [1, 32, 1, 512]> var_158_7, tensor<fp16, [1, 32, 1, 512]> var_158_8, tensor<fp16, [1, 32, 1, 512]> var_158_9, tensor<fp16, [1, 32, 1, 512]> var_158_10, tensor<fp16, [1, 32, 1, 512]> var_158_11 = split(axis = var_158_axis_0, split_sizes = tile_4, x = var_130)[name = tensor<string, []>("op_158")];
+            tensor<string, []> var_172_equation_0 = const()[name = tensor<string, []>("op_172_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_172 = einsum(equation = var_172_equation_0, values = (var_145_0, var_131_0))[name = tensor<string, []>("op_172")];
+            tensor<fp16, []> var_173_to_fp16 = const()[name = tensor<string, []>("op_173_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_1_cast_fp16 = mul(x = var_172, y = var_173_to_fp16)[name = tensor<string, []>("w_1_cast_fp16")];
+            tensor<string, []> var_176_equation_0 = const()[name = tensor<string, []>("op_176_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_176 = einsum(equation = var_176_equation_0, values = (var_145_1, var_131_1))[name = tensor<string, []>("op_176")];
+            tensor<fp16, []> var_177_to_fp16 = const()[name = tensor<string, []>("op_177_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_3_cast_fp16 = mul(x = var_176, y = var_177_to_fp16)[name = tensor<string, []>("w_3_cast_fp16")];
+            tensor<string, []> var_180_equation_0 = const()[name = tensor<string, []>("op_180_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_180 = einsum(equation = var_180_equation_0, values = (var_145_2, var_131_2))[name = tensor<string, []>("op_180")];
+            tensor<fp16, []> var_181_to_fp16 = const()[name = tensor<string, []>("op_181_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_5_cast_fp16 = mul(x = var_180, y = var_181_to_fp16)[name = tensor<string, []>("w_5_cast_fp16")];
+            tensor<string, []> var_184_equation_0 = const()[name = tensor<string, []>("op_184_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_184 = einsum(equation = var_184_equation_0, values = (var_145_3, var_131_3))[name = tensor<string, []>("op_184")];
+            tensor<fp16, []> var_185_to_fp16 = const()[name = tensor<string, []>("op_185_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_7_cast_fp16 = mul(x = var_184, y = var_185_to_fp16)[name = tensor<string, []>("w_7_cast_fp16")];
+            tensor<string, []> var_188_equation_0 = const()[name = tensor<string, []>("op_188_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_188 = einsum(equation = var_188_equation_0, values = (var_145_4, var_131_4))[name = tensor<string, []>("op_188")];
+            tensor<fp16, []> var_189_to_fp16 = const()[name = tensor<string, []>("op_189_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_9_cast_fp16 = mul(x = var_188, y = var_189_to_fp16)[name = tensor<string, []>("w_9_cast_fp16")];
+            tensor<string, []> var_192_equation_0 = const()[name = tensor<string, []>("op_192_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_192 = einsum(equation = var_192_equation_0, values = (var_145_5, var_131_5))[name = tensor<string, []>("op_192")];
+            tensor<fp16, []> var_193_to_fp16 = const()[name = tensor<string, []>("op_193_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_11_cast_fp16 = mul(x = var_192, y = var_193_to_fp16)[name = tensor<string, []>("w_11_cast_fp16")];
+            tensor<string, []> var_196_equation_0 = const()[name = tensor<string, []>("op_196_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_196 = einsum(equation = var_196_equation_0, values = (var_145_6, var_131_6))[name = tensor<string, []>("op_196")];
+            tensor<fp16, []> var_197_to_fp16 = const()[name = tensor<string, []>("op_197_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_13_cast_fp16 = mul(x = var_196, y = var_197_to_fp16)[name = tensor<string, []>("w_13_cast_fp16")];
+            tensor<string, []> var_200_equation_0 = const()[name = tensor<string, []>("op_200_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_200 = einsum(equation = var_200_equation_0, values = (var_145_7, var_131_7))[name = tensor<string, []>("op_200")];
+            tensor<fp16, []> var_201_to_fp16 = const()[name = tensor<string, []>("op_201_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_15_cast_fp16 = mul(x = var_200, y = var_201_to_fp16)[name = tensor<string, []>("w_15_cast_fp16")];
+            tensor<string, []> var_204_equation_0 = const()[name = tensor<string, []>("op_204_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_204 = einsum(equation = var_204_equation_0, values = (var_145_8, var_131_8))[name = tensor<string, []>("op_204")];
+            tensor<fp16, []> var_205_to_fp16 = const()[name = tensor<string, []>("op_205_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_17_cast_fp16 = mul(x = var_204, y = var_205_to_fp16)[name = tensor<string, []>("w_17_cast_fp16")];
+            tensor<string, []> var_208_equation_0 = const()[name = tensor<string, []>("op_208_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_208 = einsum(equation = var_208_equation_0, values = (var_145_9, var_131_9))[name = tensor<string, []>("op_208")];
+            tensor<fp16, []> var_209_to_fp16 = const()[name = tensor<string, []>("op_209_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_19_cast_fp16 = mul(x = var_208, y = var_209_to_fp16)[name = tensor<string, []>("w_19_cast_fp16")];
+            tensor<string, []> var_212_equation_0 = const()[name = tensor<string, []>("op_212_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_212 = einsum(equation = var_212_equation_0, values = (var_145_10, var_131_10))[name = tensor<string, []>("op_212")];
+            tensor<fp16, []> var_213_to_fp16 = const()[name = tensor<string, []>("op_213_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_21_cast_fp16 = mul(x = var_212, y = var_213_to_fp16)[name = tensor<string, []>("w_21_cast_fp16")];
+            tensor<string, []> var_216_equation_0 = const()[name = tensor<string, []>("op_216_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_216 = einsum(equation = var_216_equation_0, values = (var_145_11, var_131_11))[name = tensor<string, []>("op_216")];
+            tensor<fp16, []> var_217_to_fp16 = const()[name = tensor<string, []>("op_217_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_23_cast_fp16 = mul(x = var_216, y = var_217_to_fp16)[name = tensor<string, []>("w_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_3_cast_fp16 = add(x = w_1_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_3_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_220_cast_fp16 = softmax(axis = var_88, x = input_3_cast_fp16)[name = tensor<string, []>("op_220_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_5_cast_fp16 = add(x = w_3_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_5_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_222_cast_fp16 = softmax(axis = var_88, x = input_5_cast_fp16)[name = tensor<string, []>("op_222_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_7_cast_fp16 = add(x = w_5_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_7_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_224_cast_fp16 = softmax(axis = var_88, x = input_7_cast_fp16)[name = tensor<string, []>("op_224_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_9_cast_fp16 = add(x = w_7_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_9_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_226_cast_fp16 = softmax(axis = var_88, x = input_9_cast_fp16)[name = tensor<string, []>("op_226_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_11_cast_fp16 = add(x = w_9_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_11_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_228_cast_fp16 = softmax(axis = var_88, x = input_11_cast_fp16)[name = tensor<string, []>("op_228_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_13_cast_fp16 = add(x = w_11_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_13_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_230_cast_fp16 = softmax(axis = var_88, x = input_13_cast_fp16)[name = tensor<string, []>("op_230_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_15_cast_fp16 = add(x = w_13_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_15_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_232_cast_fp16 = softmax(axis = var_88, x = input_15_cast_fp16)[name = tensor<string, []>("op_232_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_17_cast_fp16 = add(x = w_15_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_17_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_234_cast_fp16 = softmax(axis = var_88, x = input_17_cast_fp16)[name = tensor<string, []>("op_234_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_19_cast_fp16 = add(x = w_17_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_19_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_236_cast_fp16 = softmax(axis = var_88, x = input_19_cast_fp16)[name = tensor<string, []>("op_236_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_21_cast_fp16 = add(x = w_19_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_238_cast_fp16 = softmax(axis = var_88, x = input_21_cast_fp16)[name = tensor<string, []>("op_238_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_23_cast_fp16 = add(x = w_21_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_240_cast_fp16 = softmax(axis = var_88, x = input_23_cast_fp16)[name = tensor<string, []>("op_240_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_25_cast_fp16 = add(x = w_23_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_242_cast_fp16 = softmax(axis = var_88, x = input_25_cast_fp16)[name = tensor<string, []>("op_242_cast_fp16")];
             tensor<string, []> var_244_equation_0 = const()[name = tensor<string, []>("op_244_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_244_cast_fp16 = einsum(equation = var_244_equation_0, values = (var_156_1, var_220_cast_fp16))[name = tensor<string, []>("op_244_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_244_cast_fp16 = einsum(equation = var_244_equation_0, values = (var_158_0, var_220_cast_fp16))[name = tensor<string, []>("op_244_cast_fp16")];
             tensor<string, []> var_246_equation_0 = const()[name = tensor<string, []>("op_246_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_246_cast_fp16 = einsum(equation = var_246_equation_0, values = (var_156_2, var_222_cast_fp16))[name = tensor<string, []>("op_246_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_246_cast_fp16 = einsum(equation = var_246_equation_0, values = (var_158_1, var_222_cast_fp16))[name = tensor<string, []>("op_246_cast_fp16")];
             tensor<string, []> var_248_equation_0 = const()[name = tensor<string, []>("op_248_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_248_cast_fp16 = einsum(equation = var_248_equation_0, values = (var_156_3, var_224_cast_fp16))[name = tensor<string, []>("op_248_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_248_cast_fp16 = einsum(equation = var_248_equation_0, values = (var_158_2, var_224_cast_fp16))[name = tensor<string, []>("op_248_cast_fp16")];
             tensor<string, []> var_250_equation_0 = const()[name = tensor<string, []>("op_250_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_250_cast_fp16 = einsum(equation = var_250_equation_0, values = (var_156_4, var_226_cast_fp16))[name = tensor<string, []>("op_250_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_250_cast_fp16 = einsum(equation = var_250_equation_0, values = (var_158_3, var_226_cast_fp16))[name = tensor<string, []>("op_250_cast_fp16")];
             tensor<string, []> var_252_equation_0 = const()[name = tensor<string, []>("op_252_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_252_cast_fp16 = einsum(equation = var_252_equation_0, values = (var_156_5, var_228_cast_fp16))[name = tensor<string, []>("op_252_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_252_cast_fp16 = einsum(equation = var_252_equation_0, values = (var_158_4, var_228_cast_fp16))[name = tensor<string, []>("op_252_cast_fp16")];
             tensor<string, []> var_254_equation_0 = const()[name = tensor<string, []>("op_254_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_254_cast_fp16 = einsum(equation = var_254_equation_0, values = (var_156_6, var_230_cast_fp16))[name = tensor<string, []>("op_254_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_254_cast_fp16 = einsum(equation = var_254_equation_0, values = (var_158_5, var_230_cast_fp16))[name = tensor<string, []>("op_254_cast_fp16")];
             tensor<string, []> var_256_equation_0 = const()[name = tensor<string, []>("op_256_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_256_cast_fp16 = einsum(equation = var_256_equation_0, values = (var_156_7, var_232_cast_fp16))[name = tensor<string, []>("op_256_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_256_cast_fp16 = einsum(equation = var_256_equation_0, values = (var_158_6, var_232_cast_fp16))[name = tensor<string, []>("op_256_cast_fp16")];
             tensor<string, []> var_258_equation_0 = const()[name = tensor<string, []>("op_258_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_258_cast_fp16 = einsum(equation = var_258_equation_0, values = (var_156_8, var_234_cast_fp16))[name = tensor<string, []>("op_258_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_258_cast_fp16 = einsum(equation = var_258_equation_0, values = (var_158_7, var_234_cast_fp16))[name = tensor<string, []>("op_258_cast_fp16")];
             tensor<string, []> var_260_equation_0 = const()[name = tensor<string, []>("op_260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_260_cast_fp16 = einsum(equation = var_260_equation_0, values = (var_156_9, var_236_cast_fp16))[name = tensor<string, []>("op_260_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_260_cast_fp16 = einsum(equation = var_260_equation_0, values = (var_158_8, var_236_cast_fp16))[name = tensor<string, []>("op_260_cast_fp16")];
             tensor<string, []> var_262_equation_0 = const()[name = tensor<string, []>("op_262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_262_cast_fp16 = einsum(equation = var_262_equation_0, values = (var_156_10, var_238_cast_fp16))[name = tensor<string, []>("op_262_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_262_cast_fp16 = einsum(equation = var_262_equation_0, values = (var_158_9, var_238_cast_fp16))[name = tensor<string, []>("op_262_cast_fp16")];
             tensor<string, []> var_264_equation_0 = const()[name = tensor<string, []>("op_264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_264_cast_fp16 = einsum(equation = var_264_equation_0, values = (var_156_11, var_240_cast_fp16))[name = tensor<string, []>("op_264_cast_fp16")];
-            tensor<bool, []> var_266_interleave_0 = const()[name = tensor<string, []>("op_266_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_266_cast_fp16 = concat(axis = var_77, interleave = var_266_interleave_0, values = (var_242_cast_fp16, var_244_cast_fp16, var_246_cast_fp16, var_248_cast_fp16, var_250_cast_fp16, var_252_cast_fp16, var_254_cast_fp16, var_256_cast_fp16, var_258_cast_fp16, var_260_cast_fp16, var_262_cast_fp16, var_264_cast_fp16))[name = tensor<string, []>("op_266_cast_fp16")];
-            tensor<int32, [2]> var_270 = const()[name = tensor<string, []>("op_270"), val = tensor<int32, [2]>([1, 1])];
+            tensor<fp16, [1, 32, 1, 512]> var_264_cast_fp16 = einsum(equation = var_264_equation_0, values = (var_158_10, var_240_cast_fp16))[name = tensor<string, []>("op_264_cast_fp16")];
+            tensor<string, []> var_266_equation_0 = const()[name = tensor<string, []>("op_266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_266_cast_fp16 = einsum(equation = var_266_equation_0, values = (var_158_11, var_242_cast_fp16))[name = tensor<string, []>("op_266_cast_fp16")];
+            tensor<bool, []> var_268_interleave_0 = const()[name = tensor<string, []>("op_268_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_268_cast_fp16 = concat(axis = var_88, interleave = var_268_interleave_0, values = (var_244_cast_fp16, var_246_cast_fp16, var_248_cast_fp16, var_250_cast_fp16, var_252_cast_fp16, var_254_cast_fp16, var_256_cast_fp16, var_258_cast_fp16, var_260_cast_fp16, var_262_cast_fp16, var_264_cast_fp16, var_266_cast_fp16))[name = tensor<string, []>("op_268_cast_fp16")];
             tensor<int32, [2]> var_272 = const()[name = tensor<string, []>("op_272"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_274_pad_type_0 = const()[name = tensor<string, []>("op_274_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_274_pad_0 = const()[name = tensor<string, []>("op_274_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_274 = conv(bias = layers_0_attention_o_proj_bias, dilations = var_272, groups = var_77, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_270, weight = layers_0_attention_o_proj_weight, x = var_266_cast_fp16)[name = tensor<string, []>("op_274")];
-            tensor<bool, []> var_276_interleave_0 = const()[name = tensor<string, []>("op_276_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_276 = concat(axis = var_78, interleave = var_276_interleave_0, values = var_274)[name = tensor<string, []>("op_276")];
-            tensor<fp16, [1, 384, 1, 512]> x_5 = add(x = var_63_cast_fp16, y = var_276)[name = tensor<string, []>("x_5")];
-            tensor<fp16, []> var_75_promoted = const()[name = tensor<string, []>("op_75_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_76_promoted = const()[name = tensor<string, []>("op_76_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_7 = clip(alpha = var_75_promoted, beta = var_76_promoted, x = x_5)[name = tensor<string, []>("x_7")];
-            tensor<int32, [1]> var_281 = const()[name = tensor<string, []>("op_281"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_3 = reduce_mean(axes = var_281, keep_dims = var_79, x = x_7)[name = tensor<string, []>("mean_3")];
+            tensor<int32, [2]> var_274 = const()[name = tensor<string, []>("op_274"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_276_pad_type_0 = const()[name = tensor<string, []>("op_276_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_276_pad_0 = const()[name = tensor<string, []>("op_276_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_276 = conv(bias = layers_0_attention_o_proj_bias, dilations = var_274, groups = var_88, pad = var_276_pad_0, pad_type = var_276_pad_type_0, strides = var_272, weight = layers_0_attention_o_proj_weight, x = var_268_cast_fp16)[name = tensor<string, []>("op_276")];
+            tensor<bool, []> var_278_interleave_0 = const()[name = tensor<string, []>("op_278_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_278 = concat(axis = var_89, interleave = var_278_interleave_0, values = var_276)[name = tensor<string, []>("op_278")];
+            tensor<fp16, [1, 384, 1, 512]> x_5 = add(x = var_63_cast_fp16, y = var_278)[name = tensor<string, []>("x_5")];
+            tensor<fp16, []> var_85_promoted = const()[name = tensor<string, []>("op_85_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_86_promoted = const()[name = tensor<string, []>("op_86_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_7 = clip(alpha = var_85_promoted, beta = var_86_promoted, x = x_5)[name = tensor<string, []>("x_7")];
+            tensor<int32, [1]> var_283 = const()[name = tensor<string, []>("op_283"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_3 = reduce_mean(axes = var_283, keep_dims = var_90, x = x_7)[name = tensor<string, []>("mean_3")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_3 = sub(x = x_7, y = mean_3)[name = tensor<string, []>("zero_mean_3")];
-            tensor<fp16, []> var_84_promoted = const()[name = tensor<string, []>("op_84_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_284 = pow(x = zero_mean_3, y = var_84_promoted)[name = tensor<string, []>("op_284")];
-            tensor<int32, [1]> var_285 = const()[name = tensor<string, []>("op_285"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_286 = reduce_mean(axes = var_285, keep_dims = var_79, x = var_284)[name = tensor<string, []>("op_286")];
-            tensor<fp16, []> var_287_to_fp16 = const()[name = tensor<string, []>("op_287_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_288_cast_fp16 = add(x = var_286, y = var_287_to_fp16)[name = tensor<string, []>("op_288_cast_fp16")];
+            tensor<fp16, []> var_87_promoted = const()[name = tensor<string, []>("op_87_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_286 = pow(x = zero_mean_3, y = var_87_promoted)[name = tensor<string, []>("op_286")];
+            tensor<int32, [1]> var_287 = const()[name = tensor<string, []>("op_287"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_288 = reduce_mean(axes = var_287, keep_dims = var_90, x = var_286)[name = tensor<string, []>("op_288")];
+            tensor<fp16, []> var_289_to_fp16 = const()[name = tensor<string, []>("op_289_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_290_cast_fp16 = add(x = var_288, y = var_289_to_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
             tensor<fp32, []> denom_3_epsilon_0 = const()[name = tensor<string, []>("denom_3_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0, x = var_288_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_290_cast_fp16 = mul(x = zero_mean_3, y = denom_3_cast_fp16)[name = tensor<string, []>("op_290_cast_fp16")];
-            tensor<fp16, [384]> var_292_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_292_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66790400)))];
-            tensor<fp16, [384]> var_292_beta_0_to_fp16 = const()[name = tensor<string, []>("op_292_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66791232)))];
-            tensor<fp16, []> var_292_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_292_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_292_cast_fp16 = batch_norm(beta = var_292_beta_0_to_fp16, epsilon = var_292_epsilon_0_to_fp16, gamma = var_292_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_290_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
-            tensor<int32, [2]> var_298 = const()[name = tensor<string, []>("op_298"), val = tensor<int32, [2]>([1, 1])];
+            tensor<fp16, [1, 1, 1, 512]> denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0, x = var_290_cast_fp16)[name = tensor<string, []>("denom_3_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_292_cast_fp16 = mul(x = zero_mean_3, y = denom_3_cast_fp16)[name = tensor<string, []>("op_292_cast_fp16")];
+            tensor<fp16, [384]> var_294_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_294_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66790400)))];
+            tensor<fp16, [384]> var_294_beta_0_to_fp16 = const()[name = tensor<string, []>("op_294_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66791232)))];
+            tensor<fp16, []> var_294_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_294_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_294_cast_fp16 = batch_norm(beta = var_294_beta_0_to_fp16, epsilon = var_294_epsilon_0_to_fp16, gamma = var_294_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_292_cast_fp16)[name = tensor<string, []>("op_294_cast_fp16")];
             tensor<int32, [2]> var_300 = const()[name = tensor<string, []>("op_300"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_302_pad_type_0 = const()[name = tensor<string, []>("op_302_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_302_pad_0 = const()[name = tensor<string, []>("op_302_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_302 = conv(bias = layers_0_mlp_fc1_bias, dilations = var_300, groups = var_77, pad = var_302_pad_0, pad_type = var_302_pad_type_0, strides = var_298, weight = layers_0_mlp_fc1_weight, x = var_292_cast_fp16)[name = tensor<string, []>("op_302")];
-            tensor<string, []> input_7_mode_0 = const()[name = tensor<string, []>("input_7_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_7 = gelu(mode = input_7_mode_0, x = var_302)[name = tensor<string, []>("input_7")];
-            tensor<int32, [2]> var_306 = const()[name = tensor<string, []>("op_306"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_302 = const()[name = tensor<string, []>("op_302"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_304_pad_type_0 = const()[name = tensor<string, []>("op_304_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_304_pad_0 = const()[name = tensor<string, []>("op_304_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_304 = conv(bias = layers_0_mlp_fc1_bias, dilations = var_302, groups = var_88, pad = var_304_pad_0, pad_type = var_304_pad_type_0, strides = var_300, weight = layers_0_mlp_fc1_weight, x = var_294_cast_fp16)[name = tensor<string, []>("op_304")];
+            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_31 = gelu(mode = input_31_mode_0, x = var_304)[name = tensor<string, []>("input_31")];
             tensor<int32, [2]> var_308 = const()[name = tensor<string, []>("op_308"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_310_pad_type_0 = const()[name = tensor<string, []>("op_310_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_310_pad_0 = const()[name = tensor<string, []>("op_310_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_310 = conv(bias = layers_0_mlp_fc2_bias, dilations = var_308, groups = var_77, pad = var_310_pad_0, pad_type = var_310_pad_type_0, strides = var_306, weight = layers_0_mlp_fc2_weight, x = input_7)[name = tensor<string, []>("op_310")];
-            tensor<fp16, [1, 384, 1, 512]> x_9 = add(x = var_292_cast_fp16, y = var_310)[name = tensor<string, []>("x_9")];
-            tensor<fp16, []> var_75_promoted_1 = const()[name = tensor<string, []>("op_75_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_76_promoted_1 = const()[name = tensor<string, []>("op_76_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_11 = clip(alpha = var_75_promoted_1, beta = var_76_promoted_1, x = x_9)[name = tensor<string, []>("x_11")];
-            tensor<int32, [1]> var_315 = const()[name = tensor<string, []>("op_315"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_5 = reduce_mean(axes = var_315, keep_dims = var_79, x = x_11)[name = tensor<string, []>("mean_5")];
+            tensor<int32, [2]> var_310 = const()[name = tensor<string, []>("op_310"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_312_pad_type_0 = const()[name = tensor<string, []>("op_312_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_312_pad_0 = const()[name = tensor<string, []>("op_312_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_312 = conv(bias = layers_0_mlp_fc2_bias, dilations = var_310, groups = var_88, pad = var_312_pad_0, pad_type = var_312_pad_type_0, strides = var_308, weight = layers_0_mlp_fc2_weight, x = input_31)[name = tensor<string, []>("op_312")];
+            tensor<fp16, [1, 384, 1, 512]> x_9 = add(x = var_294_cast_fp16, y = var_312)[name = tensor<string, []>("x_9")];
+            tensor<fp16, []> var_85_promoted_1 = const()[name = tensor<string, []>("op_85_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_86_promoted_1 = const()[name = tensor<string, []>("op_86_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_11 = clip(alpha = var_85_promoted_1, beta = var_86_promoted_1, x = x_9)[name = tensor<string, []>("x_11")];
+            tensor<int32, [1]> var_317 = const()[name = tensor<string, []>("op_317"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_5 = reduce_mean(axes = var_317, keep_dims = var_90, x = x_11)[name = tensor<string, []>("mean_5")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_5 = sub(x = x_11, y = mean_5)[name = tensor<string, []>("zero_mean_5")];
-            tensor<fp16, []> var_84_promoted_1 = const()[name = tensor<string, []>("op_84_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_318 = pow(x = zero_mean_5, y = var_84_promoted_1)[name = tensor<string, []>("op_318")];
-            tensor<int32, [1]> var_319 = const()[name = tensor<string, []>("op_319"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_320 = reduce_mean(axes = var_319, keep_dims = var_79, x = var_318)[name = tensor<string, []>("op_320")];
-            tensor<fp16, []> var_321_to_fp16 = const()[name = tensor<string, []>("op_321_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_322_cast_fp16 = add(x = var_320, y = var_321_to_fp16)[name = tensor<string, []>("op_322_cast_fp16")];
+            tensor<fp16, []> var_87_promoted_1 = const()[name = tensor<string, []>("op_87_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_320 = pow(x = zero_mean_5, y = var_87_promoted_1)[name = tensor<string, []>("op_320")];
+            tensor<int32, [1]> var_321 = const()[name = tensor<string, []>("op_321"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_322 = reduce_mean(axes = var_321, keep_dims = var_90, x = var_320)[name = tensor<string, []>("op_322")];
+            tensor<fp16, []> var_323_to_fp16 = const()[name = tensor<string, []>("op_323_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_324_cast_fp16 = add(x = var_322, y = var_323_to_fp16)[name = tensor<string, []>("op_324_cast_fp16")];
             tensor<fp32, []> denom_5_epsilon_0 = const()[name = tensor<string, []>("denom_5_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0, x = var_322_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_324_cast_fp16 = mul(x = zero_mean_5, y = denom_5_cast_fp16)[name = tensor<string, []>("op_324_cast_fp16")];
-            tensor<fp16, [384]> var_326_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_326_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66792064)))];
-            tensor<fp16, [384]> var_326_beta_0_to_fp16 = const()[name = tensor<string, []>("op_326_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66792896)))];
-            tensor<fp16, []> var_326_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_326_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_326_cast_fp16 = batch_norm(beta = var_326_beta_0_to_fp16, epsilon = var_326_epsilon_0_to_fp16, gamma = var_326_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_324_cast_fp16)[name = tensor<string, []>("op_326_cast_fp16")];
-            tensor<int32, []> var_331 = const()[name = tensor<string, []>("op_331"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_332 = const()[name = tensor<string, []>("op_332"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_333 = const()[name = tensor<string, []>("op_333"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 1, 1, 512]> denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0, x = var_324_cast_fp16)[name = tensor<string, []>("denom_5_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_326_cast_fp16 = mul(x = zero_mean_5, y = denom_5_cast_fp16)[name = tensor<string, []>("op_326_cast_fp16")];
+            tensor<fp16, [384]> var_328_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_328_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66792064)))];
+            tensor<fp16, [384]> var_328_beta_0_to_fp16 = const()[name = tensor<string, []>("op_328_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66792896)))];
+            tensor<fp16, []> var_328_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_328_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_328_cast_fp16 = batch_norm(beta = var_328_beta_0_to_fp16, epsilon = var_328_epsilon_0_to_fp16, gamma = var_328_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_326_cast_fp16)[name = tensor<string, []>("op_328_cast_fp16")];
+            tensor<int32, []> var_334 = const()[name = tensor<string, []>("op_334"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_335 = const()[name = tensor<string, []>("op_335"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_336 = const()[name = tensor<string, []>("op_336"), val = tensor<bool, []>(true)];
             tensor<int32, [2]> var_358 = const()[name = tensor<string, []>("op_358"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_360 = const()[name = tensor<string, []>("op_360"), val = tensor<int32, [2]>([1, 1])];
             tensor<string, []> var_362_pad_type_0 = const()[name = tensor<string, []>("op_362_pad_type_0"), val = tensor<string, []>("custom")];
             tensor<int32, [4]> var_362_pad_0 = const()[name = tensor<string, []>("op_362_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_362 = conv(bias = layers_1_attention_q_proj_bias, dilations = var_360, groups = var_331, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_358, weight = layers_1_attention_q_proj_weight, x = var_326_cast_fp16)[name = tensor<string, []>("op_362")];
-            tensor<int32, [4]> var_363 = const()[name = tensor<string, []>("op_363"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_364 = reshape(shape = var_363, x = var_362)[name = tensor<string, []>("op_364")];
+            tensor<fp16, [1, 384, 1, 512]> var_362 = conv(bias = layers_1_attention_q_proj_bias, dilations = var_360, groups = var_334, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_358, weight = layers_1_attention_q_proj_weight, x = var_328_cast_fp16)[name = tensor<string, []>("op_362")];
+            tensor<int32, [2]> var_365 = const()[name = tensor<string, []>("op_365"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_367 = const()[name = tensor<string, []>("op_367"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_369 = const()[name = tensor<string, []>("op_369"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_371_pad_type_0 = const()[name = tensor<string, []>("op_371_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_371_pad_0 = const()[name = tensor<string, []>("op_371_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_371 = conv(bias = layers_1_attention_k_proj_bias, dilations = var_369, groups = var_331, pad = var_371_pad_0, pad_type = var_371_pad_type_0, strides = var_367, weight = layers_1_attention_k_proj_weight, x = var_326_cast_fp16)[name = tensor<string, []>("op_371")];
-            tensor<int32, [4]> var_372 = const()[name = tensor<string, []>("op_372"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_3 = reshape(shape = var_372, x = var_371)[name = tensor<string, []>("ks_3")];
-            tensor<int32, [2]> var_376 = const()[name = tensor<string, []>("op_376"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_378 = const()[name = tensor<string, []>("op_378"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_380_pad_type_0 = const()[name = tensor<string, []>("op_380_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_380_pad_0 = const()[name = tensor<string, []>("op_380_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_380 = conv(bias = layers_1_attention_v_proj_bias, dilations = var_378, groups = var_331, pad = var_380_pad_0, pad_type = var_380_pad_type_0, strides = var_376, weight = layers_1_attention_v_proj_weight, x = var_326_cast_fp16)[name = tensor<string, []>("op_380")];
-            tensor<int32, [4]> var_381 = const()[name = tensor<string, []>("op_381"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_382 = reshape(shape = var_381, x = var_380)[name = tensor<string, []>("op_382")];
-            tensor<int32, [12]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_383_axis_0 = const()[name = tensor<string, []>("op_383_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_383_0, tensor<fp16, [1, 32, 1, 512]> var_383_1, tensor<fp16, [1, 32, 1, 512]> var_383_2, tensor<fp16, [1, 32, 1, 512]> var_383_3, tensor<fp16, [1, 32, 1, 512]> var_383_4, tensor<fp16, [1, 32, 1, 512]> var_383_5, tensor<fp16, [1, 32, 1, 512]> var_383_6, tensor<fp16, [1, 32, 1, 512]> var_383_7, tensor<fp16, [1, 32, 1, 512]> var_383_8, tensor<fp16, [1, 32, 1, 512]> var_383_9, tensor<fp16, [1, 32, 1, 512]> var_383_10, tensor<fp16, [1, 32, 1, 512]> var_383_11 = split(axis = var_383_axis_0, split_sizes = tile_7, x = var_364)[name = tensor<string, []>("op_383")];
-            tensor<int32, [4]> var_396_perm_0 = const()[name = tensor<string, []>("op_396_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_397_axis_0 = const()[name = tensor<string, []>("op_397_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_10 = transpose(perm = var_396_perm_0, x = ks_3)[name = tensor<string, []>("transpose_10")];
-            tensor<fp16, [1, 512, 1, 32]> var_397_0, tensor<fp16, [1, 512, 1, 32]> var_397_1, tensor<fp16, [1, 512, 1, 32]> var_397_2, tensor<fp16, [1, 512, 1, 32]> var_397_3, tensor<fp16, [1, 512, 1, 32]> var_397_4, tensor<fp16, [1, 512, 1, 32]> var_397_5, tensor<fp16, [1, 512, 1, 32]> var_397_6, tensor<fp16, [1, 512, 1, 32]> var_397_7, tensor<fp16, [1, 512, 1, 32]> var_397_8, tensor<fp16, [1, 512, 1, 32]> var_397_9, tensor<fp16, [1, 512, 1, 32]> var_397_10, tensor<fp16, [1, 512, 1, 32]> var_397_11 = split(axis = var_397_axis_0, split_sizes = tile_8, x = transpose_10)[name = tensor<string, []>("op_397")];
-            tensor<int32, [12]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_410_axis_0 = const()[name = tensor<string, []>("op_410_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_410_0, tensor<fp16, [1, 32, 1, 512]> var_410_1, tensor<fp16, [1, 32, 1, 512]> var_410_2, tensor<fp16, [1, 32, 1, 512]> var_410_3, tensor<fp16, [1, 32, 1, 512]> var_410_4, tensor<fp16, [1, 32, 1, 512]> var_410_5, tensor<fp16, [1, 32, 1, 512]> var_410_6, tensor<fp16, [1, 32, 1, 512]> var_410_7, tensor<fp16, [1, 32, 1, 512]> var_410_8, tensor<fp16, [1, 32, 1, 512]> var_410_9, tensor<fp16, [1, 32, 1, 512]> var_410_10, tensor<fp16, [1, 32, 1, 512]> var_410_11 = split(axis = var_410_axis_0, split_sizes = tile_9, x = var_382)[name = tensor<string, []>("op_410")];
-            tensor<string, []> var_424_equation_0 = const()[name = tensor<string, []>("op_424_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_424 = einsum(equation = var_424_equation_0, values = (var_397_0, var_383_0))[name = tensor<string, []>("op_424")];
-            tensor<fp16, []> var_425_to_fp16 = const()[name = tensor<string, []>("op_425_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_25_cast_fp16 = mul(x = var_424, y = var_425_to_fp16)[name = tensor<string, []>("w_25_cast_fp16")];
-            tensor<string, []> var_428_equation_0 = const()[name = tensor<string, []>("op_428_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_428 = einsum(equation = var_428_equation_0, values = (var_397_1, var_383_1))[name = tensor<string, []>("op_428")];
-            tensor<fp16, []> var_429_to_fp16 = const()[name = tensor<string, []>("op_429_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_27_cast_fp16 = mul(x = var_428, y = var_429_to_fp16)[name = tensor<string, []>("w_27_cast_fp16")];
-            tensor<string, []> var_432_equation_0 = const()[name = tensor<string, []>("op_432_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_432 = einsum(equation = var_432_equation_0, values = (var_397_2, var_383_2))[name = tensor<string, []>("op_432")];
-            tensor<fp16, []> var_433_to_fp16 = const()[name = tensor<string, []>("op_433_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_29_cast_fp16 = mul(x = var_432, y = var_433_to_fp16)[name = tensor<string, []>("w_29_cast_fp16")];
-            tensor<string, []> var_436_equation_0 = const()[name = tensor<string, []>("op_436_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_436 = einsum(equation = var_436_equation_0, values = (var_397_3, var_383_3))[name = tensor<string, []>("op_436")];
-            tensor<fp16, []> var_437_to_fp16 = const()[name = tensor<string, []>("op_437_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_31_cast_fp16 = mul(x = var_436, y = var_437_to_fp16)[name = tensor<string, []>("w_31_cast_fp16")];
-            tensor<string, []> var_440_equation_0 = const()[name = tensor<string, []>("op_440_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_440 = einsum(equation = var_440_equation_0, values = (var_397_4, var_383_4))[name = tensor<string, []>("op_440")];
-            tensor<fp16, []> var_441_to_fp16 = const()[name = tensor<string, []>("op_441_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_33_cast_fp16 = mul(x = var_440, y = var_441_to_fp16)[name = tensor<string, []>("w_33_cast_fp16")];
-            tensor<string, []> var_444_equation_0 = const()[name = tensor<string, []>("op_444_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_444 = einsum(equation = var_444_equation_0, values = (var_397_5, var_383_5))[name = tensor<string, []>("op_444")];
-            tensor<fp16, []> var_445_to_fp16 = const()[name = tensor<string, []>("op_445_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_35_cast_fp16 = mul(x = var_444, y = var_445_to_fp16)[name = tensor<string, []>("w_35_cast_fp16")];
-            tensor<string, []> var_448_equation_0 = const()[name = tensor<string, []>("op_448_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_448 = einsum(equation = var_448_equation_0, values = (var_397_6, var_383_6))[name = tensor<string, []>("op_448")];
-            tensor<fp16, []> var_449_to_fp16 = const()[name = tensor<string, []>("op_449_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_37_cast_fp16 = mul(x = var_448, y = var_449_to_fp16)[name = tensor<string, []>("w_37_cast_fp16")];
-            tensor<string, []> var_452_equation_0 = const()[name = tensor<string, []>("op_452_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_452 = einsum(equation = var_452_equation_0, values = (var_397_7, var_383_7))[name = tensor<string, []>("op_452")];
-            tensor<fp16, []> var_453_to_fp16 = const()[name = tensor<string, []>("op_453_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_39_cast_fp16 = mul(x = var_452, y = var_453_to_fp16)[name = tensor<string, []>("w_39_cast_fp16")];
-            tensor<string, []> var_456_equation_0 = const()[name = tensor<string, []>("op_456_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_456 = einsum(equation = var_456_equation_0, values = (var_397_8, var_383_8))[name = tensor<string, []>("op_456")];
-            tensor<fp16, []> var_457_to_fp16 = const()[name = tensor<string, []>("op_457_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_41_cast_fp16 = mul(x = var_456, y = var_457_to_fp16)[name = tensor<string, []>("w_41_cast_fp16")];
-            tensor<string, []> var_460_equation_0 = const()[name = tensor<string, []>("op_460_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_460 = einsum(equation = var_460_equation_0, values = (var_397_9, var_383_9))[name = tensor<string, []>("op_460")];
-            tensor<fp16, []> var_461_to_fp16 = const()[name = tensor<string, []>("op_461_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_43_cast_fp16 = mul(x = var_460, y = var_461_to_fp16)[name = tensor<string, []>("w_43_cast_fp16")];
-            tensor<string, []> var_464_equation_0 = const()[name = tensor<string, []>("op_464_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_464 = einsum(equation = var_464_equation_0, values = (var_397_10, var_383_10))[name = tensor<string, []>("op_464")];
-            tensor<fp16, []> var_465_to_fp16 = const()[name = tensor<string, []>("op_465_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_45_cast_fp16 = mul(x = var_464, y = var_465_to_fp16)[name = tensor<string, []>("w_45_cast_fp16")];
-            tensor<string, []> var_468_equation_0 = const()[name = tensor<string, []>("op_468_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_468 = einsum(equation = var_468_equation_0, values = (var_397_11, var_383_11))[name = tensor<string, []>("op_468")];
-            tensor<fp16, []> var_469_to_fp16 = const()[name = tensor<string, []>("op_469_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_47_cast_fp16 = mul(x = var_468, y = var_469_to_fp16)[name = tensor<string, []>("w_47_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_471_cast_fp16 = add(x = w_25_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_471_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_472_cast_fp16 = softmax(axis = var_331, x = var_471_cast_fp16)[name = tensor<string, []>("op_472_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_473_cast_fp16 = add(x = w_27_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_473_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_474_cast_fp16 = softmax(axis = var_331, x = var_473_cast_fp16)[name = tensor<string, []>("op_474_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_475_cast_fp16 = add(x = w_29_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_475_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_476_cast_fp16 = softmax(axis = var_331, x = var_475_cast_fp16)[name = tensor<string, []>("op_476_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_477_cast_fp16 = add(x = w_31_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_477_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_478_cast_fp16 = softmax(axis = var_331, x = var_477_cast_fp16)[name = tensor<string, []>("op_478_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_479_cast_fp16 = add(x = w_33_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_479_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_480_cast_fp16 = softmax(axis = var_331, x = var_479_cast_fp16)[name = tensor<string, []>("op_480_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_481_cast_fp16 = add(x = w_35_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_481_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_482_cast_fp16 = softmax(axis = var_331, x = var_481_cast_fp16)[name = tensor<string, []>("op_482_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_483_cast_fp16 = add(x = w_37_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_483_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_484_cast_fp16 = softmax(axis = var_331, x = var_483_cast_fp16)[name = tensor<string, []>("op_484_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_485_cast_fp16 = add(x = w_39_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_485_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_486_cast_fp16 = softmax(axis = var_331, x = var_485_cast_fp16)[name = tensor<string, []>("op_486_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_487_cast_fp16 = add(x = w_41_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_487_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_488_cast_fp16 = softmax(axis = var_331, x = var_487_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_489_cast_fp16 = add(x = w_43_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_489_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_490_cast_fp16 = softmax(axis = var_331, x = var_489_cast_fp16)[name = tensor<string, []>("op_490_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_491_cast_fp16 = add(x = w_45_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_491_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_492_cast_fp16 = softmax(axis = var_331, x = var_491_cast_fp16)[name = tensor<string, []>("op_492_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_493_cast_fp16 = add(x = w_47_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_493_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_494_cast_fp16 = softmax(axis = var_331, x = var_493_cast_fp16)[name = tensor<string, []>("op_494_cast_fp16")];
+            tensor<string, []> ks_3_pad_type_0 = const()[name = tensor<string, []>("ks_3_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_3_pad_0 = const()[name = tensor<string, []>("ks_3_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_3 = conv(bias = layers_1_attention_k_proj_bias, dilations = var_367, groups = var_334, pad = ks_3_pad_0, pad_type = ks_3_pad_type_0, strides = var_365, weight = layers_1_attention_k_proj_weight, x = var_328_cast_fp16)[name = tensor<string, []>("ks_3")];
+            tensor<int32, [2]> var_372 = const()[name = tensor<string, []>("op_372"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_374 = const()[name = tensor<string, []>("op_374"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_376_pad_type_0 = const()[name = tensor<string, []>("op_376_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_376_pad_0 = const()[name = tensor<string, []>("op_376_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_376 = conv(bias = layers_1_attention_v_proj_bias, dilations = var_374, groups = var_334, pad = var_376_pad_0, pad_type = var_376_pad_type_0, strides = var_372, weight = layers_1_attention_v_proj_weight, x = var_328_cast_fp16)[name = tensor<string, []>("op_376")];
+            tensor<int32, [12]> tile_7 = const()[name = tensor<string, []>("tile_7"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_377_axis_0 = const()[name = tensor<string, []>("op_377_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_377_0, tensor<fp16, [1, 32, 1, 512]> var_377_1, tensor<fp16, [1, 32, 1, 512]> var_377_2, tensor<fp16, [1, 32, 1, 512]> var_377_3, tensor<fp16, [1, 32, 1, 512]> var_377_4, tensor<fp16, [1, 32, 1, 512]> var_377_5, tensor<fp16, [1, 32, 1, 512]> var_377_6, tensor<fp16, [1, 32, 1, 512]> var_377_7, tensor<fp16, [1, 32, 1, 512]> var_377_8, tensor<fp16, [1, 32, 1, 512]> var_377_9, tensor<fp16, [1, 32, 1, 512]> var_377_10, tensor<fp16, [1, 32, 1, 512]> var_377_11 = split(axis = var_377_axis_0, split_sizes = tile_7, x = var_362)[name = tensor<string, []>("op_377")];
+            tensor<int32, [4]> var_390_perm_0 = const()[name = tensor<string, []>("op_390_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_8 = const()[name = tensor<string, []>("tile_8"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_391_axis_0 = const()[name = tensor<string, []>("op_391_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_10 = transpose(perm = var_390_perm_0, x = ks_3)[name = tensor<string, []>("transpose_10")];
+            tensor<fp16, [1, 512, 1, 32]> var_391_0, tensor<fp16, [1, 512, 1, 32]> var_391_1, tensor<fp16, [1, 512, 1, 32]> var_391_2, tensor<fp16, [1, 512, 1, 32]> var_391_3, tensor<fp16, [1, 512, 1, 32]> var_391_4, tensor<fp16, [1, 512, 1, 32]> var_391_5, tensor<fp16, [1, 512, 1, 32]> var_391_6, tensor<fp16, [1, 512, 1, 32]> var_391_7, tensor<fp16, [1, 512, 1, 32]> var_391_8, tensor<fp16, [1, 512, 1, 32]> var_391_9, tensor<fp16, [1, 512, 1, 32]> var_391_10, tensor<fp16, [1, 512, 1, 32]> var_391_11 = split(axis = var_391_axis_0, split_sizes = tile_8, x = transpose_10)[name = tensor<string, []>("op_391")];
+            tensor<int32, [12]> tile_9 = const()[name = tensor<string, []>("tile_9"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_404_axis_0 = const()[name = tensor<string, []>("op_404_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_404_0, tensor<fp16, [1, 32, 1, 512]> var_404_1, tensor<fp16, [1, 32, 1, 512]> var_404_2, tensor<fp16, [1, 32, 1, 512]> var_404_3, tensor<fp16, [1, 32, 1, 512]> var_404_4, tensor<fp16, [1, 32, 1, 512]> var_404_5, tensor<fp16, [1, 32, 1, 512]> var_404_6, tensor<fp16, [1, 32, 1, 512]> var_404_7, tensor<fp16, [1, 32, 1, 512]> var_404_8, tensor<fp16, [1, 32, 1, 512]> var_404_9, tensor<fp16, [1, 32, 1, 512]> var_404_10, tensor<fp16, [1, 32, 1, 512]> var_404_11 = split(axis = var_404_axis_0, split_sizes = tile_9, x = var_376)[name = tensor<string, []>("op_404")];
+            tensor<string, []> var_418_equation_0 = const()[name = tensor<string, []>("op_418_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_418 = einsum(equation = var_418_equation_0, values = (var_391_0, var_377_0))[name = tensor<string, []>("op_418")];
+            tensor<fp16, []> var_419_to_fp16 = const()[name = tensor<string, []>("op_419_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_25_cast_fp16 = mul(x = var_418, y = var_419_to_fp16)[name = tensor<string, []>("w_25_cast_fp16")];
+            tensor<string, []> var_422_equation_0 = const()[name = tensor<string, []>("op_422_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_422 = einsum(equation = var_422_equation_0, values = (var_391_1, var_377_1))[name = tensor<string, []>("op_422")];
+            tensor<fp16, []> var_423_to_fp16 = const()[name = tensor<string, []>("op_423_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_27_cast_fp16 = mul(x = var_422, y = var_423_to_fp16)[name = tensor<string, []>("w_27_cast_fp16")];
+            tensor<string, []> var_426_equation_0 = const()[name = tensor<string, []>("op_426_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_426 = einsum(equation = var_426_equation_0, values = (var_391_2, var_377_2))[name = tensor<string, []>("op_426")];
+            tensor<fp16, []> var_427_to_fp16 = const()[name = tensor<string, []>("op_427_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_29_cast_fp16 = mul(x = var_426, y = var_427_to_fp16)[name = tensor<string, []>("w_29_cast_fp16")];
+            tensor<string, []> var_430_equation_0 = const()[name = tensor<string, []>("op_430_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_430 = einsum(equation = var_430_equation_0, values = (var_391_3, var_377_3))[name = tensor<string, []>("op_430")];
+            tensor<fp16, []> var_431_to_fp16 = const()[name = tensor<string, []>("op_431_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_31_cast_fp16 = mul(x = var_430, y = var_431_to_fp16)[name = tensor<string, []>("w_31_cast_fp16")];
+            tensor<string, []> var_434_equation_0 = const()[name = tensor<string, []>("op_434_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_434 = einsum(equation = var_434_equation_0, values = (var_391_4, var_377_4))[name = tensor<string, []>("op_434")];
+            tensor<fp16, []> var_435_to_fp16 = const()[name = tensor<string, []>("op_435_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_33_cast_fp16 = mul(x = var_434, y = var_435_to_fp16)[name = tensor<string, []>("w_33_cast_fp16")];
+            tensor<string, []> var_438_equation_0 = const()[name = tensor<string, []>("op_438_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_438 = einsum(equation = var_438_equation_0, values = (var_391_5, var_377_5))[name = tensor<string, []>("op_438")];
+            tensor<fp16, []> var_439_to_fp16 = const()[name = tensor<string, []>("op_439_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_35_cast_fp16 = mul(x = var_438, y = var_439_to_fp16)[name = tensor<string, []>("w_35_cast_fp16")];
+            tensor<string, []> var_442_equation_0 = const()[name = tensor<string, []>("op_442_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_442 = einsum(equation = var_442_equation_0, values = (var_391_6, var_377_6))[name = tensor<string, []>("op_442")];
+            tensor<fp16, []> var_443_to_fp16 = const()[name = tensor<string, []>("op_443_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_37_cast_fp16 = mul(x = var_442, y = var_443_to_fp16)[name = tensor<string, []>("w_37_cast_fp16")];
+            tensor<string, []> var_446_equation_0 = const()[name = tensor<string, []>("op_446_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_446 = einsum(equation = var_446_equation_0, values = (var_391_7, var_377_7))[name = tensor<string, []>("op_446")];
+            tensor<fp16, []> var_447_to_fp16 = const()[name = tensor<string, []>("op_447_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_39_cast_fp16 = mul(x = var_446, y = var_447_to_fp16)[name = tensor<string, []>("w_39_cast_fp16")];
+            tensor<string, []> var_450_equation_0 = const()[name = tensor<string, []>("op_450_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_450 = einsum(equation = var_450_equation_0, values = (var_391_8, var_377_8))[name = tensor<string, []>("op_450")];
+            tensor<fp16, []> var_451_to_fp16 = const()[name = tensor<string, []>("op_451_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_41_cast_fp16 = mul(x = var_450, y = var_451_to_fp16)[name = tensor<string, []>("w_41_cast_fp16")];
+            tensor<string, []> var_454_equation_0 = const()[name = tensor<string, []>("op_454_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_454 = einsum(equation = var_454_equation_0, values = (var_391_9, var_377_9))[name = tensor<string, []>("op_454")];
+            tensor<fp16, []> var_455_to_fp16 = const()[name = tensor<string, []>("op_455_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_43_cast_fp16 = mul(x = var_454, y = var_455_to_fp16)[name = tensor<string, []>("w_43_cast_fp16")];
+            tensor<string, []> var_458_equation_0 = const()[name = tensor<string, []>("op_458_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_458 = einsum(equation = var_458_equation_0, values = (var_391_10, var_377_10))[name = tensor<string, []>("op_458")];
+            tensor<fp16, []> var_459_to_fp16 = const()[name = tensor<string, []>("op_459_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_45_cast_fp16 = mul(x = var_458, y = var_459_to_fp16)[name = tensor<string, []>("w_45_cast_fp16")];
+            tensor<string, []> var_462_equation_0 = const()[name = tensor<string, []>("op_462_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_462 = einsum(equation = var_462_equation_0, values = (var_391_11, var_377_11))[name = tensor<string, []>("op_462")];
+            tensor<fp16, []> var_463_to_fp16 = const()[name = tensor<string, []>("op_463_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_47_cast_fp16 = mul(x = var_462, y = var_463_to_fp16)[name = tensor<string, []>("w_47_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_35_cast_fp16 = add(x = w_25_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_466_cast_fp16 = softmax(axis = var_334, x = input_35_cast_fp16)[name = tensor<string, []>("op_466_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_37_cast_fp16 = add(x = w_27_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_468_cast_fp16 = softmax(axis = var_334, x = input_37_cast_fp16)[name = tensor<string, []>("op_468_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_39_cast_fp16 = add(x = w_29_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_470_cast_fp16 = softmax(axis = var_334, x = input_39_cast_fp16)[name = tensor<string, []>("op_470_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_41_cast_fp16 = add(x = w_31_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_472_cast_fp16 = softmax(axis = var_334, x = input_41_cast_fp16)[name = tensor<string, []>("op_472_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_43_cast_fp16 = add(x = w_33_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_43_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_474_cast_fp16 = softmax(axis = var_334, x = input_43_cast_fp16)[name = tensor<string, []>("op_474_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_45_cast_fp16 = add(x = w_35_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_476_cast_fp16 = softmax(axis = var_334, x = input_45_cast_fp16)[name = tensor<string, []>("op_476_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_47_cast_fp16 = add(x = w_37_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_478_cast_fp16 = softmax(axis = var_334, x = input_47_cast_fp16)[name = tensor<string, []>("op_478_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_49_cast_fp16 = add(x = w_39_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_480_cast_fp16 = softmax(axis = var_334, x = input_49_cast_fp16)[name = tensor<string, []>("op_480_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_51_cast_fp16 = add(x = w_41_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_482_cast_fp16 = softmax(axis = var_334, x = input_51_cast_fp16)[name = tensor<string, []>("op_482_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_53_cast_fp16 = add(x = w_43_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_484_cast_fp16 = softmax(axis = var_334, x = input_53_cast_fp16)[name = tensor<string, []>("op_484_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_55_cast_fp16 = add(x = w_45_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_55_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_486_cast_fp16 = softmax(axis = var_334, x = input_55_cast_fp16)[name = tensor<string, []>("op_486_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_57_cast_fp16 = add(x = w_47_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_488_cast_fp16 = softmax(axis = var_334, x = input_57_cast_fp16)[name = tensor<string, []>("op_488_cast_fp16")];
+            tensor<string, []> var_490_equation_0 = const()[name = tensor<string, []>("op_490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_490_cast_fp16 = einsum(equation = var_490_equation_0, values = (var_404_0, var_466_cast_fp16))[name = tensor<string, []>("op_490_cast_fp16")];
+            tensor<string, []> var_492_equation_0 = const()[name = tensor<string, []>("op_492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_492_cast_fp16 = einsum(equation = var_492_equation_0, values = (var_404_1, var_468_cast_fp16))[name = tensor<string, []>("op_492_cast_fp16")];
+            tensor<string, []> var_494_equation_0 = const()[name = tensor<string, []>("op_494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_494_cast_fp16 = einsum(equation = var_494_equation_0, values = (var_404_2, var_470_cast_fp16))[name = tensor<string, []>("op_494_cast_fp16")];
             tensor<string, []> var_496_equation_0 = const()[name = tensor<string, []>("op_496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_410_0, var_472_cast_fp16))[name = tensor<string, []>("op_496_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_404_3, var_472_cast_fp16))[name = tensor<string, []>("op_496_cast_fp16")];
             tensor<string, []> var_498_equation_0 = const()[name = tensor<string, []>("op_498_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_410_1, var_474_cast_fp16))[name = tensor<string, []>("op_498_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_404_4, var_474_cast_fp16))[name = tensor<string, []>("op_498_cast_fp16")];
             tensor<string, []> var_500_equation_0 = const()[name = tensor<string, []>("op_500_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_410_2, var_476_cast_fp16))[name = tensor<string, []>("op_500_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_404_5, var_476_cast_fp16))[name = tensor<string, []>("op_500_cast_fp16")];
             tensor<string, []> var_502_equation_0 = const()[name = tensor<string, []>("op_502_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_410_3, var_478_cast_fp16))[name = tensor<string, []>("op_502_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_404_6, var_478_cast_fp16))[name = tensor<string, []>("op_502_cast_fp16")];
             tensor<string, []> var_504_equation_0 = const()[name = tensor<string, []>("op_504_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_410_4, var_480_cast_fp16))[name = tensor<string, []>("op_504_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_404_7, var_480_cast_fp16))[name = tensor<string, []>("op_504_cast_fp16")];
             tensor<string, []> var_506_equation_0 = const()[name = tensor<string, []>("op_506_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_410_5, var_482_cast_fp16))[name = tensor<string, []>("op_506_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_404_8, var_482_cast_fp16))[name = tensor<string, []>("op_506_cast_fp16")];
             tensor<string, []> var_508_equation_0 = const()[name = tensor<string, []>("op_508_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_410_6, var_484_cast_fp16))[name = tensor<string, []>("op_508_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_404_9, var_484_cast_fp16))[name = tensor<string, []>("op_508_cast_fp16")];
             tensor<string, []> var_510_equation_0 = const()[name = tensor<string, []>("op_510_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_410_7, var_486_cast_fp16))[name = tensor<string, []>("op_510_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_404_10, var_486_cast_fp16))[name = tensor<string, []>("op_510_cast_fp16")];
             tensor<string, []> var_512_equation_0 = const()[name = tensor<string, []>("op_512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_410_8, var_488_cast_fp16))[name = tensor<string, []>("op_512_cast_fp16")];
-            tensor<string, []> var_514_equation_0 = const()[name = tensor<string, []>("op_514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_514_cast_fp16 = einsum(equation = var_514_equation_0, values = (var_410_9, var_490_cast_fp16))[name = tensor<string, []>("op_514_cast_fp16")];
-            tensor<string, []> var_516_equation_0 = const()[name = tensor<string, []>("op_516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_516_cast_fp16 = einsum(equation = var_516_equation_0, values = (var_410_10, var_492_cast_fp16))[name = tensor<string, []>("op_516_cast_fp16")];
-            tensor<string, []> var_518_equation_0 = const()[name = tensor<string, []>("op_518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_518_cast_fp16 = einsum(equation = var_518_equation_0, values = (var_410_11, var_494_cast_fp16))[name = tensor<string, []>("op_518_cast_fp16")];
-            tensor<bool, []> var_520_interleave_0 = const()[name = tensor<string, []>("op_520_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_520_cast_fp16 = concat(axis = var_331, interleave = var_520_interleave_0, values = (var_496_cast_fp16, var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16, var_514_cast_fp16, var_516_cast_fp16, var_518_cast_fp16))[name = tensor<string, []>("op_520_cast_fp16")];
-            tensor<int32, [2]> var_524 = const()[name = tensor<string, []>("op_524"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_526 = const()[name = tensor<string, []>("op_526"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_528_pad_type_0 = const()[name = tensor<string, []>("op_528_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_528_pad_0 = const()[name = tensor<string, []>("op_528_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_528 = conv(bias = layers_1_attention_o_proj_bias, dilations = var_526, groups = var_331, pad = var_528_pad_0, pad_type = var_528_pad_type_0, strides = var_524, weight = layers_1_attention_o_proj_weight, x = var_520_cast_fp16)[name = tensor<string, []>("op_528")];
-            tensor<bool, []> var_530_interleave_0 = const()[name = tensor<string, []>("op_530_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_530 = concat(axis = var_332, interleave = var_530_interleave_0, values = var_528)[name = tensor<string, []>("op_530")];
-            tensor<fp16, [1, 384, 1, 512]> x_13 = add(x = var_326_cast_fp16, y = var_530)[name = tensor<string, []>("x_13")];
-            tensor<fp16, []> var_329_promoted = const()[name = tensor<string, []>("op_329_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_330_promoted = const()[name = tensor<string, []>("op_330_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_15 = clip(alpha = var_329_promoted, beta = var_330_promoted, x = x_13)[name = tensor<string, []>("x_15")];
-            tensor<int32, [1]> var_535 = const()[name = tensor<string, []>("op_535"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_7 = reduce_mean(axes = var_535, keep_dims = var_333, x = x_15)[name = tensor<string, []>("mean_7")];
+            tensor<fp16, [1, 32, 1, 512]> var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_404_11, var_488_cast_fp16))[name = tensor<string, []>("op_512_cast_fp16")];
+            tensor<bool, []> var_514_interleave_0 = const()[name = tensor<string, []>("op_514_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_514_cast_fp16 = concat(axis = var_334, interleave = var_514_interleave_0, values = (var_490_cast_fp16, var_492_cast_fp16, var_494_cast_fp16, var_496_cast_fp16, var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16))[name = tensor<string, []>("op_514_cast_fp16")];
+            tensor<int32, [2]> var_518 = const()[name = tensor<string, []>("op_518"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_520 = const()[name = tensor<string, []>("op_520"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_522_pad_type_0 = const()[name = tensor<string, []>("op_522_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_522_pad_0 = const()[name = tensor<string, []>("op_522_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_522 = conv(bias = layers_1_attention_o_proj_bias, dilations = var_520, groups = var_334, pad = var_522_pad_0, pad_type = var_522_pad_type_0, strides = var_518, weight = layers_1_attention_o_proj_weight, x = var_514_cast_fp16)[name = tensor<string, []>("op_522")];
+            tensor<bool, []> var_524_interleave_0 = const()[name = tensor<string, []>("op_524_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_524 = concat(axis = var_335, interleave = var_524_interleave_0, values = var_522)[name = tensor<string, []>("op_524")];
+            tensor<fp16, [1, 384, 1, 512]> x_13 = add(x = var_328_cast_fp16, y = var_524)[name = tensor<string, []>("x_13")];
+            tensor<fp16, []> var_331_promoted = const()[name = tensor<string, []>("op_331_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_332_promoted = const()[name = tensor<string, []>("op_332_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_15 = clip(alpha = var_331_promoted, beta = var_332_promoted, x = x_13)[name = tensor<string, []>("x_15")];
+            tensor<int32, [1]> var_529 = const()[name = tensor<string, []>("op_529"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_7 = reduce_mean(axes = var_529, keep_dims = var_336, x = x_15)[name = tensor<string, []>("mean_7")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_7 = sub(x = x_15, y = mean_7)[name = tensor<string, []>("zero_mean_7")];
-            tensor<fp16, []> var_338_promoted = const()[name = tensor<string, []>("op_338_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_538 = pow(x = zero_mean_7, y = var_338_promoted)[name = tensor<string, []>("op_538")];
-            tensor<int32, [1]> var_539 = const()[name = tensor<string, []>("op_539"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_540 = reduce_mean(axes = var_539, keep_dims = var_333, x = var_538)[name = tensor<string, []>("op_540")];
-            tensor<fp16, []> var_541_to_fp16 = const()[name = tensor<string, []>("op_541_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_542_cast_fp16 = add(x = var_540, y = var_541_to_fp16)[name = tensor<string, []>("op_542_cast_fp16")];
+            tensor<fp16, []> var_333_promoted = const()[name = tensor<string, []>("op_333_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_532 = pow(x = zero_mean_7, y = var_333_promoted)[name = tensor<string, []>("op_532")];
+            tensor<int32, [1]> var_533 = const()[name = tensor<string, []>("op_533"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_534 = reduce_mean(axes = var_533, keep_dims = var_336, x = var_532)[name = tensor<string, []>("op_534")];
+            tensor<fp16, []> var_535_to_fp16 = const()[name = tensor<string, []>("op_535_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_536_cast_fp16 = add(x = var_534, y = var_535_to_fp16)[name = tensor<string, []>("op_536_cast_fp16")];
             tensor<fp32, []> denom_7_epsilon_0 = const()[name = tensor<string, []>("denom_7_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0, x = var_542_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_544_cast_fp16 = mul(x = zero_mean_7, y = denom_7_cast_fp16)[name = tensor<string, []>("op_544_cast_fp16")];
-            tensor<fp16, [384]> var_546_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_546_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66793728)))];
-            tensor<fp16, [384]> var_546_beta_0_to_fp16 = const()[name = tensor<string, []>("op_546_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66794560)))];
-            tensor<fp16, []> var_546_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_546_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_546_cast_fp16 = batch_norm(beta = var_546_beta_0_to_fp16, epsilon = var_546_epsilon_0_to_fp16, gamma = var_546_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_544_cast_fp16)[name = tensor<string, []>("op_546_cast_fp16")];
-            tensor<int32, [2]> var_552 = const()[name = tensor<string, []>("op_552"), val = tensor<int32, [2]>([1, 1])];
+            tensor<fp16, [1, 1, 1, 512]> denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0, x = var_536_cast_fp16)[name = tensor<string, []>("denom_7_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_538_cast_fp16 = mul(x = zero_mean_7, y = denom_7_cast_fp16)[name = tensor<string, []>("op_538_cast_fp16")];
+            tensor<fp16, [384]> var_540_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_540_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66793728)))];
+            tensor<fp16, [384]> var_540_beta_0_to_fp16 = const()[name = tensor<string, []>("op_540_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66794560)))];
+            tensor<fp16, []> var_540_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_540_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_540_cast_fp16 = batch_norm(beta = var_540_beta_0_to_fp16, epsilon = var_540_epsilon_0_to_fp16, gamma = var_540_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_538_cast_fp16)[name = tensor<string, []>("op_540_cast_fp16")];
+            tensor<int32, [2]> var_546 = const()[name = tensor<string, []>("op_546"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_548 = const()[name = tensor<string, []>("op_548"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_550_pad_type_0 = const()[name = tensor<string, []>("op_550_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_550_pad_0 = const()[name = tensor<string, []>("op_550_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_550 = conv(bias = layers_1_mlp_fc1_bias, dilations = var_548, groups = var_334, pad = var_550_pad_0, pad_type = var_550_pad_type_0, strides = var_546, weight = layers_1_mlp_fc1_weight, x = var_540_cast_fp16)[name = tensor<string, []>("op_550")];
+            tensor<string, []> input_63_mode_0 = const()[name = tensor<string, []>("input_63_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_63 = gelu(mode = input_63_mode_0, x = var_550)[name = tensor<string, []>("input_63")];
             tensor<int32, [2]> var_554 = const()[name = tensor<string, []>("op_554"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_556_pad_type_0 = const()[name = tensor<string, []>("op_556_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_556_pad_0 = const()[name = tensor<string, []>("op_556_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_556 = conv(bias = layers_1_mlp_fc1_bias, dilations = var_554, groups = var_331, pad = var_556_pad_0, pad_type = var_556_pad_type_0, strides = var_552, weight = layers_1_mlp_fc1_weight, x = var_546_cast_fp16)[name = tensor<string, []>("op_556")];
-            tensor<string, []> input_15_mode_0 = const()[name = tensor<string, []>("input_15_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_15 = gelu(mode = input_15_mode_0, x = var_556)[name = tensor<string, []>("input_15")];
-            tensor<int32, [2]> var_560 = const()[name = tensor<string, []>("op_560"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_562 = const()[name = tensor<string, []>("op_562"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_564_pad_type_0 = const()[name = tensor<string, []>("op_564_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_564_pad_0 = const()[name = tensor<string, []>("op_564_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_564 = conv(bias = layers_1_mlp_fc2_bias, dilations = var_562, groups = var_331, pad = var_564_pad_0, pad_type = var_564_pad_type_0, strides = var_560, weight = layers_1_mlp_fc2_weight, x = input_15)[name = tensor<string, []>("op_564")];
-            tensor<fp16, [1, 384, 1, 512]> x_17 = add(x = var_546_cast_fp16, y = var_564)[name = tensor<string, []>("x_17")];
-            tensor<fp16, []> var_329_promoted_1 = const()[name = tensor<string, []>("op_329_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_330_promoted_1 = const()[name = tensor<string, []>("op_330_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_19 = clip(alpha = var_329_promoted_1, beta = var_330_promoted_1, x = x_17)[name = tensor<string, []>("x_19")];
-            tensor<int32, [1]> var_569 = const()[name = tensor<string, []>("op_569"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_9 = reduce_mean(axes = var_569, keep_dims = var_333, x = x_19)[name = tensor<string, []>("mean_9")];
+            tensor<int32, [2]> var_556 = const()[name = tensor<string, []>("op_556"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_558_pad_type_0 = const()[name = tensor<string, []>("op_558_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_558_pad_0 = const()[name = tensor<string, []>("op_558_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_558 = conv(bias = layers_1_mlp_fc2_bias, dilations = var_556, groups = var_334, pad = var_558_pad_0, pad_type = var_558_pad_type_0, strides = var_554, weight = layers_1_mlp_fc2_weight, x = input_63)[name = tensor<string, []>("op_558")];
+            tensor<fp16, [1, 384, 1, 512]> x_17 = add(x = var_540_cast_fp16, y = var_558)[name = tensor<string, []>("x_17")];
+            tensor<fp16, []> var_331_promoted_1 = const()[name = tensor<string, []>("op_331_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_332_promoted_1 = const()[name = tensor<string, []>("op_332_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_19 = clip(alpha = var_331_promoted_1, beta = var_332_promoted_1, x = x_17)[name = tensor<string, []>("x_19")];
+            tensor<int32, [1]> var_563 = const()[name = tensor<string, []>("op_563"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_9 = reduce_mean(axes = var_563, keep_dims = var_336, x = x_19)[name = tensor<string, []>("mean_9")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_9 = sub(x = x_19, y = mean_9)[name = tensor<string, []>("zero_mean_9")];
-            tensor<fp16, []> var_338_promoted_1 = const()[name = tensor<string, []>("op_338_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_572 = pow(x = zero_mean_9, y = var_338_promoted_1)[name = tensor<string, []>("op_572")];
-            tensor<int32, [1]> var_573 = const()[name = tensor<string, []>("op_573"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_574 = reduce_mean(axes = var_573, keep_dims = var_333, x = var_572)[name = tensor<string, []>("op_574")];
-            tensor<fp16, []> var_575_to_fp16 = const()[name = tensor<string, []>("op_575_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_576_cast_fp16 = add(x = var_574, y = var_575_to_fp16)[name = tensor<string, []>("op_576_cast_fp16")];
+            tensor<fp16, []> var_333_promoted_1 = const()[name = tensor<string, []>("op_333_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_566 = pow(x = zero_mean_9, y = var_333_promoted_1)[name = tensor<string, []>("op_566")];
+            tensor<int32, [1]> var_567 = const()[name = tensor<string, []>("op_567"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_568 = reduce_mean(axes = var_567, keep_dims = var_336, x = var_566)[name = tensor<string, []>("op_568")];
+            tensor<fp16, []> var_569_to_fp16 = const()[name = tensor<string, []>("op_569_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_570_cast_fp16 = add(x = var_568, y = var_569_to_fp16)[name = tensor<string, []>("op_570_cast_fp16")];
             tensor<fp32, []> denom_9_epsilon_0 = const()[name = tensor<string, []>("denom_9_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0, x = var_576_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_578_cast_fp16 = mul(x = zero_mean_9, y = denom_9_cast_fp16)[name = tensor<string, []>("op_578_cast_fp16")];
-            tensor<fp16, [384]> var_580_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_580_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66795392)))];
-            tensor<fp16, [384]> var_580_beta_0_to_fp16 = const()[name = tensor<string, []>("op_580_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66796224)))];
-            tensor<fp16, []> var_580_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_580_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_580_cast_fp16 = batch_norm(beta = var_580_beta_0_to_fp16, epsilon = var_580_epsilon_0_to_fp16, gamma = var_580_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_578_cast_fp16)[name = tensor<string, []>("op_580_cast_fp16")];
-            tensor<int32, []> var_585 = const()[name = tensor<string, []>("op_585"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_586 = const()[name = tensor<string, []>("op_586"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_587 = const()[name = tensor<string, []>("op_587"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_612 = const()[name = tensor<string, []>("op_612"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_614 = const()[name = tensor<string, []>("op_614"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_616_pad_type_0 = const()[name = tensor<string, []>("op_616_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_616_pad_0 = const()[name = tensor<string, []>("op_616_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_616 = conv(bias = layers_2_attention_q_proj_bias, dilations = var_614, groups = var_585, pad = var_616_pad_0, pad_type = var_616_pad_type_0, strides = var_612, weight = layers_2_attention_q_proj_weight, x = var_580_cast_fp16)[name = tensor<string, []>("op_616")];
-            tensor<int32, [4]> var_617 = const()[name = tensor<string, []>("op_617"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_618 = reshape(shape = var_617, x = var_616)[name = tensor<string, []>("op_618")];
-            tensor<int32, [2]> var_621 = const()[name = tensor<string, []>("op_621"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_623 = const()[name = tensor<string, []>("op_623"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_625_pad_type_0 = const()[name = tensor<string, []>("op_625_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_625_pad_0 = const()[name = tensor<string, []>("op_625_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_625 = conv(bias = layers_2_attention_k_proj_bias, dilations = var_623, groups = var_585, pad = var_625_pad_0, pad_type = var_625_pad_type_0, strides = var_621, weight = layers_2_attention_k_proj_weight, x = var_580_cast_fp16)[name = tensor<string, []>("op_625")];
-            tensor<int32, [4]> var_626 = const()[name = tensor<string, []>("op_626"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_5 = reshape(shape = var_626, x = var_625)[name = tensor<string, []>("ks_5")];
-            tensor<int32, [2]> var_630 = const()[name = tensor<string, []>("op_630"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_632 = const()[name = tensor<string, []>("op_632"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_634_pad_type_0 = const()[name = tensor<string, []>("op_634_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_634_pad_0 = const()[name = tensor<string, []>("op_634_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_634 = conv(bias = layers_2_attention_v_proj_bias, dilations = var_632, groups = var_585, pad = var_634_pad_0, pad_type = var_634_pad_type_0, strides = var_630, weight = layers_2_attention_v_proj_weight, x = var_580_cast_fp16)[name = tensor<string, []>("op_634")];
-            tensor<int32, [4]> var_635 = const()[name = tensor<string, []>("op_635"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_636 = reshape(shape = var_635, x = var_634)[name = tensor<string, []>("op_636")];
-            tensor<int32, [12]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_637_axis_0 = const()[name = tensor<string, []>("op_637_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_637_0, tensor<fp16, [1, 32, 1, 512]> var_637_1, tensor<fp16, [1, 32, 1, 512]> var_637_2, tensor<fp16, [1, 32, 1, 512]> var_637_3, tensor<fp16, [1, 32, 1, 512]> var_637_4, tensor<fp16, [1, 32, 1, 512]> var_637_5, tensor<fp16, [1, 32, 1, 512]> var_637_6, tensor<fp16, [1, 32, 1, 512]> var_637_7, tensor<fp16, [1, 32, 1, 512]> var_637_8, tensor<fp16, [1, 32, 1, 512]> var_637_9, tensor<fp16, [1, 32, 1, 512]> var_637_10, tensor<fp16, [1, 32, 1, 512]> var_637_11 = split(axis = var_637_axis_0, split_sizes = tile_12, x = var_618)[name = tensor<string, []>("op_637")];
-            tensor<int32, [4]> var_650_perm_0 = const()[name = tensor<string, []>("op_650_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_651_axis_0 = const()[name = tensor<string, []>("op_651_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_9 = transpose(perm = var_650_perm_0, x = ks_5)[name = tensor<string, []>("transpose_9")];
-            tensor<fp16, [1, 512, 1, 32]> var_651_0, tensor<fp16, [1, 512, 1, 32]> var_651_1, tensor<fp16, [1, 512, 1, 32]> var_651_2, tensor<fp16, [1, 512, 1, 32]> var_651_3, tensor<fp16, [1, 512, 1, 32]> var_651_4, tensor<fp16, [1, 512, 1, 32]> var_651_5, tensor<fp16, [1, 512, 1, 32]> var_651_6, tensor<fp16, [1, 512, 1, 32]> var_651_7, tensor<fp16, [1, 512, 1, 32]> var_651_8, tensor<fp16, [1, 512, 1, 32]> var_651_9, tensor<fp16, [1, 512, 1, 32]> var_651_10, tensor<fp16, [1, 512, 1, 32]> var_651_11 = split(axis = var_651_axis_0, split_sizes = tile_13, x = transpose_9)[name = tensor<string, []>("op_651")];
-            tensor<int32, [12]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_664_axis_0 = const()[name = tensor<string, []>("op_664_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_664_0, tensor<fp16, [1, 32, 1, 512]> var_664_1, tensor<fp16, [1, 32, 1, 512]> var_664_2, tensor<fp16, [1, 32, 1, 512]> var_664_3, tensor<fp16, [1, 32, 1, 512]> var_664_4, tensor<fp16, [1, 32, 1, 512]> var_664_5, tensor<fp16, [1, 32, 1, 512]> var_664_6, tensor<fp16, [1, 32, 1, 512]> var_664_7, tensor<fp16, [1, 32, 1, 512]> var_664_8, tensor<fp16, [1, 32, 1, 512]> var_664_9, tensor<fp16, [1, 32, 1, 512]> var_664_10, tensor<fp16, [1, 32, 1, 512]> var_664_11 = split(axis = var_664_axis_0, split_sizes = tile_14, x = var_636)[name = tensor<string, []>("op_664")];
-            tensor<string, []> var_678_equation_0 = const()[name = tensor<string, []>("op_678_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_678 = einsum(equation = var_678_equation_0, values = (var_651_0, var_637_0))[name = tensor<string, []>("op_678")];
-            tensor<fp16, []> var_679_to_fp16 = const()[name = tensor<string, []>("op_679_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_49_cast_fp16 = mul(x = var_678, y = var_679_to_fp16)[name = tensor<string, []>("w_49_cast_fp16")];
-            tensor<string, []> var_682_equation_0 = const()[name = tensor<string, []>("op_682_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_682 = einsum(equation = var_682_equation_0, values = (var_651_1, var_637_1))[name = tensor<string, []>("op_682")];
-            tensor<fp16, []> var_683_to_fp16 = const()[name = tensor<string, []>("op_683_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_51_cast_fp16 = mul(x = var_682, y = var_683_to_fp16)[name = tensor<string, []>("w_51_cast_fp16")];
-            tensor<string, []> var_686_equation_0 = const()[name = tensor<string, []>("op_686_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_686 = einsum(equation = var_686_equation_0, values = (var_651_2, var_637_2))[name = tensor<string, []>("op_686")];
-            tensor<fp16, []> var_687_to_fp16 = const()[name = tensor<string, []>("op_687_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_53_cast_fp16 = mul(x = var_686, y = var_687_to_fp16)[name = tensor<string, []>("w_53_cast_fp16")];
-            tensor<string, []> var_690_equation_0 = const()[name = tensor<string, []>("op_690_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_690 = einsum(equation = var_690_equation_0, values = (var_651_3, var_637_3))[name = tensor<string, []>("op_690")];
-            tensor<fp16, []> var_691_to_fp16 = const()[name = tensor<string, []>("op_691_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_55_cast_fp16 = mul(x = var_690, y = var_691_to_fp16)[name = tensor<string, []>("w_55_cast_fp16")];
-            tensor<string, []> var_694_equation_0 = const()[name = tensor<string, []>("op_694_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_694 = einsum(equation = var_694_equation_0, values = (var_651_4, var_637_4))[name = tensor<string, []>("op_694")];
-            tensor<fp16, []> var_695_to_fp16 = const()[name = tensor<string, []>("op_695_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_57_cast_fp16 = mul(x = var_694, y = var_695_to_fp16)[name = tensor<string, []>("w_57_cast_fp16")];
-            tensor<string, []> var_698_equation_0 = const()[name = tensor<string, []>("op_698_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_698 = einsum(equation = var_698_equation_0, values = (var_651_5, var_637_5))[name = tensor<string, []>("op_698")];
-            tensor<fp16, []> var_699_to_fp16 = const()[name = tensor<string, []>("op_699_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_59_cast_fp16 = mul(x = var_698, y = var_699_to_fp16)[name = tensor<string, []>("w_59_cast_fp16")];
-            tensor<string, []> var_702_equation_0 = const()[name = tensor<string, []>("op_702_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_702 = einsum(equation = var_702_equation_0, values = (var_651_6, var_637_6))[name = tensor<string, []>("op_702")];
-            tensor<fp16, []> var_703_to_fp16 = const()[name = tensor<string, []>("op_703_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_61_cast_fp16 = mul(x = var_702, y = var_703_to_fp16)[name = tensor<string, []>("w_61_cast_fp16")];
-            tensor<string, []> var_706_equation_0 = const()[name = tensor<string, []>("op_706_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_706 = einsum(equation = var_706_equation_0, values = (var_651_7, var_637_7))[name = tensor<string, []>("op_706")];
-            tensor<fp16, []> var_707_to_fp16 = const()[name = tensor<string, []>("op_707_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_63_cast_fp16 = mul(x = var_706, y = var_707_to_fp16)[name = tensor<string, []>("w_63_cast_fp16")];
-            tensor<string, []> var_710_equation_0 = const()[name = tensor<string, []>("op_710_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_710 = einsum(equation = var_710_equation_0, values = (var_651_8, var_637_8))[name = tensor<string, []>("op_710")];
-            tensor<fp16, []> var_711_to_fp16 = const()[name = tensor<string, []>("op_711_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_65_cast_fp16 = mul(x = var_710, y = var_711_to_fp16)[name = tensor<string, []>("w_65_cast_fp16")];
-            tensor<string, []> var_714_equation_0 = const()[name = tensor<string, []>("op_714_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_714 = einsum(equation = var_714_equation_0, values = (var_651_9, var_637_9))[name = tensor<string, []>("op_714")];
-            tensor<fp16, []> var_715_to_fp16 = const()[name = tensor<string, []>("op_715_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_67_cast_fp16 = mul(x = var_714, y = var_715_to_fp16)[name = tensor<string, []>("w_67_cast_fp16")];
-            tensor<string, []> var_718_equation_0 = const()[name = tensor<string, []>("op_718_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_718 = einsum(equation = var_718_equation_0, values = (var_651_10, var_637_10))[name = tensor<string, []>("op_718")];
-            tensor<fp16, []> var_719_to_fp16 = const()[name = tensor<string, []>("op_719_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_69_cast_fp16 = mul(x = var_718, y = var_719_to_fp16)[name = tensor<string, []>("w_69_cast_fp16")];
-            tensor<string, []> var_722_equation_0 = const()[name = tensor<string, []>("op_722_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_722 = einsum(equation = var_722_equation_0, values = (var_651_11, var_637_11))[name = tensor<string, []>("op_722")];
-            tensor<fp16, []> var_723_to_fp16 = const()[name = tensor<string, []>("op_723_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_71_cast_fp16 = mul(x = var_722, y = var_723_to_fp16)[name = tensor<string, []>("w_71_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_725_cast_fp16 = add(x = w_49_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_725_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_726_cast_fp16 = softmax(axis = var_585, x = var_725_cast_fp16)[name = tensor<string, []>("op_726_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_727_cast_fp16 = add(x = w_51_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_727_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_728_cast_fp16 = softmax(axis = var_585, x = var_727_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_729_cast_fp16 = add(x = w_53_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_729_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_730_cast_fp16 = softmax(axis = var_585, x = var_729_cast_fp16)[name = tensor<string, []>("op_730_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_731_cast_fp16 = add(x = w_55_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_731_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_732_cast_fp16 = softmax(axis = var_585, x = var_731_cast_fp16)[name = tensor<string, []>("op_732_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_733_cast_fp16 = add(x = w_57_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_733_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_734_cast_fp16 = softmax(axis = var_585, x = var_733_cast_fp16)[name = tensor<string, []>("op_734_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_735_cast_fp16 = add(x = w_59_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_735_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_736_cast_fp16 = softmax(axis = var_585, x = var_735_cast_fp16)[name = tensor<string, []>("op_736_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_737_cast_fp16 = add(x = w_61_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_737_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_738_cast_fp16 = softmax(axis = var_585, x = var_737_cast_fp16)[name = tensor<string, []>("op_738_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_739_cast_fp16 = add(x = w_63_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_739_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_740_cast_fp16 = softmax(axis = var_585, x = var_739_cast_fp16)[name = tensor<string, []>("op_740_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_741_cast_fp16 = add(x = w_65_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_741_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_742_cast_fp16 = softmax(axis = var_585, x = var_741_cast_fp16)[name = tensor<string, []>("op_742_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_743_cast_fp16 = add(x = w_67_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_743_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_744_cast_fp16 = softmax(axis = var_585, x = var_743_cast_fp16)[name = tensor<string, []>("op_744_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_745_cast_fp16 = add(x = w_69_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_745_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_746_cast_fp16 = softmax(axis = var_585, x = var_745_cast_fp16)[name = tensor<string, []>("op_746_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_747_cast_fp16 = add(x = w_71_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_747_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_748_cast_fp16 = softmax(axis = var_585, x = var_747_cast_fp16)[name = tensor<string, []>("op_748_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 512]> denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0, x = var_570_cast_fp16)[name = tensor<string, []>("denom_9_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_572_cast_fp16 = mul(x = zero_mean_9, y = denom_9_cast_fp16)[name = tensor<string, []>("op_572_cast_fp16")];
+            tensor<fp16, [384]> var_574_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_574_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66795392)))];
+            tensor<fp16, [384]> var_574_beta_0_to_fp16 = const()[name = tensor<string, []>("op_574_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66796224)))];
+            tensor<fp16, []> var_574_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_574_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_574_cast_fp16 = batch_norm(beta = var_574_beta_0_to_fp16, epsilon = var_574_epsilon_0_to_fp16, gamma = var_574_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_572_cast_fp16)[name = tensor<string, []>("op_574_cast_fp16")];
+            tensor<int32, []> var_580 = const()[name = tensor<string, []>("op_580"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_581 = const()[name = tensor<string, []>("op_581"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_582 = const()[name = tensor<string, []>("op_582"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_604 = const()[name = tensor<string, []>("op_604"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_606 = const()[name = tensor<string, []>("op_606"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_608_pad_type_0 = const()[name = tensor<string, []>("op_608_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_608_pad_0 = const()[name = tensor<string, []>("op_608_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_608 = conv(bias = layers_2_attention_q_proj_bias, dilations = var_606, groups = var_580, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = layers_2_attention_q_proj_weight, x = var_574_cast_fp16)[name = tensor<string, []>("op_608")];
+            tensor<int32, [2]> var_611 = const()[name = tensor<string, []>("op_611"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_613 = const()[name = tensor<string, []>("op_613"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_5_pad_type_0 = const()[name = tensor<string, []>("ks_5_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_5_pad_0 = const()[name = tensor<string, []>("ks_5_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_5 = conv(bias = layers_2_attention_k_proj_bias, dilations = var_613, groups = var_580, pad = ks_5_pad_0, pad_type = ks_5_pad_type_0, strides = var_611, weight = layers_2_attention_k_proj_weight, x = var_574_cast_fp16)[name = tensor<string, []>("ks_5")];
+            tensor<int32, [2]> var_618 = const()[name = tensor<string, []>("op_618"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_620 = const()[name = tensor<string, []>("op_620"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_622_pad_type_0 = const()[name = tensor<string, []>("op_622_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_622_pad_0 = const()[name = tensor<string, []>("op_622_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_622 = conv(bias = layers_2_attention_v_proj_bias, dilations = var_620, groups = var_580, pad = var_622_pad_0, pad_type = var_622_pad_type_0, strides = var_618, weight = layers_2_attention_v_proj_weight, x = var_574_cast_fp16)[name = tensor<string, []>("op_622")];
+            tensor<int32, [12]> tile_12 = const()[name = tensor<string, []>("tile_12"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_623_axis_0 = const()[name = tensor<string, []>("op_623_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_623_0, tensor<fp16, [1, 32, 1, 512]> var_623_1, tensor<fp16, [1, 32, 1, 512]> var_623_2, tensor<fp16, [1, 32, 1, 512]> var_623_3, tensor<fp16, [1, 32, 1, 512]> var_623_4, tensor<fp16, [1, 32, 1, 512]> var_623_5, tensor<fp16, [1, 32, 1, 512]> var_623_6, tensor<fp16, [1, 32, 1, 512]> var_623_7, tensor<fp16, [1, 32, 1, 512]> var_623_8, tensor<fp16, [1, 32, 1, 512]> var_623_9, tensor<fp16, [1, 32, 1, 512]> var_623_10, tensor<fp16, [1, 32, 1, 512]> var_623_11 = split(axis = var_623_axis_0, split_sizes = tile_12, x = var_608)[name = tensor<string, []>("op_623")];
+            tensor<int32, [4]> var_636_perm_0 = const()[name = tensor<string, []>("op_636_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_13 = const()[name = tensor<string, []>("tile_13"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_637_axis_0 = const()[name = tensor<string, []>("op_637_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_9 = transpose(perm = var_636_perm_0, x = ks_5)[name = tensor<string, []>("transpose_9")];
+            tensor<fp16, [1, 512, 1, 32]> var_637_0, tensor<fp16, [1, 512, 1, 32]> var_637_1, tensor<fp16, [1, 512, 1, 32]> var_637_2, tensor<fp16, [1, 512, 1, 32]> var_637_3, tensor<fp16, [1, 512, 1, 32]> var_637_4, tensor<fp16, [1, 512, 1, 32]> var_637_5, tensor<fp16, [1, 512, 1, 32]> var_637_6, tensor<fp16, [1, 512, 1, 32]> var_637_7, tensor<fp16, [1, 512, 1, 32]> var_637_8, tensor<fp16, [1, 512, 1, 32]> var_637_9, tensor<fp16, [1, 512, 1, 32]> var_637_10, tensor<fp16, [1, 512, 1, 32]> var_637_11 = split(axis = var_637_axis_0, split_sizes = tile_13, x = transpose_9)[name = tensor<string, []>("op_637")];
+            tensor<int32, [12]> tile_14 = const()[name = tensor<string, []>("tile_14"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_650_axis_0 = const()[name = tensor<string, []>("op_650_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_650_0, tensor<fp16, [1, 32, 1, 512]> var_650_1, tensor<fp16, [1, 32, 1, 512]> var_650_2, tensor<fp16, [1, 32, 1, 512]> var_650_3, tensor<fp16, [1, 32, 1, 512]> var_650_4, tensor<fp16, [1, 32, 1, 512]> var_650_5, tensor<fp16, [1, 32, 1, 512]> var_650_6, tensor<fp16, [1, 32, 1, 512]> var_650_7, tensor<fp16, [1, 32, 1, 512]> var_650_8, tensor<fp16, [1, 32, 1, 512]> var_650_9, tensor<fp16, [1, 32, 1, 512]> var_650_10, tensor<fp16, [1, 32, 1, 512]> var_650_11 = split(axis = var_650_axis_0, split_sizes = tile_14, x = var_622)[name = tensor<string, []>("op_650")];
+            tensor<string, []> var_664_equation_0 = const()[name = tensor<string, []>("op_664_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_664 = einsum(equation = var_664_equation_0, values = (var_637_0, var_623_0))[name = tensor<string, []>("op_664")];
+            tensor<fp16, []> var_665_to_fp16 = const()[name = tensor<string, []>("op_665_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_49_cast_fp16 = mul(x = var_664, y = var_665_to_fp16)[name = tensor<string, []>("w_49_cast_fp16")];
+            tensor<string, []> var_668_equation_0 = const()[name = tensor<string, []>("op_668_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_668 = einsum(equation = var_668_equation_0, values = (var_637_1, var_623_1))[name = tensor<string, []>("op_668")];
+            tensor<fp16, []> var_669_to_fp16 = const()[name = tensor<string, []>("op_669_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_51_cast_fp16 = mul(x = var_668, y = var_669_to_fp16)[name = tensor<string, []>("w_51_cast_fp16")];
+            tensor<string, []> var_672_equation_0 = const()[name = tensor<string, []>("op_672_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_672 = einsum(equation = var_672_equation_0, values = (var_637_2, var_623_2))[name = tensor<string, []>("op_672")];
+            tensor<fp16, []> var_673_to_fp16 = const()[name = tensor<string, []>("op_673_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_53_cast_fp16 = mul(x = var_672, y = var_673_to_fp16)[name = tensor<string, []>("w_53_cast_fp16")];
+            tensor<string, []> var_676_equation_0 = const()[name = tensor<string, []>("op_676_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_676 = einsum(equation = var_676_equation_0, values = (var_637_3, var_623_3))[name = tensor<string, []>("op_676")];
+            tensor<fp16, []> var_677_to_fp16 = const()[name = tensor<string, []>("op_677_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_55_cast_fp16 = mul(x = var_676, y = var_677_to_fp16)[name = tensor<string, []>("w_55_cast_fp16")];
+            tensor<string, []> var_680_equation_0 = const()[name = tensor<string, []>("op_680_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_680 = einsum(equation = var_680_equation_0, values = (var_637_4, var_623_4))[name = tensor<string, []>("op_680")];
+            tensor<fp16, []> var_681_to_fp16 = const()[name = tensor<string, []>("op_681_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_57_cast_fp16 = mul(x = var_680, y = var_681_to_fp16)[name = tensor<string, []>("w_57_cast_fp16")];
+            tensor<string, []> var_684_equation_0 = const()[name = tensor<string, []>("op_684_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_684 = einsum(equation = var_684_equation_0, values = (var_637_5, var_623_5))[name = tensor<string, []>("op_684")];
+            tensor<fp16, []> var_685_to_fp16 = const()[name = tensor<string, []>("op_685_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_59_cast_fp16 = mul(x = var_684, y = var_685_to_fp16)[name = tensor<string, []>("w_59_cast_fp16")];
+            tensor<string, []> var_688_equation_0 = const()[name = tensor<string, []>("op_688_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_688 = einsum(equation = var_688_equation_0, values = (var_637_6, var_623_6))[name = tensor<string, []>("op_688")];
+            tensor<fp16, []> var_689_to_fp16 = const()[name = tensor<string, []>("op_689_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_61_cast_fp16 = mul(x = var_688, y = var_689_to_fp16)[name = tensor<string, []>("w_61_cast_fp16")];
+            tensor<string, []> var_692_equation_0 = const()[name = tensor<string, []>("op_692_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_692 = einsum(equation = var_692_equation_0, values = (var_637_7, var_623_7))[name = tensor<string, []>("op_692")];
+            tensor<fp16, []> var_693_to_fp16 = const()[name = tensor<string, []>("op_693_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_63_cast_fp16 = mul(x = var_692, y = var_693_to_fp16)[name = tensor<string, []>("w_63_cast_fp16")];
+            tensor<string, []> var_696_equation_0 = const()[name = tensor<string, []>("op_696_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_696 = einsum(equation = var_696_equation_0, values = (var_637_8, var_623_8))[name = tensor<string, []>("op_696")];
+            tensor<fp16, []> var_697_to_fp16 = const()[name = tensor<string, []>("op_697_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_65_cast_fp16 = mul(x = var_696, y = var_697_to_fp16)[name = tensor<string, []>("w_65_cast_fp16")];
+            tensor<string, []> var_700_equation_0 = const()[name = tensor<string, []>("op_700_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_700 = einsum(equation = var_700_equation_0, values = (var_637_9, var_623_9))[name = tensor<string, []>("op_700")];
+            tensor<fp16, []> var_701_to_fp16 = const()[name = tensor<string, []>("op_701_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_67_cast_fp16 = mul(x = var_700, y = var_701_to_fp16)[name = tensor<string, []>("w_67_cast_fp16")];
+            tensor<string, []> var_704_equation_0 = const()[name = tensor<string, []>("op_704_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_704 = einsum(equation = var_704_equation_0, values = (var_637_10, var_623_10))[name = tensor<string, []>("op_704")];
+            tensor<fp16, []> var_705_to_fp16 = const()[name = tensor<string, []>("op_705_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_69_cast_fp16 = mul(x = var_704, y = var_705_to_fp16)[name = tensor<string, []>("w_69_cast_fp16")];
+            tensor<string, []> var_708_equation_0 = const()[name = tensor<string, []>("op_708_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_708 = einsum(equation = var_708_equation_0, values = (var_637_11, var_623_11))[name = tensor<string, []>("op_708")];
+            tensor<fp16, []> var_709_to_fp16 = const()[name = tensor<string, []>("op_709_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_71_cast_fp16 = mul(x = var_708, y = var_709_to_fp16)[name = tensor<string, []>("w_71_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_67_cast_fp16 = add(x = w_49_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_67_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_712_cast_fp16 = softmax(axis = var_580, x = input_67_cast_fp16)[name = tensor<string, []>("op_712_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_69_cast_fp16 = add(x = w_51_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_714_cast_fp16 = softmax(axis = var_580, x = input_69_cast_fp16)[name = tensor<string, []>("op_714_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_71_cast_fp16 = add(x = w_53_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_716_cast_fp16 = softmax(axis = var_580, x = input_71_cast_fp16)[name = tensor<string, []>("op_716_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_73_cast_fp16 = add(x = w_55_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_718_cast_fp16 = softmax(axis = var_580, x = input_73_cast_fp16)[name = tensor<string, []>("op_718_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_75_cast_fp16 = add(x = w_57_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_720_cast_fp16 = softmax(axis = var_580, x = input_75_cast_fp16)[name = tensor<string, []>("op_720_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_77_cast_fp16 = add(x = w_59_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_722_cast_fp16 = softmax(axis = var_580, x = input_77_cast_fp16)[name = tensor<string, []>("op_722_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_79_cast_fp16 = add(x = w_61_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_79_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_724_cast_fp16 = softmax(axis = var_580, x = input_79_cast_fp16)[name = tensor<string, []>("op_724_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_81_cast_fp16 = add(x = w_63_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_726_cast_fp16 = softmax(axis = var_580, x = input_81_cast_fp16)[name = tensor<string, []>("op_726_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_83_cast_fp16 = add(x = w_65_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_728_cast_fp16 = softmax(axis = var_580, x = input_83_cast_fp16)[name = tensor<string, []>("op_728_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_85_cast_fp16 = add(x = w_67_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_730_cast_fp16 = softmax(axis = var_580, x = input_85_cast_fp16)[name = tensor<string, []>("op_730_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_87_cast_fp16 = add(x = w_69_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_732_cast_fp16 = softmax(axis = var_580, x = input_87_cast_fp16)[name = tensor<string, []>("op_732_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_89_cast_fp16 = add(x = w_71_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_734_cast_fp16 = softmax(axis = var_580, x = input_89_cast_fp16)[name = tensor<string, []>("op_734_cast_fp16")];
+            tensor<string, []> var_736_equation_0 = const()[name = tensor<string, []>("op_736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_736_cast_fp16 = einsum(equation = var_736_equation_0, values = (var_650_0, var_712_cast_fp16))[name = tensor<string, []>("op_736_cast_fp16")];
+            tensor<string, []> var_738_equation_0 = const()[name = tensor<string, []>("op_738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_738_cast_fp16 = einsum(equation = var_738_equation_0, values = (var_650_1, var_714_cast_fp16))[name = tensor<string, []>("op_738_cast_fp16")];
+            tensor<string, []> var_740_equation_0 = const()[name = tensor<string, []>("op_740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_740_cast_fp16 = einsum(equation = var_740_equation_0, values = (var_650_2, var_716_cast_fp16))[name = tensor<string, []>("op_740_cast_fp16")];
+            tensor<string, []> var_742_equation_0 = const()[name = tensor<string, []>("op_742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_742_cast_fp16 = einsum(equation = var_742_equation_0, values = (var_650_3, var_718_cast_fp16))[name = tensor<string, []>("op_742_cast_fp16")];
+            tensor<string, []> var_744_equation_0 = const()[name = tensor<string, []>("op_744_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_744_cast_fp16 = einsum(equation = var_744_equation_0, values = (var_650_4, var_720_cast_fp16))[name = tensor<string, []>("op_744_cast_fp16")];
+            tensor<string, []> var_746_equation_0 = const()[name = tensor<string, []>("op_746_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_746_cast_fp16 = einsum(equation = var_746_equation_0, values = (var_650_5, var_722_cast_fp16))[name = tensor<string, []>("op_746_cast_fp16")];
+            tensor<string, []> var_748_equation_0 = const()[name = tensor<string, []>("op_748_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_748_cast_fp16 = einsum(equation = var_748_equation_0, values = (var_650_6, var_724_cast_fp16))[name = tensor<string, []>("op_748_cast_fp16")];
             tensor<string, []> var_750_equation_0 = const()[name = tensor<string, []>("op_750_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_750_cast_fp16 = einsum(equation = var_750_equation_0, values = (var_664_0, var_726_cast_fp16))[name = tensor<string, []>("op_750_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_750_cast_fp16 = einsum(equation = var_750_equation_0, values = (var_650_7, var_726_cast_fp16))[name = tensor<string, []>("op_750_cast_fp16")];
             tensor<string, []> var_752_equation_0 = const()[name = tensor<string, []>("op_752_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_752_cast_fp16 = einsum(equation = var_752_equation_0, values = (var_664_1, var_728_cast_fp16))[name = tensor<string, []>("op_752_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_752_cast_fp16 = einsum(equation = var_752_equation_0, values = (var_650_8, var_728_cast_fp16))[name = tensor<string, []>("op_752_cast_fp16")];
             tensor<string, []> var_754_equation_0 = const()[name = tensor<string, []>("op_754_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_754_cast_fp16 = einsum(equation = var_754_equation_0, values = (var_664_2, var_730_cast_fp16))[name = tensor<string, []>("op_754_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_754_cast_fp16 = einsum(equation = var_754_equation_0, values = (var_650_9, var_730_cast_fp16))[name = tensor<string, []>("op_754_cast_fp16")];
             tensor<string, []> var_756_equation_0 = const()[name = tensor<string, []>("op_756_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_756_cast_fp16 = einsum(equation = var_756_equation_0, values = (var_664_3, var_732_cast_fp16))[name = tensor<string, []>("op_756_cast_fp16")];
+            tensor<fp16, [1, 32, 1, 512]> var_756_cast_fp16 = einsum(equation = var_756_equation_0, values = (var_650_10, var_732_cast_fp16))[name = tensor<string, []>("op_756_cast_fp16")];
             tensor<string, []> var_758_equation_0 = const()[name = tensor<string, []>("op_758_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_758_cast_fp16 = einsum(equation = var_758_equation_0, values = (var_664_4, var_734_cast_fp16))[name = tensor<string, []>("op_758_cast_fp16")];
-            tensor<string, []> var_760_equation_0 = const()[name = tensor<string, []>("op_760_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_760_cast_fp16 = einsum(equation = var_760_equation_0, values = (var_664_5, var_736_cast_fp16))[name = tensor<string, []>("op_760_cast_fp16")];
-            tensor<string, []> var_762_equation_0 = const()[name = tensor<string, []>("op_762_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_762_cast_fp16 = einsum(equation = var_762_equation_0, values = (var_664_6, var_738_cast_fp16))[name = tensor<string, []>("op_762_cast_fp16")];
-            tensor<string, []> var_764_equation_0 = const()[name = tensor<string, []>("op_764_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_764_cast_fp16 = einsum(equation = var_764_equation_0, values = (var_664_7, var_740_cast_fp16))[name = tensor<string, []>("op_764_cast_fp16")];
-            tensor<string, []> var_766_equation_0 = const()[name = tensor<string, []>("op_766_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_766_cast_fp16 = einsum(equation = var_766_equation_0, values = (var_664_8, var_742_cast_fp16))[name = tensor<string, []>("op_766_cast_fp16")];
-            tensor<string, []> var_768_equation_0 = const()[name = tensor<string, []>("op_768_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_768_cast_fp16 = einsum(equation = var_768_equation_0, values = (var_664_9, var_744_cast_fp16))[name = tensor<string, []>("op_768_cast_fp16")];
-            tensor<string, []> var_770_equation_0 = const()[name = tensor<string, []>("op_770_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_770_cast_fp16 = einsum(equation = var_770_equation_0, values = (var_664_10, var_746_cast_fp16))[name = tensor<string, []>("op_770_cast_fp16")];
-            tensor<string, []> var_772_equation_0 = const()[name = tensor<string, []>("op_772_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_772_cast_fp16 = einsum(equation = var_772_equation_0, values = (var_664_11, var_748_cast_fp16))[name = tensor<string, []>("op_772_cast_fp16")];
-            tensor<bool, []> var_774_interleave_0 = const()[name = tensor<string, []>("op_774_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_774_cast_fp16 = concat(axis = var_585, interleave = var_774_interleave_0, values = (var_750_cast_fp16, var_752_cast_fp16, var_754_cast_fp16, var_756_cast_fp16, var_758_cast_fp16, var_760_cast_fp16, var_762_cast_fp16, var_764_cast_fp16, var_766_cast_fp16, var_768_cast_fp16, var_770_cast_fp16, var_772_cast_fp16))[name = tensor<string, []>("op_774_cast_fp16")];
-            tensor<int32, [2]> var_778 = const()[name = tensor<string, []>("op_778"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_780 = const()[name = tensor<string, []>("op_780"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_782_pad_type_0 = const()[name = tensor<string, []>("op_782_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_782_pad_0 = const()[name = tensor<string, []>("op_782_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_782 = conv(bias = layers_2_attention_o_proj_bias, dilations = var_780, groups = var_585, pad = var_782_pad_0, pad_type = var_782_pad_type_0, strides = var_778, weight = layers_2_attention_o_proj_weight, x = var_774_cast_fp16)[name = tensor<string, []>("op_782")];
-            tensor<bool, []> var_784_interleave_0 = const()[name = tensor<string, []>("op_784_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_784 = concat(axis = var_586, interleave = var_784_interleave_0, values = var_782)[name = tensor<string, []>("op_784")];
-            tensor<fp16, [1, 384, 1, 512]> x_21 = add(x = var_580_cast_fp16, y = var_784)[name = tensor<string, []>("x_21")];
-            tensor<fp16, []> var_583_promoted = const()[name = tensor<string, []>("op_583_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_584_promoted = const()[name = tensor<string, []>("op_584_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_23 = clip(alpha = var_583_promoted, beta = var_584_promoted, x = x_21)[name = tensor<string, []>("x_23")];
-            tensor<int32, [1]> var_789 = const()[name = tensor<string, []>("op_789"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_11 = reduce_mean(axes = var_789, keep_dims = var_587, x = x_23)[name = tensor<string, []>("mean_11")];
+            tensor<fp16, [1, 32, 1, 512]> var_758_cast_fp16 = einsum(equation = var_758_equation_0, values = (var_650_11, var_734_cast_fp16))[name = tensor<string, []>("op_758_cast_fp16")];
+            tensor<bool, []> var_760_interleave_0 = const()[name = tensor<string, []>("op_760_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_760_cast_fp16 = concat(axis = var_580, interleave = var_760_interleave_0, values = (var_736_cast_fp16, var_738_cast_fp16, var_740_cast_fp16, var_742_cast_fp16, var_744_cast_fp16, var_746_cast_fp16, var_748_cast_fp16, var_750_cast_fp16, var_752_cast_fp16, var_754_cast_fp16, var_756_cast_fp16, var_758_cast_fp16))[name = tensor<string, []>("op_760_cast_fp16")];
+            tensor<int32, [2]> var_764 = const()[name = tensor<string, []>("op_764"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_766 = const()[name = tensor<string, []>("op_766"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_768_pad_type_0 = const()[name = tensor<string, []>("op_768_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_768_pad_0 = const()[name = tensor<string, []>("op_768_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_768 = conv(bias = layers_2_attention_o_proj_bias, dilations = var_766, groups = var_580, pad = var_768_pad_0, pad_type = var_768_pad_type_0, strides = var_764, weight = layers_2_attention_o_proj_weight, x = var_760_cast_fp16)[name = tensor<string, []>("op_768")];
+            tensor<bool, []> var_770_interleave_0 = const()[name = tensor<string, []>("op_770_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_770 = concat(axis = var_581, interleave = var_770_interleave_0, values = var_768)[name = tensor<string, []>("op_770")];
+            tensor<fp16, [1, 384, 1, 512]> x_21 = add(x = var_574_cast_fp16, y = var_770)[name = tensor<string, []>("x_21")];
+            tensor<fp16, []> var_577_promoted = const()[name = tensor<string, []>("op_577_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_578_promoted = const()[name = tensor<string, []>("op_578_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_23 = clip(alpha = var_577_promoted, beta = var_578_promoted, x = x_21)[name = tensor<string, []>("x_23")];
+            tensor<int32, [1]> var_775 = const()[name = tensor<string, []>("op_775"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_11 = reduce_mean(axes = var_775, keep_dims = var_582, x = x_23)[name = tensor<string, []>("mean_11")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_11 = sub(x = x_23, y = mean_11)[name = tensor<string, []>("zero_mean_11")];
-            tensor<fp16, []> var_592_promoted = const()[name = tensor<string, []>("op_592_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_792 = pow(x = zero_mean_11, y = var_592_promoted)[name = tensor<string, []>("op_792")];
-            tensor<int32, [1]> var_793 = const()[name = tensor<string, []>("op_793"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_794 = reduce_mean(axes = var_793, keep_dims = var_587, x = var_792)[name = tensor<string, []>("op_794")];
-            tensor<fp16, []> var_795_to_fp16 = const()[name = tensor<string, []>("op_795_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_796_cast_fp16 = add(x = var_794, y = var_795_to_fp16)[name = tensor<string, []>("op_796_cast_fp16")];
+            tensor<fp16, []> var_579_promoted = const()[name = tensor<string, []>("op_579_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_778 = pow(x = zero_mean_11, y = var_579_promoted)[name = tensor<string, []>("op_778")];
+            tensor<int32, [1]> var_779 = const()[name = tensor<string, []>("op_779"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_780 = reduce_mean(axes = var_779, keep_dims = var_582, x = var_778)[name = tensor<string, []>("op_780")];
+            tensor<fp16, []> var_781_to_fp16 = const()[name = tensor<string, []>("op_781_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_782_cast_fp16 = add(x = var_780, y = var_781_to_fp16)[name = tensor<string, []>("op_782_cast_fp16")];
             tensor<fp32, []> denom_11_epsilon_0 = const()[name = tensor<string, []>("denom_11_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0, x = var_796_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_798_cast_fp16 = mul(x = zero_mean_11, y = denom_11_cast_fp16)[name = tensor<string, []>("op_798_cast_fp16")];
-            tensor<fp16, [384]> var_800_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_800_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66797056)))];
-            tensor<fp16, [384]> var_800_beta_0_to_fp16 = const()[name = tensor<string, []>("op_800_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66797888)))];
-            tensor<fp16, []> var_800_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_800_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_800_cast_fp16 = batch_norm(beta = var_800_beta_0_to_fp16, epsilon = var_800_epsilon_0_to_fp16, gamma = var_800_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_798_cast_fp16)[name = tensor<string, []>("op_800_cast_fp16")];
-            tensor<int32, [2]> var_806 = const()[name = tensor<string, []>("op_806"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_808 = const()[name = tensor<string, []>("op_808"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_810_pad_type_0 = const()[name = tensor<string, []>("op_810_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_810_pad_0 = const()[name = tensor<string, []>("op_810_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_810 = conv(bias = layers_2_mlp_fc1_bias, dilations = var_808, groups = var_585, pad = var_810_pad_0, pad_type = var_810_pad_type_0, strides = var_806, weight = layers_2_mlp_fc1_weight, x = var_800_cast_fp16)[name = tensor<string, []>("op_810")];
-            tensor<string, []> input_23_mode_0 = const()[name = tensor<string, []>("input_23_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_23 = gelu(mode = input_23_mode_0, x = var_810)[name = tensor<string, []>("input_23")];
-            tensor<int32, [2]> var_814 = const()[name = tensor<string, []>("op_814"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_816 = const()[name = tensor<string, []>("op_816"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_818_pad_type_0 = const()[name = tensor<string, []>("op_818_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_818_pad_0 = const()[name = tensor<string, []>("op_818_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_818 = conv(bias = layers_2_mlp_fc2_bias, dilations = var_816, groups = var_585, pad = var_818_pad_0, pad_type = var_818_pad_type_0, strides = var_814, weight = layers_2_mlp_fc2_weight, x = input_23)[name = tensor<string, []>("op_818")];
-            tensor<fp16, [1, 384, 1, 512]> x_25 = add(x = var_800_cast_fp16, y = var_818)[name = tensor<string, []>("x_25")];
-            tensor<fp16, []> var_583_promoted_1 = const()[name = tensor<string, []>("op_583_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_584_promoted_1 = const()[name = tensor<string, []>("op_584_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_27 = clip(alpha = var_583_promoted_1, beta = var_584_promoted_1, x = x_25)[name = tensor<string, []>("x_27")];
-            tensor<int32, [1]> var_823 = const()[name = tensor<string, []>("op_823"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_13 = reduce_mean(axes = var_823, keep_dims = var_587, x = x_27)[name = tensor<string, []>("mean_13")];
+            tensor<fp16, [1, 1, 1, 512]> denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0, x = var_782_cast_fp16)[name = tensor<string, []>("denom_11_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_784_cast_fp16 = mul(x = zero_mean_11, y = denom_11_cast_fp16)[name = tensor<string, []>("op_784_cast_fp16")];
+            tensor<fp16, [384]> var_786_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_786_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66797056)))];
+            tensor<fp16, [384]> var_786_beta_0_to_fp16 = const()[name = tensor<string, []>("op_786_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66797888)))];
+            tensor<fp16, []> var_786_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_786_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_786_cast_fp16 = batch_norm(beta = var_786_beta_0_to_fp16, epsilon = var_786_epsilon_0_to_fp16, gamma = var_786_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_784_cast_fp16)[name = tensor<string, []>("op_786_cast_fp16")];
+            tensor<int32, [2]> var_792 = const()[name = tensor<string, []>("op_792"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_794 = const()[name = tensor<string, []>("op_794"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_796_pad_type_0 = const()[name = tensor<string, []>("op_796_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_796_pad_0 = const()[name = tensor<string, []>("op_796_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_796 = conv(bias = layers_2_mlp_fc1_bias, dilations = var_794, groups = var_580, pad = var_796_pad_0, pad_type = var_796_pad_type_0, strides = var_792, weight = layers_2_mlp_fc1_weight, x = var_786_cast_fp16)[name = tensor<string, []>("op_796")];
+            tensor<string, []> input_95_mode_0 = const()[name = tensor<string, []>("input_95_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_95 = gelu(mode = input_95_mode_0, x = var_796)[name = tensor<string, []>("input_95")];
+            tensor<int32, [2]> var_800 = const()[name = tensor<string, []>("op_800"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_802 = const()[name = tensor<string, []>("op_802"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_804_pad_type_0 = const()[name = tensor<string, []>("op_804_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_804_pad_0 = const()[name = tensor<string, []>("op_804_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_804 = conv(bias = layers_2_mlp_fc2_bias, dilations = var_802, groups = var_580, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_800, weight = layers_2_mlp_fc2_weight, x = input_95)[name = tensor<string, []>("op_804")];
+            tensor<fp16, [1, 384, 1, 512]> x_25 = add(x = var_786_cast_fp16, y = var_804)[name = tensor<string, []>("x_25")];
+            tensor<fp16, []> var_577_promoted_1 = const()[name = tensor<string, []>("op_577_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_578_promoted_1 = const()[name = tensor<string, []>("op_578_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_27 = clip(alpha = var_577_promoted_1, beta = var_578_promoted_1, x = x_25)[name = tensor<string, []>("x_27")];
+            tensor<int32, [1]> var_809 = const()[name = tensor<string, []>("op_809"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_13 = reduce_mean(axes = var_809, keep_dims = var_582, x = x_27)[name = tensor<string, []>("mean_13")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_13 = sub(x = x_27, y = mean_13)[name = tensor<string, []>("zero_mean_13")];
-            tensor<fp16, []> var_592_promoted_1 = const()[name = tensor<string, []>("op_592_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_826 = pow(x = zero_mean_13, y = var_592_promoted_1)[name = tensor<string, []>("op_826")];
-            tensor<int32, [1]> var_827 = const()[name = tensor<string, []>("op_827"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_828 = reduce_mean(axes = var_827, keep_dims = var_587, x = var_826)[name = tensor<string, []>("op_828")];
-            tensor<fp16, []> var_829_to_fp16 = const()[name = tensor<string, []>("op_829_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_830_cast_fp16 = add(x = var_828, y = var_829_to_fp16)[name = tensor<string, []>("op_830_cast_fp16")];
+            tensor<fp16, []> var_579_promoted_1 = const()[name = tensor<string, []>("op_579_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_812 = pow(x = zero_mean_13, y = var_579_promoted_1)[name = tensor<string, []>("op_812")];
+            tensor<int32, [1]> var_813 = const()[name = tensor<string, []>("op_813"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_814 = reduce_mean(axes = var_813, keep_dims = var_582, x = var_812)[name = tensor<string, []>("op_814")];
+            tensor<fp16, []> var_815_to_fp16 = const()[name = tensor<string, []>("op_815_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_816_cast_fp16 = add(x = var_814, y = var_815_to_fp16)[name = tensor<string, []>("op_816_cast_fp16")];
             tensor<fp32, []> denom_13_epsilon_0 = const()[name = tensor<string, []>("denom_13_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0, x = var_830_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_832_cast_fp16 = mul(x = zero_mean_13, y = denom_13_cast_fp16)[name = tensor<string, []>("op_832_cast_fp16")];
-            tensor<fp16, [384]> var_834_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_834_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66798720)))];
-            tensor<fp16, [384]> var_834_beta_0_to_fp16 = const()[name = tensor<string, []>("op_834_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66799552)))];
-            tensor<fp16, []> var_834_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_834_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_834_cast_fp16 = batch_norm(beta = var_834_beta_0_to_fp16, epsilon = var_834_epsilon_0_to_fp16, gamma = var_834_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_832_cast_fp16)[name = tensor<string, []>("op_834_cast_fp16")];
-            tensor<int32, []> var_839 = const()[name = tensor<string, []>("op_839"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_840 = const()[name = tensor<string, []>("op_840"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_841 = const()[name = tensor<string, []>("op_841"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 1, 1, 512]> denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0, x = var_816_cast_fp16)[name = tensor<string, []>("denom_13_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_818_cast_fp16 = mul(x = zero_mean_13, y = denom_13_cast_fp16)[name = tensor<string, []>("op_818_cast_fp16")];
+            tensor<fp16, [384]> var_820_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_820_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66798720)))];
+            tensor<fp16, [384]> var_820_beta_0_to_fp16 = const()[name = tensor<string, []>("op_820_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66799552)))];
+            tensor<fp16, []> var_820_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_820_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_820_cast_fp16 = batch_norm(beta = var_820_beta_0_to_fp16, epsilon = var_820_epsilon_0_to_fp16, gamma = var_820_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_818_cast_fp16)[name = tensor<string, []>("op_820_cast_fp16")];
+            tensor<int32, []> var_826 = const()[name = tensor<string, []>("op_826"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_827 = const()[name = tensor<string, []>("op_827"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_828 = const()[name = tensor<string, []>("op_828"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_850 = const()[name = tensor<string, []>("op_850"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_852 = const()[name = tensor<string, []>("op_852"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_854_pad_type_0 = const()[name = tensor<string, []>("op_854_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_854_pad_0 = const()[name = tensor<string, []>("op_854_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_854 = conv(bias = layers_3_attention_q_proj_bias, dilations = var_852, groups = var_826, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_850, weight = layers_3_attention_q_proj_weight, x = var_820_cast_fp16)[name = tensor<string, []>("op_854")];
+            tensor<int32, [2]> var_857 = const()[name = tensor<string, []>("op_857"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_859 = const()[name = tensor<string, []>("op_859"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_7_pad_type_0 = const()[name = tensor<string, []>("ks_7_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_7_pad_0 = const()[name = tensor<string, []>("ks_7_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_7 = conv(bias = layers_3_attention_k_proj_bias, dilations = var_859, groups = var_826, pad = ks_7_pad_0, pad_type = ks_7_pad_type_0, strides = var_857, weight = layers_3_attention_k_proj_weight, x = var_820_cast_fp16)[name = tensor<string, []>("ks_7")];
+            tensor<int32, [2]> var_864 = const()[name = tensor<string, []>("op_864"), val = tensor<int32, [2]>([1, 1])];
             tensor<int32, [2]> var_866 = const()[name = tensor<string, []>("op_866"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_868 = const()[name = tensor<string, []>("op_868"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_870_pad_type_0 = const()[name = tensor<string, []>("op_870_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_870_pad_0 = const()[name = tensor<string, []>("op_870_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_870 = conv(bias = layers_3_attention_q_proj_bias, dilations = var_868, groups = var_839, pad = var_870_pad_0, pad_type = var_870_pad_type_0, strides = var_866, weight = layers_3_attention_q_proj_weight, x = var_834_cast_fp16)[name = tensor<string, []>("op_870")];
-            tensor<int32, [4]> var_871 = const()[name = tensor<string, []>("op_871"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_872 = reshape(shape = var_871, x = var_870)[name = tensor<string, []>("op_872")];
-            tensor<int32, [2]> var_875 = const()[name = tensor<string, []>("op_875"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_877 = const()[name = tensor<string, []>("op_877"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_879_pad_type_0 = const()[name = tensor<string, []>("op_879_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_879_pad_0 = const()[name = tensor<string, []>("op_879_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_879 = conv(bias = layers_3_attention_k_proj_bias, dilations = var_877, groups = var_839, pad = var_879_pad_0, pad_type = var_879_pad_type_0, strides = var_875, weight = layers_3_attention_k_proj_weight, x = var_834_cast_fp16)[name = tensor<string, []>("op_879")];
-            tensor<int32, [4]> var_880 = const()[name = tensor<string, []>("op_880"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_7 = reshape(shape = var_880, x = var_879)[name = tensor<string, []>("ks_7")];
-            tensor<int32, [2]> var_884 = const()[name = tensor<string, []>("op_884"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_886 = const()[name = tensor<string, []>("op_886"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_888_pad_type_0 = const()[name = tensor<string, []>("op_888_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_888_pad_0 = const()[name = tensor<string, []>("op_888_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_888 = conv(bias = layers_3_attention_v_proj_bias, dilations = var_886, groups = var_839, pad = var_888_pad_0, pad_type = var_888_pad_type_0, strides = var_884, weight = layers_3_attention_v_proj_weight, x = var_834_cast_fp16)[name = tensor<string, []>("op_888")];
-            tensor<int32, [4]> var_889 = const()[name = tensor<string, []>("op_889"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_890 = reshape(shape = var_889, x = var_888)[name = tensor<string, []>("op_890")];
-            tensor<int32, [12]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_891_axis_0 = const()[name = tensor<string, []>("op_891_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_891_0, tensor<fp16, [1, 32, 1, 512]> var_891_1, tensor<fp16, [1, 32, 1, 512]> var_891_2, tensor<fp16, [1, 32, 1, 512]> var_891_3, tensor<fp16, [1, 32, 1, 512]> var_891_4, tensor<fp16, [1, 32, 1, 512]> var_891_5, tensor<fp16, [1, 32, 1, 512]> var_891_6, tensor<fp16, [1, 32, 1, 512]> var_891_7, tensor<fp16, [1, 32, 1, 512]> var_891_8, tensor<fp16, [1, 32, 1, 512]> var_891_9, tensor<fp16, [1, 32, 1, 512]> var_891_10, tensor<fp16, [1, 32, 1, 512]> var_891_11 = split(axis = var_891_axis_0, split_sizes = tile_17, x = var_872)[name = tensor<string, []>("op_891")];
-            tensor<int32, [4]> var_904_perm_0 = const()[name = tensor<string, []>("op_904_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_905_axis_0 = const()[name = tensor<string, []>("op_905_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_8 = transpose(perm = var_904_perm_0, x = ks_7)[name = tensor<string, []>("transpose_8")];
-            tensor<fp16, [1, 512, 1, 32]> var_905_0, tensor<fp16, [1, 512, 1, 32]> var_905_1, tensor<fp16, [1, 512, 1, 32]> var_905_2, tensor<fp16, [1, 512, 1, 32]> var_905_3, tensor<fp16, [1, 512, 1, 32]> var_905_4, tensor<fp16, [1, 512, 1, 32]> var_905_5, tensor<fp16, [1, 512, 1, 32]> var_905_6, tensor<fp16, [1, 512, 1, 32]> var_905_7, tensor<fp16, [1, 512, 1, 32]> var_905_8, tensor<fp16, [1, 512, 1, 32]> var_905_9, tensor<fp16, [1, 512, 1, 32]> var_905_10, tensor<fp16, [1, 512, 1, 32]> var_905_11 = split(axis = var_905_axis_0, split_sizes = tile_18, x = transpose_8)[name = tensor<string, []>("op_905")];
-            tensor<int32, [12]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_918_axis_0 = const()[name = tensor<string, []>("op_918_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_918_0, tensor<fp16, [1, 32, 1, 512]> var_918_1, tensor<fp16, [1, 32, 1, 512]> var_918_2, tensor<fp16, [1, 32, 1, 512]> var_918_3, tensor<fp16, [1, 32, 1, 512]> var_918_4, tensor<fp16, [1, 32, 1, 512]> var_918_5, tensor<fp16, [1, 32, 1, 512]> var_918_6, tensor<fp16, [1, 32, 1, 512]> var_918_7, tensor<fp16, [1, 32, 1, 512]> var_918_8, tensor<fp16, [1, 32, 1, 512]> var_918_9, tensor<fp16, [1, 32, 1, 512]> var_918_10, tensor<fp16, [1, 32, 1, 512]> var_918_11 = split(axis = var_918_axis_0, split_sizes = tile_19, x = var_890)[name = tensor<string, []>("op_918")];
-            tensor<string, []> var_932_equation_0 = const()[name = tensor<string, []>("op_932_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_932 = einsum(equation = var_932_equation_0, values = (var_905_0, var_891_0))[name = tensor<string, []>("op_932")];
-            tensor<fp16, []> var_933_to_fp16 = const()[name = tensor<string, []>("op_933_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_73_cast_fp16 = mul(x = var_932, y = var_933_to_fp16)[name = tensor<string, []>("w_73_cast_fp16")];
-            tensor<string, []> var_936_equation_0 = const()[name = tensor<string, []>("op_936_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_936 = einsum(equation = var_936_equation_0, values = (var_905_1, var_891_1))[name = tensor<string, []>("op_936")];
-            tensor<fp16, []> var_937_to_fp16 = const()[name = tensor<string, []>("op_937_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_75_cast_fp16 = mul(x = var_936, y = var_937_to_fp16)[name = tensor<string, []>("w_75_cast_fp16")];
-            tensor<string, []> var_940_equation_0 = const()[name = tensor<string, []>("op_940_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_940 = einsum(equation = var_940_equation_0, values = (var_905_2, var_891_2))[name = tensor<string, []>("op_940")];
-            tensor<fp16, []> var_941_to_fp16 = const()[name = tensor<string, []>("op_941_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_77_cast_fp16 = mul(x = var_940, y = var_941_to_fp16)[name = tensor<string, []>("w_77_cast_fp16")];
-            tensor<string, []> var_944_equation_0 = const()[name = tensor<string, []>("op_944_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_944 = einsum(equation = var_944_equation_0, values = (var_905_3, var_891_3))[name = tensor<string, []>("op_944")];
-            tensor<fp16, []> var_945_to_fp16 = const()[name = tensor<string, []>("op_945_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_79_cast_fp16 = mul(x = var_944, y = var_945_to_fp16)[name = tensor<string, []>("w_79_cast_fp16")];
-            tensor<string, []> var_948_equation_0 = const()[name = tensor<string, []>("op_948_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_948 = einsum(equation = var_948_equation_0, values = (var_905_4, var_891_4))[name = tensor<string, []>("op_948")];
-            tensor<fp16, []> var_949_to_fp16 = const()[name = tensor<string, []>("op_949_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_81_cast_fp16 = mul(x = var_948, y = var_949_to_fp16)[name = tensor<string, []>("w_81_cast_fp16")];
-            tensor<string, []> var_952_equation_0 = const()[name = tensor<string, []>("op_952_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_952 = einsum(equation = var_952_equation_0, values = (var_905_5, var_891_5))[name = tensor<string, []>("op_952")];
-            tensor<fp16, []> var_953_to_fp16 = const()[name = tensor<string, []>("op_953_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_83_cast_fp16 = mul(x = var_952, y = var_953_to_fp16)[name = tensor<string, []>("w_83_cast_fp16")];
-            tensor<string, []> var_956_equation_0 = const()[name = tensor<string, []>("op_956_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_956 = einsum(equation = var_956_equation_0, values = (var_905_6, var_891_6))[name = tensor<string, []>("op_956")];
-            tensor<fp16, []> var_957_to_fp16 = const()[name = tensor<string, []>("op_957_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_85_cast_fp16 = mul(x = var_956, y = var_957_to_fp16)[name = tensor<string, []>("w_85_cast_fp16")];
-            tensor<string, []> var_960_equation_0 = const()[name = tensor<string, []>("op_960_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_960 = einsum(equation = var_960_equation_0, values = (var_905_7, var_891_7))[name = tensor<string, []>("op_960")];
-            tensor<fp16, []> var_961_to_fp16 = const()[name = tensor<string, []>("op_961_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_87_cast_fp16 = mul(x = var_960, y = var_961_to_fp16)[name = tensor<string, []>("w_87_cast_fp16")];
-            tensor<string, []> var_964_equation_0 = const()[name = tensor<string, []>("op_964_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_964 = einsum(equation = var_964_equation_0, values = (var_905_8, var_891_8))[name = tensor<string, []>("op_964")];
-            tensor<fp16, []> var_965_to_fp16 = const()[name = tensor<string, []>("op_965_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_89_cast_fp16 = mul(x = var_964, y = var_965_to_fp16)[name = tensor<string, []>("w_89_cast_fp16")];
-            tensor<string, []> var_968_equation_0 = const()[name = tensor<string, []>("op_968_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_968 = einsum(equation = var_968_equation_0, values = (var_905_9, var_891_9))[name = tensor<string, []>("op_968")];
-            tensor<fp16, []> var_969_to_fp16 = const()[name = tensor<string, []>("op_969_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_91_cast_fp16 = mul(x = var_968, y = var_969_to_fp16)[name = tensor<string, []>("w_91_cast_fp16")];
-            tensor<string, []> var_972_equation_0 = const()[name = tensor<string, []>("op_972_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_972 = einsum(equation = var_972_equation_0, values = (var_905_10, var_891_10))[name = tensor<string, []>("op_972")];
-            tensor<fp16, []> var_973_to_fp16 = const()[name = tensor<string, []>("op_973_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_93_cast_fp16 = mul(x = var_972, y = var_973_to_fp16)[name = tensor<string, []>("w_93_cast_fp16")];
-            tensor<string, []> var_976_equation_0 = const()[name = tensor<string, []>("op_976_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_976 = einsum(equation = var_976_equation_0, values = (var_905_11, var_891_11))[name = tensor<string, []>("op_976")];
-            tensor<fp16, []> var_977_to_fp16 = const()[name = tensor<string, []>("op_977_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_95_cast_fp16 = mul(x = var_976, y = var_977_to_fp16)[name = tensor<string, []>("w_95_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_979_cast_fp16 = add(x = w_73_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_979_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_980_cast_fp16 = softmax(axis = var_839, x = var_979_cast_fp16)[name = tensor<string, []>("op_980_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_981_cast_fp16 = add(x = w_75_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_981_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_982_cast_fp16 = softmax(axis = var_839, x = var_981_cast_fp16)[name = tensor<string, []>("op_982_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_983_cast_fp16 = add(x = w_77_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_983_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_984_cast_fp16 = softmax(axis = var_839, x = var_983_cast_fp16)[name = tensor<string, []>("op_984_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_985_cast_fp16 = add(x = w_79_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_985_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_986_cast_fp16 = softmax(axis = var_839, x = var_985_cast_fp16)[name = tensor<string, []>("op_986_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_987_cast_fp16 = add(x = w_81_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_987_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_988_cast_fp16 = softmax(axis = var_839, x = var_987_cast_fp16)[name = tensor<string, []>("op_988_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_989_cast_fp16 = add(x = w_83_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_989_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_990_cast_fp16 = softmax(axis = var_839, x = var_989_cast_fp16)[name = tensor<string, []>("op_990_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_991_cast_fp16 = add(x = w_85_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_991_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_992_cast_fp16 = softmax(axis = var_839, x = var_991_cast_fp16)[name = tensor<string, []>("op_992_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_993_cast_fp16 = add(x = w_87_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_993_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_994_cast_fp16 = softmax(axis = var_839, x = var_993_cast_fp16)[name = tensor<string, []>("op_994_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_995_cast_fp16 = add(x = w_89_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_995_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_996_cast_fp16 = softmax(axis = var_839, x = var_995_cast_fp16)[name = tensor<string, []>("op_996_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_997_cast_fp16 = add(x = w_91_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_997_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_998_cast_fp16 = softmax(axis = var_839, x = var_997_cast_fp16)[name = tensor<string, []>("op_998_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_999_cast_fp16 = add(x = w_93_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_999_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1000_cast_fp16 = softmax(axis = var_839, x = var_999_cast_fp16)[name = tensor<string, []>("op_1000_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1001_cast_fp16 = add(x = w_95_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1001_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1002_cast_fp16 = softmax(axis = var_839, x = var_1001_cast_fp16)[name = tensor<string, []>("op_1002_cast_fp16")];
+            tensor<string, []> var_868_pad_type_0 = const()[name = tensor<string, []>("op_868_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_868_pad_0 = const()[name = tensor<string, []>("op_868_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_868 = conv(bias = layers_3_attention_v_proj_bias, dilations = var_866, groups = var_826, pad = var_868_pad_0, pad_type = var_868_pad_type_0, strides = var_864, weight = layers_3_attention_v_proj_weight, x = var_820_cast_fp16)[name = tensor<string, []>("op_868")];
+            tensor<int32, [12]> tile_17 = const()[name = tensor<string, []>("tile_17"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_869_axis_0 = const()[name = tensor<string, []>("op_869_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_869_0, tensor<fp16, [1, 32, 1, 512]> var_869_1, tensor<fp16, [1, 32, 1, 512]> var_869_2, tensor<fp16, [1, 32, 1, 512]> var_869_3, tensor<fp16, [1, 32, 1, 512]> var_869_4, tensor<fp16, [1, 32, 1, 512]> var_869_5, tensor<fp16, [1, 32, 1, 512]> var_869_6, tensor<fp16, [1, 32, 1, 512]> var_869_7, tensor<fp16, [1, 32, 1, 512]> var_869_8, tensor<fp16, [1, 32, 1, 512]> var_869_9, tensor<fp16, [1, 32, 1, 512]> var_869_10, tensor<fp16, [1, 32, 1, 512]> var_869_11 = split(axis = var_869_axis_0, split_sizes = tile_17, x = var_854)[name = tensor<string, []>("op_869")];
+            tensor<int32, [4]> var_882_perm_0 = const()[name = tensor<string, []>("op_882_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_18 = const()[name = tensor<string, []>("tile_18"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_883_axis_0 = const()[name = tensor<string, []>("op_883_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_8 = transpose(perm = var_882_perm_0, x = ks_7)[name = tensor<string, []>("transpose_8")];
+            tensor<fp16, [1, 512, 1, 32]> var_883_0, tensor<fp16, [1, 512, 1, 32]> var_883_1, tensor<fp16, [1, 512, 1, 32]> var_883_2, tensor<fp16, [1, 512, 1, 32]> var_883_3, tensor<fp16, [1, 512, 1, 32]> var_883_4, tensor<fp16, [1, 512, 1, 32]> var_883_5, tensor<fp16, [1, 512, 1, 32]> var_883_6, tensor<fp16, [1, 512, 1, 32]> var_883_7, tensor<fp16, [1, 512, 1, 32]> var_883_8, tensor<fp16, [1, 512, 1, 32]> var_883_9, tensor<fp16, [1, 512, 1, 32]> var_883_10, tensor<fp16, [1, 512, 1, 32]> var_883_11 = split(axis = var_883_axis_0, split_sizes = tile_18, x = transpose_8)[name = tensor<string, []>("op_883")];
+            tensor<int32, [12]> tile_19 = const()[name = tensor<string, []>("tile_19"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_896_axis_0 = const()[name = tensor<string, []>("op_896_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_896_0, tensor<fp16, [1, 32, 1, 512]> var_896_1, tensor<fp16, [1, 32, 1, 512]> var_896_2, tensor<fp16, [1, 32, 1, 512]> var_896_3, tensor<fp16, [1, 32, 1, 512]> var_896_4, tensor<fp16, [1, 32, 1, 512]> var_896_5, tensor<fp16, [1, 32, 1, 512]> var_896_6, tensor<fp16, [1, 32, 1, 512]> var_896_7, tensor<fp16, [1, 32, 1, 512]> var_896_8, tensor<fp16, [1, 32, 1, 512]> var_896_9, tensor<fp16, [1, 32, 1, 512]> var_896_10, tensor<fp16, [1, 32, 1, 512]> var_896_11 = split(axis = var_896_axis_0, split_sizes = tile_19, x = var_868)[name = tensor<string, []>("op_896")];
+            tensor<string, []> var_910_equation_0 = const()[name = tensor<string, []>("op_910_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_910 = einsum(equation = var_910_equation_0, values = (var_883_0, var_869_0))[name = tensor<string, []>("op_910")];
+            tensor<fp16, []> var_911_to_fp16 = const()[name = tensor<string, []>("op_911_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_73_cast_fp16 = mul(x = var_910, y = var_911_to_fp16)[name = tensor<string, []>("w_73_cast_fp16")];
+            tensor<string, []> var_914_equation_0 = const()[name = tensor<string, []>("op_914_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_914 = einsum(equation = var_914_equation_0, values = (var_883_1, var_869_1))[name = tensor<string, []>("op_914")];
+            tensor<fp16, []> var_915_to_fp16 = const()[name = tensor<string, []>("op_915_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_75_cast_fp16 = mul(x = var_914, y = var_915_to_fp16)[name = tensor<string, []>("w_75_cast_fp16")];
+            tensor<string, []> var_918_equation_0 = const()[name = tensor<string, []>("op_918_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_918 = einsum(equation = var_918_equation_0, values = (var_883_2, var_869_2))[name = tensor<string, []>("op_918")];
+            tensor<fp16, []> var_919_to_fp16 = const()[name = tensor<string, []>("op_919_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_77_cast_fp16 = mul(x = var_918, y = var_919_to_fp16)[name = tensor<string, []>("w_77_cast_fp16")];
+            tensor<string, []> var_922_equation_0 = const()[name = tensor<string, []>("op_922_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_922 = einsum(equation = var_922_equation_0, values = (var_883_3, var_869_3))[name = tensor<string, []>("op_922")];
+            tensor<fp16, []> var_923_to_fp16 = const()[name = tensor<string, []>("op_923_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_79_cast_fp16 = mul(x = var_922, y = var_923_to_fp16)[name = tensor<string, []>("w_79_cast_fp16")];
+            tensor<string, []> var_926_equation_0 = const()[name = tensor<string, []>("op_926_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_926 = einsum(equation = var_926_equation_0, values = (var_883_4, var_869_4))[name = tensor<string, []>("op_926")];
+            tensor<fp16, []> var_927_to_fp16 = const()[name = tensor<string, []>("op_927_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_81_cast_fp16 = mul(x = var_926, y = var_927_to_fp16)[name = tensor<string, []>("w_81_cast_fp16")];
+            tensor<string, []> var_930_equation_0 = const()[name = tensor<string, []>("op_930_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_930 = einsum(equation = var_930_equation_0, values = (var_883_5, var_869_5))[name = tensor<string, []>("op_930")];
+            tensor<fp16, []> var_931_to_fp16 = const()[name = tensor<string, []>("op_931_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_83_cast_fp16 = mul(x = var_930, y = var_931_to_fp16)[name = tensor<string, []>("w_83_cast_fp16")];
+            tensor<string, []> var_934_equation_0 = const()[name = tensor<string, []>("op_934_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_934 = einsum(equation = var_934_equation_0, values = (var_883_6, var_869_6))[name = tensor<string, []>("op_934")];
+            tensor<fp16, []> var_935_to_fp16 = const()[name = tensor<string, []>("op_935_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_85_cast_fp16 = mul(x = var_934, y = var_935_to_fp16)[name = tensor<string, []>("w_85_cast_fp16")];
+            tensor<string, []> var_938_equation_0 = const()[name = tensor<string, []>("op_938_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_938 = einsum(equation = var_938_equation_0, values = (var_883_7, var_869_7))[name = tensor<string, []>("op_938")];
+            tensor<fp16, []> var_939_to_fp16 = const()[name = tensor<string, []>("op_939_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_87_cast_fp16 = mul(x = var_938, y = var_939_to_fp16)[name = tensor<string, []>("w_87_cast_fp16")];
+            tensor<string, []> var_942_equation_0 = const()[name = tensor<string, []>("op_942_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_942 = einsum(equation = var_942_equation_0, values = (var_883_8, var_869_8))[name = tensor<string, []>("op_942")];
+            tensor<fp16, []> var_943_to_fp16 = const()[name = tensor<string, []>("op_943_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_89_cast_fp16 = mul(x = var_942, y = var_943_to_fp16)[name = tensor<string, []>("w_89_cast_fp16")];
+            tensor<string, []> var_946_equation_0 = const()[name = tensor<string, []>("op_946_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_946 = einsum(equation = var_946_equation_0, values = (var_883_9, var_869_9))[name = tensor<string, []>("op_946")];
+            tensor<fp16, []> var_947_to_fp16 = const()[name = tensor<string, []>("op_947_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_91_cast_fp16 = mul(x = var_946, y = var_947_to_fp16)[name = tensor<string, []>("w_91_cast_fp16")];
+            tensor<string, []> var_950_equation_0 = const()[name = tensor<string, []>("op_950_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_950 = einsum(equation = var_950_equation_0, values = (var_883_10, var_869_10))[name = tensor<string, []>("op_950")];
+            tensor<fp16, []> var_951_to_fp16 = const()[name = tensor<string, []>("op_951_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_93_cast_fp16 = mul(x = var_950, y = var_951_to_fp16)[name = tensor<string, []>("w_93_cast_fp16")];
+            tensor<string, []> var_954_equation_0 = const()[name = tensor<string, []>("op_954_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_954 = einsum(equation = var_954_equation_0, values = (var_883_11, var_869_11))[name = tensor<string, []>("op_954")];
+            tensor<fp16, []> var_955_to_fp16 = const()[name = tensor<string, []>("op_955_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_95_cast_fp16 = mul(x = var_954, y = var_955_to_fp16)[name = tensor<string, []>("w_95_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_99_cast_fp16 = add(x = w_73_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_958_cast_fp16 = softmax(axis = var_826, x = input_99_cast_fp16)[name = tensor<string, []>("op_958_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_101_cast_fp16 = add(x = w_75_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_960_cast_fp16 = softmax(axis = var_826, x = input_101_cast_fp16)[name = tensor<string, []>("op_960_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_103_cast_fp16 = add(x = w_77_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_103_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_962_cast_fp16 = softmax(axis = var_826, x = input_103_cast_fp16)[name = tensor<string, []>("op_962_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_105_cast_fp16 = add(x = w_79_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_964_cast_fp16 = softmax(axis = var_826, x = input_105_cast_fp16)[name = tensor<string, []>("op_964_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_107_cast_fp16 = add(x = w_81_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_966_cast_fp16 = softmax(axis = var_826, x = input_107_cast_fp16)[name = tensor<string, []>("op_966_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_109_cast_fp16 = add(x = w_83_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_968_cast_fp16 = softmax(axis = var_826, x = input_109_cast_fp16)[name = tensor<string, []>("op_968_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_111_cast_fp16 = add(x = w_85_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_970_cast_fp16 = softmax(axis = var_826, x = input_111_cast_fp16)[name = tensor<string, []>("op_970_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_113_cast_fp16 = add(x = w_87_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_972_cast_fp16 = softmax(axis = var_826, x = input_113_cast_fp16)[name = tensor<string, []>("op_972_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_115_cast_fp16 = add(x = w_89_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_115_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_974_cast_fp16 = softmax(axis = var_826, x = input_115_cast_fp16)[name = tensor<string, []>("op_974_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_117_cast_fp16 = add(x = w_91_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_976_cast_fp16 = softmax(axis = var_826, x = input_117_cast_fp16)[name = tensor<string, []>("op_976_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_119_cast_fp16 = add(x = w_93_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_978_cast_fp16 = softmax(axis = var_826, x = input_119_cast_fp16)[name = tensor<string, []>("op_978_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_121_cast_fp16 = add(x = w_95_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_980_cast_fp16 = softmax(axis = var_826, x = input_121_cast_fp16)[name = tensor<string, []>("op_980_cast_fp16")];
+            tensor<string, []> var_982_equation_0 = const()[name = tensor<string, []>("op_982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_896_0, var_958_cast_fp16))[name = tensor<string, []>("op_982_cast_fp16")];
+            tensor<string, []> var_984_equation_0 = const()[name = tensor<string, []>("op_984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_984_cast_fp16 = einsum(equation = var_984_equation_0, values = (var_896_1, var_960_cast_fp16))[name = tensor<string, []>("op_984_cast_fp16")];
+            tensor<string, []> var_986_equation_0 = const()[name = tensor<string, []>("op_986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_896_2, var_962_cast_fp16))[name = tensor<string, []>("op_986_cast_fp16")];
+            tensor<string, []> var_988_equation_0 = const()[name = tensor<string, []>("op_988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_988_cast_fp16 = einsum(equation = var_988_equation_0, values = (var_896_3, var_964_cast_fp16))[name = tensor<string, []>("op_988_cast_fp16")];
+            tensor<string, []> var_990_equation_0 = const()[name = tensor<string, []>("op_990_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_896_4, var_966_cast_fp16))[name = tensor<string, []>("op_990_cast_fp16")];
+            tensor<string, []> var_992_equation_0 = const()[name = tensor<string, []>("op_992_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_992_cast_fp16 = einsum(equation = var_992_equation_0, values = (var_896_5, var_968_cast_fp16))[name = tensor<string, []>("op_992_cast_fp16")];
+            tensor<string, []> var_994_equation_0 = const()[name = tensor<string, []>("op_994_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_896_6, var_970_cast_fp16))[name = tensor<string, []>("op_994_cast_fp16")];
+            tensor<string, []> var_996_equation_0 = const()[name = tensor<string, []>("op_996_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_996_cast_fp16 = einsum(equation = var_996_equation_0, values = (var_896_7, var_972_cast_fp16))[name = tensor<string, []>("op_996_cast_fp16")];
+            tensor<string, []> var_998_equation_0 = const()[name = tensor<string, []>("op_998_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_896_8, var_974_cast_fp16))[name = tensor<string, []>("op_998_cast_fp16")];
+            tensor<string, []> var_1000_equation_0 = const()[name = tensor<string, []>("op_1000_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1000_cast_fp16 = einsum(equation = var_1000_equation_0, values = (var_896_9, var_976_cast_fp16))[name = tensor<string, []>("op_1000_cast_fp16")];
+            tensor<string, []> var_1002_equation_0 = const()[name = tensor<string, []>("op_1002_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_896_10, var_978_cast_fp16))[name = tensor<string, []>("op_1002_cast_fp16")];
             tensor<string, []> var_1004_equation_0 = const()[name = tensor<string, []>("op_1004_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1004_cast_fp16 = einsum(equation = var_1004_equation_0, values = (var_918_0, var_980_cast_fp16))[name = tensor<string, []>("op_1004_cast_fp16")];
-            tensor<string, []> var_1006_equation_0 = const()[name = tensor<string, []>("op_1006_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1006_cast_fp16 = einsum(equation = var_1006_equation_0, values = (var_918_1, var_982_cast_fp16))[name = tensor<string, []>("op_1006_cast_fp16")];
-            tensor<string, []> var_1008_equation_0 = const()[name = tensor<string, []>("op_1008_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1008_cast_fp16 = einsum(equation = var_1008_equation_0, values = (var_918_2, var_984_cast_fp16))[name = tensor<string, []>("op_1008_cast_fp16")];
-            tensor<string, []> var_1010_equation_0 = const()[name = tensor<string, []>("op_1010_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1010_cast_fp16 = einsum(equation = var_1010_equation_0, values = (var_918_3, var_986_cast_fp16))[name = tensor<string, []>("op_1010_cast_fp16")];
-            tensor<string, []> var_1012_equation_0 = const()[name = tensor<string, []>("op_1012_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1012_cast_fp16 = einsum(equation = var_1012_equation_0, values = (var_918_4, var_988_cast_fp16))[name = tensor<string, []>("op_1012_cast_fp16")];
-            tensor<string, []> var_1014_equation_0 = const()[name = tensor<string, []>("op_1014_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1014_cast_fp16 = einsum(equation = var_1014_equation_0, values = (var_918_5, var_990_cast_fp16))[name = tensor<string, []>("op_1014_cast_fp16")];
-            tensor<string, []> var_1016_equation_0 = const()[name = tensor<string, []>("op_1016_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1016_cast_fp16 = einsum(equation = var_1016_equation_0, values = (var_918_6, var_992_cast_fp16))[name = tensor<string, []>("op_1016_cast_fp16")];
-            tensor<string, []> var_1018_equation_0 = const()[name = tensor<string, []>("op_1018_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1018_cast_fp16 = einsum(equation = var_1018_equation_0, values = (var_918_7, var_994_cast_fp16))[name = tensor<string, []>("op_1018_cast_fp16")];
-            tensor<string, []> var_1020_equation_0 = const()[name = tensor<string, []>("op_1020_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1020_cast_fp16 = einsum(equation = var_1020_equation_0, values = (var_918_8, var_996_cast_fp16))[name = tensor<string, []>("op_1020_cast_fp16")];
-            tensor<string, []> var_1022_equation_0 = const()[name = tensor<string, []>("op_1022_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1022_cast_fp16 = einsum(equation = var_1022_equation_0, values = (var_918_9, var_998_cast_fp16))[name = tensor<string, []>("op_1022_cast_fp16")];
-            tensor<string, []> var_1024_equation_0 = const()[name = tensor<string, []>("op_1024_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1024_cast_fp16 = einsum(equation = var_1024_equation_0, values = (var_918_10, var_1000_cast_fp16))[name = tensor<string, []>("op_1024_cast_fp16")];
-            tensor<string, []> var_1026_equation_0 = const()[name = tensor<string, []>("op_1026_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1026_cast_fp16 = einsum(equation = var_1026_equation_0, values = (var_918_11, var_1002_cast_fp16))[name = tensor<string, []>("op_1026_cast_fp16")];
-            tensor<bool, []> var_1028_interleave_0 = const()[name = tensor<string, []>("op_1028_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1028_cast_fp16 = concat(axis = var_839, interleave = var_1028_interleave_0, values = (var_1004_cast_fp16, var_1006_cast_fp16, var_1008_cast_fp16, var_1010_cast_fp16, var_1012_cast_fp16, var_1014_cast_fp16, var_1016_cast_fp16, var_1018_cast_fp16, var_1020_cast_fp16, var_1022_cast_fp16, var_1024_cast_fp16, var_1026_cast_fp16))[name = tensor<string, []>("op_1028_cast_fp16")];
-            tensor<int32, [2]> var_1032 = const()[name = tensor<string, []>("op_1032"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1034 = const()[name = tensor<string, []>("op_1034"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1036_pad_type_0 = const()[name = tensor<string, []>("op_1036_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1036_pad_0 = const()[name = tensor<string, []>("op_1036_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1036 = conv(bias = layers_3_attention_o_proj_bias, dilations = var_1034, groups = var_839, pad = var_1036_pad_0, pad_type = var_1036_pad_type_0, strides = var_1032, weight = layers_3_attention_o_proj_weight, x = var_1028_cast_fp16)[name = tensor<string, []>("op_1036")];
-            tensor<bool, []> var_1038_interleave_0 = const()[name = tensor<string, []>("op_1038_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1038 = concat(axis = var_840, interleave = var_1038_interleave_0, values = var_1036)[name = tensor<string, []>("op_1038")];
-            tensor<fp16, [1, 384, 1, 512]> x_29 = add(x = var_834_cast_fp16, y = var_1038)[name = tensor<string, []>("x_29")];
-            tensor<fp16, []> var_837_promoted = const()[name = tensor<string, []>("op_837_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_838_promoted = const()[name = tensor<string, []>("op_838_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_31 = clip(alpha = var_837_promoted, beta = var_838_promoted, x = x_29)[name = tensor<string, []>("x_31")];
-            tensor<int32, [1]> var_1043 = const()[name = tensor<string, []>("op_1043"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_15 = reduce_mean(axes = var_1043, keep_dims = var_841, x = x_31)[name = tensor<string, []>("mean_15")];
+            tensor<fp16, [1, 32, 1, 512]> var_1004_cast_fp16 = einsum(equation = var_1004_equation_0, values = (var_896_11, var_980_cast_fp16))[name = tensor<string, []>("op_1004_cast_fp16")];
+            tensor<bool, []> var_1006_interleave_0 = const()[name = tensor<string, []>("op_1006_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1006_cast_fp16 = concat(axis = var_826, interleave = var_1006_interleave_0, values = (var_982_cast_fp16, var_984_cast_fp16, var_986_cast_fp16, var_988_cast_fp16, var_990_cast_fp16, var_992_cast_fp16, var_994_cast_fp16, var_996_cast_fp16, var_998_cast_fp16, var_1000_cast_fp16, var_1002_cast_fp16, var_1004_cast_fp16))[name = tensor<string, []>("op_1006_cast_fp16")];
+            tensor<int32, [2]> var_1010 = const()[name = tensor<string, []>("op_1010"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1012 = const()[name = tensor<string, []>("op_1012"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1014_pad_type_0 = const()[name = tensor<string, []>("op_1014_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1014_pad_0 = const()[name = tensor<string, []>("op_1014_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1014 = conv(bias = layers_3_attention_o_proj_bias, dilations = var_1012, groups = var_826, pad = var_1014_pad_0, pad_type = var_1014_pad_type_0, strides = var_1010, weight = layers_3_attention_o_proj_weight, x = var_1006_cast_fp16)[name = tensor<string, []>("op_1014")];
+            tensor<bool, []> var_1016_interleave_0 = const()[name = tensor<string, []>("op_1016_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1016 = concat(axis = var_827, interleave = var_1016_interleave_0, values = var_1014)[name = tensor<string, []>("op_1016")];
+            tensor<fp16, [1, 384, 1, 512]> x_29 = add(x = var_820_cast_fp16, y = var_1016)[name = tensor<string, []>("x_29")];
+            tensor<fp16, []> var_823_promoted = const()[name = tensor<string, []>("op_823_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_824_promoted = const()[name = tensor<string, []>("op_824_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_31 = clip(alpha = var_823_promoted, beta = var_824_promoted, x = x_29)[name = tensor<string, []>("x_31")];
+            tensor<int32, [1]> var_1021 = const()[name = tensor<string, []>("op_1021"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_15 = reduce_mean(axes = var_1021, keep_dims = var_828, x = x_31)[name = tensor<string, []>("mean_15")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_15 = sub(x = x_31, y = mean_15)[name = tensor<string, []>("zero_mean_15")];
-            tensor<fp16, []> var_846_promoted = const()[name = tensor<string, []>("op_846_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1046 = pow(x = zero_mean_15, y = var_846_promoted)[name = tensor<string, []>("op_1046")];
-            tensor<int32, [1]> var_1047 = const()[name = tensor<string, []>("op_1047"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1048 = reduce_mean(axes = var_1047, keep_dims = var_841, x = var_1046)[name = tensor<string, []>("op_1048")];
-            tensor<fp16, []> var_1049_to_fp16 = const()[name = tensor<string, []>("op_1049_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1050_cast_fp16 = add(x = var_1048, y = var_1049_to_fp16)[name = tensor<string, []>("op_1050_cast_fp16")];
+            tensor<fp16, []> var_825_promoted = const()[name = tensor<string, []>("op_825_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1024 = pow(x = zero_mean_15, y = var_825_promoted)[name = tensor<string, []>("op_1024")];
+            tensor<int32, [1]> var_1025 = const()[name = tensor<string, []>("op_1025"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1026 = reduce_mean(axes = var_1025, keep_dims = var_828, x = var_1024)[name = tensor<string, []>("op_1026")];
+            tensor<fp16, []> var_1027_to_fp16 = const()[name = tensor<string, []>("op_1027_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1028_cast_fp16 = add(x = var_1026, y = var_1027_to_fp16)[name = tensor<string, []>("op_1028_cast_fp16")];
             tensor<fp32, []> denom_15_epsilon_0 = const()[name = tensor<string, []>("denom_15_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0, x = var_1050_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1052_cast_fp16 = mul(x = zero_mean_15, y = denom_15_cast_fp16)[name = tensor<string, []>("op_1052_cast_fp16")];
-            tensor<fp16, [384]> var_1054_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1054_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66800384)))];
-            tensor<fp16, [384]> var_1054_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1054_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66801216)))];
-            tensor<fp16, []> var_1054_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1054_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1054_cast_fp16 = batch_norm(beta = var_1054_beta_0_to_fp16, epsilon = var_1054_epsilon_0_to_fp16, gamma = var_1054_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1052_cast_fp16)[name = tensor<string, []>("op_1054_cast_fp16")];
-            tensor<int32, [2]> var_1060 = const()[name = tensor<string, []>("op_1060"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1062 = const()[name = tensor<string, []>("op_1062"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1064_pad_type_0 = const()[name = tensor<string, []>("op_1064_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1064_pad_0 = const()[name = tensor<string, []>("op_1064_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_1064 = conv(bias = layers_3_mlp_fc1_bias, dilations = var_1062, groups = var_839, pad = var_1064_pad_0, pad_type = var_1064_pad_type_0, strides = var_1060, weight = layers_3_mlp_fc1_weight, x = var_1054_cast_fp16)[name = tensor<string, []>("op_1064")];
-            tensor<string, []> input_31_mode_0 = const()[name = tensor<string, []>("input_31_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_31 = gelu(mode = input_31_mode_0, x = var_1064)[name = tensor<string, []>("input_31")];
-            tensor<int32, [2]> var_1068 = const()[name = tensor<string, []>("op_1068"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1070 = const()[name = tensor<string, []>("op_1070"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1072_pad_type_0 = const()[name = tensor<string, []>("op_1072_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1072_pad_0 = const()[name = tensor<string, []>("op_1072_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1072 = conv(bias = layers_3_mlp_fc2_bias, dilations = var_1070, groups = var_839, pad = var_1072_pad_0, pad_type = var_1072_pad_type_0, strides = var_1068, weight = layers_3_mlp_fc2_weight, x = input_31)[name = tensor<string, []>("op_1072")];
-            tensor<fp16, [1, 384, 1, 512]> x_33 = add(x = var_1054_cast_fp16, y = var_1072)[name = tensor<string, []>("x_33")];
-            tensor<fp16, []> var_837_promoted_1 = const()[name = tensor<string, []>("op_837_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_838_promoted_1 = const()[name = tensor<string, []>("op_838_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_35 = clip(alpha = var_837_promoted_1, beta = var_838_promoted_1, x = x_33)[name = tensor<string, []>("x_35")];
-            tensor<int32, [1]> var_1077 = const()[name = tensor<string, []>("op_1077"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_17 = reduce_mean(axes = var_1077, keep_dims = var_841, x = x_35)[name = tensor<string, []>("mean_17")];
+            tensor<fp16, [1, 1, 1, 512]> denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0, x = var_1028_cast_fp16)[name = tensor<string, []>("denom_15_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1030_cast_fp16 = mul(x = zero_mean_15, y = denom_15_cast_fp16)[name = tensor<string, []>("op_1030_cast_fp16")];
+            tensor<fp16, [384]> var_1032_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1032_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66800384)))];
+            tensor<fp16, [384]> var_1032_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1032_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66801216)))];
+            tensor<fp16, []> var_1032_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1032_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1032_cast_fp16 = batch_norm(beta = var_1032_beta_0_to_fp16, epsilon = var_1032_epsilon_0_to_fp16, gamma = var_1032_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1030_cast_fp16)[name = tensor<string, []>("op_1032_cast_fp16")];
+            tensor<int32, [2]> var_1038 = const()[name = tensor<string, []>("op_1038"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1040 = const()[name = tensor<string, []>("op_1040"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1042_pad_type_0 = const()[name = tensor<string, []>("op_1042_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1042_pad_0 = const()[name = tensor<string, []>("op_1042_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_1042 = conv(bias = layers_3_mlp_fc1_bias, dilations = var_1040, groups = var_826, pad = var_1042_pad_0, pad_type = var_1042_pad_type_0, strides = var_1038, weight = layers_3_mlp_fc1_weight, x = var_1032_cast_fp16)[name = tensor<string, []>("op_1042")];
+            tensor<string, []> input_127_mode_0 = const()[name = tensor<string, []>("input_127_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_127 = gelu(mode = input_127_mode_0, x = var_1042)[name = tensor<string, []>("input_127")];
+            tensor<int32, [2]> var_1046 = const()[name = tensor<string, []>("op_1046"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1048 = const()[name = tensor<string, []>("op_1048"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1050_pad_type_0 = const()[name = tensor<string, []>("op_1050_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1050_pad_0 = const()[name = tensor<string, []>("op_1050_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1050 = conv(bias = layers_3_mlp_fc2_bias, dilations = var_1048, groups = var_826, pad = var_1050_pad_0, pad_type = var_1050_pad_type_0, strides = var_1046, weight = layers_3_mlp_fc2_weight, x = input_127)[name = tensor<string, []>("op_1050")];
+            tensor<fp16, [1, 384, 1, 512]> x_33 = add(x = var_1032_cast_fp16, y = var_1050)[name = tensor<string, []>("x_33")];
+            tensor<fp16, []> var_823_promoted_1 = const()[name = tensor<string, []>("op_823_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_824_promoted_1 = const()[name = tensor<string, []>("op_824_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_35 = clip(alpha = var_823_promoted_1, beta = var_824_promoted_1, x = x_33)[name = tensor<string, []>("x_35")];
+            tensor<int32, [1]> var_1055 = const()[name = tensor<string, []>("op_1055"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_17 = reduce_mean(axes = var_1055, keep_dims = var_828, x = x_35)[name = tensor<string, []>("mean_17")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_17 = sub(x = x_35, y = mean_17)[name = tensor<string, []>("zero_mean_17")];
-            tensor<fp16, []> var_846_promoted_1 = const()[name = tensor<string, []>("op_846_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1080 = pow(x = zero_mean_17, y = var_846_promoted_1)[name = tensor<string, []>("op_1080")];
-            tensor<int32, [1]> var_1081 = const()[name = tensor<string, []>("op_1081"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1082 = reduce_mean(axes = var_1081, keep_dims = var_841, x = var_1080)[name = tensor<string, []>("op_1082")];
-            tensor<fp16, []> var_1083_to_fp16 = const()[name = tensor<string, []>("op_1083_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1084_cast_fp16 = add(x = var_1082, y = var_1083_to_fp16)[name = tensor<string, []>("op_1084_cast_fp16")];
+            tensor<fp16, []> var_825_promoted_1 = const()[name = tensor<string, []>("op_825_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1058 = pow(x = zero_mean_17, y = var_825_promoted_1)[name = tensor<string, []>("op_1058")];
+            tensor<int32, [1]> var_1059 = const()[name = tensor<string, []>("op_1059"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1060 = reduce_mean(axes = var_1059, keep_dims = var_828, x = var_1058)[name = tensor<string, []>("op_1060")];
+            tensor<fp16, []> var_1061_to_fp16 = const()[name = tensor<string, []>("op_1061_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1062_cast_fp16 = add(x = var_1060, y = var_1061_to_fp16)[name = tensor<string, []>("op_1062_cast_fp16")];
             tensor<fp32, []> denom_17_epsilon_0 = const()[name = tensor<string, []>("denom_17_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0, x = var_1084_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1086_cast_fp16 = mul(x = zero_mean_17, y = denom_17_cast_fp16)[name = tensor<string, []>("op_1086_cast_fp16")];
-            tensor<fp16, [384]> var_1088_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1088_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66802048)))];
-            tensor<fp16, [384]> var_1088_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1088_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66802880)))];
-            tensor<fp16, []> var_1088_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1088_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1088_cast_fp16 = batch_norm(beta = var_1088_beta_0_to_fp16, epsilon = var_1088_epsilon_0_to_fp16, gamma = var_1088_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1086_cast_fp16)[name = tensor<string, []>("op_1088_cast_fp16")];
-            tensor<int32, []> var_1093 = const()[name = tensor<string, []>("op_1093"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_1094 = const()[name = tensor<string, []>("op_1094"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_1095 = const()[name = tensor<string, []>("op_1095"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_1120 = const()[name = tensor<string, []>("op_1120"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1122 = const()[name = tensor<string, []>("op_1122"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1124_pad_type_0 = const()[name = tensor<string, []>("op_1124_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1124_pad_0 = const()[name = tensor<string, []>("op_1124_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1124 = conv(bias = layers_4_attention_q_proj_bias, dilations = var_1122, groups = var_1093, pad = var_1124_pad_0, pad_type = var_1124_pad_type_0, strides = var_1120, weight = layers_4_attention_q_proj_weight, x = var_1088_cast_fp16)[name = tensor<string, []>("op_1124")];
-            tensor<int32, [4]> var_1125 = const()[name = tensor<string, []>("op_1125"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1126 = reshape(shape = var_1125, x = var_1124)[name = tensor<string, []>("op_1126")];
-            tensor<int32, [2]> var_1129 = const()[name = tensor<string, []>("op_1129"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1131 = const()[name = tensor<string, []>("op_1131"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1133_pad_type_0 = const()[name = tensor<string, []>("op_1133_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1133_pad_0 = const()[name = tensor<string, []>("op_1133_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1133 = conv(bias = layers_4_attention_k_proj_bias, dilations = var_1131, groups = var_1093, pad = var_1133_pad_0, pad_type = var_1133_pad_type_0, strides = var_1129, weight = layers_4_attention_k_proj_weight, x = var_1088_cast_fp16)[name = tensor<string, []>("op_1133")];
-            tensor<int32, [4]> var_1134 = const()[name = tensor<string, []>("op_1134"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_9 = reshape(shape = var_1134, x = var_1133)[name = tensor<string, []>("ks_9")];
-            tensor<int32, [2]> var_1138 = const()[name = tensor<string, []>("op_1138"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1140 = const()[name = tensor<string, []>("op_1140"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1142_pad_type_0 = const()[name = tensor<string, []>("op_1142_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1142_pad_0 = const()[name = tensor<string, []>("op_1142_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1142 = conv(bias = layers_4_attention_v_proj_bias, dilations = var_1140, groups = var_1093, pad = var_1142_pad_0, pad_type = var_1142_pad_type_0, strides = var_1138, weight = layers_4_attention_v_proj_weight, x = var_1088_cast_fp16)[name = tensor<string, []>("op_1142")];
-            tensor<int32, [4]> var_1143 = const()[name = tensor<string, []>("op_1143"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1144 = reshape(shape = var_1143, x = var_1142)[name = tensor<string, []>("op_1144")];
-            tensor<int32, [12]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1145_axis_0 = const()[name = tensor<string, []>("op_1145_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1145_0, tensor<fp16, [1, 32, 1, 512]> var_1145_1, tensor<fp16, [1, 32, 1, 512]> var_1145_2, tensor<fp16, [1, 32, 1, 512]> var_1145_3, tensor<fp16, [1, 32, 1, 512]> var_1145_4, tensor<fp16, [1, 32, 1, 512]> var_1145_5, tensor<fp16, [1, 32, 1, 512]> var_1145_6, tensor<fp16, [1, 32, 1, 512]> var_1145_7, tensor<fp16, [1, 32, 1, 512]> var_1145_8, tensor<fp16, [1, 32, 1, 512]> var_1145_9, tensor<fp16, [1, 32, 1, 512]> var_1145_10, tensor<fp16, [1, 32, 1, 512]> var_1145_11 = split(axis = var_1145_axis_0, split_sizes = tile_22, x = var_1126)[name = tensor<string, []>("op_1145")];
-            tensor<int32, [4]> var_1158_perm_0 = const()[name = tensor<string, []>("op_1158_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1159_axis_0 = const()[name = tensor<string, []>("op_1159_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_7 = transpose(perm = var_1158_perm_0, x = ks_9)[name = tensor<string, []>("transpose_7")];
-            tensor<fp16, [1, 512, 1, 32]> var_1159_0, tensor<fp16, [1, 512, 1, 32]> var_1159_1, tensor<fp16, [1, 512, 1, 32]> var_1159_2, tensor<fp16, [1, 512, 1, 32]> var_1159_3, tensor<fp16, [1, 512, 1, 32]> var_1159_4, tensor<fp16, [1, 512, 1, 32]> var_1159_5, tensor<fp16, [1, 512, 1, 32]> var_1159_6, tensor<fp16, [1, 512, 1, 32]> var_1159_7, tensor<fp16, [1, 512, 1, 32]> var_1159_8, tensor<fp16, [1, 512, 1, 32]> var_1159_9, tensor<fp16, [1, 512, 1, 32]> var_1159_10, tensor<fp16, [1, 512, 1, 32]> var_1159_11 = split(axis = var_1159_axis_0, split_sizes = tile_23, x = transpose_7)[name = tensor<string, []>("op_1159")];
-            tensor<int32, [12]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1172_axis_0 = const()[name = tensor<string, []>("op_1172_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1172_0, tensor<fp16, [1, 32, 1, 512]> var_1172_1, tensor<fp16, [1, 32, 1, 512]> var_1172_2, tensor<fp16, [1, 32, 1, 512]> var_1172_3, tensor<fp16, [1, 32, 1, 512]> var_1172_4, tensor<fp16, [1, 32, 1, 512]> var_1172_5, tensor<fp16, [1, 32, 1, 512]> var_1172_6, tensor<fp16, [1, 32, 1, 512]> var_1172_7, tensor<fp16, [1, 32, 1, 512]> var_1172_8, tensor<fp16, [1, 32, 1, 512]> var_1172_9, tensor<fp16, [1, 32, 1, 512]> var_1172_10, tensor<fp16, [1, 32, 1, 512]> var_1172_11 = split(axis = var_1172_axis_0, split_sizes = tile_24, x = var_1144)[name = tensor<string, []>("op_1172")];
-            tensor<string, []> var_1186_equation_0 = const()[name = tensor<string, []>("op_1186_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1186 = einsum(equation = var_1186_equation_0, values = (var_1159_0, var_1145_0))[name = tensor<string, []>("op_1186")];
-            tensor<fp16, []> var_1187_to_fp16 = const()[name = tensor<string, []>("op_1187_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_97_cast_fp16 = mul(x = var_1186, y = var_1187_to_fp16)[name = tensor<string, []>("w_97_cast_fp16")];
-            tensor<string, []> var_1190_equation_0 = const()[name = tensor<string, []>("op_1190_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1190 = einsum(equation = var_1190_equation_0, values = (var_1159_1, var_1145_1))[name = tensor<string, []>("op_1190")];
-            tensor<fp16, []> var_1191_to_fp16 = const()[name = tensor<string, []>("op_1191_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_99_cast_fp16 = mul(x = var_1190, y = var_1191_to_fp16)[name = tensor<string, []>("w_99_cast_fp16")];
-            tensor<string, []> var_1194_equation_0 = const()[name = tensor<string, []>("op_1194_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1194 = einsum(equation = var_1194_equation_0, values = (var_1159_2, var_1145_2))[name = tensor<string, []>("op_1194")];
-            tensor<fp16, []> var_1195_to_fp16 = const()[name = tensor<string, []>("op_1195_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_101_cast_fp16 = mul(x = var_1194, y = var_1195_to_fp16)[name = tensor<string, []>("w_101_cast_fp16")];
-            tensor<string, []> var_1198_equation_0 = const()[name = tensor<string, []>("op_1198_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1198 = einsum(equation = var_1198_equation_0, values = (var_1159_3, var_1145_3))[name = tensor<string, []>("op_1198")];
-            tensor<fp16, []> var_1199_to_fp16 = const()[name = tensor<string, []>("op_1199_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_103_cast_fp16 = mul(x = var_1198, y = var_1199_to_fp16)[name = tensor<string, []>("w_103_cast_fp16")];
-            tensor<string, []> var_1202_equation_0 = const()[name = tensor<string, []>("op_1202_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1202 = einsum(equation = var_1202_equation_0, values = (var_1159_4, var_1145_4))[name = tensor<string, []>("op_1202")];
-            tensor<fp16, []> var_1203_to_fp16 = const()[name = tensor<string, []>("op_1203_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_105_cast_fp16 = mul(x = var_1202, y = var_1203_to_fp16)[name = tensor<string, []>("w_105_cast_fp16")];
-            tensor<string, []> var_1206_equation_0 = const()[name = tensor<string, []>("op_1206_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1206 = einsum(equation = var_1206_equation_0, values = (var_1159_5, var_1145_5))[name = tensor<string, []>("op_1206")];
-            tensor<fp16, []> var_1207_to_fp16 = const()[name = tensor<string, []>("op_1207_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_107_cast_fp16 = mul(x = var_1206, y = var_1207_to_fp16)[name = tensor<string, []>("w_107_cast_fp16")];
-            tensor<string, []> var_1210_equation_0 = const()[name = tensor<string, []>("op_1210_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1210 = einsum(equation = var_1210_equation_0, values = (var_1159_6, var_1145_6))[name = tensor<string, []>("op_1210")];
-            tensor<fp16, []> var_1211_to_fp16 = const()[name = tensor<string, []>("op_1211_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_109_cast_fp16 = mul(x = var_1210, y = var_1211_to_fp16)[name = tensor<string, []>("w_109_cast_fp16")];
-            tensor<string, []> var_1214_equation_0 = const()[name = tensor<string, []>("op_1214_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1214 = einsum(equation = var_1214_equation_0, values = (var_1159_7, var_1145_7))[name = tensor<string, []>("op_1214")];
-            tensor<fp16, []> var_1215_to_fp16 = const()[name = tensor<string, []>("op_1215_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_111_cast_fp16 = mul(x = var_1214, y = var_1215_to_fp16)[name = tensor<string, []>("w_111_cast_fp16")];
-            tensor<string, []> var_1218_equation_0 = const()[name = tensor<string, []>("op_1218_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1218 = einsum(equation = var_1218_equation_0, values = (var_1159_8, var_1145_8))[name = tensor<string, []>("op_1218")];
-            tensor<fp16, []> var_1219_to_fp16 = const()[name = tensor<string, []>("op_1219_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_113_cast_fp16 = mul(x = var_1218, y = var_1219_to_fp16)[name = tensor<string, []>("w_113_cast_fp16")];
-            tensor<string, []> var_1222_equation_0 = const()[name = tensor<string, []>("op_1222_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1222 = einsum(equation = var_1222_equation_0, values = (var_1159_9, var_1145_9))[name = tensor<string, []>("op_1222")];
-            tensor<fp16, []> var_1223_to_fp16 = const()[name = tensor<string, []>("op_1223_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_115_cast_fp16 = mul(x = var_1222, y = var_1223_to_fp16)[name = tensor<string, []>("w_115_cast_fp16")];
-            tensor<string, []> var_1226_equation_0 = const()[name = tensor<string, []>("op_1226_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1226 = einsum(equation = var_1226_equation_0, values = (var_1159_10, var_1145_10))[name = tensor<string, []>("op_1226")];
-            tensor<fp16, []> var_1227_to_fp16 = const()[name = tensor<string, []>("op_1227_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_117_cast_fp16 = mul(x = var_1226, y = var_1227_to_fp16)[name = tensor<string, []>("w_117_cast_fp16")];
-            tensor<string, []> var_1230_equation_0 = const()[name = tensor<string, []>("op_1230_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1230 = einsum(equation = var_1230_equation_0, values = (var_1159_11, var_1145_11))[name = tensor<string, []>("op_1230")];
-            tensor<fp16, []> var_1231_to_fp16 = const()[name = tensor<string, []>("op_1231_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_119_cast_fp16 = mul(x = var_1230, y = var_1231_to_fp16)[name = tensor<string, []>("w_119_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1233_cast_fp16 = add(x = w_97_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1233_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1234_cast_fp16 = softmax(axis = var_1093, x = var_1233_cast_fp16)[name = tensor<string, []>("op_1234_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1235_cast_fp16 = add(x = w_99_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1235_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1236_cast_fp16 = softmax(axis = var_1093, x = var_1235_cast_fp16)[name = tensor<string, []>("op_1236_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1237_cast_fp16 = add(x = w_101_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1237_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1238_cast_fp16 = softmax(axis = var_1093, x = var_1237_cast_fp16)[name = tensor<string, []>("op_1238_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1239_cast_fp16 = add(x = w_103_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1239_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1240_cast_fp16 = softmax(axis = var_1093, x = var_1239_cast_fp16)[name = tensor<string, []>("op_1240_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1241_cast_fp16 = add(x = w_105_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1241_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1242_cast_fp16 = softmax(axis = var_1093, x = var_1241_cast_fp16)[name = tensor<string, []>("op_1242_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1243_cast_fp16 = add(x = w_107_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1243_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1244_cast_fp16 = softmax(axis = var_1093, x = var_1243_cast_fp16)[name = tensor<string, []>("op_1244_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1245_cast_fp16 = add(x = w_109_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1245_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1246_cast_fp16 = softmax(axis = var_1093, x = var_1245_cast_fp16)[name = tensor<string, []>("op_1246_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1247_cast_fp16 = add(x = w_111_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1247_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1248_cast_fp16 = softmax(axis = var_1093, x = var_1247_cast_fp16)[name = tensor<string, []>("op_1248_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1249_cast_fp16 = add(x = w_113_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1249_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1250_cast_fp16 = softmax(axis = var_1093, x = var_1249_cast_fp16)[name = tensor<string, []>("op_1250_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1251_cast_fp16 = add(x = w_115_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1251_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1252_cast_fp16 = softmax(axis = var_1093, x = var_1251_cast_fp16)[name = tensor<string, []>("op_1252_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1253_cast_fp16 = add(x = w_117_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1253_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1254_cast_fp16 = softmax(axis = var_1093, x = var_1253_cast_fp16)[name = tensor<string, []>("op_1254_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1255_cast_fp16 = add(x = w_119_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1255_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1256_cast_fp16 = softmax(axis = var_1093, x = var_1255_cast_fp16)[name = tensor<string, []>("op_1256_cast_fp16")];
-            tensor<string, []> var_1258_equation_0 = const()[name = tensor<string, []>("op_1258_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1258_cast_fp16 = einsum(equation = var_1258_equation_0, values = (var_1172_0, var_1234_cast_fp16))[name = tensor<string, []>("op_1258_cast_fp16")];
-            tensor<string, []> var_1260_equation_0 = const()[name = tensor<string, []>("op_1260_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1172_1, var_1236_cast_fp16))[name = tensor<string, []>("op_1260_cast_fp16")];
-            tensor<string, []> var_1262_equation_0 = const()[name = tensor<string, []>("op_1262_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1262_cast_fp16 = einsum(equation = var_1262_equation_0, values = (var_1172_2, var_1238_cast_fp16))[name = tensor<string, []>("op_1262_cast_fp16")];
-            tensor<string, []> var_1264_equation_0 = const()[name = tensor<string, []>("op_1264_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1172_3, var_1240_cast_fp16))[name = tensor<string, []>("op_1264_cast_fp16")];
-            tensor<string, []> var_1266_equation_0 = const()[name = tensor<string, []>("op_1266_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1266_cast_fp16 = einsum(equation = var_1266_equation_0, values = (var_1172_4, var_1242_cast_fp16))[name = tensor<string, []>("op_1266_cast_fp16")];
-            tensor<string, []> var_1268_equation_0 = const()[name = tensor<string, []>("op_1268_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1172_5, var_1244_cast_fp16))[name = tensor<string, []>("op_1268_cast_fp16")];
-            tensor<string, []> var_1270_equation_0 = const()[name = tensor<string, []>("op_1270_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1270_cast_fp16 = einsum(equation = var_1270_equation_0, values = (var_1172_6, var_1246_cast_fp16))[name = tensor<string, []>("op_1270_cast_fp16")];
-            tensor<string, []> var_1272_equation_0 = const()[name = tensor<string, []>("op_1272_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1172_7, var_1248_cast_fp16))[name = tensor<string, []>("op_1272_cast_fp16")];
-            tensor<string, []> var_1274_equation_0 = const()[name = tensor<string, []>("op_1274_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1274_cast_fp16 = einsum(equation = var_1274_equation_0, values = (var_1172_8, var_1250_cast_fp16))[name = tensor<string, []>("op_1274_cast_fp16")];
-            tensor<string, []> var_1276_equation_0 = const()[name = tensor<string, []>("op_1276_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1172_9, var_1252_cast_fp16))[name = tensor<string, []>("op_1276_cast_fp16")];
-            tensor<string, []> var_1278_equation_0 = const()[name = tensor<string, []>("op_1278_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1278_cast_fp16 = einsum(equation = var_1278_equation_0, values = (var_1172_10, var_1254_cast_fp16))[name = tensor<string, []>("op_1278_cast_fp16")];
-            tensor<string, []> var_1280_equation_0 = const()[name = tensor<string, []>("op_1280_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1172_11, var_1256_cast_fp16))[name = tensor<string, []>("op_1280_cast_fp16")];
-            tensor<bool, []> var_1282_interleave_0 = const()[name = tensor<string, []>("op_1282_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1282_cast_fp16 = concat(axis = var_1093, interleave = var_1282_interleave_0, values = (var_1258_cast_fp16, var_1260_cast_fp16, var_1262_cast_fp16, var_1264_cast_fp16, var_1266_cast_fp16, var_1268_cast_fp16, var_1270_cast_fp16, var_1272_cast_fp16, var_1274_cast_fp16, var_1276_cast_fp16, var_1278_cast_fp16, var_1280_cast_fp16))[name = tensor<string, []>("op_1282_cast_fp16")];
-            tensor<int32, [2]> var_1286 = const()[name = tensor<string, []>("op_1286"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1288 = const()[name = tensor<string, []>("op_1288"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1290_pad_type_0 = const()[name = tensor<string, []>("op_1290_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1290_pad_0 = const()[name = tensor<string, []>("op_1290_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1290 = conv(bias = layers_4_attention_o_proj_bias, dilations = var_1288, groups = var_1093, pad = var_1290_pad_0, pad_type = var_1290_pad_type_0, strides = var_1286, weight = layers_4_attention_o_proj_weight, x = var_1282_cast_fp16)[name = tensor<string, []>("op_1290")];
-            tensor<bool, []> var_1292_interleave_0 = const()[name = tensor<string, []>("op_1292_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1292 = concat(axis = var_1094, interleave = var_1292_interleave_0, values = var_1290)[name = tensor<string, []>("op_1292")];
-            tensor<fp16, [1, 384, 1, 512]> x_37 = add(x = var_1088_cast_fp16, y = var_1292)[name = tensor<string, []>("x_37")];
-            tensor<fp16, []> var_1091_promoted = const()[name = tensor<string, []>("op_1091_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1092_promoted = const()[name = tensor<string, []>("op_1092_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_39 = clip(alpha = var_1091_promoted, beta = var_1092_promoted, x = x_37)[name = tensor<string, []>("x_39")];
-            tensor<int32, [1]> var_1297 = const()[name = tensor<string, []>("op_1297"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_19 = reduce_mean(axes = var_1297, keep_dims = var_1095, x = x_39)[name = tensor<string, []>("mean_19")];
+            tensor<fp16, [1, 1, 1, 512]> denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0, x = var_1062_cast_fp16)[name = tensor<string, []>("denom_17_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1064_cast_fp16 = mul(x = zero_mean_17, y = denom_17_cast_fp16)[name = tensor<string, []>("op_1064_cast_fp16")];
+            tensor<fp16, [384]> var_1066_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1066_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66802048)))];
+            tensor<fp16, [384]> var_1066_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1066_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66802880)))];
+            tensor<fp16, []> var_1066_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1066_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1066_cast_fp16 = batch_norm(beta = var_1066_beta_0_to_fp16, epsilon = var_1066_epsilon_0_to_fp16, gamma = var_1066_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1064_cast_fp16)[name = tensor<string, []>("op_1066_cast_fp16")];
+            tensor<int32, []> var_1072 = const()[name = tensor<string, []>("op_1072"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_1073 = const()[name = tensor<string, []>("op_1073"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_1074 = const()[name = tensor<string, []>("op_1074"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_1096 = const()[name = tensor<string, []>("op_1096"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1098 = const()[name = tensor<string, []>("op_1098"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1100_pad_type_0 = const()[name = tensor<string, []>("op_1100_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1100_pad_0 = const()[name = tensor<string, []>("op_1100_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1100 = conv(bias = layers_4_attention_q_proj_bias, dilations = var_1098, groups = var_1072, pad = var_1100_pad_0, pad_type = var_1100_pad_type_0, strides = var_1096, weight = layers_4_attention_q_proj_weight, x = var_1066_cast_fp16)[name = tensor<string, []>("op_1100")];
+            tensor<int32, [2]> var_1103 = const()[name = tensor<string, []>("op_1103"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1105 = const()[name = tensor<string, []>("op_1105"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_9_pad_type_0 = const()[name = tensor<string, []>("ks_9_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_9_pad_0 = const()[name = tensor<string, []>("ks_9_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_9 = conv(bias = layers_4_attention_k_proj_bias, dilations = var_1105, groups = var_1072, pad = ks_9_pad_0, pad_type = ks_9_pad_type_0, strides = var_1103, weight = layers_4_attention_k_proj_weight, x = var_1066_cast_fp16)[name = tensor<string, []>("ks_9")];
+            tensor<int32, [2]> var_1110 = const()[name = tensor<string, []>("op_1110"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1112 = const()[name = tensor<string, []>("op_1112"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1114_pad_type_0 = const()[name = tensor<string, []>("op_1114_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1114_pad_0 = const()[name = tensor<string, []>("op_1114_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1114 = conv(bias = layers_4_attention_v_proj_bias, dilations = var_1112, groups = var_1072, pad = var_1114_pad_0, pad_type = var_1114_pad_type_0, strides = var_1110, weight = layers_4_attention_v_proj_weight, x = var_1066_cast_fp16)[name = tensor<string, []>("op_1114")];
+            tensor<int32, [12]> tile_22 = const()[name = tensor<string, []>("tile_22"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1115_axis_0 = const()[name = tensor<string, []>("op_1115_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1115_0, tensor<fp16, [1, 32, 1, 512]> var_1115_1, tensor<fp16, [1, 32, 1, 512]> var_1115_2, tensor<fp16, [1, 32, 1, 512]> var_1115_3, tensor<fp16, [1, 32, 1, 512]> var_1115_4, tensor<fp16, [1, 32, 1, 512]> var_1115_5, tensor<fp16, [1, 32, 1, 512]> var_1115_6, tensor<fp16, [1, 32, 1, 512]> var_1115_7, tensor<fp16, [1, 32, 1, 512]> var_1115_8, tensor<fp16, [1, 32, 1, 512]> var_1115_9, tensor<fp16, [1, 32, 1, 512]> var_1115_10, tensor<fp16, [1, 32, 1, 512]> var_1115_11 = split(axis = var_1115_axis_0, split_sizes = tile_22, x = var_1100)[name = tensor<string, []>("op_1115")];
+            tensor<int32, [4]> var_1128_perm_0 = const()[name = tensor<string, []>("op_1128_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_23 = const()[name = tensor<string, []>("tile_23"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1129_axis_0 = const()[name = tensor<string, []>("op_1129_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_7 = transpose(perm = var_1128_perm_0, x = ks_9)[name = tensor<string, []>("transpose_7")];
+            tensor<fp16, [1, 512, 1, 32]> var_1129_0, tensor<fp16, [1, 512, 1, 32]> var_1129_1, tensor<fp16, [1, 512, 1, 32]> var_1129_2, tensor<fp16, [1, 512, 1, 32]> var_1129_3, tensor<fp16, [1, 512, 1, 32]> var_1129_4, tensor<fp16, [1, 512, 1, 32]> var_1129_5, tensor<fp16, [1, 512, 1, 32]> var_1129_6, tensor<fp16, [1, 512, 1, 32]> var_1129_7, tensor<fp16, [1, 512, 1, 32]> var_1129_8, tensor<fp16, [1, 512, 1, 32]> var_1129_9, tensor<fp16, [1, 512, 1, 32]> var_1129_10, tensor<fp16, [1, 512, 1, 32]> var_1129_11 = split(axis = var_1129_axis_0, split_sizes = tile_23, x = transpose_7)[name = tensor<string, []>("op_1129")];
+            tensor<int32, [12]> tile_24 = const()[name = tensor<string, []>("tile_24"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1142_axis_0 = const()[name = tensor<string, []>("op_1142_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1142_0, tensor<fp16, [1, 32, 1, 512]> var_1142_1, tensor<fp16, [1, 32, 1, 512]> var_1142_2, tensor<fp16, [1, 32, 1, 512]> var_1142_3, tensor<fp16, [1, 32, 1, 512]> var_1142_4, tensor<fp16, [1, 32, 1, 512]> var_1142_5, tensor<fp16, [1, 32, 1, 512]> var_1142_6, tensor<fp16, [1, 32, 1, 512]> var_1142_7, tensor<fp16, [1, 32, 1, 512]> var_1142_8, tensor<fp16, [1, 32, 1, 512]> var_1142_9, tensor<fp16, [1, 32, 1, 512]> var_1142_10, tensor<fp16, [1, 32, 1, 512]> var_1142_11 = split(axis = var_1142_axis_0, split_sizes = tile_24, x = var_1114)[name = tensor<string, []>("op_1142")];
+            tensor<string, []> var_1156_equation_0 = const()[name = tensor<string, []>("op_1156_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1156 = einsum(equation = var_1156_equation_0, values = (var_1129_0, var_1115_0))[name = tensor<string, []>("op_1156")];
+            tensor<fp16, []> var_1157_to_fp16 = const()[name = tensor<string, []>("op_1157_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_97_cast_fp16 = mul(x = var_1156, y = var_1157_to_fp16)[name = tensor<string, []>("w_97_cast_fp16")];
+            tensor<string, []> var_1160_equation_0 = const()[name = tensor<string, []>("op_1160_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1160 = einsum(equation = var_1160_equation_0, values = (var_1129_1, var_1115_1))[name = tensor<string, []>("op_1160")];
+            tensor<fp16, []> var_1161_to_fp16 = const()[name = tensor<string, []>("op_1161_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_99_cast_fp16 = mul(x = var_1160, y = var_1161_to_fp16)[name = tensor<string, []>("w_99_cast_fp16")];
+            tensor<string, []> var_1164_equation_0 = const()[name = tensor<string, []>("op_1164_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1164 = einsum(equation = var_1164_equation_0, values = (var_1129_2, var_1115_2))[name = tensor<string, []>("op_1164")];
+            tensor<fp16, []> var_1165_to_fp16 = const()[name = tensor<string, []>("op_1165_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_101_cast_fp16 = mul(x = var_1164, y = var_1165_to_fp16)[name = tensor<string, []>("w_101_cast_fp16")];
+            tensor<string, []> var_1168_equation_0 = const()[name = tensor<string, []>("op_1168_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1168 = einsum(equation = var_1168_equation_0, values = (var_1129_3, var_1115_3))[name = tensor<string, []>("op_1168")];
+            tensor<fp16, []> var_1169_to_fp16 = const()[name = tensor<string, []>("op_1169_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_103_cast_fp16 = mul(x = var_1168, y = var_1169_to_fp16)[name = tensor<string, []>("w_103_cast_fp16")];
+            tensor<string, []> var_1172_equation_0 = const()[name = tensor<string, []>("op_1172_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1172 = einsum(equation = var_1172_equation_0, values = (var_1129_4, var_1115_4))[name = tensor<string, []>("op_1172")];
+            tensor<fp16, []> var_1173_to_fp16 = const()[name = tensor<string, []>("op_1173_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_105_cast_fp16 = mul(x = var_1172, y = var_1173_to_fp16)[name = tensor<string, []>("w_105_cast_fp16")];
+            tensor<string, []> var_1176_equation_0 = const()[name = tensor<string, []>("op_1176_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1176 = einsum(equation = var_1176_equation_0, values = (var_1129_5, var_1115_5))[name = tensor<string, []>("op_1176")];
+            tensor<fp16, []> var_1177_to_fp16 = const()[name = tensor<string, []>("op_1177_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_107_cast_fp16 = mul(x = var_1176, y = var_1177_to_fp16)[name = tensor<string, []>("w_107_cast_fp16")];
+            tensor<string, []> var_1180_equation_0 = const()[name = tensor<string, []>("op_1180_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1180 = einsum(equation = var_1180_equation_0, values = (var_1129_6, var_1115_6))[name = tensor<string, []>("op_1180")];
+            tensor<fp16, []> var_1181_to_fp16 = const()[name = tensor<string, []>("op_1181_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_109_cast_fp16 = mul(x = var_1180, y = var_1181_to_fp16)[name = tensor<string, []>("w_109_cast_fp16")];
+            tensor<string, []> var_1184_equation_0 = const()[name = tensor<string, []>("op_1184_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1184 = einsum(equation = var_1184_equation_0, values = (var_1129_7, var_1115_7))[name = tensor<string, []>("op_1184")];
+            tensor<fp16, []> var_1185_to_fp16 = const()[name = tensor<string, []>("op_1185_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_111_cast_fp16 = mul(x = var_1184, y = var_1185_to_fp16)[name = tensor<string, []>("w_111_cast_fp16")];
+            tensor<string, []> var_1188_equation_0 = const()[name = tensor<string, []>("op_1188_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1188 = einsum(equation = var_1188_equation_0, values = (var_1129_8, var_1115_8))[name = tensor<string, []>("op_1188")];
+            tensor<fp16, []> var_1189_to_fp16 = const()[name = tensor<string, []>("op_1189_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_113_cast_fp16 = mul(x = var_1188, y = var_1189_to_fp16)[name = tensor<string, []>("w_113_cast_fp16")];
+            tensor<string, []> var_1192_equation_0 = const()[name = tensor<string, []>("op_1192_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1192 = einsum(equation = var_1192_equation_0, values = (var_1129_9, var_1115_9))[name = tensor<string, []>("op_1192")];
+            tensor<fp16, []> var_1193_to_fp16 = const()[name = tensor<string, []>("op_1193_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_115_cast_fp16 = mul(x = var_1192, y = var_1193_to_fp16)[name = tensor<string, []>("w_115_cast_fp16")];
+            tensor<string, []> var_1196_equation_0 = const()[name = tensor<string, []>("op_1196_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1196 = einsum(equation = var_1196_equation_0, values = (var_1129_10, var_1115_10))[name = tensor<string, []>("op_1196")];
+            tensor<fp16, []> var_1197_to_fp16 = const()[name = tensor<string, []>("op_1197_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_117_cast_fp16 = mul(x = var_1196, y = var_1197_to_fp16)[name = tensor<string, []>("w_117_cast_fp16")];
+            tensor<string, []> var_1200_equation_0 = const()[name = tensor<string, []>("op_1200_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1200 = einsum(equation = var_1200_equation_0, values = (var_1129_11, var_1115_11))[name = tensor<string, []>("op_1200")];
+            tensor<fp16, []> var_1201_to_fp16 = const()[name = tensor<string, []>("op_1201_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_119_cast_fp16 = mul(x = var_1200, y = var_1201_to_fp16)[name = tensor<string, []>("w_119_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_131_cast_fp16 = add(x = w_97_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1204_cast_fp16 = softmax(axis = var_1072, x = input_131_cast_fp16)[name = tensor<string, []>("op_1204_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_133_cast_fp16 = add(x = w_99_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1206_cast_fp16 = softmax(axis = var_1072, x = input_133_cast_fp16)[name = tensor<string, []>("op_1206_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_135_cast_fp16 = add(x = w_101_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1208_cast_fp16 = softmax(axis = var_1072, x = input_135_cast_fp16)[name = tensor<string, []>("op_1208_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_137_cast_fp16 = add(x = w_103_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1210_cast_fp16 = softmax(axis = var_1072, x = input_137_cast_fp16)[name = tensor<string, []>("op_1210_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_139_cast_fp16 = add(x = w_105_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_139_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1212_cast_fp16 = softmax(axis = var_1072, x = input_139_cast_fp16)[name = tensor<string, []>("op_1212_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_141_cast_fp16 = add(x = w_107_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1214_cast_fp16 = softmax(axis = var_1072, x = input_141_cast_fp16)[name = tensor<string, []>("op_1214_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_143_cast_fp16 = add(x = w_109_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1216_cast_fp16 = softmax(axis = var_1072, x = input_143_cast_fp16)[name = tensor<string, []>("op_1216_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_145_cast_fp16 = add(x = w_111_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1218_cast_fp16 = softmax(axis = var_1072, x = input_145_cast_fp16)[name = tensor<string, []>("op_1218_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_147_cast_fp16 = add(x = w_113_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1220_cast_fp16 = softmax(axis = var_1072, x = input_147_cast_fp16)[name = tensor<string, []>("op_1220_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_149_cast_fp16 = add(x = w_115_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1222_cast_fp16 = softmax(axis = var_1072, x = input_149_cast_fp16)[name = tensor<string, []>("op_1222_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_151_cast_fp16 = add(x = w_117_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_151_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1224_cast_fp16 = softmax(axis = var_1072, x = input_151_cast_fp16)[name = tensor<string, []>("op_1224_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_153_cast_fp16 = add(x = w_119_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1226_cast_fp16 = softmax(axis = var_1072, x = input_153_cast_fp16)[name = tensor<string, []>("op_1226_cast_fp16")];
+            tensor<string, []> var_1228_equation_0 = const()[name = tensor<string, []>("op_1228_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1142_0, var_1204_cast_fp16))[name = tensor<string, []>("op_1228_cast_fp16")];
+            tensor<string, []> var_1230_equation_0 = const()[name = tensor<string, []>("op_1230_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1230_cast_fp16 = einsum(equation = var_1230_equation_0, values = (var_1142_1, var_1206_cast_fp16))[name = tensor<string, []>("op_1230_cast_fp16")];
+            tensor<string, []> var_1232_equation_0 = const()[name = tensor<string, []>("op_1232_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1142_2, var_1208_cast_fp16))[name = tensor<string, []>("op_1232_cast_fp16")];
+            tensor<string, []> var_1234_equation_0 = const()[name = tensor<string, []>("op_1234_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1234_cast_fp16 = einsum(equation = var_1234_equation_0, values = (var_1142_3, var_1210_cast_fp16))[name = tensor<string, []>("op_1234_cast_fp16")];
+            tensor<string, []> var_1236_equation_0 = const()[name = tensor<string, []>("op_1236_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1142_4, var_1212_cast_fp16))[name = tensor<string, []>("op_1236_cast_fp16")];
+            tensor<string, []> var_1238_equation_0 = const()[name = tensor<string, []>("op_1238_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_1142_5, var_1214_cast_fp16))[name = tensor<string, []>("op_1238_cast_fp16")];
+            tensor<string, []> var_1240_equation_0 = const()[name = tensor<string, []>("op_1240_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1142_6, var_1216_cast_fp16))[name = tensor<string, []>("op_1240_cast_fp16")];
+            tensor<string, []> var_1242_equation_0 = const()[name = tensor<string, []>("op_1242_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_1142_7, var_1218_cast_fp16))[name = tensor<string, []>("op_1242_cast_fp16")];
+            tensor<string, []> var_1244_equation_0 = const()[name = tensor<string, []>("op_1244_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1142_8, var_1220_cast_fp16))[name = tensor<string, []>("op_1244_cast_fp16")];
+            tensor<string, []> var_1246_equation_0 = const()[name = tensor<string, []>("op_1246_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_1142_9, var_1222_cast_fp16))[name = tensor<string, []>("op_1246_cast_fp16")];
+            tensor<string, []> var_1248_equation_0 = const()[name = tensor<string, []>("op_1248_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1142_10, var_1224_cast_fp16))[name = tensor<string, []>("op_1248_cast_fp16")];
+            tensor<string, []> var_1250_equation_0 = const()[name = tensor<string, []>("op_1250_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_1142_11, var_1226_cast_fp16))[name = tensor<string, []>("op_1250_cast_fp16")];
+            tensor<bool, []> var_1252_interleave_0 = const()[name = tensor<string, []>("op_1252_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1252_cast_fp16 = concat(axis = var_1072, interleave = var_1252_interleave_0, values = (var_1228_cast_fp16, var_1230_cast_fp16, var_1232_cast_fp16, var_1234_cast_fp16, var_1236_cast_fp16, var_1238_cast_fp16, var_1240_cast_fp16, var_1242_cast_fp16, var_1244_cast_fp16, var_1246_cast_fp16, var_1248_cast_fp16, var_1250_cast_fp16))[name = tensor<string, []>("op_1252_cast_fp16")];
+            tensor<int32, [2]> var_1256 = const()[name = tensor<string, []>("op_1256"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1258 = const()[name = tensor<string, []>("op_1258"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1260_pad_type_0 = const()[name = tensor<string, []>("op_1260_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1260_pad_0 = const()[name = tensor<string, []>("op_1260_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1260 = conv(bias = layers_4_attention_o_proj_bias, dilations = var_1258, groups = var_1072, pad = var_1260_pad_0, pad_type = var_1260_pad_type_0, strides = var_1256, weight = layers_4_attention_o_proj_weight, x = var_1252_cast_fp16)[name = tensor<string, []>("op_1260")];
+            tensor<bool, []> var_1262_interleave_0 = const()[name = tensor<string, []>("op_1262_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1262 = concat(axis = var_1073, interleave = var_1262_interleave_0, values = var_1260)[name = tensor<string, []>("op_1262")];
+            tensor<fp16, [1, 384, 1, 512]> x_37 = add(x = var_1066_cast_fp16, y = var_1262)[name = tensor<string, []>("x_37")];
+            tensor<fp16, []> var_1069_promoted = const()[name = tensor<string, []>("op_1069_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1070_promoted = const()[name = tensor<string, []>("op_1070_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_39 = clip(alpha = var_1069_promoted, beta = var_1070_promoted, x = x_37)[name = tensor<string, []>("x_39")];
+            tensor<int32, [1]> var_1267 = const()[name = tensor<string, []>("op_1267"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_19 = reduce_mean(axes = var_1267, keep_dims = var_1074, x = x_39)[name = tensor<string, []>("mean_19")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_19 = sub(x = x_39, y = mean_19)[name = tensor<string, []>("zero_mean_19")];
-            tensor<fp16, []> var_1100_promoted = const()[name = tensor<string, []>("op_1100_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1300 = pow(x = zero_mean_19, y = var_1100_promoted)[name = tensor<string, []>("op_1300")];
-            tensor<int32, [1]> var_1301 = const()[name = tensor<string, []>("op_1301"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1302 = reduce_mean(axes = var_1301, keep_dims = var_1095, x = var_1300)[name = tensor<string, []>("op_1302")];
-            tensor<fp16, []> var_1303_to_fp16 = const()[name = tensor<string, []>("op_1303_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1304_cast_fp16 = add(x = var_1302, y = var_1303_to_fp16)[name = tensor<string, []>("op_1304_cast_fp16")];
+            tensor<fp16, []> var_1071_promoted = const()[name = tensor<string, []>("op_1071_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1270 = pow(x = zero_mean_19, y = var_1071_promoted)[name = tensor<string, []>("op_1270")];
+            tensor<int32, [1]> var_1271 = const()[name = tensor<string, []>("op_1271"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1272 = reduce_mean(axes = var_1271, keep_dims = var_1074, x = var_1270)[name = tensor<string, []>("op_1272")];
+            tensor<fp16, []> var_1273_to_fp16 = const()[name = tensor<string, []>("op_1273_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1274_cast_fp16 = add(x = var_1272, y = var_1273_to_fp16)[name = tensor<string, []>("op_1274_cast_fp16")];
             tensor<fp32, []> denom_19_epsilon_0 = const()[name = tensor<string, []>("denom_19_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0, x = var_1304_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1306_cast_fp16 = mul(x = zero_mean_19, y = denom_19_cast_fp16)[name = tensor<string, []>("op_1306_cast_fp16")];
-            tensor<fp16, [384]> var_1308_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1308_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66803712)))];
-            tensor<fp16, [384]> var_1308_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1308_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66804544)))];
-            tensor<fp16, []> var_1308_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1308_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1308_cast_fp16 = batch_norm(beta = var_1308_beta_0_to_fp16, epsilon = var_1308_epsilon_0_to_fp16, gamma = var_1308_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1306_cast_fp16)[name = tensor<string, []>("op_1308_cast_fp16")];
-            tensor<int32, [2]> var_1314 = const()[name = tensor<string, []>("op_1314"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1316 = const()[name = tensor<string, []>("op_1316"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1318_pad_type_0 = const()[name = tensor<string, []>("op_1318_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1318_pad_0 = const()[name = tensor<string, []>("op_1318_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_1318 = conv(bias = layers_4_mlp_fc1_bias, dilations = var_1316, groups = var_1093, pad = var_1318_pad_0, pad_type = var_1318_pad_type_0, strides = var_1314, weight = layers_4_mlp_fc1_weight, x = var_1308_cast_fp16)[name = tensor<string, []>("op_1318")];
-            tensor<string, []> input_39_mode_0 = const()[name = tensor<string, []>("input_39_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_39 = gelu(mode = input_39_mode_0, x = var_1318)[name = tensor<string, []>("input_39")];
-            tensor<int32, [2]> var_1322 = const()[name = tensor<string, []>("op_1322"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1324 = const()[name = tensor<string, []>("op_1324"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1326_pad_type_0 = const()[name = tensor<string, []>("op_1326_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1326_pad_0 = const()[name = tensor<string, []>("op_1326_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1326 = conv(bias = layers_4_mlp_fc2_bias, dilations = var_1324, groups = var_1093, pad = var_1326_pad_0, pad_type = var_1326_pad_type_0, strides = var_1322, weight = layers_4_mlp_fc2_weight, x = input_39)[name = tensor<string, []>("op_1326")];
-            tensor<fp16, [1, 384, 1, 512]> x_41 = add(x = var_1308_cast_fp16, y = var_1326)[name = tensor<string, []>("x_41")];
-            tensor<fp16, []> var_1091_promoted_1 = const()[name = tensor<string, []>("op_1091_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1092_promoted_1 = const()[name = tensor<string, []>("op_1092_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_43 = clip(alpha = var_1091_promoted_1, beta = var_1092_promoted_1, x = x_41)[name = tensor<string, []>("x_43")];
-            tensor<int32, [1]> var_1331 = const()[name = tensor<string, []>("op_1331"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_21 = reduce_mean(axes = var_1331, keep_dims = var_1095, x = x_43)[name = tensor<string, []>("mean_21")];
+            tensor<fp16, [1, 1, 1, 512]> denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0, x = var_1274_cast_fp16)[name = tensor<string, []>("denom_19_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1276_cast_fp16 = mul(x = zero_mean_19, y = denom_19_cast_fp16)[name = tensor<string, []>("op_1276_cast_fp16")];
+            tensor<fp16, [384]> var_1278_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1278_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66803712)))];
+            tensor<fp16, [384]> var_1278_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1278_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66804544)))];
+            tensor<fp16, []> var_1278_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1278_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1278_cast_fp16 = batch_norm(beta = var_1278_beta_0_to_fp16, epsilon = var_1278_epsilon_0_to_fp16, gamma = var_1278_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1276_cast_fp16)[name = tensor<string, []>("op_1278_cast_fp16")];
+            tensor<int32, [2]> var_1284 = const()[name = tensor<string, []>("op_1284"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1286 = const()[name = tensor<string, []>("op_1286"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1288_pad_type_0 = const()[name = tensor<string, []>("op_1288_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1288_pad_0 = const()[name = tensor<string, []>("op_1288_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_1288 = conv(bias = layers_4_mlp_fc1_bias, dilations = var_1286, groups = var_1072, pad = var_1288_pad_0, pad_type = var_1288_pad_type_0, strides = var_1284, weight = layers_4_mlp_fc1_weight, x = var_1278_cast_fp16)[name = tensor<string, []>("op_1288")];
+            tensor<string, []> input_159_mode_0 = const()[name = tensor<string, []>("input_159_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_159 = gelu(mode = input_159_mode_0, x = var_1288)[name = tensor<string, []>("input_159")];
+            tensor<int32, [2]> var_1292 = const()[name = tensor<string, []>("op_1292"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1294 = const()[name = tensor<string, []>("op_1294"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1296_pad_type_0 = const()[name = tensor<string, []>("op_1296_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1296_pad_0 = const()[name = tensor<string, []>("op_1296_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1296 = conv(bias = layers_4_mlp_fc2_bias, dilations = var_1294, groups = var_1072, pad = var_1296_pad_0, pad_type = var_1296_pad_type_0, strides = var_1292, weight = layers_4_mlp_fc2_weight, x = input_159)[name = tensor<string, []>("op_1296")];
+            tensor<fp16, [1, 384, 1, 512]> x_41 = add(x = var_1278_cast_fp16, y = var_1296)[name = tensor<string, []>("x_41")];
+            tensor<fp16, []> var_1069_promoted_1 = const()[name = tensor<string, []>("op_1069_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1070_promoted_1 = const()[name = tensor<string, []>("op_1070_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_43 = clip(alpha = var_1069_promoted_1, beta = var_1070_promoted_1, x = x_41)[name = tensor<string, []>("x_43")];
+            tensor<int32, [1]> var_1301 = const()[name = tensor<string, []>("op_1301"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_21 = reduce_mean(axes = var_1301, keep_dims = var_1074, x = x_43)[name = tensor<string, []>("mean_21")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_21 = sub(x = x_43, y = mean_21)[name = tensor<string, []>("zero_mean_21")];
-            tensor<fp16, []> var_1100_promoted_1 = const()[name = tensor<string, []>("op_1100_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1334 = pow(x = zero_mean_21, y = var_1100_promoted_1)[name = tensor<string, []>("op_1334")];
-            tensor<int32, [1]> var_1335 = const()[name = tensor<string, []>("op_1335"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1336 = reduce_mean(axes = var_1335, keep_dims = var_1095, x = var_1334)[name = tensor<string, []>("op_1336")];
-            tensor<fp16, []> var_1337_to_fp16 = const()[name = tensor<string, []>("op_1337_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1338_cast_fp16 = add(x = var_1336, y = var_1337_to_fp16)[name = tensor<string, []>("op_1338_cast_fp16")];
+            tensor<fp16, []> var_1071_promoted_1 = const()[name = tensor<string, []>("op_1071_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1304 = pow(x = zero_mean_21, y = var_1071_promoted_1)[name = tensor<string, []>("op_1304")];
+            tensor<int32, [1]> var_1305 = const()[name = tensor<string, []>("op_1305"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1306 = reduce_mean(axes = var_1305, keep_dims = var_1074, x = var_1304)[name = tensor<string, []>("op_1306")];
+            tensor<fp16, []> var_1307_to_fp16 = const()[name = tensor<string, []>("op_1307_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1308_cast_fp16 = add(x = var_1306, y = var_1307_to_fp16)[name = tensor<string, []>("op_1308_cast_fp16")];
             tensor<fp32, []> denom_21_epsilon_0 = const()[name = tensor<string, []>("denom_21_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0, x = var_1338_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1340_cast_fp16 = mul(x = zero_mean_21, y = denom_21_cast_fp16)[name = tensor<string, []>("op_1340_cast_fp16")];
-            tensor<fp16, [384]> var_1342_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1342_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66805376)))];
-            tensor<fp16, [384]> var_1342_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1342_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66806208)))];
-            tensor<fp16, []> var_1342_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1342_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1342_cast_fp16 = batch_norm(beta = var_1342_beta_0_to_fp16, epsilon = var_1342_epsilon_0_to_fp16, gamma = var_1342_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1340_cast_fp16)[name = tensor<string, []>("op_1342_cast_fp16")];
-            tensor<int32, []> var_1347 = const()[name = tensor<string, []>("op_1347"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_1348 = const()[name = tensor<string, []>("op_1348"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_1349 = const()[name = tensor<string, []>("op_1349"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_1374 = const()[name = tensor<string, []>("op_1374"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1376 = const()[name = tensor<string, []>("op_1376"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1378_pad_type_0 = const()[name = tensor<string, []>("op_1378_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1378_pad_0 = const()[name = tensor<string, []>("op_1378_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1378 = conv(bias = layers_5_attention_q_proj_bias, dilations = var_1376, groups = var_1347, pad = var_1378_pad_0, pad_type = var_1378_pad_type_0, strides = var_1374, weight = layers_5_attention_q_proj_weight, x = var_1342_cast_fp16)[name = tensor<string, []>("op_1378")];
-            tensor<int32, [4]> var_1379 = const()[name = tensor<string, []>("op_1379"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1380 = reshape(shape = var_1379, x = var_1378)[name = tensor<string, []>("op_1380")];
-            tensor<int32, [2]> var_1383 = const()[name = tensor<string, []>("op_1383"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1385 = const()[name = tensor<string, []>("op_1385"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1387_pad_type_0 = const()[name = tensor<string, []>("op_1387_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1387_pad_0 = const()[name = tensor<string, []>("op_1387_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1387 = conv(bias = layers_5_attention_k_proj_bias, dilations = var_1385, groups = var_1347, pad = var_1387_pad_0, pad_type = var_1387_pad_type_0, strides = var_1383, weight = layers_5_attention_k_proj_weight, x = var_1342_cast_fp16)[name = tensor<string, []>("op_1387")];
-            tensor<int32, [4]> var_1388 = const()[name = tensor<string, []>("op_1388"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_11 = reshape(shape = var_1388, x = var_1387)[name = tensor<string, []>("ks_11")];
-            tensor<int32, [2]> var_1392 = const()[name = tensor<string, []>("op_1392"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1394 = const()[name = tensor<string, []>("op_1394"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1396_pad_type_0 = const()[name = tensor<string, []>("op_1396_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1396_pad_0 = const()[name = tensor<string, []>("op_1396_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1396 = conv(bias = layers_5_attention_v_proj_bias, dilations = var_1394, groups = var_1347, pad = var_1396_pad_0, pad_type = var_1396_pad_type_0, strides = var_1392, weight = layers_5_attention_v_proj_weight, x = var_1342_cast_fp16)[name = tensor<string, []>("op_1396")];
-            tensor<int32, [4]> var_1397 = const()[name = tensor<string, []>("op_1397"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1398 = reshape(shape = var_1397, x = var_1396)[name = tensor<string, []>("op_1398")];
-            tensor<int32, [12]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1399_axis_0 = const()[name = tensor<string, []>("op_1399_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1399_0, tensor<fp16, [1, 32, 1, 512]> var_1399_1, tensor<fp16, [1, 32, 1, 512]> var_1399_2, tensor<fp16, [1, 32, 1, 512]> var_1399_3, tensor<fp16, [1, 32, 1, 512]> var_1399_4, tensor<fp16, [1, 32, 1, 512]> var_1399_5, tensor<fp16, [1, 32, 1, 512]> var_1399_6, tensor<fp16, [1, 32, 1, 512]> var_1399_7, tensor<fp16, [1, 32, 1, 512]> var_1399_8, tensor<fp16, [1, 32, 1, 512]> var_1399_9, tensor<fp16, [1, 32, 1, 512]> var_1399_10, tensor<fp16, [1, 32, 1, 512]> var_1399_11 = split(axis = var_1399_axis_0, split_sizes = tile_27, x = var_1380)[name = tensor<string, []>("op_1399")];
-            tensor<int32, [4]> var_1412_perm_0 = const()[name = tensor<string, []>("op_1412_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1413_axis_0 = const()[name = tensor<string, []>("op_1413_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_6 = transpose(perm = var_1412_perm_0, x = ks_11)[name = tensor<string, []>("transpose_6")];
-            tensor<fp16, [1, 512, 1, 32]> var_1413_0, tensor<fp16, [1, 512, 1, 32]> var_1413_1, tensor<fp16, [1, 512, 1, 32]> var_1413_2, tensor<fp16, [1, 512, 1, 32]> var_1413_3, tensor<fp16, [1, 512, 1, 32]> var_1413_4, tensor<fp16, [1, 512, 1, 32]> var_1413_5, tensor<fp16, [1, 512, 1, 32]> var_1413_6, tensor<fp16, [1, 512, 1, 32]> var_1413_7, tensor<fp16, [1, 512, 1, 32]> var_1413_8, tensor<fp16, [1, 512, 1, 32]> var_1413_9, tensor<fp16, [1, 512, 1, 32]> var_1413_10, tensor<fp16, [1, 512, 1, 32]> var_1413_11 = split(axis = var_1413_axis_0, split_sizes = tile_28, x = transpose_6)[name = tensor<string, []>("op_1413")];
-            tensor<int32, [12]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1426_axis_0 = const()[name = tensor<string, []>("op_1426_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1426_0, tensor<fp16, [1, 32, 1, 512]> var_1426_1, tensor<fp16, [1, 32, 1, 512]> var_1426_2, tensor<fp16, [1, 32, 1, 512]> var_1426_3, tensor<fp16, [1, 32, 1, 512]> var_1426_4, tensor<fp16, [1, 32, 1, 512]> var_1426_5, tensor<fp16, [1, 32, 1, 512]> var_1426_6, tensor<fp16, [1, 32, 1, 512]> var_1426_7, tensor<fp16, [1, 32, 1, 512]> var_1426_8, tensor<fp16, [1, 32, 1, 512]> var_1426_9, tensor<fp16, [1, 32, 1, 512]> var_1426_10, tensor<fp16, [1, 32, 1, 512]> var_1426_11 = split(axis = var_1426_axis_0, split_sizes = tile_29, x = var_1398)[name = tensor<string, []>("op_1426")];
-            tensor<string, []> var_1440_equation_0 = const()[name = tensor<string, []>("op_1440_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1440 = einsum(equation = var_1440_equation_0, values = (var_1413_0, var_1399_0))[name = tensor<string, []>("op_1440")];
-            tensor<fp16, []> var_1441_to_fp16 = const()[name = tensor<string, []>("op_1441_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_121_cast_fp16 = mul(x = var_1440, y = var_1441_to_fp16)[name = tensor<string, []>("w_121_cast_fp16")];
-            tensor<string, []> var_1444_equation_0 = const()[name = tensor<string, []>("op_1444_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1444 = einsum(equation = var_1444_equation_0, values = (var_1413_1, var_1399_1))[name = tensor<string, []>("op_1444")];
-            tensor<fp16, []> var_1445_to_fp16 = const()[name = tensor<string, []>("op_1445_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_123_cast_fp16 = mul(x = var_1444, y = var_1445_to_fp16)[name = tensor<string, []>("w_123_cast_fp16")];
-            tensor<string, []> var_1448_equation_0 = const()[name = tensor<string, []>("op_1448_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1448 = einsum(equation = var_1448_equation_0, values = (var_1413_2, var_1399_2))[name = tensor<string, []>("op_1448")];
-            tensor<fp16, []> var_1449_to_fp16 = const()[name = tensor<string, []>("op_1449_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_125_cast_fp16 = mul(x = var_1448, y = var_1449_to_fp16)[name = tensor<string, []>("w_125_cast_fp16")];
-            tensor<string, []> var_1452_equation_0 = const()[name = tensor<string, []>("op_1452_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1452 = einsum(equation = var_1452_equation_0, values = (var_1413_3, var_1399_3))[name = tensor<string, []>("op_1452")];
-            tensor<fp16, []> var_1453_to_fp16 = const()[name = tensor<string, []>("op_1453_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_127_cast_fp16 = mul(x = var_1452, y = var_1453_to_fp16)[name = tensor<string, []>("w_127_cast_fp16")];
-            tensor<string, []> var_1456_equation_0 = const()[name = tensor<string, []>("op_1456_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1456 = einsum(equation = var_1456_equation_0, values = (var_1413_4, var_1399_4))[name = tensor<string, []>("op_1456")];
-            tensor<fp16, []> var_1457_to_fp16 = const()[name = tensor<string, []>("op_1457_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_129_cast_fp16 = mul(x = var_1456, y = var_1457_to_fp16)[name = tensor<string, []>("w_129_cast_fp16")];
-            tensor<string, []> var_1460_equation_0 = const()[name = tensor<string, []>("op_1460_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1460 = einsum(equation = var_1460_equation_0, values = (var_1413_5, var_1399_5))[name = tensor<string, []>("op_1460")];
-            tensor<fp16, []> var_1461_to_fp16 = const()[name = tensor<string, []>("op_1461_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_131_cast_fp16 = mul(x = var_1460, y = var_1461_to_fp16)[name = tensor<string, []>("w_131_cast_fp16")];
-            tensor<string, []> var_1464_equation_0 = const()[name = tensor<string, []>("op_1464_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1464 = einsum(equation = var_1464_equation_0, values = (var_1413_6, var_1399_6))[name = tensor<string, []>("op_1464")];
-            tensor<fp16, []> var_1465_to_fp16 = const()[name = tensor<string, []>("op_1465_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_133_cast_fp16 = mul(x = var_1464, y = var_1465_to_fp16)[name = tensor<string, []>("w_133_cast_fp16")];
-            tensor<string, []> var_1468_equation_0 = const()[name = tensor<string, []>("op_1468_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1468 = einsum(equation = var_1468_equation_0, values = (var_1413_7, var_1399_7))[name = tensor<string, []>("op_1468")];
-            tensor<fp16, []> var_1469_to_fp16 = const()[name = tensor<string, []>("op_1469_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_135_cast_fp16 = mul(x = var_1468, y = var_1469_to_fp16)[name = tensor<string, []>("w_135_cast_fp16")];
-            tensor<string, []> var_1472_equation_0 = const()[name = tensor<string, []>("op_1472_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1472 = einsum(equation = var_1472_equation_0, values = (var_1413_8, var_1399_8))[name = tensor<string, []>("op_1472")];
-            tensor<fp16, []> var_1473_to_fp16 = const()[name = tensor<string, []>("op_1473_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_137_cast_fp16 = mul(x = var_1472, y = var_1473_to_fp16)[name = tensor<string, []>("w_137_cast_fp16")];
-            tensor<string, []> var_1476_equation_0 = const()[name = tensor<string, []>("op_1476_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1476 = einsum(equation = var_1476_equation_0, values = (var_1413_9, var_1399_9))[name = tensor<string, []>("op_1476")];
-            tensor<fp16, []> var_1477_to_fp16 = const()[name = tensor<string, []>("op_1477_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_139_cast_fp16 = mul(x = var_1476, y = var_1477_to_fp16)[name = tensor<string, []>("w_139_cast_fp16")];
-            tensor<string, []> var_1480_equation_0 = const()[name = tensor<string, []>("op_1480_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1480 = einsum(equation = var_1480_equation_0, values = (var_1413_10, var_1399_10))[name = tensor<string, []>("op_1480")];
-            tensor<fp16, []> var_1481_to_fp16 = const()[name = tensor<string, []>("op_1481_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_141_cast_fp16 = mul(x = var_1480, y = var_1481_to_fp16)[name = tensor<string, []>("w_141_cast_fp16")];
-            tensor<string, []> var_1484_equation_0 = const()[name = tensor<string, []>("op_1484_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1484 = einsum(equation = var_1484_equation_0, values = (var_1413_11, var_1399_11))[name = tensor<string, []>("op_1484")];
-            tensor<fp16, []> var_1485_to_fp16 = const()[name = tensor<string, []>("op_1485_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_143_cast_fp16 = mul(x = var_1484, y = var_1485_to_fp16)[name = tensor<string, []>("w_143_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1487_cast_fp16 = add(x = w_121_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1487_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1488_cast_fp16 = softmax(axis = var_1347, x = var_1487_cast_fp16)[name = tensor<string, []>("op_1488_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1489_cast_fp16 = add(x = w_123_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1489_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1490_cast_fp16 = softmax(axis = var_1347, x = var_1489_cast_fp16)[name = tensor<string, []>("op_1490_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1491_cast_fp16 = add(x = w_125_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1491_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1492_cast_fp16 = softmax(axis = var_1347, x = var_1491_cast_fp16)[name = tensor<string, []>("op_1492_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1493_cast_fp16 = add(x = w_127_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1493_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1494_cast_fp16 = softmax(axis = var_1347, x = var_1493_cast_fp16)[name = tensor<string, []>("op_1494_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1495_cast_fp16 = add(x = w_129_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1495_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1496_cast_fp16 = softmax(axis = var_1347, x = var_1495_cast_fp16)[name = tensor<string, []>("op_1496_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1497_cast_fp16 = add(x = w_131_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1497_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1498_cast_fp16 = softmax(axis = var_1347, x = var_1497_cast_fp16)[name = tensor<string, []>("op_1498_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1499_cast_fp16 = add(x = w_133_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1499_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1500_cast_fp16 = softmax(axis = var_1347, x = var_1499_cast_fp16)[name = tensor<string, []>("op_1500_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1501_cast_fp16 = add(x = w_135_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1501_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1502_cast_fp16 = softmax(axis = var_1347, x = var_1501_cast_fp16)[name = tensor<string, []>("op_1502_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1503_cast_fp16 = add(x = w_137_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1503_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1504_cast_fp16 = softmax(axis = var_1347, x = var_1503_cast_fp16)[name = tensor<string, []>("op_1504_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1505_cast_fp16 = add(x = w_139_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1505_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1506_cast_fp16 = softmax(axis = var_1347, x = var_1505_cast_fp16)[name = tensor<string, []>("op_1506_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1507_cast_fp16 = add(x = w_141_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1507_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1508_cast_fp16 = softmax(axis = var_1347, x = var_1507_cast_fp16)[name = tensor<string, []>("op_1508_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1509_cast_fp16 = add(x = w_143_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1509_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1510_cast_fp16 = softmax(axis = var_1347, x = var_1509_cast_fp16)[name = tensor<string, []>("op_1510_cast_fp16")];
-            tensor<string, []> var_1512_equation_0 = const()[name = tensor<string, []>("op_1512_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1512_cast_fp16 = einsum(equation = var_1512_equation_0, values = (var_1426_0, var_1488_cast_fp16))[name = tensor<string, []>("op_1512_cast_fp16")];
-            tensor<string, []> var_1514_equation_0 = const()[name = tensor<string, []>("op_1514_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1426_1, var_1490_cast_fp16))[name = tensor<string, []>("op_1514_cast_fp16")];
-            tensor<string, []> var_1516_equation_0 = const()[name = tensor<string, []>("op_1516_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1516_cast_fp16 = einsum(equation = var_1516_equation_0, values = (var_1426_2, var_1492_cast_fp16))[name = tensor<string, []>("op_1516_cast_fp16")];
-            tensor<string, []> var_1518_equation_0 = const()[name = tensor<string, []>("op_1518_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1426_3, var_1494_cast_fp16))[name = tensor<string, []>("op_1518_cast_fp16")];
-            tensor<string, []> var_1520_equation_0 = const()[name = tensor<string, []>("op_1520_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1520_cast_fp16 = einsum(equation = var_1520_equation_0, values = (var_1426_4, var_1496_cast_fp16))[name = tensor<string, []>("op_1520_cast_fp16")];
-            tensor<string, []> var_1522_equation_0 = const()[name = tensor<string, []>("op_1522_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1426_5, var_1498_cast_fp16))[name = tensor<string, []>("op_1522_cast_fp16")];
-            tensor<string, []> var_1524_equation_0 = const()[name = tensor<string, []>("op_1524_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1524_cast_fp16 = einsum(equation = var_1524_equation_0, values = (var_1426_6, var_1500_cast_fp16))[name = tensor<string, []>("op_1524_cast_fp16")];
-            tensor<string, []> var_1526_equation_0 = const()[name = tensor<string, []>("op_1526_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1426_7, var_1502_cast_fp16))[name = tensor<string, []>("op_1526_cast_fp16")];
-            tensor<string, []> var_1528_equation_0 = const()[name = tensor<string, []>("op_1528_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1528_cast_fp16 = einsum(equation = var_1528_equation_0, values = (var_1426_8, var_1504_cast_fp16))[name = tensor<string, []>("op_1528_cast_fp16")];
-            tensor<string, []> var_1530_equation_0 = const()[name = tensor<string, []>("op_1530_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1426_9, var_1506_cast_fp16))[name = tensor<string, []>("op_1530_cast_fp16")];
-            tensor<string, []> var_1532_equation_0 = const()[name = tensor<string, []>("op_1532_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1532_cast_fp16 = einsum(equation = var_1532_equation_0, values = (var_1426_10, var_1508_cast_fp16))[name = tensor<string, []>("op_1532_cast_fp16")];
-            tensor<string, []> var_1534_equation_0 = const()[name = tensor<string, []>("op_1534_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1426_11, var_1510_cast_fp16))[name = tensor<string, []>("op_1534_cast_fp16")];
-            tensor<bool, []> var_1536_interleave_0 = const()[name = tensor<string, []>("op_1536_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1536_cast_fp16 = concat(axis = var_1347, interleave = var_1536_interleave_0, values = (var_1512_cast_fp16, var_1514_cast_fp16, var_1516_cast_fp16, var_1518_cast_fp16, var_1520_cast_fp16, var_1522_cast_fp16, var_1524_cast_fp16, var_1526_cast_fp16, var_1528_cast_fp16, var_1530_cast_fp16, var_1532_cast_fp16, var_1534_cast_fp16))[name = tensor<string, []>("op_1536_cast_fp16")];
-            tensor<int32, [2]> var_1540 = const()[name = tensor<string, []>("op_1540"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1542 = const()[name = tensor<string, []>("op_1542"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1544_pad_type_0 = const()[name = tensor<string, []>("op_1544_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1544_pad_0 = const()[name = tensor<string, []>("op_1544_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1544 = conv(bias = layers_5_attention_o_proj_bias, dilations = var_1542, groups = var_1347, pad = var_1544_pad_0, pad_type = var_1544_pad_type_0, strides = var_1540, weight = layers_5_attention_o_proj_weight, x = var_1536_cast_fp16)[name = tensor<string, []>("op_1544")];
-            tensor<bool, []> var_1546_interleave_0 = const()[name = tensor<string, []>("op_1546_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1546 = concat(axis = var_1348, interleave = var_1546_interleave_0, values = var_1544)[name = tensor<string, []>("op_1546")];
-            tensor<fp16, [1, 384, 1, 512]> x_45 = add(x = var_1342_cast_fp16, y = var_1546)[name = tensor<string, []>("x_45")];
-            tensor<fp16, []> var_1345_promoted = const()[name = tensor<string, []>("op_1345_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1346_promoted = const()[name = tensor<string, []>("op_1346_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_47 = clip(alpha = var_1345_promoted, beta = var_1346_promoted, x = x_45)[name = tensor<string, []>("x_47")];
-            tensor<int32, [1]> var_1551 = const()[name = tensor<string, []>("op_1551"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_23 = reduce_mean(axes = var_1551, keep_dims = var_1349, x = x_47)[name = tensor<string, []>("mean_23")];
+            tensor<fp16, [1, 1, 1, 512]> denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0, x = var_1308_cast_fp16)[name = tensor<string, []>("denom_21_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1310_cast_fp16 = mul(x = zero_mean_21, y = denom_21_cast_fp16)[name = tensor<string, []>("op_1310_cast_fp16")];
+            tensor<fp16, [384]> var_1312_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1312_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66805376)))];
+            tensor<fp16, [384]> var_1312_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1312_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66806208)))];
+            tensor<fp16, []> var_1312_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1312_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1312_cast_fp16 = batch_norm(beta = var_1312_beta_0_to_fp16, epsilon = var_1312_epsilon_0_to_fp16, gamma = var_1312_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1310_cast_fp16)[name = tensor<string, []>("op_1312_cast_fp16")];
+            tensor<int32, []> var_1318 = const()[name = tensor<string, []>("op_1318"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_1319 = const()[name = tensor<string, []>("op_1319"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_1320 = const()[name = tensor<string, []>("op_1320"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_1342 = const()[name = tensor<string, []>("op_1342"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1344 = const()[name = tensor<string, []>("op_1344"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1346_pad_type_0 = const()[name = tensor<string, []>("op_1346_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1346_pad_0 = const()[name = tensor<string, []>("op_1346_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1346 = conv(bias = layers_5_attention_q_proj_bias, dilations = var_1344, groups = var_1318, pad = var_1346_pad_0, pad_type = var_1346_pad_type_0, strides = var_1342, weight = layers_5_attention_q_proj_weight, x = var_1312_cast_fp16)[name = tensor<string, []>("op_1346")];
+            tensor<int32, [2]> var_1349 = const()[name = tensor<string, []>("op_1349"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1351 = const()[name = tensor<string, []>("op_1351"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_11_pad_type_0 = const()[name = tensor<string, []>("ks_11_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_11_pad_0 = const()[name = tensor<string, []>("ks_11_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_11 = conv(bias = layers_5_attention_k_proj_bias, dilations = var_1351, groups = var_1318, pad = ks_11_pad_0, pad_type = ks_11_pad_type_0, strides = var_1349, weight = layers_5_attention_k_proj_weight, x = var_1312_cast_fp16)[name = tensor<string, []>("ks_11")];
+            tensor<int32, [2]> var_1356 = const()[name = tensor<string, []>("op_1356"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1358 = const()[name = tensor<string, []>("op_1358"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1360_pad_type_0 = const()[name = tensor<string, []>("op_1360_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1360_pad_0 = const()[name = tensor<string, []>("op_1360_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1360 = conv(bias = layers_5_attention_v_proj_bias, dilations = var_1358, groups = var_1318, pad = var_1360_pad_0, pad_type = var_1360_pad_type_0, strides = var_1356, weight = layers_5_attention_v_proj_weight, x = var_1312_cast_fp16)[name = tensor<string, []>("op_1360")];
+            tensor<int32, [12]> tile_27 = const()[name = tensor<string, []>("tile_27"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1361_axis_0 = const()[name = tensor<string, []>("op_1361_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1361_0, tensor<fp16, [1, 32, 1, 512]> var_1361_1, tensor<fp16, [1, 32, 1, 512]> var_1361_2, tensor<fp16, [1, 32, 1, 512]> var_1361_3, tensor<fp16, [1, 32, 1, 512]> var_1361_4, tensor<fp16, [1, 32, 1, 512]> var_1361_5, tensor<fp16, [1, 32, 1, 512]> var_1361_6, tensor<fp16, [1, 32, 1, 512]> var_1361_7, tensor<fp16, [1, 32, 1, 512]> var_1361_8, tensor<fp16, [1, 32, 1, 512]> var_1361_9, tensor<fp16, [1, 32, 1, 512]> var_1361_10, tensor<fp16, [1, 32, 1, 512]> var_1361_11 = split(axis = var_1361_axis_0, split_sizes = tile_27, x = var_1346)[name = tensor<string, []>("op_1361")];
+            tensor<int32, [4]> var_1374_perm_0 = const()[name = tensor<string, []>("op_1374_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_28 = const()[name = tensor<string, []>("tile_28"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1375_axis_0 = const()[name = tensor<string, []>("op_1375_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_6 = transpose(perm = var_1374_perm_0, x = ks_11)[name = tensor<string, []>("transpose_6")];
+            tensor<fp16, [1, 512, 1, 32]> var_1375_0, tensor<fp16, [1, 512, 1, 32]> var_1375_1, tensor<fp16, [1, 512, 1, 32]> var_1375_2, tensor<fp16, [1, 512, 1, 32]> var_1375_3, tensor<fp16, [1, 512, 1, 32]> var_1375_4, tensor<fp16, [1, 512, 1, 32]> var_1375_5, tensor<fp16, [1, 512, 1, 32]> var_1375_6, tensor<fp16, [1, 512, 1, 32]> var_1375_7, tensor<fp16, [1, 512, 1, 32]> var_1375_8, tensor<fp16, [1, 512, 1, 32]> var_1375_9, tensor<fp16, [1, 512, 1, 32]> var_1375_10, tensor<fp16, [1, 512, 1, 32]> var_1375_11 = split(axis = var_1375_axis_0, split_sizes = tile_28, x = transpose_6)[name = tensor<string, []>("op_1375")];
+            tensor<int32, [12]> tile_29 = const()[name = tensor<string, []>("tile_29"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1388_axis_0 = const()[name = tensor<string, []>("op_1388_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1388_0, tensor<fp16, [1, 32, 1, 512]> var_1388_1, tensor<fp16, [1, 32, 1, 512]> var_1388_2, tensor<fp16, [1, 32, 1, 512]> var_1388_3, tensor<fp16, [1, 32, 1, 512]> var_1388_4, tensor<fp16, [1, 32, 1, 512]> var_1388_5, tensor<fp16, [1, 32, 1, 512]> var_1388_6, tensor<fp16, [1, 32, 1, 512]> var_1388_7, tensor<fp16, [1, 32, 1, 512]> var_1388_8, tensor<fp16, [1, 32, 1, 512]> var_1388_9, tensor<fp16, [1, 32, 1, 512]> var_1388_10, tensor<fp16, [1, 32, 1, 512]> var_1388_11 = split(axis = var_1388_axis_0, split_sizes = tile_29, x = var_1360)[name = tensor<string, []>("op_1388")];
+            tensor<string, []> var_1402_equation_0 = const()[name = tensor<string, []>("op_1402_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1402 = einsum(equation = var_1402_equation_0, values = (var_1375_0, var_1361_0))[name = tensor<string, []>("op_1402")];
+            tensor<fp16, []> var_1403_to_fp16 = const()[name = tensor<string, []>("op_1403_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_121_cast_fp16 = mul(x = var_1402, y = var_1403_to_fp16)[name = tensor<string, []>("w_121_cast_fp16")];
+            tensor<string, []> var_1406_equation_0 = const()[name = tensor<string, []>("op_1406_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1406 = einsum(equation = var_1406_equation_0, values = (var_1375_1, var_1361_1))[name = tensor<string, []>("op_1406")];
+            tensor<fp16, []> var_1407_to_fp16 = const()[name = tensor<string, []>("op_1407_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_123_cast_fp16 = mul(x = var_1406, y = var_1407_to_fp16)[name = tensor<string, []>("w_123_cast_fp16")];
+            tensor<string, []> var_1410_equation_0 = const()[name = tensor<string, []>("op_1410_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1410 = einsum(equation = var_1410_equation_0, values = (var_1375_2, var_1361_2))[name = tensor<string, []>("op_1410")];
+            tensor<fp16, []> var_1411_to_fp16 = const()[name = tensor<string, []>("op_1411_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_125_cast_fp16 = mul(x = var_1410, y = var_1411_to_fp16)[name = tensor<string, []>("w_125_cast_fp16")];
+            tensor<string, []> var_1414_equation_0 = const()[name = tensor<string, []>("op_1414_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1414 = einsum(equation = var_1414_equation_0, values = (var_1375_3, var_1361_3))[name = tensor<string, []>("op_1414")];
+            tensor<fp16, []> var_1415_to_fp16 = const()[name = tensor<string, []>("op_1415_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_127_cast_fp16 = mul(x = var_1414, y = var_1415_to_fp16)[name = tensor<string, []>("w_127_cast_fp16")];
+            tensor<string, []> var_1418_equation_0 = const()[name = tensor<string, []>("op_1418_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1418 = einsum(equation = var_1418_equation_0, values = (var_1375_4, var_1361_4))[name = tensor<string, []>("op_1418")];
+            tensor<fp16, []> var_1419_to_fp16 = const()[name = tensor<string, []>("op_1419_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_129_cast_fp16 = mul(x = var_1418, y = var_1419_to_fp16)[name = tensor<string, []>("w_129_cast_fp16")];
+            tensor<string, []> var_1422_equation_0 = const()[name = tensor<string, []>("op_1422_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1422 = einsum(equation = var_1422_equation_0, values = (var_1375_5, var_1361_5))[name = tensor<string, []>("op_1422")];
+            tensor<fp16, []> var_1423_to_fp16 = const()[name = tensor<string, []>("op_1423_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_131_cast_fp16 = mul(x = var_1422, y = var_1423_to_fp16)[name = tensor<string, []>("w_131_cast_fp16")];
+            tensor<string, []> var_1426_equation_0 = const()[name = tensor<string, []>("op_1426_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1426 = einsum(equation = var_1426_equation_0, values = (var_1375_6, var_1361_6))[name = tensor<string, []>("op_1426")];
+            tensor<fp16, []> var_1427_to_fp16 = const()[name = tensor<string, []>("op_1427_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_133_cast_fp16 = mul(x = var_1426, y = var_1427_to_fp16)[name = tensor<string, []>("w_133_cast_fp16")];
+            tensor<string, []> var_1430_equation_0 = const()[name = tensor<string, []>("op_1430_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1430 = einsum(equation = var_1430_equation_0, values = (var_1375_7, var_1361_7))[name = tensor<string, []>("op_1430")];
+            tensor<fp16, []> var_1431_to_fp16 = const()[name = tensor<string, []>("op_1431_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_135_cast_fp16 = mul(x = var_1430, y = var_1431_to_fp16)[name = tensor<string, []>("w_135_cast_fp16")];
+            tensor<string, []> var_1434_equation_0 = const()[name = tensor<string, []>("op_1434_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1434 = einsum(equation = var_1434_equation_0, values = (var_1375_8, var_1361_8))[name = tensor<string, []>("op_1434")];
+            tensor<fp16, []> var_1435_to_fp16 = const()[name = tensor<string, []>("op_1435_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_137_cast_fp16 = mul(x = var_1434, y = var_1435_to_fp16)[name = tensor<string, []>("w_137_cast_fp16")];
+            tensor<string, []> var_1438_equation_0 = const()[name = tensor<string, []>("op_1438_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1438 = einsum(equation = var_1438_equation_0, values = (var_1375_9, var_1361_9))[name = tensor<string, []>("op_1438")];
+            tensor<fp16, []> var_1439_to_fp16 = const()[name = tensor<string, []>("op_1439_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_139_cast_fp16 = mul(x = var_1438, y = var_1439_to_fp16)[name = tensor<string, []>("w_139_cast_fp16")];
+            tensor<string, []> var_1442_equation_0 = const()[name = tensor<string, []>("op_1442_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1442 = einsum(equation = var_1442_equation_0, values = (var_1375_10, var_1361_10))[name = tensor<string, []>("op_1442")];
+            tensor<fp16, []> var_1443_to_fp16 = const()[name = tensor<string, []>("op_1443_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_141_cast_fp16 = mul(x = var_1442, y = var_1443_to_fp16)[name = tensor<string, []>("w_141_cast_fp16")];
+            tensor<string, []> var_1446_equation_0 = const()[name = tensor<string, []>("op_1446_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1446 = einsum(equation = var_1446_equation_0, values = (var_1375_11, var_1361_11))[name = tensor<string, []>("op_1446")];
+            tensor<fp16, []> var_1447_to_fp16 = const()[name = tensor<string, []>("op_1447_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_143_cast_fp16 = mul(x = var_1446, y = var_1447_to_fp16)[name = tensor<string, []>("w_143_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_163_cast_fp16 = add(x = w_121_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_163_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1450_cast_fp16 = softmax(axis = var_1318, x = input_163_cast_fp16)[name = tensor<string, []>("op_1450_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_165_cast_fp16 = add(x = w_123_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_165_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1452_cast_fp16 = softmax(axis = var_1318, x = input_165_cast_fp16)[name = tensor<string, []>("op_1452_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_167_cast_fp16 = add(x = w_125_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_167_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1454_cast_fp16 = softmax(axis = var_1318, x = input_167_cast_fp16)[name = tensor<string, []>("op_1454_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_169_cast_fp16 = add(x = w_127_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_169_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1456_cast_fp16 = softmax(axis = var_1318, x = input_169_cast_fp16)[name = tensor<string, []>("op_1456_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_171_cast_fp16 = add(x = w_129_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_171_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1458_cast_fp16 = softmax(axis = var_1318, x = input_171_cast_fp16)[name = tensor<string, []>("op_1458_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_173_cast_fp16 = add(x = w_131_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_173_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1460_cast_fp16 = softmax(axis = var_1318, x = input_173_cast_fp16)[name = tensor<string, []>("op_1460_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_175_cast_fp16 = add(x = w_133_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_175_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1462_cast_fp16 = softmax(axis = var_1318, x = input_175_cast_fp16)[name = tensor<string, []>("op_1462_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_177_cast_fp16 = add(x = w_135_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_177_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1464_cast_fp16 = softmax(axis = var_1318, x = input_177_cast_fp16)[name = tensor<string, []>("op_1464_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_179_cast_fp16 = add(x = w_137_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_179_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1466_cast_fp16 = softmax(axis = var_1318, x = input_179_cast_fp16)[name = tensor<string, []>("op_1466_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_181_cast_fp16 = add(x = w_139_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_181_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1468_cast_fp16 = softmax(axis = var_1318, x = input_181_cast_fp16)[name = tensor<string, []>("op_1468_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_183_cast_fp16 = add(x = w_141_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_183_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1470_cast_fp16 = softmax(axis = var_1318, x = input_183_cast_fp16)[name = tensor<string, []>("op_1470_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_185_cast_fp16 = add(x = w_143_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_185_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1472_cast_fp16 = softmax(axis = var_1318, x = input_185_cast_fp16)[name = tensor<string, []>("op_1472_cast_fp16")];
+            tensor<string, []> var_1474_equation_0 = const()[name = tensor<string, []>("op_1474_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1388_0, var_1450_cast_fp16))[name = tensor<string, []>("op_1474_cast_fp16")];
+            tensor<string, []> var_1476_equation_0 = const()[name = tensor<string, []>("op_1476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1388_1, var_1452_cast_fp16))[name = tensor<string, []>("op_1476_cast_fp16")];
+            tensor<string, []> var_1478_equation_0 = const()[name = tensor<string, []>("op_1478_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1478_cast_fp16 = einsum(equation = var_1478_equation_0, values = (var_1388_2, var_1454_cast_fp16))[name = tensor<string, []>("op_1478_cast_fp16")];
+            tensor<string, []> var_1480_equation_0 = const()[name = tensor<string, []>("op_1480_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1480_cast_fp16 = einsum(equation = var_1480_equation_0, values = (var_1388_3, var_1456_cast_fp16))[name = tensor<string, []>("op_1480_cast_fp16")];
+            tensor<string, []> var_1482_equation_0 = const()[name = tensor<string, []>("op_1482_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1482_cast_fp16 = einsum(equation = var_1482_equation_0, values = (var_1388_4, var_1458_cast_fp16))[name = tensor<string, []>("op_1482_cast_fp16")];
+            tensor<string, []> var_1484_equation_0 = const()[name = tensor<string, []>("op_1484_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1484_cast_fp16 = einsum(equation = var_1484_equation_0, values = (var_1388_5, var_1460_cast_fp16))[name = tensor<string, []>("op_1484_cast_fp16")];
+            tensor<string, []> var_1486_equation_0 = const()[name = tensor<string, []>("op_1486_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1486_cast_fp16 = einsum(equation = var_1486_equation_0, values = (var_1388_6, var_1462_cast_fp16))[name = tensor<string, []>("op_1486_cast_fp16")];
+            tensor<string, []> var_1488_equation_0 = const()[name = tensor<string, []>("op_1488_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1488_cast_fp16 = einsum(equation = var_1488_equation_0, values = (var_1388_7, var_1464_cast_fp16))[name = tensor<string, []>("op_1488_cast_fp16")];
+            tensor<string, []> var_1490_equation_0 = const()[name = tensor<string, []>("op_1490_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1490_cast_fp16 = einsum(equation = var_1490_equation_0, values = (var_1388_8, var_1466_cast_fp16))[name = tensor<string, []>("op_1490_cast_fp16")];
+            tensor<string, []> var_1492_equation_0 = const()[name = tensor<string, []>("op_1492_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1492_cast_fp16 = einsum(equation = var_1492_equation_0, values = (var_1388_9, var_1468_cast_fp16))[name = tensor<string, []>("op_1492_cast_fp16")];
+            tensor<string, []> var_1494_equation_0 = const()[name = tensor<string, []>("op_1494_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1494_cast_fp16 = einsum(equation = var_1494_equation_0, values = (var_1388_10, var_1470_cast_fp16))[name = tensor<string, []>("op_1494_cast_fp16")];
+            tensor<string, []> var_1496_equation_0 = const()[name = tensor<string, []>("op_1496_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1496_cast_fp16 = einsum(equation = var_1496_equation_0, values = (var_1388_11, var_1472_cast_fp16))[name = tensor<string, []>("op_1496_cast_fp16")];
+            tensor<bool, []> var_1498_interleave_0 = const()[name = tensor<string, []>("op_1498_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1498_cast_fp16 = concat(axis = var_1318, interleave = var_1498_interleave_0, values = (var_1474_cast_fp16, var_1476_cast_fp16, var_1478_cast_fp16, var_1480_cast_fp16, var_1482_cast_fp16, var_1484_cast_fp16, var_1486_cast_fp16, var_1488_cast_fp16, var_1490_cast_fp16, var_1492_cast_fp16, var_1494_cast_fp16, var_1496_cast_fp16))[name = tensor<string, []>("op_1498_cast_fp16")];
+            tensor<int32, [2]> var_1502 = const()[name = tensor<string, []>("op_1502"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1504 = const()[name = tensor<string, []>("op_1504"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1506_pad_type_0 = const()[name = tensor<string, []>("op_1506_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1506_pad_0 = const()[name = tensor<string, []>("op_1506_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1506 = conv(bias = layers_5_attention_o_proj_bias, dilations = var_1504, groups = var_1318, pad = var_1506_pad_0, pad_type = var_1506_pad_type_0, strides = var_1502, weight = layers_5_attention_o_proj_weight, x = var_1498_cast_fp16)[name = tensor<string, []>("op_1506")];
+            tensor<bool, []> var_1508_interleave_0 = const()[name = tensor<string, []>("op_1508_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1508 = concat(axis = var_1319, interleave = var_1508_interleave_0, values = var_1506)[name = tensor<string, []>("op_1508")];
+            tensor<fp16, [1, 384, 1, 512]> x_45 = add(x = var_1312_cast_fp16, y = var_1508)[name = tensor<string, []>("x_45")];
+            tensor<fp16, []> var_1315_promoted = const()[name = tensor<string, []>("op_1315_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1316_promoted = const()[name = tensor<string, []>("op_1316_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_47 = clip(alpha = var_1315_promoted, beta = var_1316_promoted, x = x_45)[name = tensor<string, []>("x_47")];
+            tensor<int32, [1]> var_1513 = const()[name = tensor<string, []>("op_1513"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_23 = reduce_mean(axes = var_1513, keep_dims = var_1320, x = x_47)[name = tensor<string, []>("mean_23")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_23 = sub(x = x_47, y = mean_23)[name = tensor<string, []>("zero_mean_23")];
-            tensor<fp16, []> var_1354_promoted = const()[name = tensor<string, []>("op_1354_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1554 = pow(x = zero_mean_23, y = var_1354_promoted)[name = tensor<string, []>("op_1554")];
-            tensor<int32, [1]> var_1555 = const()[name = tensor<string, []>("op_1555"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1556 = reduce_mean(axes = var_1555, keep_dims = var_1349, x = var_1554)[name = tensor<string, []>("op_1556")];
-            tensor<fp16, []> var_1557_to_fp16 = const()[name = tensor<string, []>("op_1557_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1558_cast_fp16 = add(x = var_1556, y = var_1557_to_fp16)[name = tensor<string, []>("op_1558_cast_fp16")];
+            tensor<fp16, []> var_1317_promoted = const()[name = tensor<string, []>("op_1317_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1516 = pow(x = zero_mean_23, y = var_1317_promoted)[name = tensor<string, []>("op_1516")];
+            tensor<int32, [1]> var_1517 = const()[name = tensor<string, []>("op_1517"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1518 = reduce_mean(axes = var_1517, keep_dims = var_1320, x = var_1516)[name = tensor<string, []>("op_1518")];
+            tensor<fp16, []> var_1519_to_fp16 = const()[name = tensor<string, []>("op_1519_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1520_cast_fp16 = add(x = var_1518, y = var_1519_to_fp16)[name = tensor<string, []>("op_1520_cast_fp16")];
             tensor<fp32, []> denom_23_epsilon_0 = const()[name = tensor<string, []>("denom_23_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0, x = var_1558_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1560_cast_fp16 = mul(x = zero_mean_23, y = denom_23_cast_fp16)[name = tensor<string, []>("op_1560_cast_fp16")];
-            tensor<fp16, [384]> var_1562_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1562_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66807040)))];
-            tensor<fp16, [384]> var_1562_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1562_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66807872)))];
-            tensor<fp16, []> var_1562_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1562_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1562_cast_fp16 = batch_norm(beta = var_1562_beta_0_to_fp16, epsilon = var_1562_epsilon_0_to_fp16, gamma = var_1562_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1560_cast_fp16)[name = tensor<string, []>("op_1562_cast_fp16")];
-            tensor<int32, [2]> var_1568 = const()[name = tensor<string, []>("op_1568"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1570 = const()[name = tensor<string, []>("op_1570"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1572_pad_type_0 = const()[name = tensor<string, []>("op_1572_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1572_pad_0 = const()[name = tensor<string, []>("op_1572_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_1572 = conv(bias = layers_5_mlp_fc1_bias, dilations = var_1570, groups = var_1347, pad = var_1572_pad_0, pad_type = var_1572_pad_type_0, strides = var_1568, weight = layers_5_mlp_fc1_weight, x = var_1562_cast_fp16)[name = tensor<string, []>("op_1572")];
-            tensor<string, []> input_47_mode_0 = const()[name = tensor<string, []>("input_47_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_47 = gelu(mode = input_47_mode_0, x = var_1572)[name = tensor<string, []>("input_47")];
-            tensor<int32, [2]> var_1576 = const()[name = tensor<string, []>("op_1576"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1578 = const()[name = tensor<string, []>("op_1578"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1580_pad_type_0 = const()[name = tensor<string, []>("op_1580_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1580_pad_0 = const()[name = tensor<string, []>("op_1580_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1580 = conv(bias = layers_5_mlp_fc2_bias, dilations = var_1578, groups = var_1347, pad = var_1580_pad_0, pad_type = var_1580_pad_type_0, strides = var_1576, weight = layers_5_mlp_fc2_weight, x = input_47)[name = tensor<string, []>("op_1580")];
-            tensor<fp16, [1, 384, 1, 512]> x_49 = add(x = var_1562_cast_fp16, y = var_1580)[name = tensor<string, []>("x_49")];
-            tensor<fp16, []> var_1345_promoted_1 = const()[name = tensor<string, []>("op_1345_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1346_promoted_1 = const()[name = tensor<string, []>("op_1346_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_51 = clip(alpha = var_1345_promoted_1, beta = var_1346_promoted_1, x = x_49)[name = tensor<string, []>("x_51")];
-            tensor<int32, [1]> var_1585 = const()[name = tensor<string, []>("op_1585"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_25 = reduce_mean(axes = var_1585, keep_dims = var_1349, x = x_51)[name = tensor<string, []>("mean_25")];
+            tensor<fp16, [1, 1, 1, 512]> denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0, x = var_1520_cast_fp16)[name = tensor<string, []>("denom_23_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1522_cast_fp16 = mul(x = zero_mean_23, y = denom_23_cast_fp16)[name = tensor<string, []>("op_1522_cast_fp16")];
+            tensor<fp16, [384]> var_1524_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1524_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66807040)))];
+            tensor<fp16, [384]> var_1524_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1524_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66807872)))];
+            tensor<fp16, []> var_1524_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1524_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1524_cast_fp16 = batch_norm(beta = var_1524_beta_0_to_fp16, epsilon = var_1524_epsilon_0_to_fp16, gamma = var_1524_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1522_cast_fp16)[name = tensor<string, []>("op_1524_cast_fp16")];
+            tensor<int32, [2]> var_1530 = const()[name = tensor<string, []>("op_1530"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1532 = const()[name = tensor<string, []>("op_1532"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1534_pad_type_0 = const()[name = tensor<string, []>("op_1534_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1534_pad_0 = const()[name = tensor<string, []>("op_1534_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_1534 = conv(bias = layers_5_mlp_fc1_bias, dilations = var_1532, groups = var_1318, pad = var_1534_pad_0, pad_type = var_1534_pad_type_0, strides = var_1530, weight = layers_5_mlp_fc1_weight, x = var_1524_cast_fp16)[name = tensor<string, []>("op_1534")];
+            tensor<string, []> input_191_mode_0 = const()[name = tensor<string, []>("input_191_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_191 = gelu(mode = input_191_mode_0, x = var_1534)[name = tensor<string, []>("input_191")];
+            tensor<int32, [2]> var_1538 = const()[name = tensor<string, []>("op_1538"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1540 = const()[name = tensor<string, []>("op_1540"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1542_pad_type_0 = const()[name = tensor<string, []>("op_1542_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1542_pad_0 = const()[name = tensor<string, []>("op_1542_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1542 = conv(bias = layers_5_mlp_fc2_bias, dilations = var_1540, groups = var_1318, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1538, weight = layers_5_mlp_fc2_weight, x = input_191)[name = tensor<string, []>("op_1542")];
+            tensor<fp16, [1, 384, 1, 512]> x_49 = add(x = var_1524_cast_fp16, y = var_1542)[name = tensor<string, []>("x_49")];
+            tensor<fp16, []> var_1315_promoted_1 = const()[name = tensor<string, []>("op_1315_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1316_promoted_1 = const()[name = tensor<string, []>("op_1316_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_51 = clip(alpha = var_1315_promoted_1, beta = var_1316_promoted_1, x = x_49)[name = tensor<string, []>("x_51")];
+            tensor<int32, [1]> var_1547 = const()[name = tensor<string, []>("op_1547"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_25 = reduce_mean(axes = var_1547, keep_dims = var_1320, x = x_51)[name = tensor<string, []>("mean_25")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_25 = sub(x = x_51, y = mean_25)[name = tensor<string, []>("zero_mean_25")];
-            tensor<fp16, []> var_1354_promoted_1 = const()[name = tensor<string, []>("op_1354_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1588 = pow(x = zero_mean_25, y = var_1354_promoted_1)[name = tensor<string, []>("op_1588")];
-            tensor<int32, [1]> var_1589 = const()[name = tensor<string, []>("op_1589"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1590 = reduce_mean(axes = var_1589, keep_dims = var_1349, x = var_1588)[name = tensor<string, []>("op_1590")];
-            tensor<fp16, []> var_1591_to_fp16 = const()[name = tensor<string, []>("op_1591_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1592_cast_fp16 = add(x = var_1590, y = var_1591_to_fp16)[name = tensor<string, []>("op_1592_cast_fp16")];
+            tensor<fp16, []> var_1317_promoted_1 = const()[name = tensor<string, []>("op_1317_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1550 = pow(x = zero_mean_25, y = var_1317_promoted_1)[name = tensor<string, []>("op_1550")];
+            tensor<int32, [1]> var_1551 = const()[name = tensor<string, []>("op_1551"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1552 = reduce_mean(axes = var_1551, keep_dims = var_1320, x = var_1550)[name = tensor<string, []>("op_1552")];
+            tensor<fp16, []> var_1553_to_fp16 = const()[name = tensor<string, []>("op_1553_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1554_cast_fp16 = add(x = var_1552, y = var_1553_to_fp16)[name = tensor<string, []>("op_1554_cast_fp16")];
             tensor<fp32, []> denom_25_epsilon_0 = const()[name = tensor<string, []>("denom_25_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0, x = var_1592_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1594_cast_fp16 = mul(x = zero_mean_25, y = denom_25_cast_fp16)[name = tensor<string, []>("op_1594_cast_fp16")];
-            tensor<fp16, [384]> var_1596_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1596_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66808704)))];
-            tensor<fp16, [384]> var_1596_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1596_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66809536)))];
-            tensor<fp16, []> var_1596_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1596_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1596_cast_fp16 = batch_norm(beta = var_1596_beta_0_to_fp16, epsilon = var_1596_epsilon_0_to_fp16, gamma = var_1596_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1594_cast_fp16)[name = tensor<string, []>("op_1596_cast_fp16")];
-            tensor<int32, []> var_1601 = const()[name = tensor<string, []>("op_1601"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_1602 = const()[name = tensor<string, []>("op_1602"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_1603 = const()[name = tensor<string, []>("op_1603"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_1628 = const()[name = tensor<string, []>("op_1628"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1630 = const()[name = tensor<string, []>("op_1630"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1632_pad_type_0 = const()[name = tensor<string, []>("op_1632_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1632_pad_0 = const()[name = tensor<string, []>("op_1632_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1632 = conv(bias = layers_6_attention_q_proj_bias, dilations = var_1630, groups = var_1601, pad = var_1632_pad_0, pad_type = var_1632_pad_type_0, strides = var_1628, weight = layers_6_attention_q_proj_weight, x = var_1596_cast_fp16)[name = tensor<string, []>("op_1632")];
-            tensor<int32, [4]> var_1633 = const()[name = tensor<string, []>("op_1633"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1634 = reshape(shape = var_1633, x = var_1632)[name = tensor<string, []>("op_1634")];
-            tensor<int32, [2]> var_1637 = const()[name = tensor<string, []>("op_1637"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1639 = const()[name = tensor<string, []>("op_1639"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1641_pad_type_0 = const()[name = tensor<string, []>("op_1641_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1641_pad_0 = const()[name = tensor<string, []>("op_1641_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1641 = conv(bias = layers_6_attention_k_proj_bias, dilations = var_1639, groups = var_1601, pad = var_1641_pad_0, pad_type = var_1641_pad_type_0, strides = var_1637, weight = layers_6_attention_k_proj_weight, x = var_1596_cast_fp16)[name = tensor<string, []>("op_1641")];
-            tensor<int32, [4]> var_1642 = const()[name = tensor<string, []>("op_1642"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_13 = reshape(shape = var_1642, x = var_1641)[name = tensor<string, []>("ks_13")];
-            tensor<int32, [2]> var_1646 = const()[name = tensor<string, []>("op_1646"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1648 = const()[name = tensor<string, []>("op_1648"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1650_pad_type_0 = const()[name = tensor<string, []>("op_1650_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1650_pad_0 = const()[name = tensor<string, []>("op_1650_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1650 = conv(bias = layers_6_attention_v_proj_bias, dilations = var_1648, groups = var_1601, pad = var_1650_pad_0, pad_type = var_1650_pad_type_0, strides = var_1646, weight = layers_6_attention_v_proj_weight, x = var_1596_cast_fp16)[name = tensor<string, []>("op_1650")];
-            tensor<int32, [4]> var_1651 = const()[name = tensor<string, []>("op_1651"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1652 = reshape(shape = var_1651, x = var_1650)[name = tensor<string, []>("op_1652")];
-            tensor<int32, [12]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1653_axis_0 = const()[name = tensor<string, []>("op_1653_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1653_0, tensor<fp16, [1, 32, 1, 512]> var_1653_1, tensor<fp16, [1, 32, 1, 512]> var_1653_2, tensor<fp16, [1, 32, 1, 512]> var_1653_3, tensor<fp16, [1, 32, 1, 512]> var_1653_4, tensor<fp16, [1, 32, 1, 512]> var_1653_5, tensor<fp16, [1, 32, 1, 512]> var_1653_6, tensor<fp16, [1, 32, 1, 512]> var_1653_7, tensor<fp16, [1, 32, 1, 512]> var_1653_8, tensor<fp16, [1, 32, 1, 512]> var_1653_9, tensor<fp16, [1, 32, 1, 512]> var_1653_10, tensor<fp16, [1, 32, 1, 512]> var_1653_11 = split(axis = var_1653_axis_0, split_sizes = tile_32, x = var_1634)[name = tensor<string, []>("op_1653")];
-            tensor<int32, [4]> var_1666_perm_0 = const()[name = tensor<string, []>("op_1666_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1667_axis_0 = const()[name = tensor<string, []>("op_1667_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_5 = transpose(perm = var_1666_perm_0, x = ks_13)[name = tensor<string, []>("transpose_5")];
-            tensor<fp16, [1, 512, 1, 32]> var_1667_0, tensor<fp16, [1, 512, 1, 32]> var_1667_1, tensor<fp16, [1, 512, 1, 32]> var_1667_2, tensor<fp16, [1, 512, 1, 32]> var_1667_3, tensor<fp16, [1, 512, 1, 32]> var_1667_4, tensor<fp16, [1, 512, 1, 32]> var_1667_5, tensor<fp16, [1, 512, 1, 32]> var_1667_6, tensor<fp16, [1, 512, 1, 32]> var_1667_7, tensor<fp16, [1, 512, 1, 32]> var_1667_8, tensor<fp16, [1, 512, 1, 32]> var_1667_9, tensor<fp16, [1, 512, 1, 32]> var_1667_10, tensor<fp16, [1, 512, 1, 32]> var_1667_11 = split(axis = var_1667_axis_0, split_sizes = tile_33, x = transpose_5)[name = tensor<string, []>("op_1667")];
-            tensor<int32, [12]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1680_axis_0 = const()[name = tensor<string, []>("op_1680_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1680_0, tensor<fp16, [1, 32, 1, 512]> var_1680_1, tensor<fp16, [1, 32, 1, 512]> var_1680_2, tensor<fp16, [1, 32, 1, 512]> var_1680_3, tensor<fp16, [1, 32, 1, 512]> var_1680_4, tensor<fp16, [1, 32, 1, 512]> var_1680_5, tensor<fp16, [1, 32, 1, 512]> var_1680_6, tensor<fp16, [1, 32, 1, 512]> var_1680_7, tensor<fp16, [1, 32, 1, 512]> var_1680_8, tensor<fp16, [1, 32, 1, 512]> var_1680_9, tensor<fp16, [1, 32, 1, 512]> var_1680_10, tensor<fp16, [1, 32, 1, 512]> var_1680_11 = split(axis = var_1680_axis_0, split_sizes = tile_34, x = var_1652)[name = tensor<string, []>("op_1680")];
-            tensor<string, []> var_1694_equation_0 = const()[name = tensor<string, []>("op_1694_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1694 = einsum(equation = var_1694_equation_0, values = (var_1667_0, var_1653_0))[name = tensor<string, []>("op_1694")];
-            tensor<fp16, []> var_1695_to_fp16 = const()[name = tensor<string, []>("op_1695_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_145_cast_fp16 = mul(x = var_1694, y = var_1695_to_fp16)[name = tensor<string, []>("w_145_cast_fp16")];
-            tensor<string, []> var_1698_equation_0 = const()[name = tensor<string, []>("op_1698_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1698 = einsum(equation = var_1698_equation_0, values = (var_1667_1, var_1653_1))[name = tensor<string, []>("op_1698")];
-            tensor<fp16, []> var_1699_to_fp16 = const()[name = tensor<string, []>("op_1699_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_147_cast_fp16 = mul(x = var_1698, y = var_1699_to_fp16)[name = tensor<string, []>("w_147_cast_fp16")];
-            tensor<string, []> var_1702_equation_0 = const()[name = tensor<string, []>("op_1702_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1702 = einsum(equation = var_1702_equation_0, values = (var_1667_2, var_1653_2))[name = tensor<string, []>("op_1702")];
-            tensor<fp16, []> var_1703_to_fp16 = const()[name = tensor<string, []>("op_1703_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_149_cast_fp16 = mul(x = var_1702, y = var_1703_to_fp16)[name = tensor<string, []>("w_149_cast_fp16")];
-            tensor<string, []> var_1706_equation_0 = const()[name = tensor<string, []>("op_1706_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1706 = einsum(equation = var_1706_equation_0, values = (var_1667_3, var_1653_3))[name = tensor<string, []>("op_1706")];
-            tensor<fp16, []> var_1707_to_fp16 = const()[name = tensor<string, []>("op_1707_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_151_cast_fp16 = mul(x = var_1706, y = var_1707_to_fp16)[name = tensor<string, []>("w_151_cast_fp16")];
-            tensor<string, []> var_1710_equation_0 = const()[name = tensor<string, []>("op_1710_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1710 = einsum(equation = var_1710_equation_0, values = (var_1667_4, var_1653_4))[name = tensor<string, []>("op_1710")];
-            tensor<fp16, []> var_1711_to_fp16 = const()[name = tensor<string, []>("op_1711_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_153_cast_fp16 = mul(x = var_1710, y = var_1711_to_fp16)[name = tensor<string, []>("w_153_cast_fp16")];
-            tensor<string, []> var_1714_equation_0 = const()[name = tensor<string, []>("op_1714_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1714 = einsum(equation = var_1714_equation_0, values = (var_1667_5, var_1653_5))[name = tensor<string, []>("op_1714")];
-            tensor<fp16, []> var_1715_to_fp16 = const()[name = tensor<string, []>("op_1715_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_155_cast_fp16 = mul(x = var_1714, y = var_1715_to_fp16)[name = tensor<string, []>("w_155_cast_fp16")];
-            tensor<string, []> var_1718_equation_0 = const()[name = tensor<string, []>("op_1718_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1718 = einsum(equation = var_1718_equation_0, values = (var_1667_6, var_1653_6))[name = tensor<string, []>("op_1718")];
-            tensor<fp16, []> var_1719_to_fp16 = const()[name = tensor<string, []>("op_1719_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_157_cast_fp16 = mul(x = var_1718, y = var_1719_to_fp16)[name = tensor<string, []>("w_157_cast_fp16")];
-            tensor<string, []> var_1722_equation_0 = const()[name = tensor<string, []>("op_1722_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1722 = einsum(equation = var_1722_equation_0, values = (var_1667_7, var_1653_7))[name = tensor<string, []>("op_1722")];
-            tensor<fp16, []> var_1723_to_fp16 = const()[name = tensor<string, []>("op_1723_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_159_cast_fp16 = mul(x = var_1722, y = var_1723_to_fp16)[name = tensor<string, []>("w_159_cast_fp16")];
-            tensor<string, []> var_1726_equation_0 = const()[name = tensor<string, []>("op_1726_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1726 = einsum(equation = var_1726_equation_0, values = (var_1667_8, var_1653_8))[name = tensor<string, []>("op_1726")];
-            tensor<fp16, []> var_1727_to_fp16 = const()[name = tensor<string, []>("op_1727_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_161_cast_fp16 = mul(x = var_1726, y = var_1727_to_fp16)[name = tensor<string, []>("w_161_cast_fp16")];
-            tensor<string, []> var_1730_equation_0 = const()[name = tensor<string, []>("op_1730_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1730 = einsum(equation = var_1730_equation_0, values = (var_1667_9, var_1653_9))[name = tensor<string, []>("op_1730")];
-            tensor<fp16, []> var_1731_to_fp16 = const()[name = tensor<string, []>("op_1731_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_163_cast_fp16 = mul(x = var_1730, y = var_1731_to_fp16)[name = tensor<string, []>("w_163_cast_fp16")];
-            tensor<string, []> var_1734_equation_0 = const()[name = tensor<string, []>("op_1734_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1734 = einsum(equation = var_1734_equation_0, values = (var_1667_10, var_1653_10))[name = tensor<string, []>("op_1734")];
-            tensor<fp16, []> var_1735_to_fp16 = const()[name = tensor<string, []>("op_1735_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_165_cast_fp16 = mul(x = var_1734, y = var_1735_to_fp16)[name = tensor<string, []>("w_165_cast_fp16")];
-            tensor<string, []> var_1738_equation_0 = const()[name = tensor<string, []>("op_1738_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1738 = einsum(equation = var_1738_equation_0, values = (var_1667_11, var_1653_11))[name = tensor<string, []>("op_1738")];
-            tensor<fp16, []> var_1739_to_fp16 = const()[name = tensor<string, []>("op_1739_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_167_cast_fp16 = mul(x = var_1738, y = var_1739_to_fp16)[name = tensor<string, []>("w_167_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1741_cast_fp16 = add(x = w_145_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1741_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1742_cast_fp16 = softmax(axis = var_1601, x = var_1741_cast_fp16)[name = tensor<string, []>("op_1742_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1743_cast_fp16 = add(x = w_147_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1743_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1744_cast_fp16 = softmax(axis = var_1601, x = var_1743_cast_fp16)[name = tensor<string, []>("op_1744_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1745_cast_fp16 = add(x = w_149_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1745_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1746_cast_fp16 = softmax(axis = var_1601, x = var_1745_cast_fp16)[name = tensor<string, []>("op_1746_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1747_cast_fp16 = add(x = w_151_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1747_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1748_cast_fp16 = softmax(axis = var_1601, x = var_1747_cast_fp16)[name = tensor<string, []>("op_1748_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1749_cast_fp16 = add(x = w_153_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1749_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1750_cast_fp16 = softmax(axis = var_1601, x = var_1749_cast_fp16)[name = tensor<string, []>("op_1750_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1751_cast_fp16 = add(x = w_155_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1751_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1752_cast_fp16 = softmax(axis = var_1601, x = var_1751_cast_fp16)[name = tensor<string, []>("op_1752_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1753_cast_fp16 = add(x = w_157_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1753_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1754_cast_fp16 = softmax(axis = var_1601, x = var_1753_cast_fp16)[name = tensor<string, []>("op_1754_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1755_cast_fp16 = add(x = w_159_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1755_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1756_cast_fp16 = softmax(axis = var_1601, x = var_1755_cast_fp16)[name = tensor<string, []>("op_1756_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1757_cast_fp16 = add(x = w_161_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1757_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1758_cast_fp16 = softmax(axis = var_1601, x = var_1757_cast_fp16)[name = tensor<string, []>("op_1758_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1759_cast_fp16 = add(x = w_163_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1759_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1760_cast_fp16 = softmax(axis = var_1601, x = var_1759_cast_fp16)[name = tensor<string, []>("op_1760_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1761_cast_fp16 = add(x = w_165_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1761_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1762_cast_fp16 = softmax(axis = var_1601, x = var_1761_cast_fp16)[name = tensor<string, []>("op_1762_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1763_cast_fp16 = add(x = w_167_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1763_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1764_cast_fp16 = softmax(axis = var_1601, x = var_1763_cast_fp16)[name = tensor<string, []>("op_1764_cast_fp16")];
-            tensor<string, []> var_1766_equation_0 = const()[name = tensor<string, []>("op_1766_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1766_cast_fp16 = einsum(equation = var_1766_equation_0, values = (var_1680_0, var_1742_cast_fp16))[name = tensor<string, []>("op_1766_cast_fp16")];
-            tensor<string, []> var_1768_equation_0 = const()[name = tensor<string, []>("op_1768_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1768_cast_fp16 = einsum(equation = var_1768_equation_0, values = (var_1680_1, var_1744_cast_fp16))[name = tensor<string, []>("op_1768_cast_fp16")];
-            tensor<string, []> var_1770_equation_0 = const()[name = tensor<string, []>("op_1770_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1770_cast_fp16 = einsum(equation = var_1770_equation_0, values = (var_1680_2, var_1746_cast_fp16))[name = tensor<string, []>("op_1770_cast_fp16")];
-            tensor<string, []> var_1772_equation_0 = const()[name = tensor<string, []>("op_1772_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1772_cast_fp16 = einsum(equation = var_1772_equation_0, values = (var_1680_3, var_1748_cast_fp16))[name = tensor<string, []>("op_1772_cast_fp16")];
-            tensor<string, []> var_1774_equation_0 = const()[name = tensor<string, []>("op_1774_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1774_cast_fp16 = einsum(equation = var_1774_equation_0, values = (var_1680_4, var_1750_cast_fp16))[name = tensor<string, []>("op_1774_cast_fp16")];
-            tensor<string, []> var_1776_equation_0 = const()[name = tensor<string, []>("op_1776_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1776_cast_fp16 = einsum(equation = var_1776_equation_0, values = (var_1680_5, var_1752_cast_fp16))[name = tensor<string, []>("op_1776_cast_fp16")];
-            tensor<string, []> var_1778_equation_0 = const()[name = tensor<string, []>("op_1778_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1778_cast_fp16 = einsum(equation = var_1778_equation_0, values = (var_1680_6, var_1754_cast_fp16))[name = tensor<string, []>("op_1778_cast_fp16")];
-            tensor<string, []> var_1780_equation_0 = const()[name = tensor<string, []>("op_1780_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1780_cast_fp16 = einsum(equation = var_1780_equation_0, values = (var_1680_7, var_1756_cast_fp16))[name = tensor<string, []>("op_1780_cast_fp16")];
-            tensor<string, []> var_1782_equation_0 = const()[name = tensor<string, []>("op_1782_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1782_cast_fp16 = einsum(equation = var_1782_equation_0, values = (var_1680_8, var_1758_cast_fp16))[name = tensor<string, []>("op_1782_cast_fp16")];
-            tensor<string, []> var_1784_equation_0 = const()[name = tensor<string, []>("op_1784_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1784_cast_fp16 = einsum(equation = var_1784_equation_0, values = (var_1680_9, var_1760_cast_fp16))[name = tensor<string, []>("op_1784_cast_fp16")];
-            tensor<string, []> var_1786_equation_0 = const()[name = tensor<string, []>("op_1786_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1786_cast_fp16 = einsum(equation = var_1786_equation_0, values = (var_1680_10, var_1762_cast_fp16))[name = tensor<string, []>("op_1786_cast_fp16")];
-            tensor<string, []> var_1788_equation_0 = const()[name = tensor<string, []>("op_1788_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_1788_cast_fp16 = einsum(equation = var_1788_equation_0, values = (var_1680_11, var_1764_cast_fp16))[name = tensor<string, []>("op_1788_cast_fp16")];
-            tensor<bool, []> var_1790_interleave_0 = const()[name = tensor<string, []>("op_1790_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1790_cast_fp16 = concat(axis = var_1601, interleave = var_1790_interleave_0, values = (var_1766_cast_fp16, var_1768_cast_fp16, var_1770_cast_fp16, var_1772_cast_fp16, var_1774_cast_fp16, var_1776_cast_fp16, var_1778_cast_fp16, var_1780_cast_fp16, var_1782_cast_fp16, var_1784_cast_fp16, var_1786_cast_fp16, var_1788_cast_fp16))[name = tensor<string, []>("op_1790_cast_fp16")];
-            tensor<int32, [2]> var_1794 = const()[name = tensor<string, []>("op_1794"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1796 = const()[name = tensor<string, []>("op_1796"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1798_pad_type_0 = const()[name = tensor<string, []>("op_1798_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1798_pad_0 = const()[name = tensor<string, []>("op_1798_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1798 = conv(bias = layers_6_attention_o_proj_bias, dilations = var_1796, groups = var_1601, pad = var_1798_pad_0, pad_type = var_1798_pad_type_0, strides = var_1794, weight = layers_6_attention_o_proj_weight, x = var_1790_cast_fp16)[name = tensor<string, []>("op_1798")];
-            tensor<bool, []> var_1800_interleave_0 = const()[name = tensor<string, []>("op_1800_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_1800 = concat(axis = var_1602, interleave = var_1800_interleave_0, values = var_1798)[name = tensor<string, []>("op_1800")];
-            tensor<fp16, [1, 384, 1, 512]> x_53 = add(x = var_1596_cast_fp16, y = var_1800)[name = tensor<string, []>("x_53")];
-            tensor<fp16, []> var_1599_promoted = const()[name = tensor<string, []>("op_1599_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1600_promoted = const()[name = tensor<string, []>("op_1600_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_55 = clip(alpha = var_1599_promoted, beta = var_1600_promoted, x = x_53)[name = tensor<string, []>("x_55")];
-            tensor<int32, [1]> var_1805 = const()[name = tensor<string, []>("op_1805"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_27 = reduce_mean(axes = var_1805, keep_dims = var_1603, x = x_55)[name = tensor<string, []>("mean_27")];
+            tensor<fp16, [1, 1, 1, 512]> denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0, x = var_1554_cast_fp16)[name = tensor<string, []>("denom_25_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1556_cast_fp16 = mul(x = zero_mean_25, y = denom_25_cast_fp16)[name = tensor<string, []>("op_1556_cast_fp16")];
+            tensor<fp16, [384]> var_1558_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1558_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66808704)))];
+            tensor<fp16, [384]> var_1558_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1558_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66809536)))];
+            tensor<fp16, []> var_1558_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1558_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1558_cast_fp16 = batch_norm(beta = var_1558_beta_0_to_fp16, epsilon = var_1558_epsilon_0_to_fp16, gamma = var_1558_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1556_cast_fp16)[name = tensor<string, []>("op_1558_cast_fp16")];
+            tensor<int32, []> var_1564 = const()[name = tensor<string, []>("op_1564"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_1565 = const()[name = tensor<string, []>("op_1565"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_1566 = const()[name = tensor<string, []>("op_1566"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_1588 = const()[name = tensor<string, []>("op_1588"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1590 = const()[name = tensor<string, []>("op_1590"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1592_pad_type_0 = const()[name = tensor<string, []>("op_1592_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1592_pad_0 = const()[name = tensor<string, []>("op_1592_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1592 = conv(bias = layers_6_attention_q_proj_bias, dilations = var_1590, groups = var_1564, pad = var_1592_pad_0, pad_type = var_1592_pad_type_0, strides = var_1588, weight = layers_6_attention_q_proj_weight, x = var_1558_cast_fp16)[name = tensor<string, []>("op_1592")];
+            tensor<int32, [2]> var_1595 = const()[name = tensor<string, []>("op_1595"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1597 = const()[name = tensor<string, []>("op_1597"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_13_pad_type_0 = const()[name = tensor<string, []>("ks_13_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_13_pad_0 = const()[name = tensor<string, []>("ks_13_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_13 = conv(bias = layers_6_attention_k_proj_bias, dilations = var_1597, groups = var_1564, pad = ks_13_pad_0, pad_type = ks_13_pad_type_0, strides = var_1595, weight = layers_6_attention_k_proj_weight, x = var_1558_cast_fp16)[name = tensor<string, []>("ks_13")];
+            tensor<int32, [2]> var_1602 = const()[name = tensor<string, []>("op_1602"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1604 = const()[name = tensor<string, []>("op_1604"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1606_pad_type_0 = const()[name = tensor<string, []>("op_1606_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1606_pad_0 = const()[name = tensor<string, []>("op_1606_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1606 = conv(bias = layers_6_attention_v_proj_bias, dilations = var_1604, groups = var_1564, pad = var_1606_pad_0, pad_type = var_1606_pad_type_0, strides = var_1602, weight = layers_6_attention_v_proj_weight, x = var_1558_cast_fp16)[name = tensor<string, []>("op_1606")];
+            tensor<int32, [12]> tile_32 = const()[name = tensor<string, []>("tile_32"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1607_axis_0 = const()[name = tensor<string, []>("op_1607_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1607_0, tensor<fp16, [1, 32, 1, 512]> var_1607_1, tensor<fp16, [1, 32, 1, 512]> var_1607_2, tensor<fp16, [1, 32, 1, 512]> var_1607_3, tensor<fp16, [1, 32, 1, 512]> var_1607_4, tensor<fp16, [1, 32, 1, 512]> var_1607_5, tensor<fp16, [1, 32, 1, 512]> var_1607_6, tensor<fp16, [1, 32, 1, 512]> var_1607_7, tensor<fp16, [1, 32, 1, 512]> var_1607_8, tensor<fp16, [1, 32, 1, 512]> var_1607_9, tensor<fp16, [1, 32, 1, 512]> var_1607_10, tensor<fp16, [1, 32, 1, 512]> var_1607_11 = split(axis = var_1607_axis_0, split_sizes = tile_32, x = var_1592)[name = tensor<string, []>("op_1607")];
+            tensor<int32, [4]> var_1620_perm_0 = const()[name = tensor<string, []>("op_1620_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_33 = const()[name = tensor<string, []>("tile_33"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1621_axis_0 = const()[name = tensor<string, []>("op_1621_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_5 = transpose(perm = var_1620_perm_0, x = ks_13)[name = tensor<string, []>("transpose_5")];
+            tensor<fp16, [1, 512, 1, 32]> var_1621_0, tensor<fp16, [1, 512, 1, 32]> var_1621_1, tensor<fp16, [1, 512, 1, 32]> var_1621_2, tensor<fp16, [1, 512, 1, 32]> var_1621_3, tensor<fp16, [1, 512, 1, 32]> var_1621_4, tensor<fp16, [1, 512, 1, 32]> var_1621_5, tensor<fp16, [1, 512, 1, 32]> var_1621_6, tensor<fp16, [1, 512, 1, 32]> var_1621_7, tensor<fp16, [1, 512, 1, 32]> var_1621_8, tensor<fp16, [1, 512, 1, 32]> var_1621_9, tensor<fp16, [1, 512, 1, 32]> var_1621_10, tensor<fp16, [1, 512, 1, 32]> var_1621_11 = split(axis = var_1621_axis_0, split_sizes = tile_33, x = transpose_5)[name = tensor<string, []>("op_1621")];
+            tensor<int32, [12]> tile_34 = const()[name = tensor<string, []>("tile_34"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1634_axis_0 = const()[name = tensor<string, []>("op_1634_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1634_0, tensor<fp16, [1, 32, 1, 512]> var_1634_1, tensor<fp16, [1, 32, 1, 512]> var_1634_2, tensor<fp16, [1, 32, 1, 512]> var_1634_3, tensor<fp16, [1, 32, 1, 512]> var_1634_4, tensor<fp16, [1, 32, 1, 512]> var_1634_5, tensor<fp16, [1, 32, 1, 512]> var_1634_6, tensor<fp16, [1, 32, 1, 512]> var_1634_7, tensor<fp16, [1, 32, 1, 512]> var_1634_8, tensor<fp16, [1, 32, 1, 512]> var_1634_9, tensor<fp16, [1, 32, 1, 512]> var_1634_10, tensor<fp16, [1, 32, 1, 512]> var_1634_11 = split(axis = var_1634_axis_0, split_sizes = tile_34, x = var_1606)[name = tensor<string, []>("op_1634")];
+            tensor<string, []> var_1648_equation_0 = const()[name = tensor<string, []>("op_1648_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1648 = einsum(equation = var_1648_equation_0, values = (var_1621_0, var_1607_0))[name = tensor<string, []>("op_1648")];
+            tensor<fp16, []> var_1649_to_fp16 = const()[name = tensor<string, []>("op_1649_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_145_cast_fp16 = mul(x = var_1648, y = var_1649_to_fp16)[name = tensor<string, []>("w_145_cast_fp16")];
+            tensor<string, []> var_1652_equation_0 = const()[name = tensor<string, []>("op_1652_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1652 = einsum(equation = var_1652_equation_0, values = (var_1621_1, var_1607_1))[name = tensor<string, []>("op_1652")];
+            tensor<fp16, []> var_1653_to_fp16 = const()[name = tensor<string, []>("op_1653_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_147_cast_fp16 = mul(x = var_1652, y = var_1653_to_fp16)[name = tensor<string, []>("w_147_cast_fp16")];
+            tensor<string, []> var_1656_equation_0 = const()[name = tensor<string, []>("op_1656_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1656 = einsum(equation = var_1656_equation_0, values = (var_1621_2, var_1607_2))[name = tensor<string, []>("op_1656")];
+            tensor<fp16, []> var_1657_to_fp16 = const()[name = tensor<string, []>("op_1657_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_149_cast_fp16 = mul(x = var_1656, y = var_1657_to_fp16)[name = tensor<string, []>("w_149_cast_fp16")];
+            tensor<string, []> var_1660_equation_0 = const()[name = tensor<string, []>("op_1660_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1660 = einsum(equation = var_1660_equation_0, values = (var_1621_3, var_1607_3))[name = tensor<string, []>("op_1660")];
+            tensor<fp16, []> var_1661_to_fp16 = const()[name = tensor<string, []>("op_1661_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_151_cast_fp16 = mul(x = var_1660, y = var_1661_to_fp16)[name = tensor<string, []>("w_151_cast_fp16")];
+            tensor<string, []> var_1664_equation_0 = const()[name = tensor<string, []>("op_1664_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1664 = einsum(equation = var_1664_equation_0, values = (var_1621_4, var_1607_4))[name = tensor<string, []>("op_1664")];
+            tensor<fp16, []> var_1665_to_fp16 = const()[name = tensor<string, []>("op_1665_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_153_cast_fp16 = mul(x = var_1664, y = var_1665_to_fp16)[name = tensor<string, []>("w_153_cast_fp16")];
+            tensor<string, []> var_1668_equation_0 = const()[name = tensor<string, []>("op_1668_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1668 = einsum(equation = var_1668_equation_0, values = (var_1621_5, var_1607_5))[name = tensor<string, []>("op_1668")];
+            tensor<fp16, []> var_1669_to_fp16 = const()[name = tensor<string, []>("op_1669_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_155_cast_fp16 = mul(x = var_1668, y = var_1669_to_fp16)[name = tensor<string, []>("w_155_cast_fp16")];
+            tensor<string, []> var_1672_equation_0 = const()[name = tensor<string, []>("op_1672_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1672 = einsum(equation = var_1672_equation_0, values = (var_1621_6, var_1607_6))[name = tensor<string, []>("op_1672")];
+            tensor<fp16, []> var_1673_to_fp16 = const()[name = tensor<string, []>("op_1673_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_157_cast_fp16 = mul(x = var_1672, y = var_1673_to_fp16)[name = tensor<string, []>("w_157_cast_fp16")];
+            tensor<string, []> var_1676_equation_0 = const()[name = tensor<string, []>("op_1676_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1676 = einsum(equation = var_1676_equation_0, values = (var_1621_7, var_1607_7))[name = tensor<string, []>("op_1676")];
+            tensor<fp16, []> var_1677_to_fp16 = const()[name = tensor<string, []>("op_1677_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_159_cast_fp16 = mul(x = var_1676, y = var_1677_to_fp16)[name = tensor<string, []>("w_159_cast_fp16")];
+            tensor<string, []> var_1680_equation_0 = const()[name = tensor<string, []>("op_1680_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1680 = einsum(equation = var_1680_equation_0, values = (var_1621_8, var_1607_8))[name = tensor<string, []>("op_1680")];
+            tensor<fp16, []> var_1681_to_fp16 = const()[name = tensor<string, []>("op_1681_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_161_cast_fp16 = mul(x = var_1680, y = var_1681_to_fp16)[name = tensor<string, []>("w_161_cast_fp16")];
+            tensor<string, []> var_1684_equation_0 = const()[name = tensor<string, []>("op_1684_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1684 = einsum(equation = var_1684_equation_0, values = (var_1621_9, var_1607_9))[name = tensor<string, []>("op_1684")];
+            tensor<fp16, []> var_1685_to_fp16 = const()[name = tensor<string, []>("op_1685_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_163_cast_fp16 = mul(x = var_1684, y = var_1685_to_fp16)[name = tensor<string, []>("w_163_cast_fp16")];
+            tensor<string, []> var_1688_equation_0 = const()[name = tensor<string, []>("op_1688_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1688 = einsum(equation = var_1688_equation_0, values = (var_1621_10, var_1607_10))[name = tensor<string, []>("op_1688")];
+            tensor<fp16, []> var_1689_to_fp16 = const()[name = tensor<string, []>("op_1689_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_165_cast_fp16 = mul(x = var_1688, y = var_1689_to_fp16)[name = tensor<string, []>("w_165_cast_fp16")];
+            tensor<string, []> var_1692_equation_0 = const()[name = tensor<string, []>("op_1692_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1692 = einsum(equation = var_1692_equation_0, values = (var_1621_11, var_1607_11))[name = tensor<string, []>("op_1692")];
+            tensor<fp16, []> var_1693_to_fp16 = const()[name = tensor<string, []>("op_1693_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_167_cast_fp16 = mul(x = var_1692, y = var_1693_to_fp16)[name = tensor<string, []>("w_167_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_195_cast_fp16 = add(x = w_145_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_195_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1696_cast_fp16 = softmax(axis = var_1564, x = input_195_cast_fp16)[name = tensor<string, []>("op_1696_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_197_cast_fp16 = add(x = w_147_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_197_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1698_cast_fp16 = softmax(axis = var_1564, x = input_197_cast_fp16)[name = tensor<string, []>("op_1698_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_199_cast_fp16 = add(x = w_149_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_199_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1700_cast_fp16 = softmax(axis = var_1564, x = input_199_cast_fp16)[name = tensor<string, []>("op_1700_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_201_cast_fp16 = add(x = w_151_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_201_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1702_cast_fp16 = softmax(axis = var_1564, x = input_201_cast_fp16)[name = tensor<string, []>("op_1702_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_203_cast_fp16 = add(x = w_153_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_203_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1704_cast_fp16 = softmax(axis = var_1564, x = input_203_cast_fp16)[name = tensor<string, []>("op_1704_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_205_cast_fp16 = add(x = w_155_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_205_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1706_cast_fp16 = softmax(axis = var_1564, x = input_205_cast_fp16)[name = tensor<string, []>("op_1706_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_207_cast_fp16 = add(x = w_157_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_207_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1708_cast_fp16 = softmax(axis = var_1564, x = input_207_cast_fp16)[name = tensor<string, []>("op_1708_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_209_cast_fp16 = add(x = w_159_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_209_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1710_cast_fp16 = softmax(axis = var_1564, x = input_209_cast_fp16)[name = tensor<string, []>("op_1710_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_211_cast_fp16 = add(x = w_161_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_211_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1712_cast_fp16 = softmax(axis = var_1564, x = input_211_cast_fp16)[name = tensor<string, []>("op_1712_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_213_cast_fp16 = add(x = w_163_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_213_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1714_cast_fp16 = softmax(axis = var_1564, x = input_213_cast_fp16)[name = tensor<string, []>("op_1714_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_215_cast_fp16 = add(x = w_165_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_215_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1716_cast_fp16 = softmax(axis = var_1564, x = input_215_cast_fp16)[name = tensor<string, []>("op_1716_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_217_cast_fp16 = add(x = w_167_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_217_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1718_cast_fp16 = softmax(axis = var_1564, x = input_217_cast_fp16)[name = tensor<string, []>("op_1718_cast_fp16")];
+            tensor<string, []> var_1720_equation_0 = const()[name = tensor<string, []>("op_1720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1634_0, var_1696_cast_fp16))[name = tensor<string, []>("op_1720_cast_fp16")];
+            tensor<string, []> var_1722_equation_0 = const()[name = tensor<string, []>("op_1722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1634_1, var_1698_cast_fp16))[name = tensor<string, []>("op_1722_cast_fp16")];
+            tensor<string, []> var_1724_equation_0 = const()[name = tensor<string, []>("op_1724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1634_2, var_1700_cast_fp16))[name = tensor<string, []>("op_1724_cast_fp16")];
+            tensor<string, []> var_1726_equation_0 = const()[name = tensor<string, []>("op_1726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1634_3, var_1702_cast_fp16))[name = tensor<string, []>("op_1726_cast_fp16")];
+            tensor<string, []> var_1728_equation_0 = const()[name = tensor<string, []>("op_1728_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1634_4, var_1704_cast_fp16))[name = tensor<string, []>("op_1728_cast_fp16")];
+            tensor<string, []> var_1730_equation_0 = const()[name = tensor<string, []>("op_1730_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1634_5, var_1706_cast_fp16))[name = tensor<string, []>("op_1730_cast_fp16")];
+            tensor<string, []> var_1732_equation_0 = const()[name = tensor<string, []>("op_1732_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1634_6, var_1708_cast_fp16))[name = tensor<string, []>("op_1732_cast_fp16")];
+            tensor<string, []> var_1734_equation_0 = const()[name = tensor<string, []>("op_1734_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1634_7, var_1710_cast_fp16))[name = tensor<string, []>("op_1734_cast_fp16")];
+            tensor<string, []> var_1736_equation_0 = const()[name = tensor<string, []>("op_1736_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1634_8, var_1712_cast_fp16))[name = tensor<string, []>("op_1736_cast_fp16")];
+            tensor<string, []> var_1738_equation_0 = const()[name = tensor<string, []>("op_1738_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1634_9, var_1714_cast_fp16))[name = tensor<string, []>("op_1738_cast_fp16")];
+            tensor<string, []> var_1740_equation_0 = const()[name = tensor<string, []>("op_1740_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1634_10, var_1716_cast_fp16))[name = tensor<string, []>("op_1740_cast_fp16")];
+            tensor<string, []> var_1742_equation_0 = const()[name = tensor<string, []>("op_1742_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1634_11, var_1718_cast_fp16))[name = tensor<string, []>("op_1742_cast_fp16")];
+            tensor<bool, []> var_1744_interleave_0 = const()[name = tensor<string, []>("op_1744_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1744_cast_fp16 = concat(axis = var_1564, interleave = var_1744_interleave_0, values = (var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16))[name = tensor<string, []>("op_1744_cast_fp16")];
+            tensor<int32, [2]> var_1748 = const()[name = tensor<string, []>("op_1748"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1750 = const()[name = tensor<string, []>("op_1750"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1752_pad_type_0 = const()[name = tensor<string, []>("op_1752_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1752_pad_0 = const()[name = tensor<string, []>("op_1752_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1752 = conv(bias = layers_6_attention_o_proj_bias, dilations = var_1750, groups = var_1564, pad = var_1752_pad_0, pad_type = var_1752_pad_type_0, strides = var_1748, weight = layers_6_attention_o_proj_weight, x = var_1744_cast_fp16)[name = tensor<string, []>("op_1752")];
+            tensor<bool, []> var_1754_interleave_0 = const()[name = tensor<string, []>("op_1754_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1754 = concat(axis = var_1565, interleave = var_1754_interleave_0, values = var_1752)[name = tensor<string, []>("op_1754")];
+            tensor<fp16, [1, 384, 1, 512]> x_53 = add(x = var_1558_cast_fp16, y = var_1754)[name = tensor<string, []>("x_53")];
+            tensor<fp16, []> var_1561_promoted = const()[name = tensor<string, []>("op_1561_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1562_promoted = const()[name = tensor<string, []>("op_1562_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_55 = clip(alpha = var_1561_promoted, beta = var_1562_promoted, x = x_53)[name = tensor<string, []>("x_55")];
+            tensor<int32, [1]> var_1759 = const()[name = tensor<string, []>("op_1759"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_27 = reduce_mean(axes = var_1759, keep_dims = var_1566, x = x_55)[name = tensor<string, []>("mean_27")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_27 = sub(x = x_55, y = mean_27)[name = tensor<string, []>("zero_mean_27")];
-            tensor<fp16, []> var_1608_promoted = const()[name = tensor<string, []>("op_1608_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1808 = pow(x = zero_mean_27, y = var_1608_promoted)[name = tensor<string, []>("op_1808")];
-            tensor<int32, [1]> var_1809 = const()[name = tensor<string, []>("op_1809"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1810 = reduce_mean(axes = var_1809, keep_dims = var_1603, x = var_1808)[name = tensor<string, []>("op_1810")];
-            tensor<fp16, []> var_1811_to_fp16 = const()[name = tensor<string, []>("op_1811_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1812_cast_fp16 = add(x = var_1810, y = var_1811_to_fp16)[name = tensor<string, []>("op_1812_cast_fp16")];
+            tensor<fp16, []> var_1563_promoted = const()[name = tensor<string, []>("op_1563_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1762 = pow(x = zero_mean_27, y = var_1563_promoted)[name = tensor<string, []>("op_1762")];
+            tensor<int32, [1]> var_1763 = const()[name = tensor<string, []>("op_1763"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1764 = reduce_mean(axes = var_1763, keep_dims = var_1566, x = var_1762)[name = tensor<string, []>("op_1764")];
+            tensor<fp16, []> var_1765_to_fp16 = const()[name = tensor<string, []>("op_1765_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1766_cast_fp16 = add(x = var_1764, y = var_1765_to_fp16)[name = tensor<string, []>("op_1766_cast_fp16")];
             tensor<fp32, []> denom_27_epsilon_0 = const()[name = tensor<string, []>("denom_27_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0, x = var_1812_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1814_cast_fp16 = mul(x = zero_mean_27, y = denom_27_cast_fp16)[name = tensor<string, []>("op_1814_cast_fp16")];
-            tensor<fp16, [384]> var_1816_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1816_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66810368)))];
-            tensor<fp16, [384]> var_1816_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1816_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66811200)))];
-            tensor<fp16, []> var_1816_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1816_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1816_cast_fp16 = batch_norm(beta = var_1816_beta_0_to_fp16, epsilon = var_1816_epsilon_0_to_fp16, gamma = var_1816_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1814_cast_fp16)[name = tensor<string, []>("op_1816_cast_fp16")];
-            tensor<int32, [2]> var_1822 = const()[name = tensor<string, []>("op_1822"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1824 = const()[name = tensor<string, []>("op_1824"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1826_pad_type_0 = const()[name = tensor<string, []>("op_1826_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1826_pad_0 = const()[name = tensor<string, []>("op_1826_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_1826 = conv(bias = layers_6_mlp_fc1_bias, dilations = var_1824, groups = var_1601, pad = var_1826_pad_0, pad_type = var_1826_pad_type_0, strides = var_1822, weight = layers_6_mlp_fc1_weight, x = var_1816_cast_fp16)[name = tensor<string, []>("op_1826")];
-            tensor<string, []> input_55_mode_0 = const()[name = tensor<string, []>("input_55_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_55 = gelu(mode = input_55_mode_0, x = var_1826)[name = tensor<string, []>("input_55")];
-            tensor<int32, [2]> var_1830 = const()[name = tensor<string, []>("op_1830"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1832 = const()[name = tensor<string, []>("op_1832"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1834_pad_type_0 = const()[name = tensor<string, []>("op_1834_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1834_pad_0 = const()[name = tensor<string, []>("op_1834_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1834 = conv(bias = layers_6_mlp_fc2_bias, dilations = var_1832, groups = var_1601, pad = var_1834_pad_0, pad_type = var_1834_pad_type_0, strides = var_1830, weight = layers_6_mlp_fc2_weight, x = input_55)[name = tensor<string, []>("op_1834")];
-            tensor<fp16, [1, 384, 1, 512]> x_57 = add(x = var_1816_cast_fp16, y = var_1834)[name = tensor<string, []>("x_57")];
-            tensor<fp16, []> var_1599_promoted_1 = const()[name = tensor<string, []>("op_1599_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1600_promoted_1 = const()[name = tensor<string, []>("op_1600_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_59 = clip(alpha = var_1599_promoted_1, beta = var_1600_promoted_1, x = x_57)[name = tensor<string, []>("x_59")];
-            tensor<int32, [1]> var_1839 = const()[name = tensor<string, []>("op_1839"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_29 = reduce_mean(axes = var_1839, keep_dims = var_1603, x = x_59)[name = tensor<string, []>("mean_29")];
+            tensor<fp16, [1, 1, 1, 512]> denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0, x = var_1766_cast_fp16)[name = tensor<string, []>("denom_27_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1768_cast_fp16 = mul(x = zero_mean_27, y = denom_27_cast_fp16)[name = tensor<string, []>("op_1768_cast_fp16")];
+            tensor<fp16, [384]> var_1770_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1770_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66810368)))];
+            tensor<fp16, [384]> var_1770_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1770_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66811200)))];
+            tensor<fp16, []> var_1770_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1770_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1770_cast_fp16 = batch_norm(beta = var_1770_beta_0_to_fp16, epsilon = var_1770_epsilon_0_to_fp16, gamma = var_1770_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1768_cast_fp16)[name = tensor<string, []>("op_1770_cast_fp16")];
+            tensor<int32, [2]> var_1776 = const()[name = tensor<string, []>("op_1776"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1778 = const()[name = tensor<string, []>("op_1778"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1780_pad_type_0 = const()[name = tensor<string, []>("op_1780_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1780_pad_0 = const()[name = tensor<string, []>("op_1780_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_1780 = conv(bias = layers_6_mlp_fc1_bias, dilations = var_1778, groups = var_1564, pad = var_1780_pad_0, pad_type = var_1780_pad_type_0, strides = var_1776, weight = layers_6_mlp_fc1_weight, x = var_1770_cast_fp16)[name = tensor<string, []>("op_1780")];
+            tensor<string, []> input_223_mode_0 = const()[name = tensor<string, []>("input_223_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_223 = gelu(mode = input_223_mode_0, x = var_1780)[name = tensor<string, []>("input_223")];
+            tensor<int32, [2]> var_1784 = const()[name = tensor<string, []>("op_1784"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1786 = const()[name = tensor<string, []>("op_1786"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1788_pad_type_0 = const()[name = tensor<string, []>("op_1788_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1788_pad_0 = const()[name = tensor<string, []>("op_1788_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1788 = conv(bias = layers_6_mlp_fc2_bias, dilations = var_1786, groups = var_1564, pad = var_1788_pad_0, pad_type = var_1788_pad_type_0, strides = var_1784, weight = layers_6_mlp_fc2_weight, x = input_223)[name = tensor<string, []>("op_1788")];
+            tensor<fp16, [1, 384, 1, 512]> x_57 = add(x = var_1770_cast_fp16, y = var_1788)[name = tensor<string, []>("x_57")];
+            tensor<fp16, []> var_1561_promoted_1 = const()[name = tensor<string, []>("op_1561_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1562_promoted_1 = const()[name = tensor<string, []>("op_1562_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_59 = clip(alpha = var_1561_promoted_1, beta = var_1562_promoted_1, x = x_57)[name = tensor<string, []>("x_59")];
+            tensor<int32, [1]> var_1793 = const()[name = tensor<string, []>("op_1793"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_29 = reduce_mean(axes = var_1793, keep_dims = var_1566, x = x_59)[name = tensor<string, []>("mean_29")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_29 = sub(x = x_59, y = mean_29)[name = tensor<string, []>("zero_mean_29")];
-            tensor<fp16, []> var_1608_promoted_1 = const()[name = tensor<string, []>("op_1608_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_1842 = pow(x = zero_mean_29, y = var_1608_promoted_1)[name = tensor<string, []>("op_1842")];
-            tensor<int32, [1]> var_1843 = const()[name = tensor<string, []>("op_1843"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_1844 = reduce_mean(axes = var_1843, keep_dims = var_1603, x = var_1842)[name = tensor<string, []>("op_1844")];
-            tensor<fp16, []> var_1845_to_fp16 = const()[name = tensor<string, []>("op_1845_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_1846_cast_fp16 = add(x = var_1844, y = var_1845_to_fp16)[name = tensor<string, []>("op_1846_cast_fp16")];
+            tensor<fp16, []> var_1563_promoted_1 = const()[name = tensor<string, []>("op_1563_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_1796 = pow(x = zero_mean_29, y = var_1563_promoted_1)[name = tensor<string, []>("op_1796")];
+            tensor<int32, [1]> var_1797 = const()[name = tensor<string, []>("op_1797"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_1798 = reduce_mean(axes = var_1797, keep_dims = var_1566, x = var_1796)[name = tensor<string, []>("op_1798")];
+            tensor<fp16, []> var_1799_to_fp16 = const()[name = tensor<string, []>("op_1799_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_1800_cast_fp16 = add(x = var_1798, y = var_1799_to_fp16)[name = tensor<string, []>("op_1800_cast_fp16")];
             tensor<fp32, []> denom_29_epsilon_0 = const()[name = tensor<string, []>("denom_29_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0, x = var_1846_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_1848_cast_fp16 = mul(x = zero_mean_29, y = denom_29_cast_fp16)[name = tensor<string, []>("op_1848_cast_fp16")];
-            tensor<fp16, [384]> var_1850_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1850_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66812032)))];
-            tensor<fp16, [384]> var_1850_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1850_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66812864)))];
-            tensor<fp16, []> var_1850_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1850_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_1850_cast_fp16 = batch_norm(beta = var_1850_beta_0_to_fp16, epsilon = var_1850_epsilon_0_to_fp16, gamma = var_1850_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1848_cast_fp16)[name = tensor<string, []>("op_1850_cast_fp16")];
-            tensor<int32, []> var_1855 = const()[name = tensor<string, []>("op_1855"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_1856 = const()[name = tensor<string, []>("op_1856"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_1857 = const()[name = tensor<string, []>("op_1857"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_1882 = const()[name = tensor<string, []>("op_1882"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1884 = const()[name = tensor<string, []>("op_1884"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1886_pad_type_0 = const()[name = tensor<string, []>("op_1886_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1886_pad_0 = const()[name = tensor<string, []>("op_1886_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1886 = conv(bias = layers_7_attention_q_proj_bias, dilations = var_1884, groups = var_1855, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1882, weight = layers_7_attention_q_proj_weight, x = var_1850_cast_fp16)[name = tensor<string, []>("op_1886")];
-            tensor<int32, [4]> var_1887 = const()[name = tensor<string, []>("op_1887"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1888 = reshape(shape = var_1887, x = var_1886)[name = tensor<string, []>("op_1888")];
-            tensor<int32, [2]> var_1891 = const()[name = tensor<string, []>("op_1891"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1893 = const()[name = tensor<string, []>("op_1893"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1895_pad_type_0 = const()[name = tensor<string, []>("op_1895_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1895_pad_0 = const()[name = tensor<string, []>("op_1895_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1895 = conv(bias = layers_7_attention_k_proj_bias, dilations = var_1893, groups = var_1855, pad = var_1895_pad_0, pad_type = var_1895_pad_type_0, strides = var_1891, weight = layers_7_attention_k_proj_weight, x = var_1850_cast_fp16)[name = tensor<string, []>("op_1895")];
-            tensor<int32, [4]> var_1896 = const()[name = tensor<string, []>("op_1896"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_15 = reshape(shape = var_1896, x = var_1895)[name = tensor<string, []>("ks_15")];
-            tensor<int32, [2]> var_1900 = const()[name = tensor<string, []>("op_1900"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_1902 = const()[name = tensor<string, []>("op_1902"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_1904_pad_type_0 = const()[name = tensor<string, []>("op_1904_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_1904_pad_0 = const()[name = tensor<string, []>("op_1904_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_1904 = conv(bias = layers_7_attention_v_proj_bias, dilations = var_1902, groups = var_1855, pad = var_1904_pad_0, pad_type = var_1904_pad_type_0, strides = var_1900, weight = layers_7_attention_v_proj_weight, x = var_1850_cast_fp16)[name = tensor<string, []>("op_1904")];
-            tensor<int32, [4]> var_1905 = const()[name = tensor<string, []>("op_1905"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_1906 = reshape(shape = var_1905, x = var_1904)[name = tensor<string, []>("op_1906")];
-            tensor<int32, [12]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1907_axis_0 = const()[name = tensor<string, []>("op_1907_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1907_0, tensor<fp16, [1, 32, 1, 512]> var_1907_1, tensor<fp16, [1, 32, 1, 512]> var_1907_2, tensor<fp16, [1, 32, 1, 512]> var_1907_3, tensor<fp16, [1, 32, 1, 512]> var_1907_4, tensor<fp16, [1, 32, 1, 512]> var_1907_5, tensor<fp16, [1, 32, 1, 512]> var_1907_6, tensor<fp16, [1, 32, 1, 512]> var_1907_7, tensor<fp16, [1, 32, 1, 512]> var_1907_8, tensor<fp16, [1, 32, 1, 512]> var_1907_9, tensor<fp16, [1, 32, 1, 512]> var_1907_10, tensor<fp16, [1, 32, 1, 512]> var_1907_11 = split(axis = var_1907_axis_0, split_sizes = tile_37, x = var_1888)[name = tensor<string, []>("op_1907")];
-            tensor<int32, [4]> var_1920_perm_0 = const()[name = tensor<string, []>("op_1920_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1921_axis_0 = const()[name = tensor<string, []>("op_1921_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_4 = transpose(perm = var_1920_perm_0, x = ks_15)[name = tensor<string, []>("transpose_4")];
-            tensor<fp16, [1, 512, 1, 32]> var_1921_0, tensor<fp16, [1, 512, 1, 32]> var_1921_1, tensor<fp16, [1, 512, 1, 32]> var_1921_2, tensor<fp16, [1, 512, 1, 32]> var_1921_3, tensor<fp16, [1, 512, 1, 32]> var_1921_4, tensor<fp16, [1, 512, 1, 32]> var_1921_5, tensor<fp16, [1, 512, 1, 32]> var_1921_6, tensor<fp16, [1, 512, 1, 32]> var_1921_7, tensor<fp16, [1, 512, 1, 32]> var_1921_8, tensor<fp16, [1, 512, 1, 32]> var_1921_9, tensor<fp16, [1, 512, 1, 32]> var_1921_10, tensor<fp16, [1, 512, 1, 32]> var_1921_11 = split(axis = var_1921_axis_0, split_sizes = tile_38, x = transpose_4)[name = tensor<string, []>("op_1921")];
-            tensor<int32, [12]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_1934_axis_0 = const()[name = tensor<string, []>("op_1934_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_1934_0, tensor<fp16, [1, 32, 1, 512]> var_1934_1, tensor<fp16, [1, 32, 1, 512]> var_1934_2, tensor<fp16, [1, 32, 1, 512]> var_1934_3, tensor<fp16, [1, 32, 1, 512]> var_1934_4, tensor<fp16, [1, 32, 1, 512]> var_1934_5, tensor<fp16, [1, 32, 1, 512]> var_1934_6, tensor<fp16, [1, 32, 1, 512]> var_1934_7, tensor<fp16, [1, 32, 1, 512]> var_1934_8, tensor<fp16, [1, 32, 1, 512]> var_1934_9, tensor<fp16, [1, 32, 1, 512]> var_1934_10, tensor<fp16, [1, 32, 1, 512]> var_1934_11 = split(axis = var_1934_axis_0, split_sizes = tile_39, x = var_1906)[name = tensor<string, []>("op_1934")];
-            tensor<string, []> var_1948_equation_0 = const()[name = tensor<string, []>("op_1948_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1948 = einsum(equation = var_1948_equation_0, values = (var_1921_0, var_1907_0))[name = tensor<string, []>("op_1948")];
-            tensor<fp16, []> var_1949_to_fp16 = const()[name = tensor<string, []>("op_1949_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_169_cast_fp16 = mul(x = var_1948, y = var_1949_to_fp16)[name = tensor<string, []>("w_169_cast_fp16")];
-            tensor<string, []> var_1952_equation_0 = const()[name = tensor<string, []>("op_1952_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1952 = einsum(equation = var_1952_equation_0, values = (var_1921_1, var_1907_1))[name = tensor<string, []>("op_1952")];
-            tensor<fp16, []> var_1953_to_fp16 = const()[name = tensor<string, []>("op_1953_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_171_cast_fp16 = mul(x = var_1952, y = var_1953_to_fp16)[name = tensor<string, []>("w_171_cast_fp16")];
-            tensor<string, []> var_1956_equation_0 = const()[name = tensor<string, []>("op_1956_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1956 = einsum(equation = var_1956_equation_0, values = (var_1921_2, var_1907_2))[name = tensor<string, []>("op_1956")];
-            tensor<fp16, []> var_1957_to_fp16 = const()[name = tensor<string, []>("op_1957_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_173_cast_fp16 = mul(x = var_1956, y = var_1957_to_fp16)[name = tensor<string, []>("w_173_cast_fp16")];
-            tensor<string, []> var_1960_equation_0 = const()[name = tensor<string, []>("op_1960_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1960 = einsum(equation = var_1960_equation_0, values = (var_1921_3, var_1907_3))[name = tensor<string, []>("op_1960")];
-            tensor<fp16, []> var_1961_to_fp16 = const()[name = tensor<string, []>("op_1961_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_175_cast_fp16 = mul(x = var_1960, y = var_1961_to_fp16)[name = tensor<string, []>("w_175_cast_fp16")];
-            tensor<string, []> var_1964_equation_0 = const()[name = tensor<string, []>("op_1964_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1964 = einsum(equation = var_1964_equation_0, values = (var_1921_4, var_1907_4))[name = tensor<string, []>("op_1964")];
-            tensor<fp16, []> var_1965_to_fp16 = const()[name = tensor<string, []>("op_1965_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_177_cast_fp16 = mul(x = var_1964, y = var_1965_to_fp16)[name = tensor<string, []>("w_177_cast_fp16")];
-            tensor<string, []> var_1968_equation_0 = const()[name = tensor<string, []>("op_1968_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1968 = einsum(equation = var_1968_equation_0, values = (var_1921_5, var_1907_5))[name = tensor<string, []>("op_1968")];
-            tensor<fp16, []> var_1969_to_fp16 = const()[name = tensor<string, []>("op_1969_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_179_cast_fp16 = mul(x = var_1968, y = var_1969_to_fp16)[name = tensor<string, []>("w_179_cast_fp16")];
-            tensor<string, []> var_1972_equation_0 = const()[name = tensor<string, []>("op_1972_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1972 = einsum(equation = var_1972_equation_0, values = (var_1921_6, var_1907_6))[name = tensor<string, []>("op_1972")];
-            tensor<fp16, []> var_1973_to_fp16 = const()[name = tensor<string, []>("op_1973_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_181_cast_fp16 = mul(x = var_1972, y = var_1973_to_fp16)[name = tensor<string, []>("w_181_cast_fp16")];
-            tensor<string, []> var_1976_equation_0 = const()[name = tensor<string, []>("op_1976_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1976 = einsum(equation = var_1976_equation_0, values = (var_1921_7, var_1907_7))[name = tensor<string, []>("op_1976")];
-            tensor<fp16, []> var_1977_to_fp16 = const()[name = tensor<string, []>("op_1977_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_183_cast_fp16 = mul(x = var_1976, y = var_1977_to_fp16)[name = tensor<string, []>("w_183_cast_fp16")];
-            tensor<string, []> var_1980_equation_0 = const()[name = tensor<string, []>("op_1980_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1980 = einsum(equation = var_1980_equation_0, values = (var_1921_8, var_1907_8))[name = tensor<string, []>("op_1980")];
-            tensor<fp16, []> var_1981_to_fp16 = const()[name = tensor<string, []>("op_1981_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_185_cast_fp16 = mul(x = var_1980, y = var_1981_to_fp16)[name = tensor<string, []>("w_185_cast_fp16")];
-            tensor<string, []> var_1984_equation_0 = const()[name = tensor<string, []>("op_1984_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1984 = einsum(equation = var_1984_equation_0, values = (var_1921_9, var_1907_9))[name = tensor<string, []>("op_1984")];
-            tensor<fp16, []> var_1985_to_fp16 = const()[name = tensor<string, []>("op_1985_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_187_cast_fp16 = mul(x = var_1984, y = var_1985_to_fp16)[name = tensor<string, []>("w_187_cast_fp16")];
-            tensor<string, []> var_1988_equation_0 = const()[name = tensor<string, []>("op_1988_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1988 = einsum(equation = var_1988_equation_0, values = (var_1921_10, var_1907_10))[name = tensor<string, []>("op_1988")];
-            tensor<fp16, []> var_1989_to_fp16 = const()[name = tensor<string, []>("op_1989_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_189_cast_fp16 = mul(x = var_1988, y = var_1989_to_fp16)[name = tensor<string, []>("w_189_cast_fp16")];
-            tensor<string, []> var_1992_equation_0 = const()[name = tensor<string, []>("op_1992_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_1992 = einsum(equation = var_1992_equation_0, values = (var_1921_11, var_1907_11))[name = tensor<string, []>("op_1992")];
-            tensor<fp16, []> var_1993_to_fp16 = const()[name = tensor<string, []>("op_1993_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_191_cast_fp16 = mul(x = var_1992, y = var_1993_to_fp16)[name = tensor<string, []>("w_191_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1995_cast_fp16 = add(x = w_169_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1995_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1996_cast_fp16 = softmax(axis = var_1855, x = var_1995_cast_fp16)[name = tensor<string, []>("op_1996_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1997_cast_fp16 = add(x = w_171_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1997_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1998_cast_fp16 = softmax(axis = var_1855, x = var_1997_cast_fp16)[name = tensor<string, []>("op_1998_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_1999_cast_fp16 = add(x = w_173_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_1999_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2000_cast_fp16 = softmax(axis = var_1855, x = var_1999_cast_fp16)[name = tensor<string, []>("op_2000_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2001_cast_fp16 = add(x = w_175_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2001_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2002_cast_fp16 = softmax(axis = var_1855, x = var_2001_cast_fp16)[name = tensor<string, []>("op_2002_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2003_cast_fp16 = add(x = w_177_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2003_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2004_cast_fp16 = softmax(axis = var_1855, x = var_2003_cast_fp16)[name = tensor<string, []>("op_2004_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2005_cast_fp16 = add(x = w_179_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2005_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2006_cast_fp16 = softmax(axis = var_1855, x = var_2005_cast_fp16)[name = tensor<string, []>("op_2006_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2007_cast_fp16 = add(x = w_181_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2007_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2008_cast_fp16 = softmax(axis = var_1855, x = var_2007_cast_fp16)[name = tensor<string, []>("op_2008_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2009_cast_fp16 = add(x = w_183_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2009_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2010_cast_fp16 = softmax(axis = var_1855, x = var_2009_cast_fp16)[name = tensor<string, []>("op_2010_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2011_cast_fp16 = add(x = w_185_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2011_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2012_cast_fp16 = softmax(axis = var_1855, x = var_2011_cast_fp16)[name = tensor<string, []>("op_2012_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2013_cast_fp16 = add(x = w_187_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2013_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2014_cast_fp16 = softmax(axis = var_1855, x = var_2013_cast_fp16)[name = tensor<string, []>("op_2014_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2015_cast_fp16 = add(x = w_189_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2015_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2016_cast_fp16 = softmax(axis = var_1855, x = var_2015_cast_fp16)[name = tensor<string, []>("op_2016_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2017_cast_fp16 = add(x = w_191_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2017_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2018_cast_fp16 = softmax(axis = var_1855, x = var_2017_cast_fp16)[name = tensor<string, []>("op_2018_cast_fp16")];
-            tensor<string, []> var_2020_equation_0 = const()[name = tensor<string, []>("op_2020_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2020_cast_fp16 = einsum(equation = var_2020_equation_0, values = (var_1934_0, var_1996_cast_fp16))[name = tensor<string, []>("op_2020_cast_fp16")];
-            tensor<string, []> var_2022_equation_0 = const()[name = tensor<string, []>("op_2022_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2022_cast_fp16 = einsum(equation = var_2022_equation_0, values = (var_1934_1, var_1998_cast_fp16))[name = tensor<string, []>("op_2022_cast_fp16")];
-            tensor<string, []> var_2024_equation_0 = const()[name = tensor<string, []>("op_2024_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2024_cast_fp16 = einsum(equation = var_2024_equation_0, values = (var_1934_2, var_2000_cast_fp16))[name = tensor<string, []>("op_2024_cast_fp16")];
-            tensor<string, []> var_2026_equation_0 = const()[name = tensor<string, []>("op_2026_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2026_cast_fp16 = einsum(equation = var_2026_equation_0, values = (var_1934_3, var_2002_cast_fp16))[name = tensor<string, []>("op_2026_cast_fp16")];
-            tensor<string, []> var_2028_equation_0 = const()[name = tensor<string, []>("op_2028_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2028_cast_fp16 = einsum(equation = var_2028_equation_0, values = (var_1934_4, var_2004_cast_fp16))[name = tensor<string, []>("op_2028_cast_fp16")];
-            tensor<string, []> var_2030_equation_0 = const()[name = tensor<string, []>("op_2030_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2030_cast_fp16 = einsum(equation = var_2030_equation_0, values = (var_1934_5, var_2006_cast_fp16))[name = tensor<string, []>("op_2030_cast_fp16")];
-            tensor<string, []> var_2032_equation_0 = const()[name = tensor<string, []>("op_2032_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2032_cast_fp16 = einsum(equation = var_2032_equation_0, values = (var_1934_6, var_2008_cast_fp16))[name = tensor<string, []>("op_2032_cast_fp16")];
-            tensor<string, []> var_2034_equation_0 = const()[name = tensor<string, []>("op_2034_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2034_cast_fp16 = einsum(equation = var_2034_equation_0, values = (var_1934_7, var_2010_cast_fp16))[name = tensor<string, []>("op_2034_cast_fp16")];
-            tensor<string, []> var_2036_equation_0 = const()[name = tensor<string, []>("op_2036_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2036_cast_fp16 = einsum(equation = var_2036_equation_0, values = (var_1934_8, var_2012_cast_fp16))[name = tensor<string, []>("op_2036_cast_fp16")];
-            tensor<string, []> var_2038_equation_0 = const()[name = tensor<string, []>("op_2038_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2038_cast_fp16 = einsum(equation = var_2038_equation_0, values = (var_1934_9, var_2014_cast_fp16))[name = tensor<string, []>("op_2038_cast_fp16")];
-            tensor<string, []> var_2040_equation_0 = const()[name = tensor<string, []>("op_2040_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2040_cast_fp16 = einsum(equation = var_2040_equation_0, values = (var_1934_10, var_2016_cast_fp16))[name = tensor<string, []>("op_2040_cast_fp16")];
-            tensor<string, []> var_2042_equation_0 = const()[name = tensor<string, []>("op_2042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2042_cast_fp16 = einsum(equation = var_2042_equation_0, values = (var_1934_11, var_2018_cast_fp16))[name = tensor<string, []>("op_2042_cast_fp16")];
-            tensor<bool, []> var_2044_interleave_0 = const()[name = tensor<string, []>("op_2044_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2044_cast_fp16 = concat(axis = var_1855, interleave = var_2044_interleave_0, values = (var_2020_cast_fp16, var_2022_cast_fp16, var_2024_cast_fp16, var_2026_cast_fp16, var_2028_cast_fp16, var_2030_cast_fp16, var_2032_cast_fp16, var_2034_cast_fp16, var_2036_cast_fp16, var_2038_cast_fp16, var_2040_cast_fp16, var_2042_cast_fp16))[name = tensor<string, []>("op_2044_cast_fp16")];
-            tensor<int32, [2]> var_2048 = const()[name = tensor<string, []>("op_2048"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2050 = const()[name = tensor<string, []>("op_2050"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2052_pad_type_0 = const()[name = tensor<string, []>("op_2052_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2052_pad_0 = const()[name = tensor<string, []>("op_2052_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2052 = conv(bias = layers_7_attention_o_proj_bias, dilations = var_2050, groups = var_1855, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2048, weight = layers_7_attention_o_proj_weight, x = var_2044_cast_fp16)[name = tensor<string, []>("op_2052")];
-            tensor<bool, []> var_2054_interleave_0 = const()[name = tensor<string, []>("op_2054_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2054 = concat(axis = var_1856, interleave = var_2054_interleave_0, values = var_2052)[name = tensor<string, []>("op_2054")];
-            tensor<fp16, [1, 384, 1, 512]> x_61 = add(x = var_1850_cast_fp16, y = var_2054)[name = tensor<string, []>("x_61")];
-            tensor<fp16, []> var_1853_promoted = const()[name = tensor<string, []>("op_1853_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1854_promoted = const()[name = tensor<string, []>("op_1854_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_63 = clip(alpha = var_1853_promoted, beta = var_1854_promoted, x = x_61)[name = tensor<string, []>("x_63")];
-            tensor<int32, [1]> var_2059 = const()[name = tensor<string, []>("op_2059"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_31 = reduce_mean(axes = var_2059, keep_dims = var_1857, x = x_63)[name = tensor<string, []>("mean_31")];
+            tensor<fp16, [1, 1, 1, 512]> denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0, x = var_1800_cast_fp16)[name = tensor<string, []>("denom_29_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_1802_cast_fp16 = mul(x = zero_mean_29, y = denom_29_cast_fp16)[name = tensor<string, []>("op_1802_cast_fp16")];
+            tensor<fp16, [384]> var_1804_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_1804_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66812032)))];
+            tensor<fp16, [384]> var_1804_beta_0_to_fp16 = const()[name = tensor<string, []>("op_1804_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66812864)))];
+            tensor<fp16, []> var_1804_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_1804_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_1804_cast_fp16 = batch_norm(beta = var_1804_beta_0_to_fp16, epsilon = var_1804_epsilon_0_to_fp16, gamma = var_1804_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1802_cast_fp16)[name = tensor<string, []>("op_1804_cast_fp16")];
+            tensor<int32, []> var_1810 = const()[name = tensor<string, []>("op_1810"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_1811 = const()[name = tensor<string, []>("op_1811"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_1812 = const()[name = tensor<string, []>("op_1812"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_1834 = const()[name = tensor<string, []>("op_1834"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1836 = const()[name = tensor<string, []>("op_1836"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1838_pad_type_0 = const()[name = tensor<string, []>("op_1838_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1838_pad_0 = const()[name = tensor<string, []>("op_1838_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1838 = conv(bias = layers_7_attention_q_proj_bias, dilations = var_1836, groups = var_1810, pad = var_1838_pad_0, pad_type = var_1838_pad_type_0, strides = var_1834, weight = layers_7_attention_q_proj_weight, x = var_1804_cast_fp16)[name = tensor<string, []>("op_1838")];
+            tensor<int32, [2]> var_1841 = const()[name = tensor<string, []>("op_1841"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1843 = const()[name = tensor<string, []>("op_1843"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_15_pad_type_0 = const()[name = tensor<string, []>("ks_15_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_15_pad_0 = const()[name = tensor<string, []>("ks_15_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_15 = conv(bias = layers_7_attention_k_proj_bias, dilations = var_1843, groups = var_1810, pad = ks_15_pad_0, pad_type = ks_15_pad_type_0, strides = var_1841, weight = layers_7_attention_k_proj_weight, x = var_1804_cast_fp16)[name = tensor<string, []>("ks_15")];
+            tensor<int32, [2]> var_1848 = const()[name = tensor<string, []>("op_1848"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1850 = const()[name = tensor<string, []>("op_1850"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1852_pad_type_0 = const()[name = tensor<string, []>("op_1852_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1852_pad_0 = const()[name = tensor<string, []>("op_1852_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1852 = conv(bias = layers_7_attention_v_proj_bias, dilations = var_1850, groups = var_1810, pad = var_1852_pad_0, pad_type = var_1852_pad_type_0, strides = var_1848, weight = layers_7_attention_v_proj_weight, x = var_1804_cast_fp16)[name = tensor<string, []>("op_1852")];
+            tensor<int32, [12]> tile_37 = const()[name = tensor<string, []>("tile_37"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1853_axis_0 = const()[name = tensor<string, []>("op_1853_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1853_0, tensor<fp16, [1, 32, 1, 512]> var_1853_1, tensor<fp16, [1, 32, 1, 512]> var_1853_2, tensor<fp16, [1, 32, 1, 512]> var_1853_3, tensor<fp16, [1, 32, 1, 512]> var_1853_4, tensor<fp16, [1, 32, 1, 512]> var_1853_5, tensor<fp16, [1, 32, 1, 512]> var_1853_6, tensor<fp16, [1, 32, 1, 512]> var_1853_7, tensor<fp16, [1, 32, 1, 512]> var_1853_8, tensor<fp16, [1, 32, 1, 512]> var_1853_9, tensor<fp16, [1, 32, 1, 512]> var_1853_10, tensor<fp16, [1, 32, 1, 512]> var_1853_11 = split(axis = var_1853_axis_0, split_sizes = tile_37, x = var_1838)[name = tensor<string, []>("op_1853")];
+            tensor<int32, [4]> var_1866_perm_0 = const()[name = tensor<string, []>("op_1866_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_38 = const()[name = tensor<string, []>("tile_38"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1867_axis_0 = const()[name = tensor<string, []>("op_1867_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_4 = transpose(perm = var_1866_perm_0, x = ks_15)[name = tensor<string, []>("transpose_4")];
+            tensor<fp16, [1, 512, 1, 32]> var_1867_0, tensor<fp16, [1, 512, 1, 32]> var_1867_1, tensor<fp16, [1, 512, 1, 32]> var_1867_2, tensor<fp16, [1, 512, 1, 32]> var_1867_3, tensor<fp16, [1, 512, 1, 32]> var_1867_4, tensor<fp16, [1, 512, 1, 32]> var_1867_5, tensor<fp16, [1, 512, 1, 32]> var_1867_6, tensor<fp16, [1, 512, 1, 32]> var_1867_7, tensor<fp16, [1, 512, 1, 32]> var_1867_8, tensor<fp16, [1, 512, 1, 32]> var_1867_9, tensor<fp16, [1, 512, 1, 32]> var_1867_10, tensor<fp16, [1, 512, 1, 32]> var_1867_11 = split(axis = var_1867_axis_0, split_sizes = tile_38, x = transpose_4)[name = tensor<string, []>("op_1867")];
+            tensor<int32, [12]> tile_39 = const()[name = tensor<string, []>("tile_39"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_1880_axis_0 = const()[name = tensor<string, []>("op_1880_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_1880_0, tensor<fp16, [1, 32, 1, 512]> var_1880_1, tensor<fp16, [1, 32, 1, 512]> var_1880_2, tensor<fp16, [1, 32, 1, 512]> var_1880_3, tensor<fp16, [1, 32, 1, 512]> var_1880_4, tensor<fp16, [1, 32, 1, 512]> var_1880_5, tensor<fp16, [1, 32, 1, 512]> var_1880_6, tensor<fp16, [1, 32, 1, 512]> var_1880_7, tensor<fp16, [1, 32, 1, 512]> var_1880_8, tensor<fp16, [1, 32, 1, 512]> var_1880_9, tensor<fp16, [1, 32, 1, 512]> var_1880_10, tensor<fp16, [1, 32, 1, 512]> var_1880_11 = split(axis = var_1880_axis_0, split_sizes = tile_39, x = var_1852)[name = tensor<string, []>("op_1880")];
+            tensor<string, []> var_1894_equation_0 = const()[name = tensor<string, []>("op_1894_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1894 = einsum(equation = var_1894_equation_0, values = (var_1867_0, var_1853_0))[name = tensor<string, []>("op_1894")];
+            tensor<fp16, []> var_1895_to_fp16 = const()[name = tensor<string, []>("op_1895_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_169_cast_fp16 = mul(x = var_1894, y = var_1895_to_fp16)[name = tensor<string, []>("w_169_cast_fp16")];
+            tensor<string, []> var_1898_equation_0 = const()[name = tensor<string, []>("op_1898_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1898 = einsum(equation = var_1898_equation_0, values = (var_1867_1, var_1853_1))[name = tensor<string, []>("op_1898")];
+            tensor<fp16, []> var_1899_to_fp16 = const()[name = tensor<string, []>("op_1899_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_171_cast_fp16 = mul(x = var_1898, y = var_1899_to_fp16)[name = tensor<string, []>("w_171_cast_fp16")];
+            tensor<string, []> var_1902_equation_0 = const()[name = tensor<string, []>("op_1902_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1902 = einsum(equation = var_1902_equation_0, values = (var_1867_2, var_1853_2))[name = tensor<string, []>("op_1902")];
+            tensor<fp16, []> var_1903_to_fp16 = const()[name = tensor<string, []>("op_1903_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_173_cast_fp16 = mul(x = var_1902, y = var_1903_to_fp16)[name = tensor<string, []>("w_173_cast_fp16")];
+            tensor<string, []> var_1906_equation_0 = const()[name = tensor<string, []>("op_1906_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1906 = einsum(equation = var_1906_equation_0, values = (var_1867_3, var_1853_3))[name = tensor<string, []>("op_1906")];
+            tensor<fp16, []> var_1907_to_fp16 = const()[name = tensor<string, []>("op_1907_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_175_cast_fp16 = mul(x = var_1906, y = var_1907_to_fp16)[name = tensor<string, []>("w_175_cast_fp16")];
+            tensor<string, []> var_1910_equation_0 = const()[name = tensor<string, []>("op_1910_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1910 = einsum(equation = var_1910_equation_0, values = (var_1867_4, var_1853_4))[name = tensor<string, []>("op_1910")];
+            tensor<fp16, []> var_1911_to_fp16 = const()[name = tensor<string, []>("op_1911_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_177_cast_fp16 = mul(x = var_1910, y = var_1911_to_fp16)[name = tensor<string, []>("w_177_cast_fp16")];
+            tensor<string, []> var_1914_equation_0 = const()[name = tensor<string, []>("op_1914_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1914 = einsum(equation = var_1914_equation_0, values = (var_1867_5, var_1853_5))[name = tensor<string, []>("op_1914")];
+            tensor<fp16, []> var_1915_to_fp16 = const()[name = tensor<string, []>("op_1915_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_179_cast_fp16 = mul(x = var_1914, y = var_1915_to_fp16)[name = tensor<string, []>("w_179_cast_fp16")];
+            tensor<string, []> var_1918_equation_0 = const()[name = tensor<string, []>("op_1918_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1918 = einsum(equation = var_1918_equation_0, values = (var_1867_6, var_1853_6))[name = tensor<string, []>("op_1918")];
+            tensor<fp16, []> var_1919_to_fp16 = const()[name = tensor<string, []>("op_1919_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_181_cast_fp16 = mul(x = var_1918, y = var_1919_to_fp16)[name = tensor<string, []>("w_181_cast_fp16")];
+            tensor<string, []> var_1922_equation_0 = const()[name = tensor<string, []>("op_1922_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1922 = einsum(equation = var_1922_equation_0, values = (var_1867_7, var_1853_7))[name = tensor<string, []>("op_1922")];
+            tensor<fp16, []> var_1923_to_fp16 = const()[name = tensor<string, []>("op_1923_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_183_cast_fp16 = mul(x = var_1922, y = var_1923_to_fp16)[name = tensor<string, []>("w_183_cast_fp16")];
+            tensor<string, []> var_1926_equation_0 = const()[name = tensor<string, []>("op_1926_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1926 = einsum(equation = var_1926_equation_0, values = (var_1867_8, var_1853_8))[name = tensor<string, []>("op_1926")];
+            tensor<fp16, []> var_1927_to_fp16 = const()[name = tensor<string, []>("op_1927_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_185_cast_fp16 = mul(x = var_1926, y = var_1927_to_fp16)[name = tensor<string, []>("w_185_cast_fp16")];
+            tensor<string, []> var_1930_equation_0 = const()[name = tensor<string, []>("op_1930_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1930 = einsum(equation = var_1930_equation_0, values = (var_1867_9, var_1853_9))[name = tensor<string, []>("op_1930")];
+            tensor<fp16, []> var_1931_to_fp16 = const()[name = tensor<string, []>("op_1931_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_187_cast_fp16 = mul(x = var_1930, y = var_1931_to_fp16)[name = tensor<string, []>("w_187_cast_fp16")];
+            tensor<string, []> var_1934_equation_0 = const()[name = tensor<string, []>("op_1934_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1934 = einsum(equation = var_1934_equation_0, values = (var_1867_10, var_1853_10))[name = tensor<string, []>("op_1934")];
+            tensor<fp16, []> var_1935_to_fp16 = const()[name = tensor<string, []>("op_1935_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_189_cast_fp16 = mul(x = var_1934, y = var_1935_to_fp16)[name = tensor<string, []>("w_189_cast_fp16")];
+            tensor<string, []> var_1938_equation_0 = const()[name = tensor<string, []>("op_1938_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_1938 = einsum(equation = var_1938_equation_0, values = (var_1867_11, var_1853_11))[name = tensor<string, []>("op_1938")];
+            tensor<fp16, []> var_1939_to_fp16 = const()[name = tensor<string, []>("op_1939_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_191_cast_fp16 = mul(x = var_1938, y = var_1939_to_fp16)[name = tensor<string, []>("w_191_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_227_cast_fp16 = add(x = w_169_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_227_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1942_cast_fp16 = softmax(axis = var_1810, x = input_227_cast_fp16)[name = tensor<string, []>("op_1942_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_229_cast_fp16 = add(x = w_171_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_229_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1944_cast_fp16 = softmax(axis = var_1810, x = input_229_cast_fp16)[name = tensor<string, []>("op_1944_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_231_cast_fp16 = add(x = w_173_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_231_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1946_cast_fp16 = softmax(axis = var_1810, x = input_231_cast_fp16)[name = tensor<string, []>("op_1946_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_233_cast_fp16 = add(x = w_175_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_233_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1948_cast_fp16 = softmax(axis = var_1810, x = input_233_cast_fp16)[name = tensor<string, []>("op_1948_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_235_cast_fp16 = add(x = w_177_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_235_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1950_cast_fp16 = softmax(axis = var_1810, x = input_235_cast_fp16)[name = tensor<string, []>("op_1950_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_237_cast_fp16 = add(x = w_179_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_237_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1952_cast_fp16 = softmax(axis = var_1810, x = input_237_cast_fp16)[name = tensor<string, []>("op_1952_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_239_cast_fp16 = add(x = w_181_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_239_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1954_cast_fp16 = softmax(axis = var_1810, x = input_239_cast_fp16)[name = tensor<string, []>("op_1954_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_241_cast_fp16 = add(x = w_183_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_241_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1956_cast_fp16 = softmax(axis = var_1810, x = input_241_cast_fp16)[name = tensor<string, []>("op_1956_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_243_cast_fp16 = add(x = w_185_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_243_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1958_cast_fp16 = softmax(axis = var_1810, x = input_243_cast_fp16)[name = tensor<string, []>("op_1958_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_245_cast_fp16 = add(x = w_187_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_245_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1960_cast_fp16 = softmax(axis = var_1810, x = input_245_cast_fp16)[name = tensor<string, []>("op_1960_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_247_cast_fp16 = add(x = w_189_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_247_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1962_cast_fp16 = softmax(axis = var_1810, x = input_247_cast_fp16)[name = tensor<string, []>("op_1962_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_249_cast_fp16 = add(x = w_191_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_249_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_1964_cast_fp16 = softmax(axis = var_1810, x = input_249_cast_fp16)[name = tensor<string, []>("op_1964_cast_fp16")];
+            tensor<string, []> var_1966_equation_0 = const()[name = tensor<string, []>("op_1966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1966_cast_fp16 = einsum(equation = var_1966_equation_0, values = (var_1880_0, var_1942_cast_fp16))[name = tensor<string, []>("op_1966_cast_fp16")];
+            tensor<string, []> var_1968_equation_0 = const()[name = tensor<string, []>("op_1968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1968_cast_fp16 = einsum(equation = var_1968_equation_0, values = (var_1880_1, var_1944_cast_fp16))[name = tensor<string, []>("op_1968_cast_fp16")];
+            tensor<string, []> var_1970_equation_0 = const()[name = tensor<string, []>("op_1970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1970_cast_fp16 = einsum(equation = var_1970_equation_0, values = (var_1880_2, var_1946_cast_fp16))[name = tensor<string, []>("op_1970_cast_fp16")];
+            tensor<string, []> var_1972_equation_0 = const()[name = tensor<string, []>("op_1972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1972_cast_fp16 = einsum(equation = var_1972_equation_0, values = (var_1880_3, var_1948_cast_fp16))[name = tensor<string, []>("op_1972_cast_fp16")];
+            tensor<string, []> var_1974_equation_0 = const()[name = tensor<string, []>("op_1974_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1974_cast_fp16 = einsum(equation = var_1974_equation_0, values = (var_1880_4, var_1950_cast_fp16))[name = tensor<string, []>("op_1974_cast_fp16")];
+            tensor<string, []> var_1976_equation_0 = const()[name = tensor<string, []>("op_1976_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1976_cast_fp16 = einsum(equation = var_1976_equation_0, values = (var_1880_5, var_1952_cast_fp16))[name = tensor<string, []>("op_1976_cast_fp16")];
+            tensor<string, []> var_1978_equation_0 = const()[name = tensor<string, []>("op_1978_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1978_cast_fp16 = einsum(equation = var_1978_equation_0, values = (var_1880_6, var_1954_cast_fp16))[name = tensor<string, []>("op_1978_cast_fp16")];
+            tensor<string, []> var_1980_equation_0 = const()[name = tensor<string, []>("op_1980_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1980_cast_fp16 = einsum(equation = var_1980_equation_0, values = (var_1880_7, var_1956_cast_fp16))[name = tensor<string, []>("op_1980_cast_fp16")];
+            tensor<string, []> var_1982_equation_0 = const()[name = tensor<string, []>("op_1982_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1982_cast_fp16 = einsum(equation = var_1982_equation_0, values = (var_1880_8, var_1958_cast_fp16))[name = tensor<string, []>("op_1982_cast_fp16")];
+            tensor<string, []> var_1984_equation_0 = const()[name = tensor<string, []>("op_1984_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1984_cast_fp16 = einsum(equation = var_1984_equation_0, values = (var_1880_9, var_1960_cast_fp16))[name = tensor<string, []>("op_1984_cast_fp16")];
+            tensor<string, []> var_1986_equation_0 = const()[name = tensor<string, []>("op_1986_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1986_cast_fp16 = einsum(equation = var_1986_equation_0, values = (var_1880_10, var_1962_cast_fp16))[name = tensor<string, []>("op_1986_cast_fp16")];
+            tensor<string, []> var_1988_equation_0 = const()[name = tensor<string, []>("op_1988_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_1988_cast_fp16 = einsum(equation = var_1988_equation_0, values = (var_1880_11, var_1964_cast_fp16))[name = tensor<string, []>("op_1988_cast_fp16")];
+            tensor<bool, []> var_1990_interleave_0 = const()[name = tensor<string, []>("op_1990_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_1990_cast_fp16 = concat(axis = var_1810, interleave = var_1990_interleave_0, values = (var_1966_cast_fp16, var_1968_cast_fp16, var_1970_cast_fp16, var_1972_cast_fp16, var_1974_cast_fp16, var_1976_cast_fp16, var_1978_cast_fp16, var_1980_cast_fp16, var_1982_cast_fp16, var_1984_cast_fp16, var_1986_cast_fp16, var_1988_cast_fp16))[name = tensor<string, []>("op_1990_cast_fp16")];
+            tensor<int32, [2]> var_1994 = const()[name = tensor<string, []>("op_1994"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_1996 = const()[name = tensor<string, []>("op_1996"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_1998_pad_type_0 = const()[name = tensor<string, []>("op_1998_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_1998_pad_0 = const()[name = tensor<string, []>("op_1998_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_1998 = conv(bias = layers_7_attention_o_proj_bias, dilations = var_1996, groups = var_1810, pad = var_1998_pad_0, pad_type = var_1998_pad_type_0, strides = var_1994, weight = layers_7_attention_o_proj_weight, x = var_1990_cast_fp16)[name = tensor<string, []>("op_1998")];
+            tensor<bool, []> var_2000_interleave_0 = const()[name = tensor<string, []>("op_2000_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2000 = concat(axis = var_1811, interleave = var_2000_interleave_0, values = var_1998)[name = tensor<string, []>("op_2000")];
+            tensor<fp16, [1, 384, 1, 512]> x_61 = add(x = var_1804_cast_fp16, y = var_2000)[name = tensor<string, []>("x_61")];
+            tensor<fp16, []> var_1807_promoted = const()[name = tensor<string, []>("op_1807_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1808_promoted = const()[name = tensor<string, []>("op_1808_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_63 = clip(alpha = var_1807_promoted, beta = var_1808_promoted, x = x_61)[name = tensor<string, []>("x_63")];
+            tensor<int32, [1]> var_2005 = const()[name = tensor<string, []>("op_2005"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_31 = reduce_mean(axes = var_2005, keep_dims = var_1812, x = x_63)[name = tensor<string, []>("mean_31")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_31 = sub(x = x_63, y = mean_31)[name = tensor<string, []>("zero_mean_31")];
-            tensor<fp16, []> var_1862_promoted = const()[name = tensor<string, []>("op_1862_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2062 = pow(x = zero_mean_31, y = var_1862_promoted)[name = tensor<string, []>("op_2062")];
-            tensor<int32, [1]> var_2063 = const()[name = tensor<string, []>("op_2063"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2064 = reduce_mean(axes = var_2063, keep_dims = var_1857, x = var_2062)[name = tensor<string, []>("op_2064")];
-            tensor<fp16, []> var_2065_to_fp16 = const()[name = tensor<string, []>("op_2065_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2066_cast_fp16 = add(x = var_2064, y = var_2065_to_fp16)[name = tensor<string, []>("op_2066_cast_fp16")];
+            tensor<fp16, []> var_1809_promoted = const()[name = tensor<string, []>("op_1809_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2008 = pow(x = zero_mean_31, y = var_1809_promoted)[name = tensor<string, []>("op_2008")];
+            tensor<int32, [1]> var_2009 = const()[name = tensor<string, []>("op_2009"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2010 = reduce_mean(axes = var_2009, keep_dims = var_1812, x = var_2008)[name = tensor<string, []>("op_2010")];
+            tensor<fp16, []> var_2011_to_fp16 = const()[name = tensor<string, []>("op_2011_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2012_cast_fp16 = add(x = var_2010, y = var_2011_to_fp16)[name = tensor<string, []>("op_2012_cast_fp16")];
             tensor<fp32, []> denom_31_epsilon_0 = const()[name = tensor<string, []>("denom_31_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0, x = var_2066_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2068_cast_fp16 = mul(x = zero_mean_31, y = denom_31_cast_fp16)[name = tensor<string, []>("op_2068_cast_fp16")];
-            tensor<fp16, [384]> var_2070_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2070_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66813696)))];
-            tensor<fp16, [384]> var_2070_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2070_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66814528)))];
-            tensor<fp16, []> var_2070_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2070_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2070_cast_fp16 = batch_norm(beta = var_2070_beta_0_to_fp16, epsilon = var_2070_epsilon_0_to_fp16, gamma = var_2070_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2068_cast_fp16)[name = tensor<string, []>("op_2070_cast_fp16")];
-            tensor<int32, [2]> var_2076 = const()[name = tensor<string, []>("op_2076"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2078 = const()[name = tensor<string, []>("op_2078"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2080_pad_type_0 = const()[name = tensor<string, []>("op_2080_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2080_pad_0 = const()[name = tensor<string, []>("op_2080_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_2080 = conv(bias = layers_7_mlp_fc1_bias, dilations = var_2078, groups = var_1855, pad = var_2080_pad_0, pad_type = var_2080_pad_type_0, strides = var_2076, weight = layers_7_mlp_fc1_weight, x = var_2070_cast_fp16)[name = tensor<string, []>("op_2080")];
-            tensor<string, []> input_63_mode_0 = const()[name = tensor<string, []>("input_63_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_63 = gelu(mode = input_63_mode_0, x = var_2080)[name = tensor<string, []>("input_63")];
-            tensor<int32, [2]> var_2084 = const()[name = tensor<string, []>("op_2084"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2086 = const()[name = tensor<string, []>("op_2086"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2088_pad_type_0 = const()[name = tensor<string, []>("op_2088_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2088_pad_0 = const()[name = tensor<string, []>("op_2088_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2088 = conv(bias = layers_7_mlp_fc2_bias, dilations = var_2086, groups = var_1855, pad = var_2088_pad_0, pad_type = var_2088_pad_type_0, strides = var_2084, weight = layers_7_mlp_fc2_weight, x = input_63)[name = tensor<string, []>("op_2088")];
-            tensor<fp16, [1, 384, 1, 512]> x_65 = add(x = var_2070_cast_fp16, y = var_2088)[name = tensor<string, []>("x_65")];
-            tensor<fp16, []> var_1853_promoted_1 = const()[name = tensor<string, []>("op_1853_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_1854_promoted_1 = const()[name = tensor<string, []>("op_1854_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_67 = clip(alpha = var_1853_promoted_1, beta = var_1854_promoted_1, x = x_65)[name = tensor<string, []>("x_67")];
-            tensor<int32, [1]> var_2093 = const()[name = tensor<string, []>("op_2093"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_33 = reduce_mean(axes = var_2093, keep_dims = var_1857, x = x_67)[name = tensor<string, []>("mean_33")];
+            tensor<fp16, [1, 1, 1, 512]> denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0, x = var_2012_cast_fp16)[name = tensor<string, []>("denom_31_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2014_cast_fp16 = mul(x = zero_mean_31, y = denom_31_cast_fp16)[name = tensor<string, []>("op_2014_cast_fp16")];
+            tensor<fp16, [384]> var_2016_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2016_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66813696)))];
+            tensor<fp16, [384]> var_2016_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2016_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66814528)))];
+            tensor<fp16, []> var_2016_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2016_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2016_cast_fp16 = batch_norm(beta = var_2016_beta_0_to_fp16, epsilon = var_2016_epsilon_0_to_fp16, gamma = var_2016_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2014_cast_fp16)[name = tensor<string, []>("op_2016_cast_fp16")];
+            tensor<int32, [2]> var_2022 = const()[name = tensor<string, []>("op_2022"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2024 = const()[name = tensor<string, []>("op_2024"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2026_pad_type_0 = const()[name = tensor<string, []>("op_2026_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2026_pad_0 = const()[name = tensor<string, []>("op_2026_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_2026 = conv(bias = layers_7_mlp_fc1_bias, dilations = var_2024, groups = var_1810, pad = var_2026_pad_0, pad_type = var_2026_pad_type_0, strides = var_2022, weight = layers_7_mlp_fc1_weight, x = var_2016_cast_fp16)[name = tensor<string, []>("op_2026")];
+            tensor<string, []> input_255_mode_0 = const()[name = tensor<string, []>("input_255_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_255 = gelu(mode = input_255_mode_0, x = var_2026)[name = tensor<string, []>("input_255")];
+            tensor<int32, [2]> var_2030 = const()[name = tensor<string, []>("op_2030"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2032 = const()[name = tensor<string, []>("op_2032"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2034_pad_type_0 = const()[name = tensor<string, []>("op_2034_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2034_pad_0 = const()[name = tensor<string, []>("op_2034_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2034 = conv(bias = layers_7_mlp_fc2_bias, dilations = var_2032, groups = var_1810, pad = var_2034_pad_0, pad_type = var_2034_pad_type_0, strides = var_2030, weight = layers_7_mlp_fc2_weight, x = input_255)[name = tensor<string, []>("op_2034")];
+            tensor<fp16, [1, 384, 1, 512]> x_65 = add(x = var_2016_cast_fp16, y = var_2034)[name = tensor<string, []>("x_65")];
+            tensor<fp16, []> var_1807_promoted_1 = const()[name = tensor<string, []>("op_1807_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_1808_promoted_1 = const()[name = tensor<string, []>("op_1808_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_67 = clip(alpha = var_1807_promoted_1, beta = var_1808_promoted_1, x = x_65)[name = tensor<string, []>("x_67")];
+            tensor<int32, [1]> var_2039 = const()[name = tensor<string, []>("op_2039"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_33 = reduce_mean(axes = var_2039, keep_dims = var_1812, x = x_67)[name = tensor<string, []>("mean_33")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_33 = sub(x = x_67, y = mean_33)[name = tensor<string, []>("zero_mean_33")];
-            tensor<fp16, []> var_1862_promoted_1 = const()[name = tensor<string, []>("op_1862_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2096 = pow(x = zero_mean_33, y = var_1862_promoted_1)[name = tensor<string, []>("op_2096")];
-            tensor<int32, [1]> var_2097 = const()[name = tensor<string, []>("op_2097"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2098 = reduce_mean(axes = var_2097, keep_dims = var_1857, x = var_2096)[name = tensor<string, []>("op_2098")];
-            tensor<fp16, []> var_2099_to_fp16 = const()[name = tensor<string, []>("op_2099_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2100_cast_fp16 = add(x = var_2098, y = var_2099_to_fp16)[name = tensor<string, []>("op_2100_cast_fp16")];
+            tensor<fp16, []> var_1809_promoted_1 = const()[name = tensor<string, []>("op_1809_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2042 = pow(x = zero_mean_33, y = var_1809_promoted_1)[name = tensor<string, []>("op_2042")];
+            tensor<int32, [1]> var_2043 = const()[name = tensor<string, []>("op_2043"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2044 = reduce_mean(axes = var_2043, keep_dims = var_1812, x = var_2042)[name = tensor<string, []>("op_2044")];
+            tensor<fp16, []> var_2045_to_fp16 = const()[name = tensor<string, []>("op_2045_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2046_cast_fp16 = add(x = var_2044, y = var_2045_to_fp16)[name = tensor<string, []>("op_2046_cast_fp16")];
             tensor<fp32, []> denom_33_epsilon_0 = const()[name = tensor<string, []>("denom_33_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0, x = var_2100_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2102_cast_fp16 = mul(x = zero_mean_33, y = denom_33_cast_fp16)[name = tensor<string, []>("op_2102_cast_fp16")];
-            tensor<fp16, [384]> var_2104_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2104_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66815360)))];
-            tensor<fp16, [384]> var_2104_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2104_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66816192)))];
-            tensor<fp16, []> var_2104_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2104_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2104_cast_fp16 = batch_norm(beta = var_2104_beta_0_to_fp16, epsilon = var_2104_epsilon_0_to_fp16, gamma = var_2104_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2102_cast_fp16)[name = tensor<string, []>("op_2104_cast_fp16")];
-            tensor<int32, []> var_2109 = const()[name = tensor<string, []>("op_2109"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_2110 = const()[name = tensor<string, []>("op_2110"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_2111 = const()[name = tensor<string, []>("op_2111"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_2136 = const()[name = tensor<string, []>("op_2136"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2138 = const()[name = tensor<string, []>("op_2138"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2140_pad_type_0 = const()[name = tensor<string, []>("op_2140_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2140_pad_0 = const()[name = tensor<string, []>("op_2140_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2140 = conv(bias = layers_8_attention_q_proj_bias, dilations = var_2138, groups = var_2109, pad = var_2140_pad_0, pad_type = var_2140_pad_type_0, strides = var_2136, weight = layers_8_attention_q_proj_weight, x = var_2104_cast_fp16)[name = tensor<string, []>("op_2140")];
-            tensor<int32, [4]> var_2141 = const()[name = tensor<string, []>("op_2141"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2142 = reshape(shape = var_2141, x = var_2140)[name = tensor<string, []>("op_2142")];
-            tensor<int32, [2]> var_2145 = const()[name = tensor<string, []>("op_2145"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2147 = const()[name = tensor<string, []>("op_2147"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2149_pad_type_0 = const()[name = tensor<string, []>("op_2149_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2149_pad_0 = const()[name = tensor<string, []>("op_2149_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2149 = conv(bias = layers_8_attention_k_proj_bias, dilations = var_2147, groups = var_2109, pad = var_2149_pad_0, pad_type = var_2149_pad_type_0, strides = var_2145, weight = layers_8_attention_k_proj_weight, x = var_2104_cast_fp16)[name = tensor<string, []>("op_2149")];
-            tensor<int32, [4]> var_2150 = const()[name = tensor<string, []>("op_2150"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_17 = reshape(shape = var_2150, x = var_2149)[name = tensor<string, []>("ks_17")];
-            tensor<int32, [2]> var_2154 = const()[name = tensor<string, []>("op_2154"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2156 = const()[name = tensor<string, []>("op_2156"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2158_pad_type_0 = const()[name = tensor<string, []>("op_2158_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2158_pad_0 = const()[name = tensor<string, []>("op_2158_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2158 = conv(bias = layers_8_attention_v_proj_bias, dilations = var_2156, groups = var_2109, pad = var_2158_pad_0, pad_type = var_2158_pad_type_0, strides = var_2154, weight = layers_8_attention_v_proj_weight, x = var_2104_cast_fp16)[name = tensor<string, []>("op_2158")];
-            tensor<int32, [4]> var_2159 = const()[name = tensor<string, []>("op_2159"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2160 = reshape(shape = var_2159, x = var_2158)[name = tensor<string, []>("op_2160")];
-            tensor<int32, [12]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2161_axis_0 = const()[name = tensor<string, []>("op_2161_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2161_0, tensor<fp16, [1, 32, 1, 512]> var_2161_1, tensor<fp16, [1, 32, 1, 512]> var_2161_2, tensor<fp16, [1, 32, 1, 512]> var_2161_3, tensor<fp16, [1, 32, 1, 512]> var_2161_4, tensor<fp16, [1, 32, 1, 512]> var_2161_5, tensor<fp16, [1, 32, 1, 512]> var_2161_6, tensor<fp16, [1, 32, 1, 512]> var_2161_7, tensor<fp16, [1, 32, 1, 512]> var_2161_8, tensor<fp16, [1, 32, 1, 512]> var_2161_9, tensor<fp16, [1, 32, 1, 512]> var_2161_10, tensor<fp16, [1, 32, 1, 512]> var_2161_11 = split(axis = var_2161_axis_0, split_sizes = tile_42, x = var_2142)[name = tensor<string, []>("op_2161")];
-            tensor<int32, [4]> var_2174_perm_0 = const()[name = tensor<string, []>("op_2174_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2175_axis_0 = const()[name = tensor<string, []>("op_2175_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_3 = transpose(perm = var_2174_perm_0, x = ks_17)[name = tensor<string, []>("transpose_3")];
-            tensor<fp16, [1, 512, 1, 32]> var_2175_0, tensor<fp16, [1, 512, 1, 32]> var_2175_1, tensor<fp16, [1, 512, 1, 32]> var_2175_2, tensor<fp16, [1, 512, 1, 32]> var_2175_3, tensor<fp16, [1, 512, 1, 32]> var_2175_4, tensor<fp16, [1, 512, 1, 32]> var_2175_5, tensor<fp16, [1, 512, 1, 32]> var_2175_6, tensor<fp16, [1, 512, 1, 32]> var_2175_7, tensor<fp16, [1, 512, 1, 32]> var_2175_8, tensor<fp16, [1, 512, 1, 32]> var_2175_9, tensor<fp16, [1, 512, 1, 32]> var_2175_10, tensor<fp16, [1, 512, 1, 32]> var_2175_11 = split(axis = var_2175_axis_0, split_sizes = tile_43, x = transpose_3)[name = tensor<string, []>("op_2175")];
-            tensor<int32, [12]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2188_axis_0 = const()[name = tensor<string, []>("op_2188_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2188_0, tensor<fp16, [1, 32, 1, 512]> var_2188_1, tensor<fp16, [1, 32, 1, 512]> var_2188_2, tensor<fp16, [1, 32, 1, 512]> var_2188_3, tensor<fp16, [1, 32, 1, 512]> var_2188_4, tensor<fp16, [1, 32, 1, 512]> var_2188_5, tensor<fp16, [1, 32, 1, 512]> var_2188_6, tensor<fp16, [1, 32, 1, 512]> var_2188_7, tensor<fp16, [1, 32, 1, 512]> var_2188_8, tensor<fp16, [1, 32, 1, 512]> var_2188_9, tensor<fp16, [1, 32, 1, 512]> var_2188_10, tensor<fp16, [1, 32, 1, 512]> var_2188_11 = split(axis = var_2188_axis_0, split_sizes = tile_44, x = var_2160)[name = tensor<string, []>("op_2188")];
-            tensor<string, []> var_2202_equation_0 = const()[name = tensor<string, []>("op_2202_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2202 = einsum(equation = var_2202_equation_0, values = (var_2175_0, var_2161_0))[name = tensor<string, []>("op_2202")];
-            tensor<fp16, []> var_2203_to_fp16 = const()[name = tensor<string, []>("op_2203_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_193_cast_fp16 = mul(x = var_2202, y = var_2203_to_fp16)[name = tensor<string, []>("w_193_cast_fp16")];
-            tensor<string, []> var_2206_equation_0 = const()[name = tensor<string, []>("op_2206_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2206 = einsum(equation = var_2206_equation_0, values = (var_2175_1, var_2161_1))[name = tensor<string, []>("op_2206")];
-            tensor<fp16, []> var_2207_to_fp16 = const()[name = tensor<string, []>("op_2207_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_195_cast_fp16 = mul(x = var_2206, y = var_2207_to_fp16)[name = tensor<string, []>("w_195_cast_fp16")];
-            tensor<string, []> var_2210_equation_0 = const()[name = tensor<string, []>("op_2210_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2210 = einsum(equation = var_2210_equation_0, values = (var_2175_2, var_2161_2))[name = tensor<string, []>("op_2210")];
-            tensor<fp16, []> var_2211_to_fp16 = const()[name = tensor<string, []>("op_2211_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_197_cast_fp16 = mul(x = var_2210, y = var_2211_to_fp16)[name = tensor<string, []>("w_197_cast_fp16")];
-            tensor<string, []> var_2214_equation_0 = const()[name = tensor<string, []>("op_2214_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2214 = einsum(equation = var_2214_equation_0, values = (var_2175_3, var_2161_3))[name = tensor<string, []>("op_2214")];
-            tensor<fp16, []> var_2215_to_fp16 = const()[name = tensor<string, []>("op_2215_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_199_cast_fp16 = mul(x = var_2214, y = var_2215_to_fp16)[name = tensor<string, []>("w_199_cast_fp16")];
-            tensor<string, []> var_2218_equation_0 = const()[name = tensor<string, []>("op_2218_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2218 = einsum(equation = var_2218_equation_0, values = (var_2175_4, var_2161_4))[name = tensor<string, []>("op_2218")];
-            tensor<fp16, []> var_2219_to_fp16 = const()[name = tensor<string, []>("op_2219_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_201_cast_fp16 = mul(x = var_2218, y = var_2219_to_fp16)[name = tensor<string, []>("w_201_cast_fp16")];
-            tensor<string, []> var_2222_equation_0 = const()[name = tensor<string, []>("op_2222_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2222 = einsum(equation = var_2222_equation_0, values = (var_2175_5, var_2161_5))[name = tensor<string, []>("op_2222")];
-            tensor<fp16, []> var_2223_to_fp16 = const()[name = tensor<string, []>("op_2223_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_203_cast_fp16 = mul(x = var_2222, y = var_2223_to_fp16)[name = tensor<string, []>("w_203_cast_fp16")];
-            tensor<string, []> var_2226_equation_0 = const()[name = tensor<string, []>("op_2226_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2226 = einsum(equation = var_2226_equation_0, values = (var_2175_6, var_2161_6))[name = tensor<string, []>("op_2226")];
-            tensor<fp16, []> var_2227_to_fp16 = const()[name = tensor<string, []>("op_2227_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_205_cast_fp16 = mul(x = var_2226, y = var_2227_to_fp16)[name = tensor<string, []>("w_205_cast_fp16")];
-            tensor<string, []> var_2230_equation_0 = const()[name = tensor<string, []>("op_2230_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2230 = einsum(equation = var_2230_equation_0, values = (var_2175_7, var_2161_7))[name = tensor<string, []>("op_2230")];
-            tensor<fp16, []> var_2231_to_fp16 = const()[name = tensor<string, []>("op_2231_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_207_cast_fp16 = mul(x = var_2230, y = var_2231_to_fp16)[name = tensor<string, []>("w_207_cast_fp16")];
-            tensor<string, []> var_2234_equation_0 = const()[name = tensor<string, []>("op_2234_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2234 = einsum(equation = var_2234_equation_0, values = (var_2175_8, var_2161_8))[name = tensor<string, []>("op_2234")];
-            tensor<fp16, []> var_2235_to_fp16 = const()[name = tensor<string, []>("op_2235_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_209_cast_fp16 = mul(x = var_2234, y = var_2235_to_fp16)[name = tensor<string, []>("w_209_cast_fp16")];
-            tensor<string, []> var_2238_equation_0 = const()[name = tensor<string, []>("op_2238_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2238 = einsum(equation = var_2238_equation_0, values = (var_2175_9, var_2161_9))[name = tensor<string, []>("op_2238")];
-            tensor<fp16, []> var_2239_to_fp16 = const()[name = tensor<string, []>("op_2239_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_211_cast_fp16 = mul(x = var_2238, y = var_2239_to_fp16)[name = tensor<string, []>("w_211_cast_fp16")];
-            tensor<string, []> var_2242_equation_0 = const()[name = tensor<string, []>("op_2242_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2242 = einsum(equation = var_2242_equation_0, values = (var_2175_10, var_2161_10))[name = tensor<string, []>("op_2242")];
-            tensor<fp16, []> var_2243_to_fp16 = const()[name = tensor<string, []>("op_2243_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_213_cast_fp16 = mul(x = var_2242, y = var_2243_to_fp16)[name = tensor<string, []>("w_213_cast_fp16")];
-            tensor<string, []> var_2246_equation_0 = const()[name = tensor<string, []>("op_2246_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2246 = einsum(equation = var_2246_equation_0, values = (var_2175_11, var_2161_11))[name = tensor<string, []>("op_2246")];
-            tensor<fp16, []> var_2247_to_fp16 = const()[name = tensor<string, []>("op_2247_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_215_cast_fp16 = mul(x = var_2246, y = var_2247_to_fp16)[name = tensor<string, []>("w_215_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2249_cast_fp16 = add(x = w_193_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2249_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2250_cast_fp16 = softmax(axis = var_2109, x = var_2249_cast_fp16)[name = tensor<string, []>("op_2250_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2251_cast_fp16 = add(x = w_195_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2251_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2252_cast_fp16 = softmax(axis = var_2109, x = var_2251_cast_fp16)[name = tensor<string, []>("op_2252_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2253_cast_fp16 = add(x = w_197_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2253_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2254_cast_fp16 = softmax(axis = var_2109, x = var_2253_cast_fp16)[name = tensor<string, []>("op_2254_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2255_cast_fp16 = add(x = w_199_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2255_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2256_cast_fp16 = softmax(axis = var_2109, x = var_2255_cast_fp16)[name = tensor<string, []>("op_2256_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2257_cast_fp16 = add(x = w_201_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2257_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2258_cast_fp16 = softmax(axis = var_2109, x = var_2257_cast_fp16)[name = tensor<string, []>("op_2258_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2259_cast_fp16 = add(x = w_203_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2259_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2260_cast_fp16 = softmax(axis = var_2109, x = var_2259_cast_fp16)[name = tensor<string, []>("op_2260_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2261_cast_fp16 = add(x = w_205_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2261_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2262_cast_fp16 = softmax(axis = var_2109, x = var_2261_cast_fp16)[name = tensor<string, []>("op_2262_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2263_cast_fp16 = add(x = w_207_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2263_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2264_cast_fp16 = softmax(axis = var_2109, x = var_2263_cast_fp16)[name = tensor<string, []>("op_2264_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2265_cast_fp16 = add(x = w_209_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2265_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2266_cast_fp16 = softmax(axis = var_2109, x = var_2265_cast_fp16)[name = tensor<string, []>("op_2266_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2267_cast_fp16 = add(x = w_211_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2267_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2268_cast_fp16 = softmax(axis = var_2109, x = var_2267_cast_fp16)[name = tensor<string, []>("op_2268_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2269_cast_fp16 = add(x = w_213_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2269_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2270_cast_fp16 = softmax(axis = var_2109, x = var_2269_cast_fp16)[name = tensor<string, []>("op_2270_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2271_cast_fp16 = add(x = w_215_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2271_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2272_cast_fp16 = softmax(axis = var_2109, x = var_2271_cast_fp16)[name = tensor<string, []>("op_2272_cast_fp16")];
-            tensor<string, []> var_2274_equation_0 = const()[name = tensor<string, []>("op_2274_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2274_cast_fp16 = einsum(equation = var_2274_equation_0, values = (var_2188_0, var_2250_cast_fp16))[name = tensor<string, []>("op_2274_cast_fp16")];
-            tensor<string, []> var_2276_equation_0 = const()[name = tensor<string, []>("op_2276_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2276_cast_fp16 = einsum(equation = var_2276_equation_0, values = (var_2188_1, var_2252_cast_fp16))[name = tensor<string, []>("op_2276_cast_fp16")];
-            tensor<string, []> var_2278_equation_0 = const()[name = tensor<string, []>("op_2278_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2278_cast_fp16 = einsum(equation = var_2278_equation_0, values = (var_2188_2, var_2254_cast_fp16))[name = tensor<string, []>("op_2278_cast_fp16")];
-            tensor<string, []> var_2280_equation_0 = const()[name = tensor<string, []>("op_2280_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2280_cast_fp16 = einsum(equation = var_2280_equation_0, values = (var_2188_3, var_2256_cast_fp16))[name = tensor<string, []>("op_2280_cast_fp16")];
-            tensor<string, []> var_2282_equation_0 = const()[name = tensor<string, []>("op_2282_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2282_cast_fp16 = einsum(equation = var_2282_equation_0, values = (var_2188_4, var_2258_cast_fp16))[name = tensor<string, []>("op_2282_cast_fp16")];
-            tensor<string, []> var_2284_equation_0 = const()[name = tensor<string, []>("op_2284_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2284_cast_fp16 = einsum(equation = var_2284_equation_0, values = (var_2188_5, var_2260_cast_fp16))[name = tensor<string, []>("op_2284_cast_fp16")];
-            tensor<string, []> var_2286_equation_0 = const()[name = tensor<string, []>("op_2286_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2286_cast_fp16 = einsum(equation = var_2286_equation_0, values = (var_2188_6, var_2262_cast_fp16))[name = tensor<string, []>("op_2286_cast_fp16")];
-            tensor<string, []> var_2288_equation_0 = const()[name = tensor<string, []>("op_2288_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2288_cast_fp16 = einsum(equation = var_2288_equation_0, values = (var_2188_7, var_2264_cast_fp16))[name = tensor<string, []>("op_2288_cast_fp16")];
-            tensor<string, []> var_2290_equation_0 = const()[name = tensor<string, []>("op_2290_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2290_cast_fp16 = einsum(equation = var_2290_equation_0, values = (var_2188_8, var_2266_cast_fp16))[name = tensor<string, []>("op_2290_cast_fp16")];
-            tensor<string, []> var_2292_equation_0 = const()[name = tensor<string, []>("op_2292_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2292_cast_fp16 = einsum(equation = var_2292_equation_0, values = (var_2188_9, var_2268_cast_fp16))[name = tensor<string, []>("op_2292_cast_fp16")];
-            tensor<string, []> var_2294_equation_0 = const()[name = tensor<string, []>("op_2294_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2294_cast_fp16 = einsum(equation = var_2294_equation_0, values = (var_2188_10, var_2270_cast_fp16))[name = tensor<string, []>("op_2294_cast_fp16")];
-            tensor<string, []> var_2296_equation_0 = const()[name = tensor<string, []>("op_2296_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2296_cast_fp16 = einsum(equation = var_2296_equation_0, values = (var_2188_11, var_2272_cast_fp16))[name = tensor<string, []>("op_2296_cast_fp16")];
-            tensor<bool, []> var_2298_interleave_0 = const()[name = tensor<string, []>("op_2298_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2298_cast_fp16 = concat(axis = var_2109, interleave = var_2298_interleave_0, values = (var_2274_cast_fp16, var_2276_cast_fp16, var_2278_cast_fp16, var_2280_cast_fp16, var_2282_cast_fp16, var_2284_cast_fp16, var_2286_cast_fp16, var_2288_cast_fp16, var_2290_cast_fp16, var_2292_cast_fp16, var_2294_cast_fp16, var_2296_cast_fp16))[name = tensor<string, []>("op_2298_cast_fp16")];
-            tensor<int32, [2]> var_2302 = const()[name = tensor<string, []>("op_2302"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2304 = const()[name = tensor<string, []>("op_2304"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2306_pad_type_0 = const()[name = tensor<string, []>("op_2306_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2306_pad_0 = const()[name = tensor<string, []>("op_2306_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2306 = conv(bias = layers_8_attention_o_proj_bias, dilations = var_2304, groups = var_2109, pad = var_2306_pad_0, pad_type = var_2306_pad_type_0, strides = var_2302, weight = layers_8_attention_o_proj_weight, x = var_2298_cast_fp16)[name = tensor<string, []>("op_2306")];
-            tensor<bool, []> var_2308_interleave_0 = const()[name = tensor<string, []>("op_2308_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2308 = concat(axis = var_2110, interleave = var_2308_interleave_0, values = var_2306)[name = tensor<string, []>("op_2308")];
-            tensor<fp16, [1, 384, 1, 512]> x_69 = add(x = var_2104_cast_fp16, y = var_2308)[name = tensor<string, []>("x_69")];
-            tensor<fp16, []> var_2107_promoted = const()[name = tensor<string, []>("op_2107_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2108_promoted = const()[name = tensor<string, []>("op_2108_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_71 = clip(alpha = var_2107_promoted, beta = var_2108_promoted, x = x_69)[name = tensor<string, []>("x_71")];
-            tensor<int32, [1]> var_2313 = const()[name = tensor<string, []>("op_2313"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_35 = reduce_mean(axes = var_2313, keep_dims = var_2111, x = x_71)[name = tensor<string, []>("mean_35")];
+            tensor<fp16, [1, 1, 1, 512]> denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0, x = var_2046_cast_fp16)[name = tensor<string, []>("denom_33_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2048_cast_fp16 = mul(x = zero_mean_33, y = denom_33_cast_fp16)[name = tensor<string, []>("op_2048_cast_fp16")];
+            tensor<fp16, [384]> var_2050_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2050_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66815360)))];
+            tensor<fp16, [384]> var_2050_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2050_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66816192)))];
+            tensor<fp16, []> var_2050_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2050_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2050_cast_fp16 = batch_norm(beta = var_2050_beta_0_to_fp16, epsilon = var_2050_epsilon_0_to_fp16, gamma = var_2050_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2048_cast_fp16)[name = tensor<string, []>("op_2050_cast_fp16")];
+            tensor<int32, []> var_2056 = const()[name = tensor<string, []>("op_2056"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_2057 = const()[name = tensor<string, []>("op_2057"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_2058 = const()[name = tensor<string, []>("op_2058"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_2080 = const()[name = tensor<string, []>("op_2080"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2082 = const()[name = tensor<string, []>("op_2082"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2084_pad_type_0 = const()[name = tensor<string, []>("op_2084_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2084_pad_0 = const()[name = tensor<string, []>("op_2084_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2084 = conv(bias = layers_8_attention_q_proj_bias, dilations = var_2082, groups = var_2056, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2080, weight = layers_8_attention_q_proj_weight, x = var_2050_cast_fp16)[name = tensor<string, []>("op_2084")];
+            tensor<int32, [2]> var_2087 = const()[name = tensor<string, []>("op_2087"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2089 = const()[name = tensor<string, []>("op_2089"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_17_pad_type_0 = const()[name = tensor<string, []>("ks_17_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_17_pad_0 = const()[name = tensor<string, []>("ks_17_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_17 = conv(bias = layers_8_attention_k_proj_bias, dilations = var_2089, groups = var_2056, pad = ks_17_pad_0, pad_type = ks_17_pad_type_0, strides = var_2087, weight = layers_8_attention_k_proj_weight, x = var_2050_cast_fp16)[name = tensor<string, []>("ks_17")];
+            tensor<int32, [2]> var_2094 = const()[name = tensor<string, []>("op_2094"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2096 = const()[name = tensor<string, []>("op_2096"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2098_pad_type_0 = const()[name = tensor<string, []>("op_2098_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2098_pad_0 = const()[name = tensor<string, []>("op_2098_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2098 = conv(bias = layers_8_attention_v_proj_bias, dilations = var_2096, groups = var_2056, pad = var_2098_pad_0, pad_type = var_2098_pad_type_0, strides = var_2094, weight = layers_8_attention_v_proj_weight, x = var_2050_cast_fp16)[name = tensor<string, []>("op_2098")];
+            tensor<int32, [12]> tile_42 = const()[name = tensor<string, []>("tile_42"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2099_axis_0 = const()[name = tensor<string, []>("op_2099_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2099_0, tensor<fp16, [1, 32, 1, 512]> var_2099_1, tensor<fp16, [1, 32, 1, 512]> var_2099_2, tensor<fp16, [1, 32, 1, 512]> var_2099_3, tensor<fp16, [1, 32, 1, 512]> var_2099_4, tensor<fp16, [1, 32, 1, 512]> var_2099_5, tensor<fp16, [1, 32, 1, 512]> var_2099_6, tensor<fp16, [1, 32, 1, 512]> var_2099_7, tensor<fp16, [1, 32, 1, 512]> var_2099_8, tensor<fp16, [1, 32, 1, 512]> var_2099_9, tensor<fp16, [1, 32, 1, 512]> var_2099_10, tensor<fp16, [1, 32, 1, 512]> var_2099_11 = split(axis = var_2099_axis_0, split_sizes = tile_42, x = var_2084)[name = tensor<string, []>("op_2099")];
+            tensor<int32, [4]> var_2112_perm_0 = const()[name = tensor<string, []>("op_2112_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_43 = const()[name = tensor<string, []>("tile_43"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2113_axis_0 = const()[name = tensor<string, []>("op_2113_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_3 = transpose(perm = var_2112_perm_0, x = ks_17)[name = tensor<string, []>("transpose_3")];
+            tensor<fp16, [1, 512, 1, 32]> var_2113_0, tensor<fp16, [1, 512, 1, 32]> var_2113_1, tensor<fp16, [1, 512, 1, 32]> var_2113_2, tensor<fp16, [1, 512, 1, 32]> var_2113_3, tensor<fp16, [1, 512, 1, 32]> var_2113_4, tensor<fp16, [1, 512, 1, 32]> var_2113_5, tensor<fp16, [1, 512, 1, 32]> var_2113_6, tensor<fp16, [1, 512, 1, 32]> var_2113_7, tensor<fp16, [1, 512, 1, 32]> var_2113_8, tensor<fp16, [1, 512, 1, 32]> var_2113_9, tensor<fp16, [1, 512, 1, 32]> var_2113_10, tensor<fp16, [1, 512, 1, 32]> var_2113_11 = split(axis = var_2113_axis_0, split_sizes = tile_43, x = transpose_3)[name = tensor<string, []>("op_2113")];
+            tensor<int32, [12]> tile_44 = const()[name = tensor<string, []>("tile_44"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2126_axis_0 = const()[name = tensor<string, []>("op_2126_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2126_0, tensor<fp16, [1, 32, 1, 512]> var_2126_1, tensor<fp16, [1, 32, 1, 512]> var_2126_2, tensor<fp16, [1, 32, 1, 512]> var_2126_3, tensor<fp16, [1, 32, 1, 512]> var_2126_4, tensor<fp16, [1, 32, 1, 512]> var_2126_5, tensor<fp16, [1, 32, 1, 512]> var_2126_6, tensor<fp16, [1, 32, 1, 512]> var_2126_7, tensor<fp16, [1, 32, 1, 512]> var_2126_8, tensor<fp16, [1, 32, 1, 512]> var_2126_9, tensor<fp16, [1, 32, 1, 512]> var_2126_10, tensor<fp16, [1, 32, 1, 512]> var_2126_11 = split(axis = var_2126_axis_0, split_sizes = tile_44, x = var_2098)[name = tensor<string, []>("op_2126")];
+            tensor<string, []> var_2140_equation_0 = const()[name = tensor<string, []>("op_2140_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2140 = einsum(equation = var_2140_equation_0, values = (var_2113_0, var_2099_0))[name = tensor<string, []>("op_2140")];
+            tensor<fp16, []> var_2141_to_fp16 = const()[name = tensor<string, []>("op_2141_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_193_cast_fp16 = mul(x = var_2140, y = var_2141_to_fp16)[name = tensor<string, []>("w_193_cast_fp16")];
+            tensor<string, []> var_2144_equation_0 = const()[name = tensor<string, []>("op_2144_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2144 = einsum(equation = var_2144_equation_0, values = (var_2113_1, var_2099_1))[name = tensor<string, []>("op_2144")];
+            tensor<fp16, []> var_2145_to_fp16 = const()[name = tensor<string, []>("op_2145_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_195_cast_fp16 = mul(x = var_2144, y = var_2145_to_fp16)[name = tensor<string, []>("w_195_cast_fp16")];
+            tensor<string, []> var_2148_equation_0 = const()[name = tensor<string, []>("op_2148_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2148 = einsum(equation = var_2148_equation_0, values = (var_2113_2, var_2099_2))[name = tensor<string, []>("op_2148")];
+            tensor<fp16, []> var_2149_to_fp16 = const()[name = tensor<string, []>("op_2149_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_197_cast_fp16 = mul(x = var_2148, y = var_2149_to_fp16)[name = tensor<string, []>("w_197_cast_fp16")];
+            tensor<string, []> var_2152_equation_0 = const()[name = tensor<string, []>("op_2152_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2152 = einsum(equation = var_2152_equation_0, values = (var_2113_3, var_2099_3))[name = tensor<string, []>("op_2152")];
+            tensor<fp16, []> var_2153_to_fp16 = const()[name = tensor<string, []>("op_2153_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_199_cast_fp16 = mul(x = var_2152, y = var_2153_to_fp16)[name = tensor<string, []>("w_199_cast_fp16")];
+            tensor<string, []> var_2156_equation_0 = const()[name = tensor<string, []>("op_2156_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2156 = einsum(equation = var_2156_equation_0, values = (var_2113_4, var_2099_4))[name = tensor<string, []>("op_2156")];
+            tensor<fp16, []> var_2157_to_fp16 = const()[name = tensor<string, []>("op_2157_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_201_cast_fp16 = mul(x = var_2156, y = var_2157_to_fp16)[name = tensor<string, []>("w_201_cast_fp16")];
+            tensor<string, []> var_2160_equation_0 = const()[name = tensor<string, []>("op_2160_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2160 = einsum(equation = var_2160_equation_0, values = (var_2113_5, var_2099_5))[name = tensor<string, []>("op_2160")];
+            tensor<fp16, []> var_2161_to_fp16 = const()[name = tensor<string, []>("op_2161_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_203_cast_fp16 = mul(x = var_2160, y = var_2161_to_fp16)[name = tensor<string, []>("w_203_cast_fp16")];
+            tensor<string, []> var_2164_equation_0 = const()[name = tensor<string, []>("op_2164_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2164 = einsum(equation = var_2164_equation_0, values = (var_2113_6, var_2099_6))[name = tensor<string, []>("op_2164")];
+            tensor<fp16, []> var_2165_to_fp16 = const()[name = tensor<string, []>("op_2165_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_205_cast_fp16 = mul(x = var_2164, y = var_2165_to_fp16)[name = tensor<string, []>("w_205_cast_fp16")];
+            tensor<string, []> var_2168_equation_0 = const()[name = tensor<string, []>("op_2168_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2168 = einsum(equation = var_2168_equation_0, values = (var_2113_7, var_2099_7))[name = tensor<string, []>("op_2168")];
+            tensor<fp16, []> var_2169_to_fp16 = const()[name = tensor<string, []>("op_2169_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_207_cast_fp16 = mul(x = var_2168, y = var_2169_to_fp16)[name = tensor<string, []>("w_207_cast_fp16")];
+            tensor<string, []> var_2172_equation_0 = const()[name = tensor<string, []>("op_2172_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2172 = einsum(equation = var_2172_equation_0, values = (var_2113_8, var_2099_8))[name = tensor<string, []>("op_2172")];
+            tensor<fp16, []> var_2173_to_fp16 = const()[name = tensor<string, []>("op_2173_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_209_cast_fp16 = mul(x = var_2172, y = var_2173_to_fp16)[name = tensor<string, []>("w_209_cast_fp16")];
+            tensor<string, []> var_2176_equation_0 = const()[name = tensor<string, []>("op_2176_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2176 = einsum(equation = var_2176_equation_0, values = (var_2113_9, var_2099_9))[name = tensor<string, []>("op_2176")];
+            tensor<fp16, []> var_2177_to_fp16 = const()[name = tensor<string, []>("op_2177_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_211_cast_fp16 = mul(x = var_2176, y = var_2177_to_fp16)[name = tensor<string, []>("w_211_cast_fp16")];
+            tensor<string, []> var_2180_equation_0 = const()[name = tensor<string, []>("op_2180_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2180 = einsum(equation = var_2180_equation_0, values = (var_2113_10, var_2099_10))[name = tensor<string, []>("op_2180")];
+            tensor<fp16, []> var_2181_to_fp16 = const()[name = tensor<string, []>("op_2181_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_213_cast_fp16 = mul(x = var_2180, y = var_2181_to_fp16)[name = tensor<string, []>("w_213_cast_fp16")];
+            tensor<string, []> var_2184_equation_0 = const()[name = tensor<string, []>("op_2184_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2184 = einsum(equation = var_2184_equation_0, values = (var_2113_11, var_2099_11))[name = tensor<string, []>("op_2184")];
+            tensor<fp16, []> var_2185_to_fp16 = const()[name = tensor<string, []>("op_2185_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_215_cast_fp16 = mul(x = var_2184, y = var_2185_to_fp16)[name = tensor<string, []>("w_215_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_259_cast_fp16 = add(x = w_193_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_259_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2188_cast_fp16 = softmax(axis = var_2056, x = input_259_cast_fp16)[name = tensor<string, []>("op_2188_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_261_cast_fp16 = add(x = w_195_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_261_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2190_cast_fp16 = softmax(axis = var_2056, x = input_261_cast_fp16)[name = tensor<string, []>("op_2190_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_263_cast_fp16 = add(x = w_197_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_263_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2192_cast_fp16 = softmax(axis = var_2056, x = input_263_cast_fp16)[name = tensor<string, []>("op_2192_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_265_cast_fp16 = add(x = w_199_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_265_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2194_cast_fp16 = softmax(axis = var_2056, x = input_265_cast_fp16)[name = tensor<string, []>("op_2194_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_267_cast_fp16 = add(x = w_201_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_267_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2196_cast_fp16 = softmax(axis = var_2056, x = input_267_cast_fp16)[name = tensor<string, []>("op_2196_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_269_cast_fp16 = add(x = w_203_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_269_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2198_cast_fp16 = softmax(axis = var_2056, x = input_269_cast_fp16)[name = tensor<string, []>("op_2198_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_271_cast_fp16 = add(x = w_205_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_271_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2200_cast_fp16 = softmax(axis = var_2056, x = input_271_cast_fp16)[name = tensor<string, []>("op_2200_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_273_cast_fp16 = add(x = w_207_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_273_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2202_cast_fp16 = softmax(axis = var_2056, x = input_273_cast_fp16)[name = tensor<string, []>("op_2202_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_275_cast_fp16 = add(x = w_209_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_275_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2204_cast_fp16 = softmax(axis = var_2056, x = input_275_cast_fp16)[name = tensor<string, []>("op_2204_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_277_cast_fp16 = add(x = w_211_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_277_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2206_cast_fp16 = softmax(axis = var_2056, x = input_277_cast_fp16)[name = tensor<string, []>("op_2206_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_279_cast_fp16 = add(x = w_213_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_279_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2208_cast_fp16 = softmax(axis = var_2056, x = input_279_cast_fp16)[name = tensor<string, []>("op_2208_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_281_cast_fp16 = add(x = w_215_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_281_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2210_cast_fp16 = softmax(axis = var_2056, x = input_281_cast_fp16)[name = tensor<string, []>("op_2210_cast_fp16")];
+            tensor<string, []> var_2212_equation_0 = const()[name = tensor<string, []>("op_2212_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2212_cast_fp16 = einsum(equation = var_2212_equation_0, values = (var_2126_0, var_2188_cast_fp16))[name = tensor<string, []>("op_2212_cast_fp16")];
+            tensor<string, []> var_2214_equation_0 = const()[name = tensor<string, []>("op_2214_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2214_cast_fp16 = einsum(equation = var_2214_equation_0, values = (var_2126_1, var_2190_cast_fp16))[name = tensor<string, []>("op_2214_cast_fp16")];
+            tensor<string, []> var_2216_equation_0 = const()[name = tensor<string, []>("op_2216_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2216_cast_fp16 = einsum(equation = var_2216_equation_0, values = (var_2126_2, var_2192_cast_fp16))[name = tensor<string, []>("op_2216_cast_fp16")];
+            tensor<string, []> var_2218_equation_0 = const()[name = tensor<string, []>("op_2218_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2218_cast_fp16 = einsum(equation = var_2218_equation_0, values = (var_2126_3, var_2194_cast_fp16))[name = tensor<string, []>("op_2218_cast_fp16")];
+            tensor<string, []> var_2220_equation_0 = const()[name = tensor<string, []>("op_2220_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2220_cast_fp16 = einsum(equation = var_2220_equation_0, values = (var_2126_4, var_2196_cast_fp16))[name = tensor<string, []>("op_2220_cast_fp16")];
+            tensor<string, []> var_2222_equation_0 = const()[name = tensor<string, []>("op_2222_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2222_cast_fp16 = einsum(equation = var_2222_equation_0, values = (var_2126_5, var_2198_cast_fp16))[name = tensor<string, []>("op_2222_cast_fp16")];
+            tensor<string, []> var_2224_equation_0 = const()[name = tensor<string, []>("op_2224_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2224_cast_fp16 = einsum(equation = var_2224_equation_0, values = (var_2126_6, var_2200_cast_fp16))[name = tensor<string, []>("op_2224_cast_fp16")];
+            tensor<string, []> var_2226_equation_0 = const()[name = tensor<string, []>("op_2226_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2226_cast_fp16 = einsum(equation = var_2226_equation_0, values = (var_2126_7, var_2202_cast_fp16))[name = tensor<string, []>("op_2226_cast_fp16")];
+            tensor<string, []> var_2228_equation_0 = const()[name = tensor<string, []>("op_2228_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2228_cast_fp16 = einsum(equation = var_2228_equation_0, values = (var_2126_8, var_2204_cast_fp16))[name = tensor<string, []>("op_2228_cast_fp16")];
+            tensor<string, []> var_2230_equation_0 = const()[name = tensor<string, []>("op_2230_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2230_cast_fp16 = einsum(equation = var_2230_equation_0, values = (var_2126_9, var_2206_cast_fp16))[name = tensor<string, []>("op_2230_cast_fp16")];
+            tensor<string, []> var_2232_equation_0 = const()[name = tensor<string, []>("op_2232_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2232_cast_fp16 = einsum(equation = var_2232_equation_0, values = (var_2126_10, var_2208_cast_fp16))[name = tensor<string, []>("op_2232_cast_fp16")];
+            tensor<string, []> var_2234_equation_0 = const()[name = tensor<string, []>("op_2234_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2234_cast_fp16 = einsum(equation = var_2234_equation_0, values = (var_2126_11, var_2210_cast_fp16))[name = tensor<string, []>("op_2234_cast_fp16")];
+            tensor<bool, []> var_2236_interleave_0 = const()[name = tensor<string, []>("op_2236_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2236_cast_fp16 = concat(axis = var_2056, interleave = var_2236_interleave_0, values = (var_2212_cast_fp16, var_2214_cast_fp16, var_2216_cast_fp16, var_2218_cast_fp16, var_2220_cast_fp16, var_2222_cast_fp16, var_2224_cast_fp16, var_2226_cast_fp16, var_2228_cast_fp16, var_2230_cast_fp16, var_2232_cast_fp16, var_2234_cast_fp16))[name = tensor<string, []>("op_2236_cast_fp16")];
+            tensor<int32, [2]> var_2240 = const()[name = tensor<string, []>("op_2240"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2242 = const()[name = tensor<string, []>("op_2242"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2244_pad_type_0 = const()[name = tensor<string, []>("op_2244_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2244_pad_0 = const()[name = tensor<string, []>("op_2244_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2244 = conv(bias = layers_8_attention_o_proj_bias, dilations = var_2242, groups = var_2056, pad = var_2244_pad_0, pad_type = var_2244_pad_type_0, strides = var_2240, weight = layers_8_attention_o_proj_weight, x = var_2236_cast_fp16)[name = tensor<string, []>("op_2244")];
+            tensor<bool, []> var_2246_interleave_0 = const()[name = tensor<string, []>("op_2246_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2246 = concat(axis = var_2057, interleave = var_2246_interleave_0, values = var_2244)[name = tensor<string, []>("op_2246")];
+            tensor<fp16, [1, 384, 1, 512]> x_69 = add(x = var_2050_cast_fp16, y = var_2246)[name = tensor<string, []>("x_69")];
+            tensor<fp16, []> var_2053_promoted = const()[name = tensor<string, []>("op_2053_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2054_promoted = const()[name = tensor<string, []>("op_2054_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_71 = clip(alpha = var_2053_promoted, beta = var_2054_promoted, x = x_69)[name = tensor<string, []>("x_71")];
+            tensor<int32, [1]> var_2251 = const()[name = tensor<string, []>("op_2251"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_35 = reduce_mean(axes = var_2251, keep_dims = var_2058, x = x_71)[name = tensor<string, []>("mean_35")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_35 = sub(x = x_71, y = mean_35)[name = tensor<string, []>("zero_mean_35")];
-            tensor<fp16, []> var_2116_promoted = const()[name = tensor<string, []>("op_2116_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2316 = pow(x = zero_mean_35, y = var_2116_promoted)[name = tensor<string, []>("op_2316")];
-            tensor<int32, [1]> var_2317 = const()[name = tensor<string, []>("op_2317"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2318 = reduce_mean(axes = var_2317, keep_dims = var_2111, x = var_2316)[name = tensor<string, []>("op_2318")];
-            tensor<fp16, []> var_2319_to_fp16 = const()[name = tensor<string, []>("op_2319_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2320_cast_fp16 = add(x = var_2318, y = var_2319_to_fp16)[name = tensor<string, []>("op_2320_cast_fp16")];
+            tensor<fp16, []> var_2055_promoted = const()[name = tensor<string, []>("op_2055_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2254 = pow(x = zero_mean_35, y = var_2055_promoted)[name = tensor<string, []>("op_2254")];
+            tensor<int32, [1]> var_2255 = const()[name = tensor<string, []>("op_2255"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2256 = reduce_mean(axes = var_2255, keep_dims = var_2058, x = var_2254)[name = tensor<string, []>("op_2256")];
+            tensor<fp16, []> var_2257_to_fp16 = const()[name = tensor<string, []>("op_2257_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2258_cast_fp16 = add(x = var_2256, y = var_2257_to_fp16)[name = tensor<string, []>("op_2258_cast_fp16")];
             tensor<fp32, []> denom_35_epsilon_0 = const()[name = tensor<string, []>("denom_35_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0, x = var_2320_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2322_cast_fp16 = mul(x = zero_mean_35, y = denom_35_cast_fp16)[name = tensor<string, []>("op_2322_cast_fp16")];
-            tensor<fp16, [384]> var_2324_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2324_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66817024)))];
-            tensor<fp16, [384]> var_2324_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2324_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66817856)))];
-            tensor<fp16, []> var_2324_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2324_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2324_cast_fp16 = batch_norm(beta = var_2324_beta_0_to_fp16, epsilon = var_2324_epsilon_0_to_fp16, gamma = var_2324_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2322_cast_fp16)[name = tensor<string, []>("op_2324_cast_fp16")];
-            tensor<int32, [2]> var_2330 = const()[name = tensor<string, []>("op_2330"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2332 = const()[name = tensor<string, []>("op_2332"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2334_pad_type_0 = const()[name = tensor<string, []>("op_2334_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2334_pad_0 = const()[name = tensor<string, []>("op_2334_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_2334 = conv(bias = layers_8_mlp_fc1_bias, dilations = var_2332, groups = var_2109, pad = var_2334_pad_0, pad_type = var_2334_pad_type_0, strides = var_2330, weight = layers_8_mlp_fc1_weight, x = var_2324_cast_fp16)[name = tensor<string, []>("op_2334")];
-            tensor<string, []> input_71_mode_0 = const()[name = tensor<string, []>("input_71_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_71 = gelu(mode = input_71_mode_0, x = var_2334)[name = tensor<string, []>("input_71")];
-            tensor<int32, [2]> var_2338 = const()[name = tensor<string, []>("op_2338"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2340 = const()[name = tensor<string, []>("op_2340"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2342_pad_type_0 = const()[name = tensor<string, []>("op_2342_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2342_pad_0 = const()[name = tensor<string, []>("op_2342_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2342 = conv(bias = layers_8_mlp_fc2_bias, dilations = var_2340, groups = var_2109, pad = var_2342_pad_0, pad_type = var_2342_pad_type_0, strides = var_2338, weight = layers_8_mlp_fc2_weight, x = input_71)[name = tensor<string, []>("op_2342")];
-            tensor<fp16, [1, 384, 1, 512]> x_73 = add(x = var_2324_cast_fp16, y = var_2342)[name = tensor<string, []>("x_73")];
-            tensor<fp16, []> var_2107_promoted_1 = const()[name = tensor<string, []>("op_2107_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2108_promoted_1 = const()[name = tensor<string, []>("op_2108_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_75 = clip(alpha = var_2107_promoted_1, beta = var_2108_promoted_1, x = x_73)[name = tensor<string, []>("x_75")];
-            tensor<int32, [1]> var_2347 = const()[name = tensor<string, []>("op_2347"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_37 = reduce_mean(axes = var_2347, keep_dims = var_2111, x = x_75)[name = tensor<string, []>("mean_37")];
+            tensor<fp16, [1, 1, 1, 512]> denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0, x = var_2258_cast_fp16)[name = tensor<string, []>("denom_35_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2260_cast_fp16 = mul(x = zero_mean_35, y = denom_35_cast_fp16)[name = tensor<string, []>("op_2260_cast_fp16")];
+            tensor<fp16, [384]> var_2262_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2262_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66817024)))];
+            tensor<fp16, [384]> var_2262_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2262_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66817856)))];
+            tensor<fp16, []> var_2262_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2262_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2262_cast_fp16 = batch_norm(beta = var_2262_beta_0_to_fp16, epsilon = var_2262_epsilon_0_to_fp16, gamma = var_2262_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2260_cast_fp16)[name = tensor<string, []>("op_2262_cast_fp16")];
+            tensor<int32, [2]> var_2268 = const()[name = tensor<string, []>("op_2268"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2270 = const()[name = tensor<string, []>("op_2270"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2272_pad_type_0 = const()[name = tensor<string, []>("op_2272_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2272_pad_0 = const()[name = tensor<string, []>("op_2272_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_2272 = conv(bias = layers_8_mlp_fc1_bias, dilations = var_2270, groups = var_2056, pad = var_2272_pad_0, pad_type = var_2272_pad_type_0, strides = var_2268, weight = layers_8_mlp_fc1_weight, x = var_2262_cast_fp16)[name = tensor<string, []>("op_2272")];
+            tensor<string, []> input_287_mode_0 = const()[name = tensor<string, []>("input_287_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_287 = gelu(mode = input_287_mode_0, x = var_2272)[name = tensor<string, []>("input_287")];
+            tensor<int32, [2]> var_2276 = const()[name = tensor<string, []>("op_2276"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2278 = const()[name = tensor<string, []>("op_2278"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2280_pad_type_0 = const()[name = tensor<string, []>("op_2280_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2280_pad_0 = const()[name = tensor<string, []>("op_2280_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2280 = conv(bias = layers_8_mlp_fc2_bias, dilations = var_2278, groups = var_2056, pad = var_2280_pad_0, pad_type = var_2280_pad_type_0, strides = var_2276, weight = layers_8_mlp_fc2_weight, x = input_287)[name = tensor<string, []>("op_2280")];
+            tensor<fp16, [1, 384, 1, 512]> x_73 = add(x = var_2262_cast_fp16, y = var_2280)[name = tensor<string, []>("x_73")];
+            tensor<fp16, []> var_2053_promoted_1 = const()[name = tensor<string, []>("op_2053_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2054_promoted_1 = const()[name = tensor<string, []>("op_2054_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_75 = clip(alpha = var_2053_promoted_1, beta = var_2054_promoted_1, x = x_73)[name = tensor<string, []>("x_75")];
+            tensor<int32, [1]> var_2285 = const()[name = tensor<string, []>("op_2285"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_37 = reduce_mean(axes = var_2285, keep_dims = var_2058, x = x_75)[name = tensor<string, []>("mean_37")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_37 = sub(x = x_75, y = mean_37)[name = tensor<string, []>("zero_mean_37")];
-            tensor<fp16, []> var_2116_promoted_1 = const()[name = tensor<string, []>("op_2116_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2350 = pow(x = zero_mean_37, y = var_2116_promoted_1)[name = tensor<string, []>("op_2350")];
-            tensor<int32, [1]> var_2351 = const()[name = tensor<string, []>("op_2351"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2352 = reduce_mean(axes = var_2351, keep_dims = var_2111, x = var_2350)[name = tensor<string, []>("op_2352")];
-            tensor<fp16, []> var_2353_to_fp16 = const()[name = tensor<string, []>("op_2353_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2354_cast_fp16 = add(x = var_2352, y = var_2353_to_fp16)[name = tensor<string, []>("op_2354_cast_fp16")];
+            tensor<fp16, []> var_2055_promoted_1 = const()[name = tensor<string, []>("op_2055_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2288 = pow(x = zero_mean_37, y = var_2055_promoted_1)[name = tensor<string, []>("op_2288")];
+            tensor<int32, [1]> var_2289 = const()[name = tensor<string, []>("op_2289"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2290 = reduce_mean(axes = var_2289, keep_dims = var_2058, x = var_2288)[name = tensor<string, []>("op_2290")];
+            tensor<fp16, []> var_2291_to_fp16 = const()[name = tensor<string, []>("op_2291_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2292_cast_fp16 = add(x = var_2290, y = var_2291_to_fp16)[name = tensor<string, []>("op_2292_cast_fp16")];
             tensor<fp32, []> denom_37_epsilon_0 = const()[name = tensor<string, []>("denom_37_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0, x = var_2354_cast_fp16)[name = tensor<string, []>("denom_37_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2356_cast_fp16 = mul(x = zero_mean_37, y = denom_37_cast_fp16)[name = tensor<string, []>("op_2356_cast_fp16")];
-            tensor<fp16, [384]> var_2358_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2358_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66818688)))];
-            tensor<fp16, [384]> var_2358_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2358_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66819520)))];
-            tensor<fp16, []> var_2358_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2358_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2358_cast_fp16 = batch_norm(beta = var_2358_beta_0_to_fp16, epsilon = var_2358_epsilon_0_to_fp16, gamma = var_2358_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2356_cast_fp16)[name = tensor<string, []>("op_2358_cast_fp16")];
-            tensor<int32, []> var_2363 = const()[name = tensor<string, []>("op_2363"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_2364 = const()[name = tensor<string, []>("op_2364"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_2365 = const()[name = tensor<string, []>("op_2365"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_2390 = const()[name = tensor<string, []>("op_2390"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2392 = const()[name = tensor<string, []>("op_2392"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2394_pad_type_0 = const()[name = tensor<string, []>("op_2394_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2394_pad_0 = const()[name = tensor<string, []>("op_2394_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2394 = conv(bias = layers_9_attention_q_proj_bias, dilations = var_2392, groups = var_2363, pad = var_2394_pad_0, pad_type = var_2394_pad_type_0, strides = var_2390, weight = layers_9_attention_q_proj_weight, x = var_2358_cast_fp16)[name = tensor<string, []>("op_2394")];
-            tensor<int32, [4]> var_2395 = const()[name = tensor<string, []>("op_2395"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2396 = reshape(shape = var_2395, x = var_2394)[name = tensor<string, []>("op_2396")];
-            tensor<int32, [2]> var_2399 = const()[name = tensor<string, []>("op_2399"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2401 = const()[name = tensor<string, []>("op_2401"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2403_pad_type_0 = const()[name = tensor<string, []>("op_2403_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2403_pad_0 = const()[name = tensor<string, []>("op_2403_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2403 = conv(bias = layers_9_attention_k_proj_bias, dilations = var_2401, groups = var_2363, pad = var_2403_pad_0, pad_type = var_2403_pad_type_0, strides = var_2399, weight = layers_9_attention_k_proj_weight, x = var_2358_cast_fp16)[name = tensor<string, []>("op_2403")];
-            tensor<int32, [4]> var_2404 = const()[name = tensor<string, []>("op_2404"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_19 = reshape(shape = var_2404, x = var_2403)[name = tensor<string, []>("ks_19")];
-            tensor<int32, [2]> var_2408 = const()[name = tensor<string, []>("op_2408"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2410 = const()[name = tensor<string, []>("op_2410"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2412_pad_type_0 = const()[name = tensor<string, []>("op_2412_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2412_pad_0 = const()[name = tensor<string, []>("op_2412_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2412 = conv(bias = layers_9_attention_v_proj_bias, dilations = var_2410, groups = var_2363, pad = var_2412_pad_0, pad_type = var_2412_pad_type_0, strides = var_2408, weight = layers_9_attention_v_proj_weight, x = var_2358_cast_fp16)[name = tensor<string, []>("op_2412")];
-            tensor<int32, [4]> var_2413 = const()[name = tensor<string, []>("op_2413"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2414 = reshape(shape = var_2413, x = var_2412)[name = tensor<string, []>("op_2414")];
-            tensor<int32, [12]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2415_axis_0 = const()[name = tensor<string, []>("op_2415_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2415_0, tensor<fp16, [1, 32, 1, 512]> var_2415_1, tensor<fp16, [1, 32, 1, 512]> var_2415_2, tensor<fp16, [1, 32, 1, 512]> var_2415_3, tensor<fp16, [1, 32, 1, 512]> var_2415_4, tensor<fp16, [1, 32, 1, 512]> var_2415_5, tensor<fp16, [1, 32, 1, 512]> var_2415_6, tensor<fp16, [1, 32, 1, 512]> var_2415_7, tensor<fp16, [1, 32, 1, 512]> var_2415_8, tensor<fp16, [1, 32, 1, 512]> var_2415_9, tensor<fp16, [1, 32, 1, 512]> var_2415_10, tensor<fp16, [1, 32, 1, 512]> var_2415_11 = split(axis = var_2415_axis_0, split_sizes = tile_47, x = var_2396)[name = tensor<string, []>("op_2415")];
-            tensor<int32, [4]> var_2428_perm_0 = const()[name = tensor<string, []>("op_2428_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2429_axis_0 = const()[name = tensor<string, []>("op_2429_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_2 = transpose(perm = var_2428_perm_0, x = ks_19)[name = tensor<string, []>("transpose_2")];
-            tensor<fp16, [1, 512, 1, 32]> var_2429_0, tensor<fp16, [1, 512, 1, 32]> var_2429_1, tensor<fp16, [1, 512, 1, 32]> var_2429_2, tensor<fp16, [1, 512, 1, 32]> var_2429_3, tensor<fp16, [1, 512, 1, 32]> var_2429_4, tensor<fp16, [1, 512, 1, 32]> var_2429_5, tensor<fp16, [1, 512, 1, 32]> var_2429_6, tensor<fp16, [1, 512, 1, 32]> var_2429_7, tensor<fp16, [1, 512, 1, 32]> var_2429_8, tensor<fp16, [1, 512, 1, 32]> var_2429_9, tensor<fp16, [1, 512, 1, 32]> var_2429_10, tensor<fp16, [1, 512, 1, 32]> var_2429_11 = split(axis = var_2429_axis_0, split_sizes = tile_48, x = transpose_2)[name = tensor<string, []>("op_2429")];
-            tensor<int32, [12]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2442_axis_0 = const()[name = tensor<string, []>("op_2442_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2442_0, tensor<fp16, [1, 32, 1, 512]> var_2442_1, tensor<fp16, [1, 32, 1, 512]> var_2442_2, tensor<fp16, [1, 32, 1, 512]> var_2442_3, tensor<fp16, [1, 32, 1, 512]> var_2442_4, tensor<fp16, [1, 32, 1, 512]> var_2442_5, tensor<fp16, [1, 32, 1, 512]> var_2442_6, tensor<fp16, [1, 32, 1, 512]> var_2442_7, tensor<fp16, [1, 32, 1, 512]> var_2442_8, tensor<fp16, [1, 32, 1, 512]> var_2442_9, tensor<fp16, [1, 32, 1, 512]> var_2442_10, tensor<fp16, [1, 32, 1, 512]> var_2442_11 = split(axis = var_2442_axis_0, split_sizes = tile_49, x = var_2414)[name = tensor<string, []>("op_2442")];
-            tensor<string, []> var_2456_equation_0 = const()[name = tensor<string, []>("op_2456_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2456 = einsum(equation = var_2456_equation_0, values = (var_2429_0, var_2415_0))[name = tensor<string, []>("op_2456")];
-            tensor<fp16, []> var_2457_to_fp16 = const()[name = tensor<string, []>("op_2457_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_217_cast_fp16 = mul(x = var_2456, y = var_2457_to_fp16)[name = tensor<string, []>("w_217_cast_fp16")];
-            tensor<string, []> var_2460_equation_0 = const()[name = tensor<string, []>("op_2460_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2460 = einsum(equation = var_2460_equation_0, values = (var_2429_1, var_2415_1))[name = tensor<string, []>("op_2460")];
-            tensor<fp16, []> var_2461_to_fp16 = const()[name = tensor<string, []>("op_2461_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_219_cast_fp16 = mul(x = var_2460, y = var_2461_to_fp16)[name = tensor<string, []>("w_219_cast_fp16")];
-            tensor<string, []> var_2464_equation_0 = const()[name = tensor<string, []>("op_2464_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2464 = einsum(equation = var_2464_equation_0, values = (var_2429_2, var_2415_2))[name = tensor<string, []>("op_2464")];
-            tensor<fp16, []> var_2465_to_fp16 = const()[name = tensor<string, []>("op_2465_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_221_cast_fp16 = mul(x = var_2464, y = var_2465_to_fp16)[name = tensor<string, []>("w_221_cast_fp16")];
-            tensor<string, []> var_2468_equation_0 = const()[name = tensor<string, []>("op_2468_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2468 = einsum(equation = var_2468_equation_0, values = (var_2429_3, var_2415_3))[name = tensor<string, []>("op_2468")];
-            tensor<fp16, []> var_2469_to_fp16 = const()[name = tensor<string, []>("op_2469_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_223_cast_fp16 = mul(x = var_2468, y = var_2469_to_fp16)[name = tensor<string, []>("w_223_cast_fp16")];
-            tensor<string, []> var_2472_equation_0 = const()[name = tensor<string, []>("op_2472_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2472 = einsum(equation = var_2472_equation_0, values = (var_2429_4, var_2415_4))[name = tensor<string, []>("op_2472")];
-            tensor<fp16, []> var_2473_to_fp16 = const()[name = tensor<string, []>("op_2473_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_225_cast_fp16 = mul(x = var_2472, y = var_2473_to_fp16)[name = tensor<string, []>("w_225_cast_fp16")];
-            tensor<string, []> var_2476_equation_0 = const()[name = tensor<string, []>("op_2476_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2476 = einsum(equation = var_2476_equation_0, values = (var_2429_5, var_2415_5))[name = tensor<string, []>("op_2476")];
-            tensor<fp16, []> var_2477_to_fp16 = const()[name = tensor<string, []>("op_2477_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_227_cast_fp16 = mul(x = var_2476, y = var_2477_to_fp16)[name = tensor<string, []>("w_227_cast_fp16")];
-            tensor<string, []> var_2480_equation_0 = const()[name = tensor<string, []>("op_2480_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2480 = einsum(equation = var_2480_equation_0, values = (var_2429_6, var_2415_6))[name = tensor<string, []>("op_2480")];
-            tensor<fp16, []> var_2481_to_fp16 = const()[name = tensor<string, []>("op_2481_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_229_cast_fp16 = mul(x = var_2480, y = var_2481_to_fp16)[name = tensor<string, []>("w_229_cast_fp16")];
-            tensor<string, []> var_2484_equation_0 = const()[name = tensor<string, []>("op_2484_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2484 = einsum(equation = var_2484_equation_0, values = (var_2429_7, var_2415_7))[name = tensor<string, []>("op_2484")];
-            tensor<fp16, []> var_2485_to_fp16 = const()[name = tensor<string, []>("op_2485_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_231_cast_fp16 = mul(x = var_2484, y = var_2485_to_fp16)[name = tensor<string, []>("w_231_cast_fp16")];
-            tensor<string, []> var_2488_equation_0 = const()[name = tensor<string, []>("op_2488_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2488 = einsum(equation = var_2488_equation_0, values = (var_2429_8, var_2415_8))[name = tensor<string, []>("op_2488")];
-            tensor<fp16, []> var_2489_to_fp16 = const()[name = tensor<string, []>("op_2489_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_233_cast_fp16 = mul(x = var_2488, y = var_2489_to_fp16)[name = tensor<string, []>("w_233_cast_fp16")];
-            tensor<string, []> var_2492_equation_0 = const()[name = tensor<string, []>("op_2492_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2492 = einsum(equation = var_2492_equation_0, values = (var_2429_9, var_2415_9))[name = tensor<string, []>("op_2492")];
-            tensor<fp16, []> var_2493_to_fp16 = const()[name = tensor<string, []>("op_2493_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_235_cast_fp16 = mul(x = var_2492, y = var_2493_to_fp16)[name = tensor<string, []>("w_235_cast_fp16")];
-            tensor<string, []> var_2496_equation_0 = const()[name = tensor<string, []>("op_2496_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2496 = einsum(equation = var_2496_equation_0, values = (var_2429_10, var_2415_10))[name = tensor<string, []>("op_2496")];
-            tensor<fp16, []> var_2497_to_fp16 = const()[name = tensor<string, []>("op_2497_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_237_cast_fp16 = mul(x = var_2496, y = var_2497_to_fp16)[name = tensor<string, []>("w_237_cast_fp16")];
-            tensor<string, []> var_2500_equation_0 = const()[name = tensor<string, []>("op_2500_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2500 = einsum(equation = var_2500_equation_0, values = (var_2429_11, var_2415_11))[name = tensor<string, []>("op_2500")];
-            tensor<fp16, []> var_2501_to_fp16 = const()[name = tensor<string, []>("op_2501_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_239_cast_fp16 = mul(x = var_2500, y = var_2501_to_fp16)[name = tensor<string, []>("w_239_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2503_cast_fp16 = add(x = w_217_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2503_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2504_cast_fp16 = softmax(axis = var_2363, x = var_2503_cast_fp16)[name = tensor<string, []>("op_2504_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2505_cast_fp16 = add(x = w_219_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2505_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2506_cast_fp16 = softmax(axis = var_2363, x = var_2505_cast_fp16)[name = tensor<string, []>("op_2506_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2507_cast_fp16 = add(x = w_221_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2507_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2508_cast_fp16 = softmax(axis = var_2363, x = var_2507_cast_fp16)[name = tensor<string, []>("op_2508_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2509_cast_fp16 = add(x = w_223_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2509_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2510_cast_fp16 = softmax(axis = var_2363, x = var_2509_cast_fp16)[name = tensor<string, []>("op_2510_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2511_cast_fp16 = add(x = w_225_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2511_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2512_cast_fp16 = softmax(axis = var_2363, x = var_2511_cast_fp16)[name = tensor<string, []>("op_2512_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2513_cast_fp16 = add(x = w_227_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2513_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2514_cast_fp16 = softmax(axis = var_2363, x = var_2513_cast_fp16)[name = tensor<string, []>("op_2514_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2515_cast_fp16 = add(x = w_229_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2515_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2516_cast_fp16 = softmax(axis = var_2363, x = var_2515_cast_fp16)[name = tensor<string, []>("op_2516_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2517_cast_fp16 = add(x = w_231_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2517_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2518_cast_fp16 = softmax(axis = var_2363, x = var_2517_cast_fp16)[name = tensor<string, []>("op_2518_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2519_cast_fp16 = add(x = w_233_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2519_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2520_cast_fp16 = softmax(axis = var_2363, x = var_2519_cast_fp16)[name = tensor<string, []>("op_2520_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2521_cast_fp16 = add(x = w_235_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2521_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2522_cast_fp16 = softmax(axis = var_2363, x = var_2521_cast_fp16)[name = tensor<string, []>("op_2522_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2523_cast_fp16 = add(x = w_237_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2523_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2524_cast_fp16 = softmax(axis = var_2363, x = var_2523_cast_fp16)[name = tensor<string, []>("op_2524_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2525_cast_fp16 = add(x = w_239_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2525_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2526_cast_fp16 = softmax(axis = var_2363, x = var_2525_cast_fp16)[name = tensor<string, []>("op_2526_cast_fp16")];
-            tensor<string, []> var_2528_equation_0 = const()[name = tensor<string, []>("op_2528_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2528_cast_fp16 = einsum(equation = var_2528_equation_0, values = (var_2442_0, var_2504_cast_fp16))[name = tensor<string, []>("op_2528_cast_fp16")];
-            tensor<string, []> var_2530_equation_0 = const()[name = tensor<string, []>("op_2530_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2442_1, var_2506_cast_fp16))[name = tensor<string, []>("op_2530_cast_fp16")];
-            tensor<string, []> var_2532_equation_0 = const()[name = tensor<string, []>("op_2532_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2532_cast_fp16 = einsum(equation = var_2532_equation_0, values = (var_2442_2, var_2508_cast_fp16))[name = tensor<string, []>("op_2532_cast_fp16")];
-            tensor<string, []> var_2534_equation_0 = const()[name = tensor<string, []>("op_2534_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2442_3, var_2510_cast_fp16))[name = tensor<string, []>("op_2534_cast_fp16")];
-            tensor<string, []> var_2536_equation_0 = const()[name = tensor<string, []>("op_2536_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2536_cast_fp16 = einsum(equation = var_2536_equation_0, values = (var_2442_4, var_2512_cast_fp16))[name = tensor<string, []>("op_2536_cast_fp16")];
-            tensor<string, []> var_2538_equation_0 = const()[name = tensor<string, []>("op_2538_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2442_5, var_2514_cast_fp16))[name = tensor<string, []>("op_2538_cast_fp16")];
-            tensor<string, []> var_2540_equation_0 = const()[name = tensor<string, []>("op_2540_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2540_cast_fp16 = einsum(equation = var_2540_equation_0, values = (var_2442_6, var_2516_cast_fp16))[name = tensor<string, []>("op_2540_cast_fp16")];
-            tensor<string, []> var_2542_equation_0 = const()[name = tensor<string, []>("op_2542_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2442_7, var_2518_cast_fp16))[name = tensor<string, []>("op_2542_cast_fp16")];
-            tensor<string, []> var_2544_equation_0 = const()[name = tensor<string, []>("op_2544_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2544_cast_fp16 = einsum(equation = var_2544_equation_0, values = (var_2442_8, var_2520_cast_fp16))[name = tensor<string, []>("op_2544_cast_fp16")];
-            tensor<string, []> var_2546_equation_0 = const()[name = tensor<string, []>("op_2546_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2546_cast_fp16 = einsum(equation = var_2546_equation_0, values = (var_2442_9, var_2522_cast_fp16))[name = tensor<string, []>("op_2546_cast_fp16")];
-            tensor<string, []> var_2548_equation_0 = const()[name = tensor<string, []>("op_2548_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2548_cast_fp16 = einsum(equation = var_2548_equation_0, values = (var_2442_10, var_2524_cast_fp16))[name = tensor<string, []>("op_2548_cast_fp16")];
-            tensor<string, []> var_2550_equation_0 = const()[name = tensor<string, []>("op_2550_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2550_cast_fp16 = einsum(equation = var_2550_equation_0, values = (var_2442_11, var_2526_cast_fp16))[name = tensor<string, []>("op_2550_cast_fp16")];
-            tensor<bool, []> var_2552_interleave_0 = const()[name = tensor<string, []>("op_2552_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2552_cast_fp16 = concat(axis = var_2363, interleave = var_2552_interleave_0, values = (var_2528_cast_fp16, var_2530_cast_fp16, var_2532_cast_fp16, var_2534_cast_fp16, var_2536_cast_fp16, var_2538_cast_fp16, var_2540_cast_fp16, var_2542_cast_fp16, var_2544_cast_fp16, var_2546_cast_fp16, var_2548_cast_fp16, var_2550_cast_fp16))[name = tensor<string, []>("op_2552_cast_fp16")];
-            tensor<int32, [2]> var_2556 = const()[name = tensor<string, []>("op_2556"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2558 = const()[name = tensor<string, []>("op_2558"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2560_pad_type_0 = const()[name = tensor<string, []>("op_2560_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2560_pad_0 = const()[name = tensor<string, []>("op_2560_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2560 = conv(bias = layers_9_attention_o_proj_bias, dilations = var_2558, groups = var_2363, pad = var_2560_pad_0, pad_type = var_2560_pad_type_0, strides = var_2556, weight = layers_9_attention_o_proj_weight, x = var_2552_cast_fp16)[name = tensor<string, []>("op_2560")];
-            tensor<bool, []> var_2562_interleave_0 = const()[name = tensor<string, []>("op_2562_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2562 = concat(axis = var_2364, interleave = var_2562_interleave_0, values = var_2560)[name = tensor<string, []>("op_2562")];
-            tensor<fp16, [1, 384, 1, 512]> x_77 = add(x = var_2358_cast_fp16, y = var_2562)[name = tensor<string, []>("x_77")];
-            tensor<fp16, []> var_2361_promoted = const()[name = tensor<string, []>("op_2361_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2362_promoted = const()[name = tensor<string, []>("op_2362_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_79 = clip(alpha = var_2361_promoted, beta = var_2362_promoted, x = x_77)[name = tensor<string, []>("x_79")];
-            tensor<int32, [1]> var_2567 = const()[name = tensor<string, []>("op_2567"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_39 = reduce_mean(axes = var_2567, keep_dims = var_2365, x = x_79)[name = tensor<string, []>("mean_39")];
+            tensor<fp16, [1, 1, 1, 512]> denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0, x = var_2292_cast_fp16)[name = tensor<string, []>("denom_37_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2294_cast_fp16 = mul(x = zero_mean_37, y = denom_37_cast_fp16)[name = tensor<string, []>("op_2294_cast_fp16")];
+            tensor<fp16, [384]> var_2296_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2296_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66818688)))];
+            tensor<fp16, [384]> var_2296_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2296_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66819520)))];
+            tensor<fp16, []> var_2296_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2296_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2296_cast_fp16 = batch_norm(beta = var_2296_beta_0_to_fp16, epsilon = var_2296_epsilon_0_to_fp16, gamma = var_2296_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2294_cast_fp16)[name = tensor<string, []>("op_2296_cast_fp16")];
+            tensor<int32, []> var_2302 = const()[name = tensor<string, []>("op_2302"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_2303 = const()[name = tensor<string, []>("op_2303"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_2304 = const()[name = tensor<string, []>("op_2304"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_2326 = const()[name = tensor<string, []>("op_2326"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2328 = const()[name = tensor<string, []>("op_2328"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2330_pad_type_0 = const()[name = tensor<string, []>("op_2330_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2330_pad_0 = const()[name = tensor<string, []>("op_2330_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2330 = conv(bias = layers_9_attention_q_proj_bias, dilations = var_2328, groups = var_2302, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2326, weight = layers_9_attention_q_proj_weight, x = var_2296_cast_fp16)[name = tensor<string, []>("op_2330")];
+            tensor<int32, [2]> var_2333 = const()[name = tensor<string, []>("op_2333"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2335 = const()[name = tensor<string, []>("op_2335"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_19_pad_type_0 = const()[name = tensor<string, []>("ks_19_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_19_pad_0 = const()[name = tensor<string, []>("ks_19_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_19 = conv(bias = layers_9_attention_k_proj_bias, dilations = var_2335, groups = var_2302, pad = ks_19_pad_0, pad_type = ks_19_pad_type_0, strides = var_2333, weight = layers_9_attention_k_proj_weight, x = var_2296_cast_fp16)[name = tensor<string, []>("ks_19")];
+            tensor<int32, [2]> var_2340 = const()[name = tensor<string, []>("op_2340"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2342 = const()[name = tensor<string, []>("op_2342"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2344_pad_type_0 = const()[name = tensor<string, []>("op_2344_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2344_pad_0 = const()[name = tensor<string, []>("op_2344_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2344 = conv(bias = layers_9_attention_v_proj_bias, dilations = var_2342, groups = var_2302, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2340, weight = layers_9_attention_v_proj_weight, x = var_2296_cast_fp16)[name = tensor<string, []>("op_2344")];
+            tensor<int32, [12]> tile_47 = const()[name = tensor<string, []>("tile_47"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2345_axis_0 = const()[name = tensor<string, []>("op_2345_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2345_0, tensor<fp16, [1, 32, 1, 512]> var_2345_1, tensor<fp16, [1, 32, 1, 512]> var_2345_2, tensor<fp16, [1, 32, 1, 512]> var_2345_3, tensor<fp16, [1, 32, 1, 512]> var_2345_4, tensor<fp16, [1, 32, 1, 512]> var_2345_5, tensor<fp16, [1, 32, 1, 512]> var_2345_6, tensor<fp16, [1, 32, 1, 512]> var_2345_7, tensor<fp16, [1, 32, 1, 512]> var_2345_8, tensor<fp16, [1, 32, 1, 512]> var_2345_9, tensor<fp16, [1, 32, 1, 512]> var_2345_10, tensor<fp16, [1, 32, 1, 512]> var_2345_11 = split(axis = var_2345_axis_0, split_sizes = tile_47, x = var_2330)[name = tensor<string, []>("op_2345")];
+            tensor<int32, [4]> var_2358_perm_0 = const()[name = tensor<string, []>("op_2358_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_48 = const()[name = tensor<string, []>("tile_48"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2359_axis_0 = const()[name = tensor<string, []>("op_2359_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_2 = transpose(perm = var_2358_perm_0, x = ks_19)[name = tensor<string, []>("transpose_2")];
+            tensor<fp16, [1, 512, 1, 32]> var_2359_0, tensor<fp16, [1, 512, 1, 32]> var_2359_1, tensor<fp16, [1, 512, 1, 32]> var_2359_2, tensor<fp16, [1, 512, 1, 32]> var_2359_3, tensor<fp16, [1, 512, 1, 32]> var_2359_4, tensor<fp16, [1, 512, 1, 32]> var_2359_5, tensor<fp16, [1, 512, 1, 32]> var_2359_6, tensor<fp16, [1, 512, 1, 32]> var_2359_7, tensor<fp16, [1, 512, 1, 32]> var_2359_8, tensor<fp16, [1, 512, 1, 32]> var_2359_9, tensor<fp16, [1, 512, 1, 32]> var_2359_10, tensor<fp16, [1, 512, 1, 32]> var_2359_11 = split(axis = var_2359_axis_0, split_sizes = tile_48, x = transpose_2)[name = tensor<string, []>("op_2359")];
+            tensor<int32, [12]> tile_49 = const()[name = tensor<string, []>("tile_49"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2372_axis_0 = const()[name = tensor<string, []>("op_2372_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2372_0, tensor<fp16, [1, 32, 1, 512]> var_2372_1, tensor<fp16, [1, 32, 1, 512]> var_2372_2, tensor<fp16, [1, 32, 1, 512]> var_2372_3, tensor<fp16, [1, 32, 1, 512]> var_2372_4, tensor<fp16, [1, 32, 1, 512]> var_2372_5, tensor<fp16, [1, 32, 1, 512]> var_2372_6, tensor<fp16, [1, 32, 1, 512]> var_2372_7, tensor<fp16, [1, 32, 1, 512]> var_2372_8, tensor<fp16, [1, 32, 1, 512]> var_2372_9, tensor<fp16, [1, 32, 1, 512]> var_2372_10, tensor<fp16, [1, 32, 1, 512]> var_2372_11 = split(axis = var_2372_axis_0, split_sizes = tile_49, x = var_2344)[name = tensor<string, []>("op_2372")];
+            tensor<string, []> var_2386_equation_0 = const()[name = tensor<string, []>("op_2386_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2386 = einsum(equation = var_2386_equation_0, values = (var_2359_0, var_2345_0))[name = tensor<string, []>("op_2386")];
+            tensor<fp16, []> var_2387_to_fp16 = const()[name = tensor<string, []>("op_2387_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_217_cast_fp16 = mul(x = var_2386, y = var_2387_to_fp16)[name = tensor<string, []>("w_217_cast_fp16")];
+            tensor<string, []> var_2390_equation_0 = const()[name = tensor<string, []>("op_2390_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2390 = einsum(equation = var_2390_equation_0, values = (var_2359_1, var_2345_1))[name = tensor<string, []>("op_2390")];
+            tensor<fp16, []> var_2391_to_fp16 = const()[name = tensor<string, []>("op_2391_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_219_cast_fp16 = mul(x = var_2390, y = var_2391_to_fp16)[name = tensor<string, []>("w_219_cast_fp16")];
+            tensor<string, []> var_2394_equation_0 = const()[name = tensor<string, []>("op_2394_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2394 = einsum(equation = var_2394_equation_0, values = (var_2359_2, var_2345_2))[name = tensor<string, []>("op_2394")];
+            tensor<fp16, []> var_2395_to_fp16 = const()[name = tensor<string, []>("op_2395_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_221_cast_fp16 = mul(x = var_2394, y = var_2395_to_fp16)[name = tensor<string, []>("w_221_cast_fp16")];
+            tensor<string, []> var_2398_equation_0 = const()[name = tensor<string, []>("op_2398_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2398 = einsum(equation = var_2398_equation_0, values = (var_2359_3, var_2345_3))[name = tensor<string, []>("op_2398")];
+            tensor<fp16, []> var_2399_to_fp16 = const()[name = tensor<string, []>("op_2399_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_223_cast_fp16 = mul(x = var_2398, y = var_2399_to_fp16)[name = tensor<string, []>("w_223_cast_fp16")];
+            tensor<string, []> var_2402_equation_0 = const()[name = tensor<string, []>("op_2402_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2402 = einsum(equation = var_2402_equation_0, values = (var_2359_4, var_2345_4))[name = tensor<string, []>("op_2402")];
+            tensor<fp16, []> var_2403_to_fp16 = const()[name = tensor<string, []>("op_2403_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_225_cast_fp16 = mul(x = var_2402, y = var_2403_to_fp16)[name = tensor<string, []>("w_225_cast_fp16")];
+            tensor<string, []> var_2406_equation_0 = const()[name = tensor<string, []>("op_2406_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2406 = einsum(equation = var_2406_equation_0, values = (var_2359_5, var_2345_5))[name = tensor<string, []>("op_2406")];
+            tensor<fp16, []> var_2407_to_fp16 = const()[name = tensor<string, []>("op_2407_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_227_cast_fp16 = mul(x = var_2406, y = var_2407_to_fp16)[name = tensor<string, []>("w_227_cast_fp16")];
+            tensor<string, []> var_2410_equation_0 = const()[name = tensor<string, []>("op_2410_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2410 = einsum(equation = var_2410_equation_0, values = (var_2359_6, var_2345_6))[name = tensor<string, []>("op_2410")];
+            tensor<fp16, []> var_2411_to_fp16 = const()[name = tensor<string, []>("op_2411_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_229_cast_fp16 = mul(x = var_2410, y = var_2411_to_fp16)[name = tensor<string, []>("w_229_cast_fp16")];
+            tensor<string, []> var_2414_equation_0 = const()[name = tensor<string, []>("op_2414_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2414 = einsum(equation = var_2414_equation_0, values = (var_2359_7, var_2345_7))[name = tensor<string, []>("op_2414")];
+            tensor<fp16, []> var_2415_to_fp16 = const()[name = tensor<string, []>("op_2415_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_231_cast_fp16 = mul(x = var_2414, y = var_2415_to_fp16)[name = tensor<string, []>("w_231_cast_fp16")];
+            tensor<string, []> var_2418_equation_0 = const()[name = tensor<string, []>("op_2418_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2418 = einsum(equation = var_2418_equation_0, values = (var_2359_8, var_2345_8))[name = tensor<string, []>("op_2418")];
+            tensor<fp16, []> var_2419_to_fp16 = const()[name = tensor<string, []>("op_2419_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_233_cast_fp16 = mul(x = var_2418, y = var_2419_to_fp16)[name = tensor<string, []>("w_233_cast_fp16")];
+            tensor<string, []> var_2422_equation_0 = const()[name = tensor<string, []>("op_2422_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2422 = einsum(equation = var_2422_equation_0, values = (var_2359_9, var_2345_9))[name = tensor<string, []>("op_2422")];
+            tensor<fp16, []> var_2423_to_fp16 = const()[name = tensor<string, []>("op_2423_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_235_cast_fp16 = mul(x = var_2422, y = var_2423_to_fp16)[name = tensor<string, []>("w_235_cast_fp16")];
+            tensor<string, []> var_2426_equation_0 = const()[name = tensor<string, []>("op_2426_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2426 = einsum(equation = var_2426_equation_0, values = (var_2359_10, var_2345_10))[name = tensor<string, []>("op_2426")];
+            tensor<fp16, []> var_2427_to_fp16 = const()[name = tensor<string, []>("op_2427_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_237_cast_fp16 = mul(x = var_2426, y = var_2427_to_fp16)[name = tensor<string, []>("w_237_cast_fp16")];
+            tensor<string, []> var_2430_equation_0 = const()[name = tensor<string, []>("op_2430_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2430 = einsum(equation = var_2430_equation_0, values = (var_2359_11, var_2345_11))[name = tensor<string, []>("op_2430")];
+            tensor<fp16, []> var_2431_to_fp16 = const()[name = tensor<string, []>("op_2431_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_239_cast_fp16 = mul(x = var_2430, y = var_2431_to_fp16)[name = tensor<string, []>("w_239_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_291_cast_fp16 = add(x = w_217_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_291_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2434_cast_fp16 = softmax(axis = var_2302, x = input_291_cast_fp16)[name = tensor<string, []>("op_2434_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_293_cast_fp16 = add(x = w_219_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_293_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2436_cast_fp16 = softmax(axis = var_2302, x = input_293_cast_fp16)[name = tensor<string, []>("op_2436_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_295_cast_fp16 = add(x = w_221_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_295_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2438_cast_fp16 = softmax(axis = var_2302, x = input_295_cast_fp16)[name = tensor<string, []>("op_2438_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_297_cast_fp16 = add(x = w_223_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_297_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2440_cast_fp16 = softmax(axis = var_2302, x = input_297_cast_fp16)[name = tensor<string, []>("op_2440_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_299_cast_fp16 = add(x = w_225_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_299_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2442_cast_fp16 = softmax(axis = var_2302, x = input_299_cast_fp16)[name = tensor<string, []>("op_2442_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_301_cast_fp16 = add(x = w_227_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_301_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2444_cast_fp16 = softmax(axis = var_2302, x = input_301_cast_fp16)[name = tensor<string, []>("op_2444_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_303_cast_fp16 = add(x = w_229_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_303_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2446_cast_fp16 = softmax(axis = var_2302, x = input_303_cast_fp16)[name = tensor<string, []>("op_2446_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_305_cast_fp16 = add(x = w_231_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_305_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2448_cast_fp16 = softmax(axis = var_2302, x = input_305_cast_fp16)[name = tensor<string, []>("op_2448_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_307_cast_fp16 = add(x = w_233_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_307_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2450_cast_fp16 = softmax(axis = var_2302, x = input_307_cast_fp16)[name = tensor<string, []>("op_2450_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_309_cast_fp16 = add(x = w_235_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_309_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2452_cast_fp16 = softmax(axis = var_2302, x = input_309_cast_fp16)[name = tensor<string, []>("op_2452_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_311_cast_fp16 = add(x = w_237_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_311_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2454_cast_fp16 = softmax(axis = var_2302, x = input_311_cast_fp16)[name = tensor<string, []>("op_2454_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_313_cast_fp16 = add(x = w_239_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_313_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2456_cast_fp16 = softmax(axis = var_2302, x = input_313_cast_fp16)[name = tensor<string, []>("op_2456_cast_fp16")];
+            tensor<string, []> var_2458_equation_0 = const()[name = tensor<string, []>("op_2458_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2372_0, var_2434_cast_fp16))[name = tensor<string, []>("op_2458_cast_fp16")];
+            tensor<string, []> var_2460_equation_0 = const()[name = tensor<string, []>("op_2460_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2460_cast_fp16 = einsum(equation = var_2460_equation_0, values = (var_2372_1, var_2436_cast_fp16))[name = tensor<string, []>("op_2460_cast_fp16")];
+            tensor<string, []> var_2462_equation_0 = const()[name = tensor<string, []>("op_2462_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2372_2, var_2438_cast_fp16))[name = tensor<string, []>("op_2462_cast_fp16")];
+            tensor<string, []> var_2464_equation_0 = const()[name = tensor<string, []>("op_2464_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2464_cast_fp16 = einsum(equation = var_2464_equation_0, values = (var_2372_3, var_2440_cast_fp16))[name = tensor<string, []>("op_2464_cast_fp16")];
+            tensor<string, []> var_2466_equation_0 = const()[name = tensor<string, []>("op_2466_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2372_4, var_2442_cast_fp16))[name = tensor<string, []>("op_2466_cast_fp16")];
+            tensor<string, []> var_2468_equation_0 = const()[name = tensor<string, []>("op_2468_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2372_5, var_2444_cast_fp16))[name = tensor<string, []>("op_2468_cast_fp16")];
+            tensor<string, []> var_2470_equation_0 = const()[name = tensor<string, []>("op_2470_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2372_6, var_2446_cast_fp16))[name = tensor<string, []>("op_2470_cast_fp16")];
+            tensor<string, []> var_2472_equation_0 = const()[name = tensor<string, []>("op_2472_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2372_7, var_2448_cast_fp16))[name = tensor<string, []>("op_2472_cast_fp16")];
+            tensor<string, []> var_2474_equation_0 = const()[name = tensor<string, []>("op_2474_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2372_8, var_2450_cast_fp16))[name = tensor<string, []>("op_2474_cast_fp16")];
+            tensor<string, []> var_2476_equation_0 = const()[name = tensor<string, []>("op_2476_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2372_9, var_2452_cast_fp16))[name = tensor<string, []>("op_2476_cast_fp16")];
+            tensor<string, []> var_2478_equation_0 = const()[name = tensor<string, []>("op_2478_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2372_10, var_2454_cast_fp16))[name = tensor<string, []>("op_2478_cast_fp16")];
+            tensor<string, []> var_2480_equation_0 = const()[name = tensor<string, []>("op_2480_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2372_11, var_2456_cast_fp16))[name = tensor<string, []>("op_2480_cast_fp16")];
+            tensor<bool, []> var_2482_interleave_0 = const()[name = tensor<string, []>("op_2482_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2482_cast_fp16 = concat(axis = var_2302, interleave = var_2482_interleave_0, values = (var_2458_cast_fp16, var_2460_cast_fp16, var_2462_cast_fp16, var_2464_cast_fp16, var_2466_cast_fp16, var_2468_cast_fp16, var_2470_cast_fp16, var_2472_cast_fp16, var_2474_cast_fp16, var_2476_cast_fp16, var_2478_cast_fp16, var_2480_cast_fp16))[name = tensor<string, []>("op_2482_cast_fp16")];
+            tensor<int32, [2]> var_2486 = const()[name = tensor<string, []>("op_2486"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2488 = const()[name = tensor<string, []>("op_2488"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2490_pad_type_0 = const()[name = tensor<string, []>("op_2490_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2490_pad_0 = const()[name = tensor<string, []>("op_2490_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2490 = conv(bias = layers_9_attention_o_proj_bias, dilations = var_2488, groups = var_2302, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2486, weight = layers_9_attention_o_proj_weight, x = var_2482_cast_fp16)[name = tensor<string, []>("op_2490")];
+            tensor<bool, []> var_2492_interleave_0 = const()[name = tensor<string, []>("op_2492_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2492 = concat(axis = var_2303, interleave = var_2492_interleave_0, values = var_2490)[name = tensor<string, []>("op_2492")];
+            tensor<fp16, [1, 384, 1, 512]> x_77 = add(x = var_2296_cast_fp16, y = var_2492)[name = tensor<string, []>("x_77")];
+            tensor<fp16, []> var_2299_promoted = const()[name = tensor<string, []>("op_2299_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2300_promoted = const()[name = tensor<string, []>("op_2300_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_79 = clip(alpha = var_2299_promoted, beta = var_2300_promoted, x = x_77)[name = tensor<string, []>("x_79")];
+            tensor<int32, [1]> var_2497 = const()[name = tensor<string, []>("op_2497"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_39 = reduce_mean(axes = var_2497, keep_dims = var_2304, x = x_79)[name = tensor<string, []>("mean_39")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_39 = sub(x = x_79, y = mean_39)[name = tensor<string, []>("zero_mean_39")];
-            tensor<fp16, []> var_2370_promoted = const()[name = tensor<string, []>("op_2370_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2570 = pow(x = zero_mean_39, y = var_2370_promoted)[name = tensor<string, []>("op_2570")];
-            tensor<int32, [1]> var_2571 = const()[name = tensor<string, []>("op_2571"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2572 = reduce_mean(axes = var_2571, keep_dims = var_2365, x = var_2570)[name = tensor<string, []>("op_2572")];
-            tensor<fp16, []> var_2573_to_fp16 = const()[name = tensor<string, []>("op_2573_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2574_cast_fp16 = add(x = var_2572, y = var_2573_to_fp16)[name = tensor<string, []>("op_2574_cast_fp16")];
+            tensor<fp16, []> var_2301_promoted = const()[name = tensor<string, []>("op_2301_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2500 = pow(x = zero_mean_39, y = var_2301_promoted)[name = tensor<string, []>("op_2500")];
+            tensor<int32, [1]> var_2501 = const()[name = tensor<string, []>("op_2501"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2502 = reduce_mean(axes = var_2501, keep_dims = var_2304, x = var_2500)[name = tensor<string, []>("op_2502")];
+            tensor<fp16, []> var_2503_to_fp16 = const()[name = tensor<string, []>("op_2503_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2504_cast_fp16 = add(x = var_2502, y = var_2503_to_fp16)[name = tensor<string, []>("op_2504_cast_fp16")];
             tensor<fp32, []> denom_39_epsilon_0 = const()[name = tensor<string, []>("denom_39_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0, x = var_2574_cast_fp16)[name = tensor<string, []>("denom_39_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2576_cast_fp16 = mul(x = zero_mean_39, y = denom_39_cast_fp16)[name = tensor<string, []>("op_2576_cast_fp16")];
-            tensor<fp16, [384]> var_2578_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2578_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66820352)))];
-            tensor<fp16, [384]> var_2578_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2578_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66821184)))];
-            tensor<fp16, []> var_2578_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2578_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2578_cast_fp16 = batch_norm(beta = var_2578_beta_0_to_fp16, epsilon = var_2578_epsilon_0_to_fp16, gamma = var_2578_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2576_cast_fp16)[name = tensor<string, []>("op_2578_cast_fp16")];
-            tensor<int32, [2]> var_2584 = const()[name = tensor<string, []>("op_2584"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2586 = const()[name = tensor<string, []>("op_2586"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2588_pad_type_0 = const()[name = tensor<string, []>("op_2588_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2588_pad_0 = const()[name = tensor<string, []>("op_2588_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_2588 = conv(bias = layers_9_mlp_fc1_bias, dilations = var_2586, groups = var_2363, pad = var_2588_pad_0, pad_type = var_2588_pad_type_0, strides = var_2584, weight = layers_9_mlp_fc1_weight, x = var_2578_cast_fp16)[name = tensor<string, []>("op_2588")];
-            tensor<string, []> input_79_mode_0 = const()[name = tensor<string, []>("input_79_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_79 = gelu(mode = input_79_mode_0, x = var_2588)[name = tensor<string, []>("input_79")];
-            tensor<int32, [2]> var_2592 = const()[name = tensor<string, []>("op_2592"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2594 = const()[name = tensor<string, []>("op_2594"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2596_pad_type_0 = const()[name = tensor<string, []>("op_2596_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2596_pad_0 = const()[name = tensor<string, []>("op_2596_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2596 = conv(bias = layers_9_mlp_fc2_bias, dilations = var_2594, groups = var_2363, pad = var_2596_pad_0, pad_type = var_2596_pad_type_0, strides = var_2592, weight = layers_9_mlp_fc2_weight, x = input_79)[name = tensor<string, []>("op_2596")];
-            tensor<fp16, [1, 384, 1, 512]> x_81 = add(x = var_2578_cast_fp16, y = var_2596)[name = tensor<string, []>("x_81")];
-            tensor<fp16, []> var_2361_promoted_1 = const()[name = tensor<string, []>("op_2361_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2362_promoted_1 = const()[name = tensor<string, []>("op_2362_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_83 = clip(alpha = var_2361_promoted_1, beta = var_2362_promoted_1, x = x_81)[name = tensor<string, []>("x_83")];
-            tensor<int32, [1]> var_2601 = const()[name = tensor<string, []>("op_2601"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_41 = reduce_mean(axes = var_2601, keep_dims = var_2365, x = x_83)[name = tensor<string, []>("mean_41")];
+            tensor<fp16, [1, 1, 1, 512]> denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0, x = var_2504_cast_fp16)[name = tensor<string, []>("denom_39_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2506_cast_fp16 = mul(x = zero_mean_39, y = denom_39_cast_fp16)[name = tensor<string, []>("op_2506_cast_fp16")];
+            tensor<fp16, [384]> var_2508_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2508_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66820352)))];
+            tensor<fp16, [384]> var_2508_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2508_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66821184)))];
+            tensor<fp16, []> var_2508_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2508_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2508_cast_fp16 = batch_norm(beta = var_2508_beta_0_to_fp16, epsilon = var_2508_epsilon_0_to_fp16, gamma = var_2508_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2506_cast_fp16)[name = tensor<string, []>("op_2508_cast_fp16")];
+            tensor<int32, [2]> var_2514 = const()[name = tensor<string, []>("op_2514"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2516 = const()[name = tensor<string, []>("op_2516"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2518_pad_type_0 = const()[name = tensor<string, []>("op_2518_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2518_pad_0 = const()[name = tensor<string, []>("op_2518_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_2518 = conv(bias = layers_9_mlp_fc1_bias, dilations = var_2516, groups = var_2302, pad = var_2518_pad_0, pad_type = var_2518_pad_type_0, strides = var_2514, weight = layers_9_mlp_fc1_weight, x = var_2508_cast_fp16)[name = tensor<string, []>("op_2518")];
+            tensor<string, []> input_319_mode_0 = const()[name = tensor<string, []>("input_319_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_319 = gelu(mode = input_319_mode_0, x = var_2518)[name = tensor<string, []>("input_319")];
+            tensor<int32, [2]> var_2522 = const()[name = tensor<string, []>("op_2522"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2524 = const()[name = tensor<string, []>("op_2524"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2526_pad_type_0 = const()[name = tensor<string, []>("op_2526_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2526_pad_0 = const()[name = tensor<string, []>("op_2526_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2526 = conv(bias = layers_9_mlp_fc2_bias, dilations = var_2524, groups = var_2302, pad = var_2526_pad_0, pad_type = var_2526_pad_type_0, strides = var_2522, weight = layers_9_mlp_fc2_weight, x = input_319)[name = tensor<string, []>("op_2526")];
+            tensor<fp16, [1, 384, 1, 512]> x_81 = add(x = var_2508_cast_fp16, y = var_2526)[name = tensor<string, []>("x_81")];
+            tensor<fp16, []> var_2299_promoted_1 = const()[name = tensor<string, []>("op_2299_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2300_promoted_1 = const()[name = tensor<string, []>("op_2300_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_83 = clip(alpha = var_2299_promoted_1, beta = var_2300_promoted_1, x = x_81)[name = tensor<string, []>("x_83")];
+            tensor<int32, [1]> var_2531 = const()[name = tensor<string, []>("op_2531"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_41 = reduce_mean(axes = var_2531, keep_dims = var_2304, x = x_83)[name = tensor<string, []>("mean_41")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_41 = sub(x = x_83, y = mean_41)[name = tensor<string, []>("zero_mean_41")];
-            tensor<fp16, []> var_2370_promoted_1 = const()[name = tensor<string, []>("op_2370_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2604 = pow(x = zero_mean_41, y = var_2370_promoted_1)[name = tensor<string, []>("op_2604")];
-            tensor<int32, [1]> var_2605 = const()[name = tensor<string, []>("op_2605"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2606 = reduce_mean(axes = var_2605, keep_dims = var_2365, x = var_2604)[name = tensor<string, []>("op_2606")];
-            tensor<fp16, []> var_2607_to_fp16 = const()[name = tensor<string, []>("op_2607_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2608_cast_fp16 = add(x = var_2606, y = var_2607_to_fp16)[name = tensor<string, []>("op_2608_cast_fp16")];
+            tensor<fp16, []> var_2301_promoted_1 = const()[name = tensor<string, []>("op_2301_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2534 = pow(x = zero_mean_41, y = var_2301_promoted_1)[name = tensor<string, []>("op_2534")];
+            tensor<int32, [1]> var_2535 = const()[name = tensor<string, []>("op_2535"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2536 = reduce_mean(axes = var_2535, keep_dims = var_2304, x = var_2534)[name = tensor<string, []>("op_2536")];
+            tensor<fp16, []> var_2537_to_fp16 = const()[name = tensor<string, []>("op_2537_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2538_cast_fp16 = add(x = var_2536, y = var_2537_to_fp16)[name = tensor<string, []>("op_2538_cast_fp16")];
             tensor<fp32, []> denom_41_epsilon_0 = const()[name = tensor<string, []>("denom_41_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0, x = var_2608_cast_fp16)[name = tensor<string, []>("denom_41_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2610_cast_fp16 = mul(x = zero_mean_41, y = denom_41_cast_fp16)[name = tensor<string, []>("op_2610_cast_fp16")];
-            tensor<fp16, [384]> var_2612_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2612_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66822016)))];
-            tensor<fp16, [384]> var_2612_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2612_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66822848)))];
-            tensor<fp16, []> var_2612_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2612_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2612_cast_fp16 = batch_norm(beta = var_2612_beta_0_to_fp16, epsilon = var_2612_epsilon_0_to_fp16, gamma = var_2612_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2610_cast_fp16)[name = tensor<string, []>("op_2612_cast_fp16")];
-            tensor<int32, []> var_2617 = const()[name = tensor<string, []>("op_2617"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_2618 = const()[name = tensor<string, []>("op_2618"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_2619 = const()[name = tensor<string, []>("op_2619"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_2644 = const()[name = tensor<string, []>("op_2644"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2646 = const()[name = tensor<string, []>("op_2646"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2648_pad_type_0 = const()[name = tensor<string, []>("op_2648_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2648_pad_0 = const()[name = tensor<string, []>("op_2648_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2648 = conv(bias = layers_10_attention_q_proj_bias, dilations = var_2646, groups = var_2617, pad = var_2648_pad_0, pad_type = var_2648_pad_type_0, strides = var_2644, weight = layers_10_attention_q_proj_weight, x = var_2612_cast_fp16)[name = tensor<string, []>("op_2648")];
-            tensor<int32, [4]> var_2649 = const()[name = tensor<string, []>("op_2649"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2650 = reshape(shape = var_2649, x = var_2648)[name = tensor<string, []>("op_2650")];
-            tensor<int32, [2]> var_2653 = const()[name = tensor<string, []>("op_2653"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2655 = const()[name = tensor<string, []>("op_2655"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2657_pad_type_0 = const()[name = tensor<string, []>("op_2657_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2657_pad_0 = const()[name = tensor<string, []>("op_2657_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2657 = conv(bias = layers_10_attention_k_proj_bias, dilations = var_2655, groups = var_2617, pad = var_2657_pad_0, pad_type = var_2657_pad_type_0, strides = var_2653, weight = layers_10_attention_k_proj_weight, x = var_2612_cast_fp16)[name = tensor<string, []>("op_2657")];
-            tensor<int32, [4]> var_2658 = const()[name = tensor<string, []>("op_2658"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks_21 = reshape(shape = var_2658, x = var_2657)[name = tensor<string, []>("ks_21")];
-            tensor<int32, [2]> var_2662 = const()[name = tensor<string, []>("op_2662"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2664 = const()[name = tensor<string, []>("op_2664"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2666_pad_type_0 = const()[name = tensor<string, []>("op_2666_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2666_pad_0 = const()[name = tensor<string, []>("op_2666_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2666 = conv(bias = layers_10_attention_v_proj_bias, dilations = var_2664, groups = var_2617, pad = var_2666_pad_0, pad_type = var_2666_pad_type_0, strides = var_2662, weight = layers_10_attention_v_proj_weight, x = var_2612_cast_fp16)[name = tensor<string, []>("op_2666")];
-            tensor<int32, [4]> var_2667 = const()[name = tensor<string, []>("op_2667"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2668 = reshape(shape = var_2667, x = var_2666)[name = tensor<string, []>("op_2668")];
-            tensor<int32, [12]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2669_axis_0 = const()[name = tensor<string, []>("op_2669_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2669_0, tensor<fp16, [1, 32, 1, 512]> var_2669_1, tensor<fp16, [1, 32, 1, 512]> var_2669_2, tensor<fp16, [1, 32, 1, 512]> var_2669_3, tensor<fp16, [1, 32, 1, 512]> var_2669_4, tensor<fp16, [1, 32, 1, 512]> var_2669_5, tensor<fp16, [1, 32, 1, 512]> var_2669_6, tensor<fp16, [1, 32, 1, 512]> var_2669_7, tensor<fp16, [1, 32, 1, 512]> var_2669_8, tensor<fp16, [1, 32, 1, 512]> var_2669_9, tensor<fp16, [1, 32, 1, 512]> var_2669_10, tensor<fp16, [1, 32, 1, 512]> var_2669_11 = split(axis = var_2669_axis_0, split_sizes = tile_52, x = var_2650)[name = tensor<string, []>("op_2669")];
-            tensor<int32, [4]> var_2682_perm_0 = const()[name = tensor<string, []>("op_2682_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2683_axis_0 = const()[name = tensor<string, []>("op_2683_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_1 = transpose(perm = var_2682_perm_0, x = ks_21)[name = tensor<string, []>("transpose_1")];
-            tensor<fp16, [1, 512, 1, 32]> var_2683_0, tensor<fp16, [1, 512, 1, 32]> var_2683_1, tensor<fp16, [1, 512, 1, 32]> var_2683_2, tensor<fp16, [1, 512, 1, 32]> var_2683_3, tensor<fp16, [1, 512, 1, 32]> var_2683_4, tensor<fp16, [1, 512, 1, 32]> var_2683_5, tensor<fp16, [1, 512, 1, 32]> var_2683_6, tensor<fp16, [1, 512, 1, 32]> var_2683_7, tensor<fp16, [1, 512, 1, 32]> var_2683_8, tensor<fp16, [1, 512, 1, 32]> var_2683_9, tensor<fp16, [1, 512, 1, 32]> var_2683_10, tensor<fp16, [1, 512, 1, 32]> var_2683_11 = split(axis = var_2683_axis_0, split_sizes = tile_53, x = transpose_1)[name = tensor<string, []>("op_2683")];
-            tensor<int32, [12]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2696_axis_0 = const()[name = tensor<string, []>("op_2696_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2696_0, tensor<fp16, [1, 32, 1, 512]> var_2696_1, tensor<fp16, [1, 32, 1, 512]> var_2696_2, tensor<fp16, [1, 32, 1, 512]> var_2696_3, tensor<fp16, [1, 32, 1, 512]> var_2696_4, tensor<fp16, [1, 32, 1, 512]> var_2696_5, tensor<fp16, [1, 32, 1, 512]> var_2696_6, tensor<fp16, [1, 32, 1, 512]> var_2696_7, tensor<fp16, [1, 32, 1, 512]> var_2696_8, tensor<fp16, [1, 32, 1, 512]> var_2696_9, tensor<fp16, [1, 32, 1, 512]> var_2696_10, tensor<fp16, [1, 32, 1, 512]> var_2696_11 = split(axis = var_2696_axis_0, split_sizes = tile_54, x = var_2668)[name = tensor<string, []>("op_2696")];
-            tensor<string, []> var_2710_equation_0 = const()[name = tensor<string, []>("op_2710_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2710 = einsum(equation = var_2710_equation_0, values = (var_2683_0, var_2669_0))[name = tensor<string, []>("op_2710")];
-            tensor<fp16, []> var_2711_to_fp16 = const()[name = tensor<string, []>("op_2711_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_241_cast_fp16 = mul(x = var_2710, y = var_2711_to_fp16)[name = tensor<string, []>("w_241_cast_fp16")];
-            tensor<string, []> var_2714_equation_0 = const()[name = tensor<string, []>("op_2714_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2714 = einsum(equation = var_2714_equation_0, values = (var_2683_1, var_2669_1))[name = tensor<string, []>("op_2714")];
-            tensor<fp16, []> var_2715_to_fp16 = const()[name = tensor<string, []>("op_2715_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_243_cast_fp16 = mul(x = var_2714, y = var_2715_to_fp16)[name = tensor<string, []>("w_243_cast_fp16")];
-            tensor<string, []> var_2718_equation_0 = const()[name = tensor<string, []>("op_2718_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2718 = einsum(equation = var_2718_equation_0, values = (var_2683_2, var_2669_2))[name = tensor<string, []>("op_2718")];
-            tensor<fp16, []> var_2719_to_fp16 = const()[name = tensor<string, []>("op_2719_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_245_cast_fp16 = mul(x = var_2718, y = var_2719_to_fp16)[name = tensor<string, []>("w_245_cast_fp16")];
-            tensor<string, []> var_2722_equation_0 = const()[name = tensor<string, []>("op_2722_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2722 = einsum(equation = var_2722_equation_0, values = (var_2683_3, var_2669_3))[name = tensor<string, []>("op_2722")];
-            tensor<fp16, []> var_2723_to_fp16 = const()[name = tensor<string, []>("op_2723_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_247_cast_fp16 = mul(x = var_2722, y = var_2723_to_fp16)[name = tensor<string, []>("w_247_cast_fp16")];
-            tensor<string, []> var_2726_equation_0 = const()[name = tensor<string, []>("op_2726_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2726 = einsum(equation = var_2726_equation_0, values = (var_2683_4, var_2669_4))[name = tensor<string, []>("op_2726")];
-            tensor<fp16, []> var_2727_to_fp16 = const()[name = tensor<string, []>("op_2727_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_249_cast_fp16 = mul(x = var_2726, y = var_2727_to_fp16)[name = tensor<string, []>("w_249_cast_fp16")];
-            tensor<string, []> var_2730_equation_0 = const()[name = tensor<string, []>("op_2730_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2730 = einsum(equation = var_2730_equation_0, values = (var_2683_5, var_2669_5))[name = tensor<string, []>("op_2730")];
-            tensor<fp16, []> var_2731_to_fp16 = const()[name = tensor<string, []>("op_2731_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_251_cast_fp16 = mul(x = var_2730, y = var_2731_to_fp16)[name = tensor<string, []>("w_251_cast_fp16")];
-            tensor<string, []> var_2734_equation_0 = const()[name = tensor<string, []>("op_2734_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2734 = einsum(equation = var_2734_equation_0, values = (var_2683_6, var_2669_6))[name = tensor<string, []>("op_2734")];
-            tensor<fp16, []> var_2735_to_fp16 = const()[name = tensor<string, []>("op_2735_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_253_cast_fp16 = mul(x = var_2734, y = var_2735_to_fp16)[name = tensor<string, []>("w_253_cast_fp16")];
-            tensor<string, []> var_2738_equation_0 = const()[name = tensor<string, []>("op_2738_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2738 = einsum(equation = var_2738_equation_0, values = (var_2683_7, var_2669_7))[name = tensor<string, []>("op_2738")];
-            tensor<fp16, []> var_2739_to_fp16 = const()[name = tensor<string, []>("op_2739_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_255_cast_fp16 = mul(x = var_2738, y = var_2739_to_fp16)[name = tensor<string, []>("w_255_cast_fp16")];
-            tensor<string, []> var_2742_equation_0 = const()[name = tensor<string, []>("op_2742_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2742 = einsum(equation = var_2742_equation_0, values = (var_2683_8, var_2669_8))[name = tensor<string, []>("op_2742")];
-            tensor<fp16, []> var_2743_to_fp16 = const()[name = tensor<string, []>("op_2743_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_257_cast_fp16 = mul(x = var_2742, y = var_2743_to_fp16)[name = tensor<string, []>("w_257_cast_fp16")];
-            tensor<string, []> var_2746_equation_0 = const()[name = tensor<string, []>("op_2746_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2746 = einsum(equation = var_2746_equation_0, values = (var_2683_9, var_2669_9))[name = tensor<string, []>("op_2746")];
-            tensor<fp16, []> var_2747_to_fp16 = const()[name = tensor<string, []>("op_2747_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_259_cast_fp16 = mul(x = var_2746, y = var_2747_to_fp16)[name = tensor<string, []>("w_259_cast_fp16")];
-            tensor<string, []> var_2750_equation_0 = const()[name = tensor<string, []>("op_2750_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2750 = einsum(equation = var_2750_equation_0, values = (var_2683_10, var_2669_10))[name = tensor<string, []>("op_2750")];
-            tensor<fp16, []> var_2751_to_fp16 = const()[name = tensor<string, []>("op_2751_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_261_cast_fp16 = mul(x = var_2750, y = var_2751_to_fp16)[name = tensor<string, []>("w_261_cast_fp16")];
-            tensor<string, []> var_2754_equation_0 = const()[name = tensor<string, []>("op_2754_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2754 = einsum(equation = var_2754_equation_0, values = (var_2683_11, var_2669_11))[name = tensor<string, []>("op_2754")];
-            tensor<fp16, []> var_2755_to_fp16 = const()[name = tensor<string, []>("op_2755_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_263_cast_fp16 = mul(x = var_2754, y = var_2755_to_fp16)[name = tensor<string, []>("w_263_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2757_cast_fp16 = add(x = w_241_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2757_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2758_cast_fp16 = softmax(axis = var_2617, x = var_2757_cast_fp16)[name = tensor<string, []>("op_2758_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2759_cast_fp16 = add(x = w_243_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2759_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2760_cast_fp16 = softmax(axis = var_2617, x = var_2759_cast_fp16)[name = tensor<string, []>("op_2760_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2761_cast_fp16 = add(x = w_245_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2761_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2762_cast_fp16 = softmax(axis = var_2617, x = var_2761_cast_fp16)[name = tensor<string, []>("op_2762_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2763_cast_fp16 = add(x = w_247_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2763_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2764_cast_fp16 = softmax(axis = var_2617, x = var_2763_cast_fp16)[name = tensor<string, []>("op_2764_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2765_cast_fp16 = add(x = w_249_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2765_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2766_cast_fp16 = softmax(axis = var_2617, x = var_2765_cast_fp16)[name = tensor<string, []>("op_2766_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2767_cast_fp16 = add(x = w_251_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2767_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2768_cast_fp16 = softmax(axis = var_2617, x = var_2767_cast_fp16)[name = tensor<string, []>("op_2768_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2769_cast_fp16 = add(x = w_253_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2769_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2770_cast_fp16 = softmax(axis = var_2617, x = var_2769_cast_fp16)[name = tensor<string, []>("op_2770_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2771_cast_fp16 = add(x = w_255_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2771_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2772_cast_fp16 = softmax(axis = var_2617, x = var_2771_cast_fp16)[name = tensor<string, []>("op_2772_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2773_cast_fp16 = add(x = w_257_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2773_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2774_cast_fp16 = softmax(axis = var_2617, x = var_2773_cast_fp16)[name = tensor<string, []>("op_2774_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2775_cast_fp16 = add(x = w_259_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2775_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2776_cast_fp16 = softmax(axis = var_2617, x = var_2775_cast_fp16)[name = tensor<string, []>("op_2776_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2777_cast_fp16 = add(x = w_261_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2777_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2778_cast_fp16 = softmax(axis = var_2617, x = var_2777_cast_fp16)[name = tensor<string, []>("op_2778_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2779_cast_fp16 = add(x = w_263_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_2779_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_2780_cast_fp16 = softmax(axis = var_2617, x = var_2779_cast_fp16)[name = tensor<string, []>("op_2780_cast_fp16")];
-            tensor<string, []> var_2782_equation_0 = const()[name = tensor<string, []>("op_2782_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2782_cast_fp16 = einsum(equation = var_2782_equation_0, values = (var_2696_0, var_2758_cast_fp16))[name = tensor<string, []>("op_2782_cast_fp16")];
-            tensor<string, []> var_2784_equation_0 = const()[name = tensor<string, []>("op_2784_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2784_cast_fp16 = einsum(equation = var_2784_equation_0, values = (var_2696_1, var_2760_cast_fp16))[name = tensor<string, []>("op_2784_cast_fp16")];
-            tensor<string, []> var_2786_equation_0 = const()[name = tensor<string, []>("op_2786_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2786_cast_fp16 = einsum(equation = var_2786_equation_0, values = (var_2696_2, var_2762_cast_fp16))[name = tensor<string, []>("op_2786_cast_fp16")];
-            tensor<string, []> var_2788_equation_0 = const()[name = tensor<string, []>("op_2788_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2788_cast_fp16 = einsum(equation = var_2788_equation_0, values = (var_2696_3, var_2764_cast_fp16))[name = tensor<string, []>("op_2788_cast_fp16")];
-            tensor<string, []> var_2790_equation_0 = const()[name = tensor<string, []>("op_2790_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2790_cast_fp16 = einsum(equation = var_2790_equation_0, values = (var_2696_4, var_2766_cast_fp16))[name = tensor<string, []>("op_2790_cast_fp16")];
-            tensor<string, []> var_2792_equation_0 = const()[name = tensor<string, []>("op_2792_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2792_cast_fp16 = einsum(equation = var_2792_equation_0, values = (var_2696_5, var_2768_cast_fp16))[name = tensor<string, []>("op_2792_cast_fp16")];
-            tensor<string, []> var_2794_equation_0 = const()[name = tensor<string, []>("op_2794_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2794_cast_fp16 = einsum(equation = var_2794_equation_0, values = (var_2696_6, var_2770_cast_fp16))[name = tensor<string, []>("op_2794_cast_fp16")];
-            tensor<string, []> var_2796_equation_0 = const()[name = tensor<string, []>("op_2796_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2796_cast_fp16 = einsum(equation = var_2796_equation_0, values = (var_2696_7, var_2772_cast_fp16))[name = tensor<string, []>("op_2796_cast_fp16")];
-            tensor<string, []> var_2798_equation_0 = const()[name = tensor<string, []>("op_2798_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2798_cast_fp16 = einsum(equation = var_2798_equation_0, values = (var_2696_8, var_2774_cast_fp16))[name = tensor<string, []>("op_2798_cast_fp16")];
-            tensor<string, []> var_2800_equation_0 = const()[name = tensor<string, []>("op_2800_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2800_cast_fp16 = einsum(equation = var_2800_equation_0, values = (var_2696_9, var_2776_cast_fp16))[name = tensor<string, []>("op_2800_cast_fp16")];
-            tensor<string, []> var_2802_equation_0 = const()[name = tensor<string, []>("op_2802_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2802_cast_fp16 = einsum(equation = var_2802_equation_0, values = (var_2696_10, var_2778_cast_fp16))[name = tensor<string, []>("op_2802_cast_fp16")];
-            tensor<string, []> var_2804_equation_0 = const()[name = tensor<string, []>("op_2804_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_2804_cast_fp16 = einsum(equation = var_2804_equation_0, values = (var_2696_11, var_2780_cast_fp16))[name = tensor<string, []>("op_2804_cast_fp16")];
-            tensor<bool, []> var_2806_interleave_0 = const()[name = tensor<string, []>("op_2806_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2806_cast_fp16 = concat(axis = var_2617, interleave = var_2806_interleave_0, values = (var_2782_cast_fp16, var_2784_cast_fp16, var_2786_cast_fp16, var_2788_cast_fp16, var_2790_cast_fp16, var_2792_cast_fp16, var_2794_cast_fp16, var_2796_cast_fp16, var_2798_cast_fp16, var_2800_cast_fp16, var_2802_cast_fp16, var_2804_cast_fp16))[name = tensor<string, []>("op_2806_cast_fp16")];
-            tensor<int32, [2]> var_2810 = const()[name = tensor<string, []>("op_2810"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2812 = const()[name = tensor<string, []>("op_2812"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2814_pad_type_0 = const()[name = tensor<string, []>("op_2814_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2814_pad_0 = const()[name = tensor<string, []>("op_2814_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2814 = conv(bias = layers_10_attention_o_proj_bias, dilations = var_2812, groups = var_2617, pad = var_2814_pad_0, pad_type = var_2814_pad_type_0, strides = var_2810, weight = layers_10_attention_o_proj_weight, x = var_2806_cast_fp16)[name = tensor<string, []>("op_2814")];
-            tensor<bool, []> var_2816_interleave_0 = const()[name = tensor<string, []>("op_2816_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_2816 = concat(axis = var_2618, interleave = var_2816_interleave_0, values = var_2814)[name = tensor<string, []>("op_2816")];
-            tensor<fp16, [1, 384, 1, 512]> x_85 = add(x = var_2612_cast_fp16, y = var_2816)[name = tensor<string, []>("x_85")];
-            tensor<fp16, []> var_2615_promoted = const()[name = tensor<string, []>("op_2615_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2616_promoted = const()[name = tensor<string, []>("op_2616_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_87 = clip(alpha = var_2615_promoted, beta = var_2616_promoted, x = x_85)[name = tensor<string, []>("x_87")];
-            tensor<int32, [1]> var_2821 = const()[name = tensor<string, []>("op_2821"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_43 = reduce_mean(axes = var_2821, keep_dims = var_2619, x = x_87)[name = tensor<string, []>("mean_43")];
+            tensor<fp16, [1, 1, 1, 512]> denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0, x = var_2538_cast_fp16)[name = tensor<string, []>("denom_41_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2540_cast_fp16 = mul(x = zero_mean_41, y = denom_41_cast_fp16)[name = tensor<string, []>("op_2540_cast_fp16")];
+            tensor<fp16, [384]> var_2542_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2542_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66822016)))];
+            tensor<fp16, [384]> var_2542_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2542_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66822848)))];
+            tensor<fp16, []> var_2542_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2542_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2542_cast_fp16 = batch_norm(beta = var_2542_beta_0_to_fp16, epsilon = var_2542_epsilon_0_to_fp16, gamma = var_2542_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2540_cast_fp16)[name = tensor<string, []>("op_2542_cast_fp16")];
+            tensor<int32, []> var_2548 = const()[name = tensor<string, []>("op_2548"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_2549 = const()[name = tensor<string, []>("op_2549"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_2550 = const()[name = tensor<string, []>("op_2550"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_2572 = const()[name = tensor<string, []>("op_2572"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2574 = const()[name = tensor<string, []>("op_2574"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2576_pad_type_0 = const()[name = tensor<string, []>("op_2576_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2576_pad_0 = const()[name = tensor<string, []>("op_2576_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2576 = conv(bias = layers_10_attention_q_proj_bias, dilations = var_2574, groups = var_2548, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2572, weight = layers_10_attention_q_proj_weight, x = var_2542_cast_fp16)[name = tensor<string, []>("op_2576")];
+            tensor<int32, [2]> var_2579 = const()[name = tensor<string, []>("op_2579"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2581 = const()[name = tensor<string, []>("op_2581"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_21_pad_type_0 = const()[name = tensor<string, []>("ks_21_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_21_pad_0 = const()[name = tensor<string, []>("ks_21_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks_21 = conv(bias = layers_10_attention_k_proj_bias, dilations = var_2581, groups = var_2548, pad = ks_21_pad_0, pad_type = ks_21_pad_type_0, strides = var_2579, weight = layers_10_attention_k_proj_weight, x = var_2542_cast_fp16)[name = tensor<string, []>("ks_21")];
+            tensor<int32, [2]> var_2586 = const()[name = tensor<string, []>("op_2586"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2588 = const()[name = tensor<string, []>("op_2588"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2590_pad_type_0 = const()[name = tensor<string, []>("op_2590_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2590_pad_0 = const()[name = tensor<string, []>("op_2590_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2590 = conv(bias = layers_10_attention_v_proj_bias, dilations = var_2588, groups = var_2548, pad = var_2590_pad_0, pad_type = var_2590_pad_type_0, strides = var_2586, weight = layers_10_attention_v_proj_weight, x = var_2542_cast_fp16)[name = tensor<string, []>("op_2590")];
+            tensor<int32, [12]> tile_52 = const()[name = tensor<string, []>("tile_52"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2591_axis_0 = const()[name = tensor<string, []>("op_2591_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2591_0, tensor<fp16, [1, 32, 1, 512]> var_2591_1, tensor<fp16, [1, 32, 1, 512]> var_2591_2, tensor<fp16, [1, 32, 1, 512]> var_2591_3, tensor<fp16, [1, 32, 1, 512]> var_2591_4, tensor<fp16, [1, 32, 1, 512]> var_2591_5, tensor<fp16, [1, 32, 1, 512]> var_2591_6, tensor<fp16, [1, 32, 1, 512]> var_2591_7, tensor<fp16, [1, 32, 1, 512]> var_2591_8, tensor<fp16, [1, 32, 1, 512]> var_2591_9, tensor<fp16, [1, 32, 1, 512]> var_2591_10, tensor<fp16, [1, 32, 1, 512]> var_2591_11 = split(axis = var_2591_axis_0, split_sizes = tile_52, x = var_2576)[name = tensor<string, []>("op_2591")];
+            tensor<int32, [4]> var_2604_perm_0 = const()[name = tensor<string, []>("op_2604_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_53 = const()[name = tensor<string, []>("tile_53"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2605_axis_0 = const()[name = tensor<string, []>("op_2605_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_1 = transpose(perm = var_2604_perm_0, x = ks_21)[name = tensor<string, []>("transpose_1")];
+            tensor<fp16, [1, 512, 1, 32]> var_2605_0, tensor<fp16, [1, 512, 1, 32]> var_2605_1, tensor<fp16, [1, 512, 1, 32]> var_2605_2, tensor<fp16, [1, 512, 1, 32]> var_2605_3, tensor<fp16, [1, 512, 1, 32]> var_2605_4, tensor<fp16, [1, 512, 1, 32]> var_2605_5, tensor<fp16, [1, 512, 1, 32]> var_2605_6, tensor<fp16, [1, 512, 1, 32]> var_2605_7, tensor<fp16, [1, 512, 1, 32]> var_2605_8, tensor<fp16, [1, 512, 1, 32]> var_2605_9, tensor<fp16, [1, 512, 1, 32]> var_2605_10, tensor<fp16, [1, 512, 1, 32]> var_2605_11 = split(axis = var_2605_axis_0, split_sizes = tile_53, x = transpose_1)[name = tensor<string, []>("op_2605")];
+            tensor<int32, [12]> tile_54 = const()[name = tensor<string, []>("tile_54"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2618_axis_0 = const()[name = tensor<string, []>("op_2618_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2618_0, tensor<fp16, [1, 32, 1, 512]> var_2618_1, tensor<fp16, [1, 32, 1, 512]> var_2618_2, tensor<fp16, [1, 32, 1, 512]> var_2618_3, tensor<fp16, [1, 32, 1, 512]> var_2618_4, tensor<fp16, [1, 32, 1, 512]> var_2618_5, tensor<fp16, [1, 32, 1, 512]> var_2618_6, tensor<fp16, [1, 32, 1, 512]> var_2618_7, tensor<fp16, [1, 32, 1, 512]> var_2618_8, tensor<fp16, [1, 32, 1, 512]> var_2618_9, tensor<fp16, [1, 32, 1, 512]> var_2618_10, tensor<fp16, [1, 32, 1, 512]> var_2618_11 = split(axis = var_2618_axis_0, split_sizes = tile_54, x = var_2590)[name = tensor<string, []>("op_2618")];
+            tensor<string, []> var_2632_equation_0 = const()[name = tensor<string, []>("op_2632_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2632 = einsum(equation = var_2632_equation_0, values = (var_2605_0, var_2591_0))[name = tensor<string, []>("op_2632")];
+            tensor<fp16, []> var_2633_to_fp16 = const()[name = tensor<string, []>("op_2633_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_241_cast_fp16 = mul(x = var_2632, y = var_2633_to_fp16)[name = tensor<string, []>("w_241_cast_fp16")];
+            tensor<string, []> var_2636_equation_0 = const()[name = tensor<string, []>("op_2636_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2636 = einsum(equation = var_2636_equation_0, values = (var_2605_1, var_2591_1))[name = tensor<string, []>("op_2636")];
+            tensor<fp16, []> var_2637_to_fp16 = const()[name = tensor<string, []>("op_2637_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_243_cast_fp16 = mul(x = var_2636, y = var_2637_to_fp16)[name = tensor<string, []>("w_243_cast_fp16")];
+            tensor<string, []> var_2640_equation_0 = const()[name = tensor<string, []>("op_2640_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2640 = einsum(equation = var_2640_equation_0, values = (var_2605_2, var_2591_2))[name = tensor<string, []>("op_2640")];
+            tensor<fp16, []> var_2641_to_fp16 = const()[name = tensor<string, []>("op_2641_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_245_cast_fp16 = mul(x = var_2640, y = var_2641_to_fp16)[name = tensor<string, []>("w_245_cast_fp16")];
+            tensor<string, []> var_2644_equation_0 = const()[name = tensor<string, []>("op_2644_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2644 = einsum(equation = var_2644_equation_0, values = (var_2605_3, var_2591_3))[name = tensor<string, []>("op_2644")];
+            tensor<fp16, []> var_2645_to_fp16 = const()[name = tensor<string, []>("op_2645_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_247_cast_fp16 = mul(x = var_2644, y = var_2645_to_fp16)[name = tensor<string, []>("w_247_cast_fp16")];
+            tensor<string, []> var_2648_equation_0 = const()[name = tensor<string, []>("op_2648_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2648 = einsum(equation = var_2648_equation_0, values = (var_2605_4, var_2591_4))[name = tensor<string, []>("op_2648")];
+            tensor<fp16, []> var_2649_to_fp16 = const()[name = tensor<string, []>("op_2649_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_249_cast_fp16 = mul(x = var_2648, y = var_2649_to_fp16)[name = tensor<string, []>("w_249_cast_fp16")];
+            tensor<string, []> var_2652_equation_0 = const()[name = tensor<string, []>("op_2652_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2652 = einsum(equation = var_2652_equation_0, values = (var_2605_5, var_2591_5))[name = tensor<string, []>("op_2652")];
+            tensor<fp16, []> var_2653_to_fp16 = const()[name = tensor<string, []>("op_2653_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_251_cast_fp16 = mul(x = var_2652, y = var_2653_to_fp16)[name = tensor<string, []>("w_251_cast_fp16")];
+            tensor<string, []> var_2656_equation_0 = const()[name = tensor<string, []>("op_2656_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2656 = einsum(equation = var_2656_equation_0, values = (var_2605_6, var_2591_6))[name = tensor<string, []>("op_2656")];
+            tensor<fp16, []> var_2657_to_fp16 = const()[name = tensor<string, []>("op_2657_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_253_cast_fp16 = mul(x = var_2656, y = var_2657_to_fp16)[name = tensor<string, []>("w_253_cast_fp16")];
+            tensor<string, []> var_2660_equation_0 = const()[name = tensor<string, []>("op_2660_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2660 = einsum(equation = var_2660_equation_0, values = (var_2605_7, var_2591_7))[name = tensor<string, []>("op_2660")];
+            tensor<fp16, []> var_2661_to_fp16 = const()[name = tensor<string, []>("op_2661_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_255_cast_fp16 = mul(x = var_2660, y = var_2661_to_fp16)[name = tensor<string, []>("w_255_cast_fp16")];
+            tensor<string, []> var_2664_equation_0 = const()[name = tensor<string, []>("op_2664_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2664 = einsum(equation = var_2664_equation_0, values = (var_2605_8, var_2591_8))[name = tensor<string, []>("op_2664")];
+            tensor<fp16, []> var_2665_to_fp16 = const()[name = tensor<string, []>("op_2665_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_257_cast_fp16 = mul(x = var_2664, y = var_2665_to_fp16)[name = tensor<string, []>("w_257_cast_fp16")];
+            tensor<string, []> var_2668_equation_0 = const()[name = tensor<string, []>("op_2668_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2668 = einsum(equation = var_2668_equation_0, values = (var_2605_9, var_2591_9))[name = tensor<string, []>("op_2668")];
+            tensor<fp16, []> var_2669_to_fp16 = const()[name = tensor<string, []>("op_2669_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_259_cast_fp16 = mul(x = var_2668, y = var_2669_to_fp16)[name = tensor<string, []>("w_259_cast_fp16")];
+            tensor<string, []> var_2672_equation_0 = const()[name = tensor<string, []>("op_2672_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2672 = einsum(equation = var_2672_equation_0, values = (var_2605_10, var_2591_10))[name = tensor<string, []>("op_2672")];
+            tensor<fp16, []> var_2673_to_fp16 = const()[name = tensor<string, []>("op_2673_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_261_cast_fp16 = mul(x = var_2672, y = var_2673_to_fp16)[name = tensor<string, []>("w_261_cast_fp16")];
+            tensor<string, []> var_2676_equation_0 = const()[name = tensor<string, []>("op_2676_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2676 = einsum(equation = var_2676_equation_0, values = (var_2605_11, var_2591_11))[name = tensor<string, []>("op_2676")];
+            tensor<fp16, []> var_2677_to_fp16 = const()[name = tensor<string, []>("op_2677_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_263_cast_fp16 = mul(x = var_2676, y = var_2677_to_fp16)[name = tensor<string, []>("w_263_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_323_cast_fp16 = add(x = w_241_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_323_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2680_cast_fp16 = softmax(axis = var_2548, x = input_323_cast_fp16)[name = tensor<string, []>("op_2680_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_325_cast_fp16 = add(x = w_243_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_325_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2682_cast_fp16 = softmax(axis = var_2548, x = input_325_cast_fp16)[name = tensor<string, []>("op_2682_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_327_cast_fp16 = add(x = w_245_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_327_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2684_cast_fp16 = softmax(axis = var_2548, x = input_327_cast_fp16)[name = tensor<string, []>("op_2684_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_329_cast_fp16 = add(x = w_247_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_329_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2686_cast_fp16 = softmax(axis = var_2548, x = input_329_cast_fp16)[name = tensor<string, []>("op_2686_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_331_cast_fp16 = add(x = w_249_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_331_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2688_cast_fp16 = softmax(axis = var_2548, x = input_331_cast_fp16)[name = tensor<string, []>("op_2688_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_333_cast_fp16 = add(x = w_251_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_333_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2690_cast_fp16 = softmax(axis = var_2548, x = input_333_cast_fp16)[name = tensor<string, []>("op_2690_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_335_cast_fp16 = add(x = w_253_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_335_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2692_cast_fp16 = softmax(axis = var_2548, x = input_335_cast_fp16)[name = tensor<string, []>("op_2692_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_337_cast_fp16 = add(x = w_255_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_337_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2694_cast_fp16 = softmax(axis = var_2548, x = input_337_cast_fp16)[name = tensor<string, []>("op_2694_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_339_cast_fp16 = add(x = w_257_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_339_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2696_cast_fp16 = softmax(axis = var_2548, x = input_339_cast_fp16)[name = tensor<string, []>("op_2696_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_341_cast_fp16 = add(x = w_259_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_341_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2698_cast_fp16 = softmax(axis = var_2548, x = input_341_cast_fp16)[name = tensor<string, []>("op_2698_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_343_cast_fp16 = add(x = w_261_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_343_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2700_cast_fp16 = softmax(axis = var_2548, x = input_343_cast_fp16)[name = tensor<string, []>("op_2700_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_345_cast_fp16 = add(x = w_263_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_345_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2702_cast_fp16 = softmax(axis = var_2548, x = input_345_cast_fp16)[name = tensor<string, []>("op_2702_cast_fp16")];
+            tensor<string, []> var_2704_equation_0 = const()[name = tensor<string, []>("op_2704_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2704_cast_fp16 = einsum(equation = var_2704_equation_0, values = (var_2618_0, var_2680_cast_fp16))[name = tensor<string, []>("op_2704_cast_fp16")];
+            tensor<string, []> var_2706_equation_0 = const()[name = tensor<string, []>("op_2706_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2706_cast_fp16 = einsum(equation = var_2706_equation_0, values = (var_2618_1, var_2682_cast_fp16))[name = tensor<string, []>("op_2706_cast_fp16")];
+            tensor<string, []> var_2708_equation_0 = const()[name = tensor<string, []>("op_2708_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2708_cast_fp16 = einsum(equation = var_2708_equation_0, values = (var_2618_2, var_2684_cast_fp16))[name = tensor<string, []>("op_2708_cast_fp16")];
+            tensor<string, []> var_2710_equation_0 = const()[name = tensor<string, []>("op_2710_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2710_cast_fp16 = einsum(equation = var_2710_equation_0, values = (var_2618_3, var_2686_cast_fp16))[name = tensor<string, []>("op_2710_cast_fp16")];
+            tensor<string, []> var_2712_equation_0 = const()[name = tensor<string, []>("op_2712_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2712_cast_fp16 = einsum(equation = var_2712_equation_0, values = (var_2618_4, var_2688_cast_fp16))[name = tensor<string, []>("op_2712_cast_fp16")];
+            tensor<string, []> var_2714_equation_0 = const()[name = tensor<string, []>("op_2714_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2714_cast_fp16 = einsum(equation = var_2714_equation_0, values = (var_2618_5, var_2690_cast_fp16))[name = tensor<string, []>("op_2714_cast_fp16")];
+            tensor<string, []> var_2716_equation_0 = const()[name = tensor<string, []>("op_2716_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2716_cast_fp16 = einsum(equation = var_2716_equation_0, values = (var_2618_6, var_2692_cast_fp16))[name = tensor<string, []>("op_2716_cast_fp16")];
+            tensor<string, []> var_2718_equation_0 = const()[name = tensor<string, []>("op_2718_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2718_cast_fp16 = einsum(equation = var_2718_equation_0, values = (var_2618_7, var_2694_cast_fp16))[name = tensor<string, []>("op_2718_cast_fp16")];
+            tensor<string, []> var_2720_equation_0 = const()[name = tensor<string, []>("op_2720_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2720_cast_fp16 = einsum(equation = var_2720_equation_0, values = (var_2618_8, var_2696_cast_fp16))[name = tensor<string, []>("op_2720_cast_fp16")];
+            tensor<string, []> var_2722_equation_0 = const()[name = tensor<string, []>("op_2722_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2618_9, var_2698_cast_fp16))[name = tensor<string, []>("op_2722_cast_fp16")];
+            tensor<string, []> var_2724_equation_0 = const()[name = tensor<string, []>("op_2724_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2618_10, var_2700_cast_fp16))[name = tensor<string, []>("op_2724_cast_fp16")];
+            tensor<string, []> var_2726_equation_0 = const()[name = tensor<string, []>("op_2726_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2618_11, var_2702_cast_fp16))[name = tensor<string, []>("op_2726_cast_fp16")];
+            tensor<bool, []> var_2728_interleave_0 = const()[name = tensor<string, []>("op_2728_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2728_cast_fp16 = concat(axis = var_2548, interleave = var_2728_interleave_0, values = (var_2704_cast_fp16, var_2706_cast_fp16, var_2708_cast_fp16, var_2710_cast_fp16, var_2712_cast_fp16, var_2714_cast_fp16, var_2716_cast_fp16, var_2718_cast_fp16, var_2720_cast_fp16, var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16))[name = tensor<string, []>("op_2728_cast_fp16")];
+            tensor<int32, [2]> var_2732 = const()[name = tensor<string, []>("op_2732"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2734 = const()[name = tensor<string, []>("op_2734"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2736_pad_type_0 = const()[name = tensor<string, []>("op_2736_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2736_pad_0 = const()[name = tensor<string, []>("op_2736_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2736 = conv(bias = layers_10_attention_o_proj_bias, dilations = var_2734, groups = var_2548, pad = var_2736_pad_0, pad_type = var_2736_pad_type_0, strides = var_2732, weight = layers_10_attention_o_proj_weight, x = var_2728_cast_fp16)[name = tensor<string, []>("op_2736")];
+            tensor<bool, []> var_2738_interleave_0 = const()[name = tensor<string, []>("op_2738_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2738 = concat(axis = var_2549, interleave = var_2738_interleave_0, values = var_2736)[name = tensor<string, []>("op_2738")];
+            tensor<fp16, [1, 384, 1, 512]> x_85 = add(x = var_2542_cast_fp16, y = var_2738)[name = tensor<string, []>("x_85")];
+            tensor<fp16, []> var_2545_promoted = const()[name = tensor<string, []>("op_2545_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2546_promoted = const()[name = tensor<string, []>("op_2546_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_87 = clip(alpha = var_2545_promoted, beta = var_2546_promoted, x = x_85)[name = tensor<string, []>("x_87")];
+            tensor<int32, [1]> var_2743 = const()[name = tensor<string, []>("op_2743"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_43 = reduce_mean(axes = var_2743, keep_dims = var_2550, x = x_87)[name = tensor<string, []>("mean_43")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_43 = sub(x = x_87, y = mean_43)[name = tensor<string, []>("zero_mean_43")];
-            tensor<fp16, []> var_2624_promoted = const()[name = tensor<string, []>("op_2624_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2824 = pow(x = zero_mean_43, y = var_2624_promoted)[name = tensor<string, []>("op_2824")];
-            tensor<int32, [1]> var_2825 = const()[name = tensor<string, []>("op_2825"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2826 = reduce_mean(axes = var_2825, keep_dims = var_2619, x = var_2824)[name = tensor<string, []>("op_2826")];
-            tensor<fp16, []> var_2827_to_fp16 = const()[name = tensor<string, []>("op_2827_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2828_cast_fp16 = add(x = var_2826, y = var_2827_to_fp16)[name = tensor<string, []>("op_2828_cast_fp16")];
+            tensor<fp16, []> var_2547_promoted = const()[name = tensor<string, []>("op_2547_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2746 = pow(x = zero_mean_43, y = var_2547_promoted)[name = tensor<string, []>("op_2746")];
+            tensor<int32, [1]> var_2747 = const()[name = tensor<string, []>("op_2747"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2748 = reduce_mean(axes = var_2747, keep_dims = var_2550, x = var_2746)[name = tensor<string, []>("op_2748")];
+            tensor<fp16, []> var_2749_to_fp16 = const()[name = tensor<string, []>("op_2749_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2750_cast_fp16 = add(x = var_2748, y = var_2749_to_fp16)[name = tensor<string, []>("op_2750_cast_fp16")];
             tensor<fp32, []> denom_43_epsilon_0 = const()[name = tensor<string, []>("denom_43_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0, x = var_2828_cast_fp16)[name = tensor<string, []>("denom_43_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2830_cast_fp16 = mul(x = zero_mean_43, y = denom_43_cast_fp16)[name = tensor<string, []>("op_2830_cast_fp16")];
-            tensor<fp16, [384]> var_2832_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2832_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66823680)))];
-            tensor<fp16, [384]> var_2832_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2832_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66824512)))];
-            tensor<fp16, []> var_2832_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2832_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2832_cast_fp16 = batch_norm(beta = var_2832_beta_0_to_fp16, epsilon = var_2832_epsilon_0_to_fp16, gamma = var_2832_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2830_cast_fp16)[name = tensor<string, []>("op_2832_cast_fp16")];
-            tensor<int32, [2]> var_2838 = const()[name = tensor<string, []>("op_2838"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2840 = const()[name = tensor<string, []>("op_2840"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2842_pad_type_0 = const()[name = tensor<string, []>("op_2842_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2842_pad_0 = const()[name = tensor<string, []>("op_2842_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_2842 = conv(bias = layers_10_mlp_fc1_bias, dilations = var_2840, groups = var_2617, pad = var_2842_pad_0, pad_type = var_2842_pad_type_0, strides = var_2838, weight = layers_10_mlp_fc1_weight, x = var_2832_cast_fp16)[name = tensor<string, []>("op_2842")];
-            tensor<string, []> input_87_mode_0 = const()[name = tensor<string, []>("input_87_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_87 = gelu(mode = input_87_mode_0, x = var_2842)[name = tensor<string, []>("input_87")];
-            tensor<int32, [2]> var_2846 = const()[name = tensor<string, []>("op_2846"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2848 = const()[name = tensor<string, []>("op_2848"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2850_pad_type_0 = const()[name = tensor<string, []>("op_2850_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2850_pad_0 = const()[name = tensor<string, []>("op_2850_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2850 = conv(bias = layers_10_mlp_fc2_bias, dilations = var_2848, groups = var_2617, pad = var_2850_pad_0, pad_type = var_2850_pad_type_0, strides = var_2846, weight = layers_10_mlp_fc2_weight, x = input_87)[name = tensor<string, []>("op_2850")];
-            tensor<fp16, [1, 384, 1, 512]> x_89 = add(x = var_2832_cast_fp16, y = var_2850)[name = tensor<string, []>("x_89")];
-            tensor<fp16, []> var_2615_promoted_1 = const()[name = tensor<string, []>("op_2615_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2616_promoted_1 = const()[name = tensor<string, []>("op_2616_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_91 = clip(alpha = var_2615_promoted_1, beta = var_2616_promoted_1, x = x_89)[name = tensor<string, []>("x_91")];
-            tensor<int32, [1]> var_2855 = const()[name = tensor<string, []>("op_2855"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_45 = reduce_mean(axes = var_2855, keep_dims = var_2619, x = x_91)[name = tensor<string, []>("mean_45")];
+            tensor<fp16, [1, 1, 1, 512]> denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0, x = var_2750_cast_fp16)[name = tensor<string, []>("denom_43_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2752_cast_fp16 = mul(x = zero_mean_43, y = denom_43_cast_fp16)[name = tensor<string, []>("op_2752_cast_fp16")];
+            tensor<fp16, [384]> var_2754_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2754_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66823680)))];
+            tensor<fp16, [384]> var_2754_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2754_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66824512)))];
+            tensor<fp16, []> var_2754_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2754_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2754_cast_fp16 = batch_norm(beta = var_2754_beta_0_to_fp16, epsilon = var_2754_epsilon_0_to_fp16, gamma = var_2754_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2752_cast_fp16)[name = tensor<string, []>("op_2754_cast_fp16")];
+            tensor<int32, [2]> var_2760 = const()[name = tensor<string, []>("op_2760"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2762 = const()[name = tensor<string, []>("op_2762"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2764_pad_type_0 = const()[name = tensor<string, []>("op_2764_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2764_pad_0 = const()[name = tensor<string, []>("op_2764_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_2764 = conv(bias = layers_10_mlp_fc1_bias, dilations = var_2762, groups = var_2548, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2760, weight = layers_10_mlp_fc1_weight, x = var_2754_cast_fp16)[name = tensor<string, []>("op_2764")];
+            tensor<string, []> input_351_mode_0 = const()[name = tensor<string, []>("input_351_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_351 = gelu(mode = input_351_mode_0, x = var_2764)[name = tensor<string, []>("input_351")];
+            tensor<int32, [2]> var_2768 = const()[name = tensor<string, []>("op_2768"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2770 = const()[name = tensor<string, []>("op_2770"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2772_pad_type_0 = const()[name = tensor<string, []>("op_2772_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2772_pad_0 = const()[name = tensor<string, []>("op_2772_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2772 = conv(bias = layers_10_mlp_fc2_bias, dilations = var_2770, groups = var_2548, pad = var_2772_pad_0, pad_type = var_2772_pad_type_0, strides = var_2768, weight = layers_10_mlp_fc2_weight, x = input_351)[name = tensor<string, []>("op_2772")];
+            tensor<fp16, [1, 384, 1, 512]> x_89 = add(x = var_2754_cast_fp16, y = var_2772)[name = tensor<string, []>("x_89")];
+            tensor<fp16, []> var_2545_promoted_1 = const()[name = tensor<string, []>("op_2545_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2546_promoted_1 = const()[name = tensor<string, []>("op_2546_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_91 = clip(alpha = var_2545_promoted_1, beta = var_2546_promoted_1, x = x_89)[name = tensor<string, []>("x_91")];
+            tensor<int32, [1]> var_2777 = const()[name = tensor<string, []>("op_2777"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_45 = reduce_mean(axes = var_2777, keep_dims = var_2550, x = x_91)[name = tensor<string, []>("mean_45")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_45 = sub(x = x_91, y = mean_45)[name = tensor<string, []>("zero_mean_45")];
-            tensor<fp16, []> var_2624_promoted_1 = const()[name = tensor<string, []>("op_2624_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_2858 = pow(x = zero_mean_45, y = var_2624_promoted_1)[name = tensor<string, []>("op_2858")];
-            tensor<int32, [1]> var_2859 = const()[name = tensor<string, []>("op_2859"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_2860 = reduce_mean(axes = var_2859, keep_dims = var_2619, x = var_2858)[name = tensor<string, []>("op_2860")];
-            tensor<fp16, []> var_2861_to_fp16 = const()[name = tensor<string, []>("op_2861_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_2862_cast_fp16 = add(x = var_2860, y = var_2861_to_fp16)[name = tensor<string, []>("op_2862_cast_fp16")];
+            tensor<fp16, []> var_2547_promoted_1 = const()[name = tensor<string, []>("op_2547_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2780 = pow(x = zero_mean_45, y = var_2547_promoted_1)[name = tensor<string, []>("op_2780")];
+            tensor<int32, [1]> var_2781 = const()[name = tensor<string, []>("op_2781"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2782 = reduce_mean(axes = var_2781, keep_dims = var_2550, x = var_2780)[name = tensor<string, []>("op_2782")];
+            tensor<fp16, []> var_2783_to_fp16 = const()[name = tensor<string, []>("op_2783_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2784_cast_fp16 = add(x = var_2782, y = var_2783_to_fp16)[name = tensor<string, []>("op_2784_cast_fp16")];
             tensor<fp32, []> denom_45_epsilon_0 = const()[name = tensor<string, []>("denom_45_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0, x = var_2862_cast_fp16)[name = tensor<string, []>("denom_45_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_2864_cast_fp16 = mul(x = zero_mean_45, y = denom_45_cast_fp16)[name = tensor<string, []>("op_2864_cast_fp16")];
-            tensor<fp16, [384]> var_2866_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2866_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66825344)))];
-            tensor<fp16, [384]> var_2866_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2866_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66826176)))];
-            tensor<fp16, []> var_2866_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2866_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_2866_cast_fp16 = batch_norm(beta = var_2866_beta_0_to_fp16, epsilon = var_2866_epsilon_0_to_fp16, gamma = var_2866_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2864_cast_fp16)[name = tensor<string, []>("op_2866_cast_fp16")];
-            tensor<int32, []> var_2871 = const()[name = tensor<string, []>("op_2871"), val = tensor<int32, []>(1)];
-            tensor<int32, []> var_2872 = const()[name = tensor<string, []>("op_2872"), val = tensor<int32, []>(0)];
-            tensor<bool, []> var_2873 = const()[name = tensor<string, []>("op_2873"), val = tensor<bool, []>(true)];
-            tensor<int32, [2]> var_2898 = const()[name = tensor<string, []>("op_2898"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2900 = const()[name = tensor<string, []>("op_2900"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2902_pad_type_0 = const()[name = tensor<string, []>("op_2902_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2902_pad_0 = const()[name = tensor<string, []>("op_2902_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2902 = conv(bias = layers_11_attention_q_proj_bias, dilations = var_2900, groups = var_2871, pad = var_2902_pad_0, pad_type = var_2902_pad_type_0, strides = var_2898, weight = layers_11_attention_q_proj_weight, x = var_2866_cast_fp16)[name = tensor<string, []>("op_2902")];
-            tensor<int32, [4]> var_2903 = const()[name = tensor<string, []>("op_2903"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2904 = reshape(shape = var_2903, x = var_2902)[name = tensor<string, []>("op_2904")];
-            tensor<int32, [2]> var_2907 = const()[name = tensor<string, []>("op_2907"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2909 = const()[name = tensor<string, []>("op_2909"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2911_pad_type_0 = const()[name = tensor<string, []>("op_2911_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2911_pad_0 = const()[name = tensor<string, []>("op_2911_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2911 = conv(bias = layers_11_attention_k_proj_bias, dilations = var_2909, groups = var_2871, pad = var_2911_pad_0, pad_type = var_2911_pad_type_0, strides = var_2907, weight = layers_11_attention_k_proj_weight, x = var_2866_cast_fp16)[name = tensor<string, []>("op_2911")];
-            tensor<int32, [4]> var_2912 = const()[name = tensor<string, []>("op_2912"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> ks = reshape(shape = var_2912, x = var_2911)[name = tensor<string, []>("ks")];
-            tensor<int32, [2]> var_2916 = const()[name = tensor<string, []>("op_2916"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_2918 = const()[name = tensor<string, []>("op_2918"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_2920_pad_type_0 = const()[name = tensor<string, []>("op_2920_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_2920_pad_0 = const()[name = tensor<string, []>("op_2920_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_2920 = conv(bias = layers_11_attention_v_proj_bias, dilations = var_2918, groups = var_2871, pad = var_2920_pad_0, pad_type = var_2920_pad_type_0, strides = var_2916, weight = layers_11_attention_v_proj_weight, x = var_2866_cast_fp16)[name = tensor<string, []>("op_2920")];
-            tensor<int32, [4]> var_2921 = const()[name = tensor<string, []>("op_2921"), val = tensor<int32, [4]>([1, 32, 12, 512])];
-            tensor<fp16, [1, 32, 12, 512]> var_2922 = reshape(shape = var_2921, x = var_2920)[name = tensor<string, []>("op_2922")];
-            tensor<int32, [12]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2923_axis_0 = const()[name = tensor<string, []>("op_2923_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2923_0, tensor<fp16, [1, 32, 1, 512]> var_2923_1, tensor<fp16, [1, 32, 1, 512]> var_2923_2, tensor<fp16, [1, 32, 1, 512]> var_2923_3, tensor<fp16, [1, 32, 1, 512]> var_2923_4, tensor<fp16, [1, 32, 1, 512]> var_2923_5, tensor<fp16, [1, 32, 1, 512]> var_2923_6, tensor<fp16, [1, 32, 1, 512]> var_2923_7, tensor<fp16, [1, 32, 1, 512]> var_2923_8, tensor<fp16, [1, 32, 1, 512]> var_2923_9, tensor<fp16, [1, 32, 1, 512]> var_2923_10, tensor<fp16, [1, 32, 1, 512]> var_2923_11 = split(axis = var_2923_axis_0, split_sizes = tile_57, x = var_2904)[name = tensor<string, []>("op_2923")];
-            tensor<int32, [4]> var_2936_perm_0 = const()[name = tensor<string, []>("op_2936_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
-            tensor<int32, [12]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2937_axis_0 = const()[name = tensor<string, []>("op_2937_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 512, 12, 32]> transpose_0 = transpose(perm = var_2936_perm_0, x = ks)[name = tensor<string, []>("transpose_0")];
-            tensor<fp16, [1, 512, 1, 32]> var_2937_0, tensor<fp16, [1, 512, 1, 32]> var_2937_1, tensor<fp16, [1, 512, 1, 32]> var_2937_2, tensor<fp16, [1, 512, 1, 32]> var_2937_3, tensor<fp16, [1, 512, 1, 32]> var_2937_4, tensor<fp16, [1, 512, 1, 32]> var_2937_5, tensor<fp16, [1, 512, 1, 32]> var_2937_6, tensor<fp16, [1, 512, 1, 32]> var_2937_7, tensor<fp16, [1, 512, 1, 32]> var_2937_8, tensor<fp16, [1, 512, 1, 32]> var_2937_9, tensor<fp16, [1, 512, 1, 32]> var_2937_10, tensor<fp16, [1, 512, 1, 32]> var_2937_11 = split(axis = var_2937_axis_0, split_sizes = tile_58, x = transpose_0)[name = tensor<string, []>("op_2937")];
-            tensor<int32, [12]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [12]>([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])];
-            tensor<int32, []> var_2950_axis_0 = const()[name = tensor<string, []>("op_2950_axis_0"), val = tensor<int32, []>(2)];
-            tensor<fp16, [1, 32, 1, 512]> var_2950_0, tensor<fp16, [1, 32, 1, 512]> var_2950_1, tensor<fp16, [1, 32, 1, 512]> var_2950_2, tensor<fp16, [1, 32, 1, 512]> var_2950_3, tensor<fp16, [1, 32, 1, 512]> var_2950_4, tensor<fp16, [1, 32, 1, 512]> var_2950_5, tensor<fp16, [1, 32, 1, 512]> var_2950_6, tensor<fp16, [1, 32, 1, 512]> var_2950_7, tensor<fp16, [1, 32, 1, 512]> var_2950_8, tensor<fp16, [1, 32, 1, 512]> var_2950_9, tensor<fp16, [1, 32, 1, 512]> var_2950_10, tensor<fp16, [1, 32, 1, 512]> var_2950_11 = split(axis = var_2950_axis_0, split_sizes = tile_59, x = var_2922)[name = tensor<string, []>("op_2950")];
-            tensor<string, []> var_2964_equation_0 = const()[name = tensor<string, []>("op_2964_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2964 = einsum(equation = var_2964_equation_0, values = (var_2937_0, var_2923_0))[name = tensor<string, []>("op_2964")];
-            tensor<fp16, []> var_2965_to_fp16 = const()[name = tensor<string, []>("op_2965_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_265_cast_fp16 = mul(x = var_2964, y = var_2965_to_fp16)[name = tensor<string, []>("w_265_cast_fp16")];
-            tensor<string, []> var_2968_equation_0 = const()[name = tensor<string, []>("op_2968_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2968 = einsum(equation = var_2968_equation_0, values = (var_2937_1, var_2923_1))[name = tensor<string, []>("op_2968")];
-            tensor<fp16, []> var_2969_to_fp16 = const()[name = tensor<string, []>("op_2969_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_267_cast_fp16 = mul(x = var_2968, y = var_2969_to_fp16)[name = tensor<string, []>("w_267_cast_fp16")];
-            tensor<string, []> var_2972_equation_0 = const()[name = tensor<string, []>("op_2972_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2972 = einsum(equation = var_2972_equation_0, values = (var_2937_2, var_2923_2))[name = tensor<string, []>("op_2972")];
-            tensor<fp16, []> var_2973_to_fp16 = const()[name = tensor<string, []>("op_2973_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_269_cast_fp16 = mul(x = var_2972, y = var_2973_to_fp16)[name = tensor<string, []>("w_269_cast_fp16")];
-            tensor<string, []> var_2976_equation_0 = const()[name = tensor<string, []>("op_2976_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2976 = einsum(equation = var_2976_equation_0, values = (var_2937_3, var_2923_3))[name = tensor<string, []>("op_2976")];
-            tensor<fp16, []> var_2977_to_fp16 = const()[name = tensor<string, []>("op_2977_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_271_cast_fp16 = mul(x = var_2976, y = var_2977_to_fp16)[name = tensor<string, []>("w_271_cast_fp16")];
-            tensor<string, []> var_2980_equation_0 = const()[name = tensor<string, []>("op_2980_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2980 = einsum(equation = var_2980_equation_0, values = (var_2937_4, var_2923_4))[name = tensor<string, []>("op_2980")];
-            tensor<fp16, []> var_2981_to_fp16 = const()[name = tensor<string, []>("op_2981_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_273_cast_fp16 = mul(x = var_2980, y = var_2981_to_fp16)[name = tensor<string, []>("w_273_cast_fp16")];
-            tensor<string, []> var_2984_equation_0 = const()[name = tensor<string, []>("op_2984_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2984 = einsum(equation = var_2984_equation_0, values = (var_2937_5, var_2923_5))[name = tensor<string, []>("op_2984")];
-            tensor<fp16, []> var_2985_to_fp16 = const()[name = tensor<string, []>("op_2985_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_275_cast_fp16 = mul(x = var_2984, y = var_2985_to_fp16)[name = tensor<string, []>("w_275_cast_fp16")];
-            tensor<string, []> var_2988_equation_0 = const()[name = tensor<string, []>("op_2988_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2988 = einsum(equation = var_2988_equation_0, values = (var_2937_6, var_2923_6))[name = tensor<string, []>("op_2988")];
-            tensor<fp16, []> var_2989_to_fp16 = const()[name = tensor<string, []>("op_2989_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_277_cast_fp16 = mul(x = var_2988, y = var_2989_to_fp16)[name = tensor<string, []>("w_277_cast_fp16")];
-            tensor<string, []> var_2992_equation_0 = const()[name = tensor<string, []>("op_2992_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2992 = einsum(equation = var_2992_equation_0, values = (var_2937_7, var_2923_7))[name = tensor<string, []>("op_2992")];
-            tensor<fp16, []> var_2993_to_fp16 = const()[name = tensor<string, []>("op_2993_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_279_cast_fp16 = mul(x = var_2992, y = var_2993_to_fp16)[name = tensor<string, []>("w_279_cast_fp16")];
-            tensor<string, []> var_2996_equation_0 = const()[name = tensor<string, []>("op_2996_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_2996 = einsum(equation = var_2996_equation_0, values = (var_2937_8, var_2923_8))[name = tensor<string, []>("op_2996")];
-            tensor<fp16, []> var_2997_to_fp16 = const()[name = tensor<string, []>("op_2997_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_281_cast_fp16 = mul(x = var_2996, y = var_2997_to_fp16)[name = tensor<string, []>("w_281_cast_fp16")];
-            tensor<string, []> var_3000_equation_0 = const()[name = tensor<string, []>("op_3000_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_3000 = einsum(equation = var_3000_equation_0, values = (var_2937_9, var_2923_9))[name = tensor<string, []>("op_3000")];
-            tensor<fp16, []> var_3001_to_fp16 = const()[name = tensor<string, []>("op_3001_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_283_cast_fp16 = mul(x = var_3000, y = var_3001_to_fp16)[name = tensor<string, []>("w_283_cast_fp16")];
-            tensor<string, []> var_3004_equation_0 = const()[name = tensor<string, []>("op_3004_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_3004 = einsum(equation = var_3004_equation_0, values = (var_2937_10, var_2923_10))[name = tensor<string, []>("op_3004")];
-            tensor<fp16, []> var_3005_to_fp16 = const()[name = tensor<string, []>("op_3005_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_285_cast_fp16 = mul(x = var_3004, y = var_3005_to_fp16)[name = tensor<string, []>("w_285_cast_fp16")];
-            tensor<string, []> var_3008_equation_0 = const()[name = tensor<string, []>("op_3008_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
-            tensor<fp16, [1, 512, 1, 512]> var_3008 = einsum(equation = var_3008_equation_0, values = (var_2937_11, var_2923_11))[name = tensor<string, []>("op_3008")];
-            tensor<fp16, []> var_3009_to_fp16 = const()[name = tensor<string, []>("op_3009_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
-            tensor<fp16, [1, 512, 1, 512]> w_cast_fp16 = mul(x = var_3008, y = var_3009_to_fp16)[name = tensor<string, []>("w_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3011_cast_fp16 = add(x = w_265_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3011_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3012_cast_fp16 = softmax(axis = var_2871, x = var_3011_cast_fp16)[name = tensor<string, []>("op_3012_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3013_cast_fp16 = add(x = w_267_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3013_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3014_cast_fp16 = softmax(axis = var_2871, x = var_3013_cast_fp16)[name = tensor<string, []>("op_3014_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3015_cast_fp16 = add(x = w_269_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3015_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3016_cast_fp16 = softmax(axis = var_2871, x = var_3015_cast_fp16)[name = tensor<string, []>("op_3016_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3017_cast_fp16 = add(x = w_271_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3017_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3018_cast_fp16 = softmax(axis = var_2871, x = var_3017_cast_fp16)[name = tensor<string, []>("op_3018_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3019_cast_fp16 = add(x = w_273_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3019_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3020_cast_fp16 = softmax(axis = var_2871, x = var_3019_cast_fp16)[name = tensor<string, []>("op_3020_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3021_cast_fp16 = add(x = w_275_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3021_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3022_cast_fp16 = softmax(axis = var_2871, x = var_3021_cast_fp16)[name = tensor<string, []>("op_3022_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3023_cast_fp16 = add(x = w_277_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3023_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3024_cast_fp16 = softmax(axis = var_2871, x = var_3023_cast_fp16)[name = tensor<string, []>("op_3024_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3025_cast_fp16 = add(x = w_279_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3025_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3026_cast_fp16 = softmax(axis = var_2871, x = var_3025_cast_fp16)[name = tensor<string, []>("op_3026_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3027_cast_fp16 = add(x = w_281_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3027_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3028_cast_fp16 = softmax(axis = var_2871, x = var_3027_cast_fp16)[name = tensor<string, []>("op_3028_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3029_cast_fp16 = add(x = w_283_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3029_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3030_cast_fp16 = softmax(axis = var_2871, x = var_3029_cast_fp16)[name = tensor<string, []>("op_3030_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3031_cast_fp16 = add(x = w_285_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3031_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3032_cast_fp16 = softmax(axis = var_2871, x = var_3031_cast_fp16)[name = tensor<string, []>("op_3032_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3033_cast_fp16 = add(x = w_cast_fp16, y = var_73_cast_fp16)[name = tensor<string, []>("op_3033_cast_fp16")];
-            tensor<fp16, [1, 512, 1, 512]> var_3034_cast_fp16 = softmax(axis = var_2871, x = var_3033_cast_fp16)[name = tensor<string, []>("op_3034_cast_fp16")];
-            tensor<string, []> var_3036_equation_0 = const()[name = tensor<string, []>("op_3036_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3036_cast_fp16 = einsum(equation = var_3036_equation_0, values = (var_2950_0, var_3012_cast_fp16))[name = tensor<string, []>("op_3036_cast_fp16")];
-            tensor<string, []> var_3038_equation_0 = const()[name = tensor<string, []>("op_3038_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3038_cast_fp16 = einsum(equation = var_3038_equation_0, values = (var_2950_1, var_3014_cast_fp16))[name = tensor<string, []>("op_3038_cast_fp16")];
-            tensor<string, []> var_3040_equation_0 = const()[name = tensor<string, []>("op_3040_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3040_cast_fp16 = einsum(equation = var_3040_equation_0, values = (var_2950_2, var_3016_cast_fp16))[name = tensor<string, []>("op_3040_cast_fp16")];
-            tensor<string, []> var_3042_equation_0 = const()[name = tensor<string, []>("op_3042_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3042_cast_fp16 = einsum(equation = var_3042_equation_0, values = (var_2950_3, var_3018_cast_fp16))[name = tensor<string, []>("op_3042_cast_fp16")];
-            tensor<string, []> var_3044_equation_0 = const()[name = tensor<string, []>("op_3044_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3044_cast_fp16 = einsum(equation = var_3044_equation_0, values = (var_2950_4, var_3020_cast_fp16))[name = tensor<string, []>("op_3044_cast_fp16")];
-            tensor<string, []> var_3046_equation_0 = const()[name = tensor<string, []>("op_3046_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3046_cast_fp16 = einsum(equation = var_3046_equation_0, values = (var_2950_5, var_3022_cast_fp16))[name = tensor<string, []>("op_3046_cast_fp16")];
-            tensor<string, []> var_3048_equation_0 = const()[name = tensor<string, []>("op_3048_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3048_cast_fp16 = einsum(equation = var_3048_equation_0, values = (var_2950_6, var_3024_cast_fp16))[name = tensor<string, []>("op_3048_cast_fp16")];
-            tensor<string, []> var_3050_equation_0 = const()[name = tensor<string, []>("op_3050_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3050_cast_fp16 = einsum(equation = var_3050_equation_0, values = (var_2950_7, var_3026_cast_fp16))[name = tensor<string, []>("op_3050_cast_fp16")];
-            tensor<string, []> var_3052_equation_0 = const()[name = tensor<string, []>("op_3052_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3052_cast_fp16 = einsum(equation = var_3052_equation_0, values = (var_2950_8, var_3028_cast_fp16))[name = tensor<string, []>("op_3052_cast_fp16")];
-            tensor<string, []> var_3054_equation_0 = const()[name = tensor<string, []>("op_3054_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3054_cast_fp16 = einsum(equation = var_3054_equation_0, values = (var_2950_9, var_3030_cast_fp16))[name = tensor<string, []>("op_3054_cast_fp16")];
-            tensor<string, []> var_3056_equation_0 = const()[name = tensor<string, []>("op_3056_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3056_cast_fp16 = einsum(equation = var_3056_equation_0, values = (var_2950_10, var_3032_cast_fp16))[name = tensor<string, []>("op_3056_cast_fp16")];
-            tensor<string, []> var_3058_equation_0 = const()[name = tensor<string, []>("op_3058_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
-            tensor<fp16, [1, 32, 1, 512]> var_3058_cast_fp16 = einsum(equation = var_3058_equation_0, values = (var_2950_11, var_3034_cast_fp16))[name = tensor<string, []>("op_3058_cast_fp16")];
-            tensor<bool, []> var_3060_interleave_0 = const()[name = tensor<string, []>("op_3060_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_3060_cast_fp16 = concat(axis = var_2871, interleave = var_3060_interleave_0, values = (var_3036_cast_fp16, var_3038_cast_fp16, var_3040_cast_fp16, var_3042_cast_fp16, var_3044_cast_fp16, var_3046_cast_fp16, var_3048_cast_fp16, var_3050_cast_fp16, var_3052_cast_fp16, var_3054_cast_fp16, var_3056_cast_fp16, var_3058_cast_fp16))[name = tensor<string, []>("op_3060_cast_fp16")];
-            tensor<int32, [2]> var_3064 = const()[name = tensor<string, []>("op_3064"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_3066 = const()[name = tensor<string, []>("op_3066"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_3068_pad_type_0 = const()[name = tensor<string, []>("op_3068_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_3068_pad_0 = const()[name = tensor<string, []>("op_3068_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_3068 = conv(bias = layers_11_attention_o_proj_bias, dilations = var_3066, groups = var_2871, pad = var_3068_pad_0, pad_type = var_3068_pad_type_0, strides = var_3064, weight = layers_11_attention_o_proj_weight, x = var_3060_cast_fp16)[name = tensor<string, []>("op_3068")];
-            tensor<bool, []> var_3070_interleave_0 = const()[name = tensor<string, []>("op_3070_interleave_0"), val = tensor<bool, []>(false)];
-            tensor<fp16, [1, 384, 1, 512]> var_3070 = concat(axis = var_2872, interleave = var_3070_interleave_0, values = var_3068)[name = tensor<string, []>("op_3070")];
-            tensor<fp16, [1, 384, 1, 512]> x_93 = add(x = var_2866_cast_fp16, y = var_3070)[name = tensor<string, []>("x_93")];
-            tensor<fp16, []> var_2869_promoted = const()[name = tensor<string, []>("op_2869_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2870_promoted = const()[name = tensor<string, []>("op_2870_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x_95 = clip(alpha = var_2869_promoted, beta = var_2870_promoted, x = x_93)[name = tensor<string, []>("x_95")];
-            tensor<int32, [1]> var_3075 = const()[name = tensor<string, []>("op_3075"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean_47 = reduce_mean(axes = var_3075, keep_dims = var_2873, x = x_95)[name = tensor<string, []>("mean_47")];
+            tensor<fp16, [1, 1, 1, 512]> denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0, x = var_2784_cast_fp16)[name = tensor<string, []>("denom_45_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2786_cast_fp16 = mul(x = zero_mean_45, y = denom_45_cast_fp16)[name = tensor<string, []>("op_2786_cast_fp16")];
+            tensor<fp16, [384]> var_2788_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_2788_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66825344)))];
+            tensor<fp16, [384]> var_2788_beta_0_to_fp16 = const()[name = tensor<string, []>("op_2788_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66826176)))];
+            tensor<fp16, []> var_2788_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_2788_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_2788_cast_fp16 = batch_norm(beta = var_2788_beta_0_to_fp16, epsilon = var_2788_epsilon_0_to_fp16, gamma = var_2788_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2786_cast_fp16)[name = tensor<string, []>("op_2788_cast_fp16")];
+            tensor<int32, []> var_2794 = const()[name = tensor<string, []>("op_2794"), val = tensor<int32, []>(1)];
+            tensor<int32, []> var_2795 = const()[name = tensor<string, []>("op_2795"), val = tensor<int32, []>(0)];
+            tensor<bool, []> var_2796 = const()[name = tensor<string, []>("op_2796"), val = tensor<bool, []>(true)];
+            tensor<int32, [2]> var_2818 = const()[name = tensor<string, []>("op_2818"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2820 = const()[name = tensor<string, []>("op_2820"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2822_pad_type_0 = const()[name = tensor<string, []>("op_2822_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2822_pad_0 = const()[name = tensor<string, []>("op_2822_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2822 = conv(bias = layers_11_attention_q_proj_bias, dilations = var_2820, groups = var_2794, pad = var_2822_pad_0, pad_type = var_2822_pad_type_0, strides = var_2818, weight = layers_11_attention_q_proj_weight, x = var_2788_cast_fp16)[name = tensor<string, []>("op_2822")];
+            tensor<int32, [2]> var_2825 = const()[name = tensor<string, []>("op_2825"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2827 = const()[name = tensor<string, []>("op_2827"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> ks_pad_type_0 = const()[name = tensor<string, []>("ks_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> ks_pad_0 = const()[name = tensor<string, []>("ks_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> ks = conv(bias = layers_11_attention_k_proj_bias, dilations = var_2827, groups = var_2794, pad = ks_pad_0, pad_type = ks_pad_type_0, strides = var_2825, weight = layers_11_attention_k_proj_weight, x = var_2788_cast_fp16)[name = tensor<string, []>("ks")];
+            tensor<int32, [2]> var_2832 = const()[name = tensor<string, []>("op_2832"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2834 = const()[name = tensor<string, []>("op_2834"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2836_pad_type_0 = const()[name = tensor<string, []>("op_2836_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2836_pad_0 = const()[name = tensor<string, []>("op_2836_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2836 = conv(bias = layers_11_attention_v_proj_bias, dilations = var_2834, groups = var_2794, pad = var_2836_pad_0, pad_type = var_2836_pad_type_0, strides = var_2832, weight = layers_11_attention_v_proj_weight, x = var_2788_cast_fp16)[name = tensor<string, []>("op_2836")];
+            tensor<int32, [12]> tile_57 = const()[name = tensor<string, []>("tile_57"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2837_axis_0 = const()[name = tensor<string, []>("op_2837_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2837_0, tensor<fp16, [1, 32, 1, 512]> var_2837_1, tensor<fp16, [1, 32, 1, 512]> var_2837_2, tensor<fp16, [1, 32, 1, 512]> var_2837_3, tensor<fp16, [1, 32, 1, 512]> var_2837_4, tensor<fp16, [1, 32, 1, 512]> var_2837_5, tensor<fp16, [1, 32, 1, 512]> var_2837_6, tensor<fp16, [1, 32, 1, 512]> var_2837_7, tensor<fp16, [1, 32, 1, 512]> var_2837_8, tensor<fp16, [1, 32, 1, 512]> var_2837_9, tensor<fp16, [1, 32, 1, 512]> var_2837_10, tensor<fp16, [1, 32, 1, 512]> var_2837_11 = split(axis = var_2837_axis_0, split_sizes = tile_57, x = var_2822)[name = tensor<string, []>("op_2837")];
+            tensor<int32, [4]> var_2850_perm_0 = const()[name = tensor<string, []>("op_2850_perm_0"), val = tensor<int32, [4]>([0, 3, 2, 1])];
+            tensor<int32, [12]> tile_58 = const()[name = tensor<string, []>("tile_58"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2851_axis_0 = const()[name = tensor<string, []>("op_2851_axis_0"), val = tensor<int32, []>(3)];
+            tensor<fp16, [1, 512, 1, 384]> transpose_0 = transpose(perm = var_2850_perm_0, x = ks)[name = tensor<string, []>("transpose_0")];
+            tensor<fp16, [1, 512, 1, 32]> var_2851_0, tensor<fp16, [1, 512, 1, 32]> var_2851_1, tensor<fp16, [1, 512, 1, 32]> var_2851_2, tensor<fp16, [1, 512, 1, 32]> var_2851_3, tensor<fp16, [1, 512, 1, 32]> var_2851_4, tensor<fp16, [1, 512, 1, 32]> var_2851_5, tensor<fp16, [1, 512, 1, 32]> var_2851_6, tensor<fp16, [1, 512, 1, 32]> var_2851_7, tensor<fp16, [1, 512, 1, 32]> var_2851_8, tensor<fp16, [1, 512, 1, 32]> var_2851_9, tensor<fp16, [1, 512, 1, 32]> var_2851_10, tensor<fp16, [1, 512, 1, 32]> var_2851_11 = split(axis = var_2851_axis_0, split_sizes = tile_58, x = transpose_0)[name = tensor<string, []>("op_2851")];
+            tensor<int32, [12]> tile_59 = const()[name = tensor<string, []>("tile_59"), val = tensor<int32, [12]>([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])];
+            tensor<int32, []> var_2864_axis_0 = const()[name = tensor<string, []>("op_2864_axis_0"), val = tensor<int32, []>(1)];
+            tensor<fp16, [1, 32, 1, 512]> var_2864_0, tensor<fp16, [1, 32, 1, 512]> var_2864_1, tensor<fp16, [1, 32, 1, 512]> var_2864_2, tensor<fp16, [1, 32, 1, 512]> var_2864_3, tensor<fp16, [1, 32, 1, 512]> var_2864_4, tensor<fp16, [1, 32, 1, 512]> var_2864_5, tensor<fp16, [1, 32, 1, 512]> var_2864_6, tensor<fp16, [1, 32, 1, 512]> var_2864_7, tensor<fp16, [1, 32, 1, 512]> var_2864_8, tensor<fp16, [1, 32, 1, 512]> var_2864_9, tensor<fp16, [1, 32, 1, 512]> var_2864_10, tensor<fp16, [1, 32, 1, 512]> var_2864_11 = split(axis = var_2864_axis_0, split_sizes = tile_59, x = var_2836)[name = tensor<string, []>("op_2864")];
+            tensor<string, []> var_2878_equation_0 = const()[name = tensor<string, []>("op_2878_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2878 = einsum(equation = var_2878_equation_0, values = (var_2851_0, var_2837_0))[name = tensor<string, []>("op_2878")];
+            tensor<fp16, []> var_2879_to_fp16 = const()[name = tensor<string, []>("op_2879_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_265_cast_fp16 = mul(x = var_2878, y = var_2879_to_fp16)[name = tensor<string, []>("w_265_cast_fp16")];
+            tensor<string, []> var_2882_equation_0 = const()[name = tensor<string, []>("op_2882_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2882 = einsum(equation = var_2882_equation_0, values = (var_2851_1, var_2837_1))[name = tensor<string, []>("op_2882")];
+            tensor<fp16, []> var_2883_to_fp16 = const()[name = tensor<string, []>("op_2883_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_267_cast_fp16 = mul(x = var_2882, y = var_2883_to_fp16)[name = tensor<string, []>("w_267_cast_fp16")];
+            tensor<string, []> var_2886_equation_0 = const()[name = tensor<string, []>("op_2886_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2886 = einsum(equation = var_2886_equation_0, values = (var_2851_2, var_2837_2))[name = tensor<string, []>("op_2886")];
+            tensor<fp16, []> var_2887_to_fp16 = const()[name = tensor<string, []>("op_2887_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_269_cast_fp16 = mul(x = var_2886, y = var_2887_to_fp16)[name = tensor<string, []>("w_269_cast_fp16")];
+            tensor<string, []> var_2890_equation_0 = const()[name = tensor<string, []>("op_2890_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2890 = einsum(equation = var_2890_equation_0, values = (var_2851_3, var_2837_3))[name = tensor<string, []>("op_2890")];
+            tensor<fp16, []> var_2891_to_fp16 = const()[name = tensor<string, []>("op_2891_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_271_cast_fp16 = mul(x = var_2890, y = var_2891_to_fp16)[name = tensor<string, []>("w_271_cast_fp16")];
+            tensor<string, []> var_2894_equation_0 = const()[name = tensor<string, []>("op_2894_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2894 = einsum(equation = var_2894_equation_0, values = (var_2851_4, var_2837_4))[name = tensor<string, []>("op_2894")];
+            tensor<fp16, []> var_2895_to_fp16 = const()[name = tensor<string, []>("op_2895_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_273_cast_fp16 = mul(x = var_2894, y = var_2895_to_fp16)[name = tensor<string, []>("w_273_cast_fp16")];
+            tensor<string, []> var_2898_equation_0 = const()[name = tensor<string, []>("op_2898_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2898 = einsum(equation = var_2898_equation_0, values = (var_2851_5, var_2837_5))[name = tensor<string, []>("op_2898")];
+            tensor<fp16, []> var_2899_to_fp16 = const()[name = tensor<string, []>("op_2899_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_275_cast_fp16 = mul(x = var_2898, y = var_2899_to_fp16)[name = tensor<string, []>("w_275_cast_fp16")];
+            tensor<string, []> var_2902_equation_0 = const()[name = tensor<string, []>("op_2902_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2902 = einsum(equation = var_2902_equation_0, values = (var_2851_6, var_2837_6))[name = tensor<string, []>("op_2902")];
+            tensor<fp16, []> var_2903_to_fp16 = const()[name = tensor<string, []>("op_2903_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_277_cast_fp16 = mul(x = var_2902, y = var_2903_to_fp16)[name = tensor<string, []>("w_277_cast_fp16")];
+            tensor<string, []> var_2906_equation_0 = const()[name = tensor<string, []>("op_2906_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2906 = einsum(equation = var_2906_equation_0, values = (var_2851_7, var_2837_7))[name = tensor<string, []>("op_2906")];
+            tensor<fp16, []> var_2907_to_fp16 = const()[name = tensor<string, []>("op_2907_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_279_cast_fp16 = mul(x = var_2906, y = var_2907_to_fp16)[name = tensor<string, []>("w_279_cast_fp16")];
+            tensor<string, []> var_2910_equation_0 = const()[name = tensor<string, []>("op_2910_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2910 = einsum(equation = var_2910_equation_0, values = (var_2851_8, var_2837_8))[name = tensor<string, []>("op_2910")];
+            tensor<fp16, []> var_2911_to_fp16 = const()[name = tensor<string, []>("op_2911_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_281_cast_fp16 = mul(x = var_2910, y = var_2911_to_fp16)[name = tensor<string, []>("w_281_cast_fp16")];
+            tensor<string, []> var_2914_equation_0 = const()[name = tensor<string, []>("op_2914_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2914 = einsum(equation = var_2914_equation_0, values = (var_2851_9, var_2837_9))[name = tensor<string, []>("op_2914")];
+            tensor<fp16, []> var_2915_to_fp16 = const()[name = tensor<string, []>("op_2915_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_283_cast_fp16 = mul(x = var_2914, y = var_2915_to_fp16)[name = tensor<string, []>("w_283_cast_fp16")];
+            tensor<string, []> var_2918_equation_0 = const()[name = tensor<string, []>("op_2918_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2918 = einsum(equation = var_2918_equation_0, values = (var_2851_10, var_2837_10))[name = tensor<string, []>("op_2918")];
+            tensor<fp16, []> var_2919_to_fp16 = const()[name = tensor<string, []>("op_2919_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_285_cast_fp16 = mul(x = var_2918, y = var_2919_to_fp16)[name = tensor<string, []>("w_285_cast_fp16")];
+            tensor<string, []> var_2922_equation_0 = const()[name = tensor<string, []>("op_2922_equation_0"), val = tensor<string, []>("bkhc,bchq->bkhq")];
+            tensor<fp16, [1, 512, 1, 512]> var_2922 = einsum(equation = var_2922_equation_0, values = (var_2851_11, var_2837_11))[name = tensor<string, []>("op_2922")];
+            tensor<fp16, []> var_2923_to_fp16 = const()[name = tensor<string, []>("op_2923_to_fp16"), val = tensor<fp16, []>(0x1.6ap-3)];
+            tensor<fp16, [1, 512, 1, 512]> w_cast_fp16 = mul(x = var_2922, y = var_2923_to_fp16)[name = tensor<string, []>("w_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_355_cast_fp16 = add(x = w_265_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_355_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2926_cast_fp16 = softmax(axis = var_2794, x = input_355_cast_fp16)[name = tensor<string, []>("op_2926_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_357_cast_fp16 = add(x = w_267_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_357_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2928_cast_fp16 = softmax(axis = var_2794, x = input_357_cast_fp16)[name = tensor<string, []>("op_2928_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_359_cast_fp16 = add(x = w_269_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_359_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2930_cast_fp16 = softmax(axis = var_2794, x = input_359_cast_fp16)[name = tensor<string, []>("op_2930_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_361_cast_fp16 = add(x = w_271_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_361_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2932_cast_fp16 = softmax(axis = var_2794, x = input_361_cast_fp16)[name = tensor<string, []>("op_2932_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_363_cast_fp16 = add(x = w_273_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_363_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2934_cast_fp16 = softmax(axis = var_2794, x = input_363_cast_fp16)[name = tensor<string, []>("op_2934_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_365_cast_fp16 = add(x = w_275_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_365_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2936_cast_fp16 = softmax(axis = var_2794, x = input_365_cast_fp16)[name = tensor<string, []>("op_2936_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_367_cast_fp16 = add(x = w_277_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_367_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2938_cast_fp16 = softmax(axis = var_2794, x = input_367_cast_fp16)[name = tensor<string, []>("op_2938_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_369_cast_fp16 = add(x = w_279_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_369_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2940_cast_fp16 = softmax(axis = var_2794, x = input_369_cast_fp16)[name = tensor<string, []>("op_2940_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_371_cast_fp16 = add(x = w_281_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_371_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2942_cast_fp16 = softmax(axis = var_2794, x = input_371_cast_fp16)[name = tensor<string, []>("op_2942_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_373_cast_fp16 = add(x = w_283_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_373_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2944_cast_fp16 = softmax(axis = var_2794, x = input_373_cast_fp16)[name = tensor<string, []>("op_2944_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_375_cast_fp16 = add(x = w_285_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_375_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2946_cast_fp16 = softmax(axis = var_2794, x = input_375_cast_fp16)[name = tensor<string, []>("op_2946_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> input_377_cast_fp16 = add(x = w_cast_fp16, y = var_83_cast_fp16)[name = tensor<string, []>("input_377_cast_fp16")];
+            tensor<fp16, [1, 512, 1, 512]> var_2948_cast_fp16 = softmax(axis = var_2794, x = input_377_cast_fp16)[name = tensor<string, []>("op_2948_cast_fp16")];
+            tensor<string, []> var_2950_equation_0 = const()[name = tensor<string, []>("op_2950_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2950_cast_fp16 = einsum(equation = var_2950_equation_0, values = (var_2864_0, var_2926_cast_fp16))[name = tensor<string, []>("op_2950_cast_fp16")];
+            tensor<string, []> var_2952_equation_0 = const()[name = tensor<string, []>("op_2952_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2952_cast_fp16 = einsum(equation = var_2952_equation_0, values = (var_2864_1, var_2928_cast_fp16))[name = tensor<string, []>("op_2952_cast_fp16")];
+            tensor<string, []> var_2954_equation_0 = const()[name = tensor<string, []>("op_2954_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2954_cast_fp16 = einsum(equation = var_2954_equation_0, values = (var_2864_2, var_2930_cast_fp16))[name = tensor<string, []>("op_2954_cast_fp16")];
+            tensor<string, []> var_2956_equation_0 = const()[name = tensor<string, []>("op_2956_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2956_cast_fp16 = einsum(equation = var_2956_equation_0, values = (var_2864_3, var_2932_cast_fp16))[name = tensor<string, []>("op_2956_cast_fp16")];
+            tensor<string, []> var_2958_equation_0 = const()[name = tensor<string, []>("op_2958_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2958_cast_fp16 = einsum(equation = var_2958_equation_0, values = (var_2864_4, var_2934_cast_fp16))[name = tensor<string, []>("op_2958_cast_fp16")];
+            tensor<string, []> var_2960_equation_0 = const()[name = tensor<string, []>("op_2960_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2960_cast_fp16 = einsum(equation = var_2960_equation_0, values = (var_2864_5, var_2936_cast_fp16))[name = tensor<string, []>("op_2960_cast_fp16")];
+            tensor<string, []> var_2962_equation_0 = const()[name = tensor<string, []>("op_2962_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2962_cast_fp16 = einsum(equation = var_2962_equation_0, values = (var_2864_6, var_2938_cast_fp16))[name = tensor<string, []>("op_2962_cast_fp16")];
+            tensor<string, []> var_2964_equation_0 = const()[name = tensor<string, []>("op_2964_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2964_cast_fp16 = einsum(equation = var_2964_equation_0, values = (var_2864_7, var_2940_cast_fp16))[name = tensor<string, []>("op_2964_cast_fp16")];
+            tensor<string, []> var_2966_equation_0 = const()[name = tensor<string, []>("op_2966_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2966_cast_fp16 = einsum(equation = var_2966_equation_0, values = (var_2864_8, var_2942_cast_fp16))[name = tensor<string, []>("op_2966_cast_fp16")];
+            tensor<string, []> var_2968_equation_0 = const()[name = tensor<string, []>("op_2968_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2968_cast_fp16 = einsum(equation = var_2968_equation_0, values = (var_2864_9, var_2944_cast_fp16))[name = tensor<string, []>("op_2968_cast_fp16")];
+            tensor<string, []> var_2970_equation_0 = const()[name = tensor<string, []>("op_2970_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2970_cast_fp16 = einsum(equation = var_2970_equation_0, values = (var_2864_10, var_2946_cast_fp16))[name = tensor<string, []>("op_2970_cast_fp16")];
+            tensor<string, []> var_2972_equation_0 = const()[name = tensor<string, []>("op_2972_equation_0"), val = tensor<string, []>("bchk,bkhq->bchq")];
+            tensor<fp16, [1, 32, 1, 512]> var_2972_cast_fp16 = einsum(equation = var_2972_equation_0, values = (var_2864_11, var_2948_cast_fp16))[name = tensor<string, []>("op_2972_cast_fp16")];
+            tensor<bool, []> var_2974_interleave_0 = const()[name = tensor<string, []>("op_2974_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2974_cast_fp16 = concat(axis = var_2794, interleave = var_2974_interleave_0, values = (var_2950_cast_fp16, var_2952_cast_fp16, var_2954_cast_fp16, var_2956_cast_fp16, var_2958_cast_fp16, var_2960_cast_fp16, var_2962_cast_fp16, var_2964_cast_fp16, var_2966_cast_fp16, var_2968_cast_fp16, var_2970_cast_fp16, var_2972_cast_fp16))[name = tensor<string, []>("op_2974_cast_fp16")];
+            tensor<int32, [2]> var_2978 = const()[name = tensor<string, []>("op_2978"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_2980 = const()[name = tensor<string, []>("op_2980"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_2982_pad_type_0 = const()[name = tensor<string, []>("op_2982_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_2982_pad_0 = const()[name = tensor<string, []>("op_2982_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_2982 = conv(bias = layers_11_attention_o_proj_bias, dilations = var_2980, groups = var_2794, pad = var_2982_pad_0, pad_type = var_2982_pad_type_0, strides = var_2978, weight = layers_11_attention_o_proj_weight, x = var_2974_cast_fp16)[name = tensor<string, []>("op_2982")];
+            tensor<bool, []> var_2984_interleave_0 = const()[name = tensor<string, []>("op_2984_interleave_0"), val = tensor<bool, []>(false)];
+            tensor<fp16, [1, 384, 1, 512]> var_2984 = concat(axis = var_2795, interleave = var_2984_interleave_0, values = var_2982)[name = tensor<string, []>("op_2984")];
+            tensor<fp16, [1, 384, 1, 512]> x_93 = add(x = var_2788_cast_fp16, y = var_2984)[name = tensor<string, []>("x_93")];
+            tensor<fp16, []> var_2791_promoted = const()[name = tensor<string, []>("op_2791_promoted"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2792_promoted = const()[name = tensor<string, []>("op_2792_promoted"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x_95 = clip(alpha = var_2791_promoted, beta = var_2792_promoted, x = x_93)[name = tensor<string, []>("x_95")];
+            tensor<int32, [1]> var_2989 = const()[name = tensor<string, []>("op_2989"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean_47 = reduce_mean(axes = var_2989, keep_dims = var_2796, x = x_95)[name = tensor<string, []>("mean_47")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean_47 = sub(x = x_95, y = mean_47)[name = tensor<string, []>("zero_mean_47")];
-            tensor<fp16, []> var_2878_promoted = const()[name = tensor<string, []>("op_2878_promoted"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_3078 = pow(x = zero_mean_47, y = var_2878_promoted)[name = tensor<string, []>("op_3078")];
-            tensor<int32, [1]> var_3079 = const()[name = tensor<string, []>("op_3079"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_3080 = reduce_mean(axes = var_3079, keep_dims = var_2873, x = var_3078)[name = tensor<string, []>("op_3080")];
-            tensor<fp16, []> var_3081_to_fp16 = const()[name = tensor<string, []>("op_3081_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_3082_cast_fp16 = add(x = var_3080, y = var_3081_to_fp16)[name = tensor<string, []>("op_3082_cast_fp16")];
+            tensor<fp16, []> var_2793_promoted = const()[name = tensor<string, []>("op_2793_promoted"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_2992 = pow(x = zero_mean_47, y = var_2793_promoted)[name = tensor<string, []>("op_2992")];
+            tensor<int32, [1]> var_2993 = const()[name = tensor<string, []>("op_2993"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_2994 = reduce_mean(axes = var_2993, keep_dims = var_2796, x = var_2992)[name = tensor<string, []>("op_2994")];
+            tensor<fp16, []> var_2995_to_fp16 = const()[name = tensor<string, []>("op_2995_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_2996_cast_fp16 = add(x = var_2994, y = var_2995_to_fp16)[name = tensor<string, []>("op_2996_cast_fp16")];
             tensor<fp32, []> denom_47_epsilon_0 = const()[name = tensor<string, []>("denom_47_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0, x = var_3082_cast_fp16)[name = tensor<string, []>("denom_47_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_3084_cast_fp16 = mul(x = zero_mean_47, y = denom_47_cast_fp16)[name = tensor<string, []>("op_3084_cast_fp16")];
-            tensor<fp16, [384]> var_3086_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_3086_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66827008)))];
-            tensor<fp16, [384]> var_3086_beta_0_to_fp16 = const()[name = tensor<string, []>("op_3086_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66827840)))];
-            tensor<fp16, []> var_3086_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_3086_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_3086_cast_fp16 = batch_norm(beta = var_3086_beta_0_to_fp16, epsilon = var_3086_epsilon_0_to_fp16, gamma = var_3086_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3084_cast_fp16)[name = tensor<string, []>("op_3086_cast_fp16")];
-            tensor<int32, [2]> var_3092 = const()[name = tensor<string, []>("op_3092"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_3094 = const()[name = tensor<string, []>("op_3094"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_3096_pad_type_0 = const()[name = tensor<string, []>("op_3096_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_3096_pad_0 = const()[name = tensor<string, []>("op_3096_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 1536, 1, 512]> var_3096 = conv(bias = layers_11_mlp_fc1_bias, dilations = var_3094, groups = var_2871, pad = var_3096_pad_0, pad_type = var_3096_pad_type_0, strides = var_3092, weight = layers_11_mlp_fc1_weight, x = var_3086_cast_fp16)[name = tensor<string, []>("op_3096")];
-            tensor<string, []> input_95_mode_0 = const()[name = tensor<string, []>("input_95_mode_0"), val = tensor<string, []>("EXACT")];
-            tensor<fp16, [1, 1536, 1, 512]> input_95 = gelu(mode = input_95_mode_0, x = var_3096)[name = tensor<string, []>("input_95")];
-            tensor<int32, [2]> var_3100 = const()[name = tensor<string, []>("op_3100"), val = tensor<int32, [2]>([1, 1])];
-            tensor<int32, [2]> var_3102 = const()[name = tensor<string, []>("op_3102"), val = tensor<int32, [2]>([1, 1])];
-            tensor<string, []> var_3104_pad_type_0 = const()[name = tensor<string, []>("op_3104_pad_type_0"), val = tensor<string, []>("custom")];
-            tensor<int32, [4]> var_3104_pad_0 = const()[name = tensor<string, []>("op_3104_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<fp16, [1, 384, 1, 512]> var_3104 = conv(bias = layers_11_mlp_fc2_bias, dilations = var_3102, groups = var_2871, pad = var_3104_pad_0, pad_type = var_3104_pad_type_0, strides = var_3100, weight = layers_11_mlp_fc2_weight, x = input_95)[name = tensor<string, []>("op_3104")];
-            tensor<fp16, [1, 384, 1, 512]> x_97 = add(x = var_3086_cast_fp16, y = var_3104)[name = tensor<string, []>("x_97")];
-            tensor<fp16, []> var_2869_promoted_1 = const()[name = tensor<string, []>("op_2869_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
-            tensor<fp16, []> var_2870_promoted_1 = const()[name = tensor<string, []>("op_2870_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
-            tensor<fp16, [1, 384, 1, 512]> x = clip(alpha = var_2869_promoted_1, beta = var_2870_promoted_1, x = x_97)[name = tensor<string, []>("x")];
-            tensor<int32, [1]> var_3109 = const()[name = tensor<string, []>("op_3109"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> mean = reduce_mean(axes = var_3109, keep_dims = var_2873, x = x)[name = tensor<string, []>("mean")];
+            tensor<fp16, [1, 1, 1, 512]> denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0, x = var_2996_cast_fp16)[name = tensor<string, []>("denom_47_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_2998_cast_fp16 = mul(x = zero_mean_47, y = denom_47_cast_fp16)[name = tensor<string, []>("op_2998_cast_fp16")];
+            tensor<fp16, [384]> var_3000_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_3000_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66827008)))];
+            tensor<fp16, [384]> var_3000_beta_0_to_fp16 = const()[name = tensor<string, []>("op_3000_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66827840)))];
+            tensor<fp16, []> var_3000_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_3000_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_3000_cast_fp16 = batch_norm(beta = var_3000_beta_0_to_fp16, epsilon = var_3000_epsilon_0_to_fp16, gamma = var_3000_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2998_cast_fp16)[name = tensor<string, []>("op_3000_cast_fp16")];
+            tensor<int32, [2]> var_3006 = const()[name = tensor<string, []>("op_3006"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3008 = const()[name = tensor<string, []>("op_3008"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_3010_pad_type_0 = const()[name = tensor<string, []>("op_3010_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_3010_pad_0 = const()[name = tensor<string, []>("op_3010_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 1536, 1, 512]> var_3010 = conv(bias = layers_11_mlp_fc1_bias, dilations = var_3008, groups = var_2794, pad = var_3010_pad_0, pad_type = var_3010_pad_type_0, strides = var_3006, weight = layers_11_mlp_fc1_weight, x = var_3000_cast_fp16)[name = tensor<string, []>("op_3010")];
+            tensor<string, []> input_383_mode_0 = const()[name = tensor<string, []>("input_383_mode_0"), val = tensor<string, []>("EXACT")];
+            tensor<fp16, [1, 1536, 1, 512]> input_383 = gelu(mode = input_383_mode_0, x = var_3010)[name = tensor<string, []>("input_383")];
+            tensor<int32, [2]> var_3014 = const()[name = tensor<string, []>("op_3014"), val = tensor<int32, [2]>([1, 1])];
+            tensor<int32, [2]> var_3016 = const()[name = tensor<string, []>("op_3016"), val = tensor<int32, [2]>([1, 1])];
+            tensor<string, []> var_3018_pad_type_0 = const()[name = tensor<string, []>("op_3018_pad_type_0"), val = tensor<string, []>("custom")];
+            tensor<int32, [4]> var_3018_pad_0 = const()[name = tensor<string, []>("op_3018_pad_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<fp16, [1, 384, 1, 512]> var_3018 = conv(bias = layers_11_mlp_fc2_bias, dilations = var_3016, groups = var_2794, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3014, weight = layers_11_mlp_fc2_weight, x = input_383)[name = tensor<string, []>("op_3018")];
+            tensor<fp16, [1, 384, 1, 512]> x_97 = add(x = var_3000_cast_fp16, y = var_3018)[name = tensor<string, []>("x_97")];
+            tensor<fp16, []> var_2791_promoted_1 = const()[name = tensor<string, []>("op_2791_promoted_1"), val = tensor<fp16, []>(-0x1.f4p+7)];
+            tensor<fp16, []> var_2792_promoted_1 = const()[name = tensor<string, []>("op_2792_promoted_1"), val = tensor<fp16, []>(0x1.f4p+7)];
+            tensor<fp16, [1, 384, 1, 512]> x = clip(alpha = var_2791_promoted_1, beta = var_2792_promoted_1, x = x_97)[name = tensor<string, []>("x")];
+            tensor<int32, [1]> var_3023 = const()[name = tensor<string, []>("op_3023"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> mean = reduce_mean(axes = var_3023, keep_dims = var_2796, x = x)[name = tensor<string, []>("mean")];
             tensor<fp16, [1, 384, 1, 512]> zero_mean = sub(x = x, y = mean)[name = tensor<string, []>("zero_mean")];
-            tensor<fp16, []> var_2878_promoted_1 = const()[name = tensor<string, []>("op_2878_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
-            tensor<fp16, [1, 384, 1, 512]> var_3112 = pow(x = zero_mean, y = var_2878_promoted_1)[name = tensor<string, []>("op_3112")];
-            tensor<int32, [1]> var_3113 = const()[name = tensor<string, []>("op_3113"), val = tensor<int32, [1]>([1])];
-            tensor<fp16, [1, 1, 1, 512]> var_3114 = reduce_mean(axes = var_3113, keep_dims = var_2873, x = var_3112)[name = tensor<string, []>("op_3114")];
-            tensor<fp16, []> var_3115_to_fp16 = const()[name = tensor<string, []>("op_3115_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1, 1, 512]> var_3116_cast_fp16 = add(x = var_3114, y = var_3115_to_fp16)[name = tensor<string, []>("op_3116_cast_fp16")];
+            tensor<fp16, []> var_2793_promoted_1 = const()[name = tensor<string, []>("op_2793_promoted_1"), val = tensor<fp16, []>(0x1p+1)];
+            tensor<fp16, [1, 384, 1, 512]> var_3026 = pow(x = zero_mean, y = var_2793_promoted_1)[name = tensor<string, []>("op_3026")];
+            tensor<int32, [1]> var_3027 = const()[name = tensor<string, []>("op_3027"), val = tensor<int32, [1]>([1])];
+            tensor<fp16, [1, 1, 1, 512]> var_3028 = reduce_mean(axes = var_3027, keep_dims = var_2796, x = var_3026)[name = tensor<string, []>("op_3028")];
+            tensor<fp16, []> var_3029_to_fp16 = const()[name = tensor<string, []>("op_3029_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1, 1, 512]> var_3030_cast_fp16 = add(x = var_3028, y = var_3029_to_fp16)[name = tensor<string, []>("op_3030_cast_fp16")];
             tensor<fp32, []> denom_49_epsilon_0 = const()[name = tensor<string, []>("denom_49_epsilon_0"), val = tensor<fp32, []>(0x1.197998p-40)];
-            tensor<fp16, [1, 1, 1, 512]> denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0, x = var_3116_cast_fp16)[name = tensor<string, []>("denom_49_cast_fp16")];
-            tensor<fp16, [1, 384, 1, 512]> var_3118_cast_fp16 = mul(x = zero_mean, y = denom_49_cast_fp16)[name = tensor<string, []>("op_3118_cast_fp16")];
-            tensor<fp16, [384]> var_3120_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_3120_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66828672)))];
-            tensor<fp16, [384]> var_3120_beta_0_to_fp16 = const()[name = tensor<string, []>("op_3120_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66829504)))];
-            tensor<fp16, []> var_3120_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_3120_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
-            tensor<fp16, [1, 384, 1, 512]> var_3120_cast_fp16 = batch_norm(beta = var_3120_beta_0_to_fp16, epsilon = var_3120_epsilon_0_to_fp16, gamma = var_3120_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3118_cast_fp16)[name = tensor<string, []>("op_3120_cast_fp16")];
-            tensor<int32, [4]> var_3134_begin_0 = const()[name = tensor<string, []>("op_3134_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
-            tensor<int32, [4]> var_3134_end_0 = const()[name = tensor<string, []>("op_3134_end_0"), val = tensor<int32, [4]>([1, 384, 1, 512])];
-            tensor<bool, [4]> var_3134_end_mask_0 = const()[name = tensor<string, []>("op_3134_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
-            tensor<bool, [4]> var_3134_squeeze_mask_0 = const()[name = tensor<string, []>("op_3134_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
-            tensor<fp16, [1, 384, 512]> var_3134 = slice_by_index(begin = var_3134_begin_0, end = var_3134_end_0, end_mask = var_3134_end_mask_0, squeeze_mask = var_3134_squeeze_mask_0, x = var_3120_cast_fp16)[name = tensor<string, []>("op_3134")];
-            tensor<int32, [3]> var_3137_begin_0 = const()[name = tensor<string, []>("op_3137_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
-            tensor<int32, [3]> var_3137_end_0 = const()[name = tensor<string, []>("op_3137_end_0"), val = tensor<int32, [3]>([1, 384, 1])];
-            tensor<bool, [3]> var_3137_end_mask_0 = const()[name = tensor<string, []>("op_3137_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
-            tensor<bool, [3]> var_3137_squeeze_mask_0 = const()[name = tensor<string, []>("op_3137_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, true])];
-            tensor<fp16, [1, 384]> var_3137 = slice_by_index(begin = var_3137_begin_0, end = var_3137_end_0, end_mask = var_3137_end_mask_0, squeeze_mask = var_3137_squeeze_mask_0, x = var_3134)[name = tensor<string, []>("op_3137")];
-            tensor<int32, [1]> var_3145 = const()[name = tensor<string, []>("op_3145"), val = tensor<int32, [1]>([1])];
-            tensor<bool, []> var_3146 = const()[name = tensor<string, []>("op_3146"), val = tensor<bool, []>(true)];
-            tensor<fp16, [1, 384]> abs_0_cast_fp16 = abs(x = var_3137)[name = tensor<string, []>("abs_0_cast_fp16")];
+            tensor<fp16, [1, 1, 1, 512]> denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0, x = var_3030_cast_fp16)[name = tensor<string, []>("denom_49_cast_fp16")];
+            tensor<fp16, [1, 384, 1, 512]> var_3032_cast_fp16 = mul(x = zero_mean, y = denom_49_cast_fp16)[name = tensor<string, []>("op_3032_cast_fp16")];
+            tensor<fp16, [384]> var_3034_gamma_0_to_fp16 = const()[name = tensor<string, []>("op_3034_gamma_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66828672)))];
+            tensor<fp16, [384]> var_3034_beta_0_to_fp16 = const()[name = tensor<string, []>("op_3034_beta_0_to_fp16"), val = tensor<fp16, [384]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(66829504)))];
+            tensor<fp16, []> var_3034_epsilon_0_to_fp16 = const()[name = tensor<string, []>("op_3034_epsilon_0_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)];
+            tensor<fp16, [1, 384, 1, 512]> var_3034_cast_fp16 = batch_norm(beta = var_3034_beta_0_to_fp16, epsilon = var_3034_epsilon_0_to_fp16, gamma = var_3034_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3032_cast_fp16)[name = tensor<string, []>("op_3034_cast_fp16")];
+            tensor<int32, [4]> var_3048_begin_0 = const()[name = tensor<string, []>("op_3048_begin_0"), val = tensor<int32, [4]>([0, 0, 0, 0])];
+            tensor<int32, [4]> var_3048_end_0 = const()[name = tensor<string, []>("op_3048_end_0"), val = tensor<int32, [4]>([1, 384, 1, 512])];
+            tensor<bool, [4]> var_3048_end_mask_0 = const()[name = tensor<string, []>("op_3048_end_mask_0"), val = tensor<bool, [4]>([true, true, false, true])];
+            tensor<bool, [4]> var_3048_squeeze_mask_0 = const()[name = tensor<string, []>("op_3048_squeeze_mask_0"), val = tensor<bool, [4]>([false, false, true, false])];
+            tensor<fp16, [1, 384, 512]> var_3048 = slice_by_index(begin = var_3048_begin_0, end = var_3048_end_0, end_mask = var_3048_end_mask_0, squeeze_mask = var_3048_squeeze_mask_0, x = var_3034_cast_fp16)[name = tensor<string, []>("op_3048")];
+            tensor<int32, [3]> var_3051_begin_0 = const()[name = tensor<string, []>("op_3051_begin_0"), val = tensor<int32, [3]>([0, 0, 0])];
+            tensor<int32, [3]> var_3051_end_0 = const()[name = tensor<string, []>("op_3051_end_0"), val = tensor<int32, [3]>([1, 384, 1])];
+            tensor<bool, [3]> var_3051_end_mask_0 = const()[name = tensor<string, []>("op_3051_end_mask_0"), val = tensor<bool, [3]>([true, true, false])];
+            tensor<bool, [3]> var_3051_squeeze_mask_0 = const()[name = tensor<string, []>("op_3051_squeeze_mask_0"), val = tensor<bool, [3]>([false, false, true])];
+            tensor<fp16, [1, 384]> var_3051 = slice_by_index(begin = var_3051_begin_0, end = var_3051_end_0, end_mask = var_3051_end_mask_0, squeeze_mask = var_3051_squeeze_mask_0, x = var_3048)[name = tensor<string, []>("op_3051")];
+            tensor<int32, [1]> var_3059 = const()[name = tensor<string, []>("op_3059"), val = tensor<int32, [1]>([1])];
+            tensor<bool, []> var_3060 = const()[name = tensor<string, []>("op_3060"), val = tensor<bool, []>(true)];
+            tensor<fp16, [1, 384]> abs_0_cast_fp16 = abs(x = var_3051)[name = tensor<string, []>("abs_0_cast_fp16")];
             tensor<fp16, []> const_120_promoted_to_fp16 = const()[name = tensor<string, []>("const_120_promoted_to_fp16"), val = tensor<fp16, []>(0x1p+1)];
             tensor<fp16, [1, 384]> pow_0_cast_fp16 = pow(x = abs_0_cast_fp16, y = const_120_promoted_to_fp16)[name = tensor<string, []>("pow_0_cast_fp16")];
-            tensor<fp16, [1, 1]> reduce_sum_0_cast_fp16 = reduce_sum(axes = var_3145, keep_dims = var_3146, x = pow_0_cast_fp16)[name = tensor<string, []>("reduce_sum_0_cast_fp16")];
-            tensor<fp16, []> var_3147_y_0_to_fp16 = const()[name = tensor<string, []>("op_3147_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
-            tensor<fp16, [1, 1]> var_3147_cast_fp16 = pow(x = reduce_sum_0_cast_fp16, y = var_3147_y_0_to_fp16)[name = tensor<string, []>("op_3147_cast_fp16")];
-            tensor<fp16, []> var_3148_to_fp16 = const()[name = tensor<string, []>("op_3148_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
-            tensor<fp16, [1, 1]> var_3149_cast_fp16 = maximum(x = var_3147_cast_fp16, y = var_3148_to_fp16)[name = tensor<string, []>("op_3149_cast_fp16")];
+            tensor<fp16, [1, 1]> reduce_sum_0_cast_fp16 = reduce_sum(axes = var_3059, keep_dims = var_3060, x = pow_0_cast_fp16)[name = tensor<string, []>("reduce_sum_0_cast_fp16")];
+            tensor<fp16, []> var_3061_y_0_to_fp16 = const()[name = tensor<string, []>("op_3061_y_0_to_fp16"), val = tensor<fp16, []>(0x1p-1)];
+            tensor<fp16, [1, 1]> var_3061_cast_fp16 = pow(x = reduce_sum_0_cast_fp16, y = var_3061_y_0_to_fp16)[name = tensor<string, []>("op_3061_cast_fp16")];
+            tensor<fp16, []> var_3062_to_fp16 = const()[name = tensor<string, []>("op_3062_to_fp16"), val = tensor<fp16, []>(0x1p-24)];
+            tensor<fp16, [1, 1]> var_3063_cast_fp16 = maximum(x = var_3061_cast_fp16, y = var_3062_to_fp16)[name = tensor<string, []>("op_3063_cast_fp16")];
             tensor<int32, [2]> denom_reps_0 = const()[name = tensor<string, []>("denom_reps_0"), val = tensor<int32, [2]>([1, 384])];
-            tensor<fp16, [1, 384]> denom_cast_fp16 = tile(reps = denom_reps_0, x = var_3149_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
-            tensor<fp16, [1, 384]> outputs = real_div(x = var_3137, y = denom_cast_fp16)[name = tensor<string, []>("op_3151_cast_fp16")];
+            tensor<fp16, [1, 384]> denom_cast_fp16 = tile(reps = denom_reps_0, x = var_3063_cast_fp16)[name = tensor<string, []>("denom_cast_fp16")];
+            tensor<fp16, [1, 384]> outputs = real_div(x = var_3051, y = denom_cast_fp16)[name = tensor<string, []>("op_3065_cast_fp16")];
         } -> (outputs);
 }
\ No newline at end of file