diff --git "a/ane-snowflake-arctic-embed-m/model.mlmodelc/model.mil" "b/ane-snowflake-arctic-embed-m/model.mlmodelc/model.mil" --- "a/ane-snowflake-arctic-embed-m/model.mlmodelc/model.mil" +++ "b/ane-snowflake-arctic-embed-m/model.mlmodelc/model.mil" @@ -154,8 +154,8 @@ program(1.0) tensor inputs_embeds_batch_dims_0 = const()[name = tensor("inputs_embeds_batch_dims_0"), val = tensor(0)]; tensor inputs_embeds_validate_indices_0 = const()[name = tensor("inputs_embeds_validate_indices_0"), val = tensor(false)]; tensor input_ids_to_int16_dtype_0 = const()[name = tensor("input_ids_to_int16_dtype_0"), val = tensor("int16")]; - tensor cast_41 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = tensor("cast_41")]; - tensor inputs_embeds_cast_uint16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = cast_41, validate_indices = inputs_embeds_validate_indices_0, x = embeddings_word_embeddings_weight)[name = tensor("inputs_embeds_cast_uint16")]; + tensor cast_5 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = tensor("cast_5")]; + tensor inputs_embeds_cast_uint16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = cast_5, validate_indices = inputs_embeds_validate_indices_0, x = embeddings_word_embeddings_weight)[name = tensor("inputs_embeds_cast_uint16")]; tensor var_45 = add(x = inputs_embeds_cast_uint16, y = embeddings_token_type_embeddings_weight)[name = tensor("op_45")]; tensor embeddings_1 = add(x = var_45, y = embeddings_position_embeddings_weight)[name = tensor("embeddings_1")]; tensor var_47_perm_0 = const()[name = tensor("op_47_perm_0"), val = tensor([0, 2, 1])]; @@ -183,2340 +183,2268 @@ program(1.0) tensor var_63_beta_0_to_fp16 = const()[name = tensor("op_63_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218504128)))]; tensor var_63_epsilon_0_to_fp16 = const()[name = tensor("op_63_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; tensor var_63_cast_fp16 = batch_norm(beta = var_63_beta_0_to_fp16, epsilon = var_63_epsilon_0_to_fp16, gamma = var_63_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_61_cast_fp16)[name = tensor("op_63_cast_fp16")]; - tensor var_66_promoted_to_fp16 = const()[name = tensor("op_66_promoted_to_fp16"), val = tensor(0x1p+0)]; - tensor var_67_cast_fp16 = sub(x = mask, y = var_66_promoted_to_fp16)[name = tensor("op_67_cast_fp16")]; - tensor var_68_to_fp16 = const()[name = tensor("op_68_to_fp16"), val = tensor(0x1.388p+13)]; - tensor var_69_cast_fp16 = mul(x = var_67_cast_fp16, y = var_68_to_fp16)[name = tensor("op_69_cast_fp16")]; - tensor var_71_axes_0 = const()[name = tensor("op_71_axes_0"), val = tensor([1])]; - tensor var_71_cast_fp16 = expand_dims(axes = var_71_axes_0, x = var_69_cast_fp16)[name = tensor("op_71_cast_fp16")]; - tensor var_73_axes_0 = const()[name = tensor("op_73_axes_0"), val = tensor([2])]; - tensor var_73_cast_fp16 = expand_dims(axes = var_73_axes_0, x = var_71_cast_fp16)[name = tensor("op_73_cast_fp16")]; - tensor var_77 = const()[name = tensor("op_77"), val = tensor(1)]; - tensor var_78 = const()[name = tensor("op_78"), val = tensor(0)]; - tensor var_79 = const()[name = tensor("op_79"), val = tensor(true)]; - tensor var_104 = const()[name = tensor("op_104"), val = tensor([1, 1])]; - tensor var_106 = const()[name = tensor("op_106"), val = tensor([1, 1])]; - tensor var_108_pad_type_0 = const()[name = tensor("op_108_pad_type_0"), val = tensor("custom")]; - tensor var_108_pad_0 = const()[name = tensor("op_108_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_108 = conv(bias = layers_0_attention_q_proj_bias, dilations = var_106, groups = var_77, pad = var_108_pad_0, pad_type = var_108_pad_type_0, strides = var_104, weight = layers_0_attention_q_proj_weight, x = var_63_cast_fp16)[name = tensor("op_108")]; - tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 64, 12, 512])]; - tensor var_110 = reshape(shape = var_109, x = var_108)[name = tensor("op_110")]; - tensor var_113 = const()[name = tensor("op_113"), val = tensor([1, 1])]; - tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; - tensor var_117_pad_type_0 = const()[name = tensor("op_117_pad_type_0"), val = tensor("custom")]; - tensor var_117_pad_0 = const()[name = tensor("op_117_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_117 = conv(bias = layers_0_attention_k_proj_bias, dilations = var_115, groups = var_77, pad = var_117_pad_0, pad_type = var_117_pad_type_0, strides = var_113, weight = layers_0_attention_k_proj_weight, x = var_63_cast_fp16)[name = tensor("op_117")]; - tensor var_118 = const()[name = tensor("op_118"), val = tensor([1, 64, 12, 512])]; - tensor ks_1 = reshape(shape = var_118, x = var_117)[name = tensor("ks_1")]; - tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, 1])]; - tensor var_124 = const()[name = tensor("op_124"), val = tensor([1, 1])]; - tensor var_126_pad_type_0 = const()[name = tensor("op_126_pad_type_0"), val = tensor("custom")]; - tensor var_126_pad_0 = const()[name = tensor("op_126_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_126 = conv(bias = layers_0_attention_v_proj_bias, dilations = var_124, groups = var_77, pad = var_126_pad_0, pad_type = var_126_pad_type_0, strides = var_122, weight = layers_0_attention_v_proj_weight, x = var_63_cast_fp16)[name = tensor("op_126")]; - tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 64, 12, 512])]; - tensor var_128 = reshape(shape = var_127, x = var_126)[name = tensor("op_128")]; - tensor tile_2 = const()[name = tensor("tile_2"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_129_axis_0 = const()[name = tensor("op_129_axis_0"), val = tensor(2)]; - tensor var_129_0, tensor var_129_1, tensor var_129_2, tensor var_129_3, tensor var_129_4, tensor var_129_5, tensor var_129_6, tensor var_129_7, tensor var_129_8, tensor var_129_9, tensor var_129_10, tensor var_129_11 = split(axis = var_129_axis_0, split_sizes = tile_2, x = var_110)[name = tensor("op_129")]; - tensor var_142_perm_0 = const()[name = tensor("op_142_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_3 = const()[name = tensor("tile_3"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_143_axis_0 = const()[name = tensor("op_143_axis_0"), val = tensor(2)]; - tensor transpose_11 = transpose(perm = var_142_perm_0, x = ks_1)[name = tensor("transpose_11")]; - tensor var_143_0, tensor var_143_1, tensor var_143_2, tensor var_143_3, tensor var_143_4, tensor var_143_5, tensor var_143_6, tensor var_143_7, tensor var_143_8, tensor var_143_9, tensor var_143_10, tensor var_143_11 = split(axis = var_143_axis_0, split_sizes = tile_3, x = transpose_11)[name = tensor("op_143")]; - tensor tile_4 = const()[name = tensor("tile_4"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_156_axis_0 = const()[name = tensor("op_156_axis_0"), val = tensor(2)]; - tensor var_156_0, tensor var_156_1, tensor var_156_2, tensor var_156_3, tensor var_156_4, tensor var_156_5, tensor var_156_6, tensor var_156_7, tensor var_156_8, tensor var_156_9, tensor var_156_10, tensor var_156_11 = split(axis = var_156_axis_0, split_sizes = tile_4, x = var_128)[name = tensor("op_156")]; - tensor var_170_equation_0 = const()[name = tensor("op_170_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_170 = einsum(equation = var_170_equation_0, values = (var_143_0, var_129_0))[name = tensor("op_170")]; - tensor var_171_to_fp16 = const()[name = tensor("op_171_to_fp16"), val = tensor(0x1p-3)]; - tensor w_1_cast_fp16 = mul(x = var_170, y = var_171_to_fp16)[name = tensor("w_1_cast_fp16")]; - tensor var_174_equation_0 = const()[name = tensor("op_174_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_174 = einsum(equation = var_174_equation_0, values = (var_143_1, var_129_1))[name = tensor("op_174")]; - tensor var_175_to_fp16 = const()[name = tensor("op_175_to_fp16"), val = tensor(0x1p-3)]; - tensor w_3_cast_fp16 = mul(x = var_174, y = var_175_to_fp16)[name = tensor("w_3_cast_fp16")]; - tensor var_178_equation_0 = const()[name = tensor("op_178_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_178 = einsum(equation = var_178_equation_0, values = (var_143_2, var_129_2))[name = tensor("op_178")]; - tensor var_179_to_fp16 = const()[name = tensor("op_179_to_fp16"), val = tensor(0x1p-3)]; - tensor w_5_cast_fp16 = mul(x = var_178, y = var_179_to_fp16)[name = tensor("w_5_cast_fp16")]; - tensor var_182_equation_0 = const()[name = tensor("op_182_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_182 = einsum(equation = var_182_equation_0, values = (var_143_3, var_129_3))[name = tensor("op_182")]; - tensor var_183_to_fp16 = const()[name = tensor("op_183_to_fp16"), val = tensor(0x1p-3)]; - tensor w_7_cast_fp16 = mul(x = var_182, y = var_183_to_fp16)[name = tensor("w_7_cast_fp16")]; - tensor var_186_equation_0 = const()[name = tensor("op_186_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_186 = einsum(equation = var_186_equation_0, values = (var_143_4, var_129_4))[name = tensor("op_186")]; - tensor var_187_to_fp16 = const()[name = tensor("op_187_to_fp16"), val = tensor(0x1p-3)]; - tensor w_9_cast_fp16 = mul(x = var_186, y = var_187_to_fp16)[name = tensor("w_9_cast_fp16")]; - tensor var_190_equation_0 = const()[name = tensor("op_190_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_190 = einsum(equation = var_190_equation_0, values = (var_143_5, var_129_5))[name = tensor("op_190")]; - tensor var_191_to_fp16 = const()[name = tensor("op_191_to_fp16"), val = tensor(0x1p-3)]; - tensor w_11_cast_fp16 = mul(x = var_190, y = var_191_to_fp16)[name = tensor("w_11_cast_fp16")]; - tensor var_194_equation_0 = const()[name = tensor("op_194_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_194 = einsum(equation = var_194_equation_0, values = (var_143_6, var_129_6))[name = tensor("op_194")]; - tensor var_195_to_fp16 = const()[name = tensor("op_195_to_fp16"), val = tensor(0x1p-3)]; - tensor w_13_cast_fp16 = mul(x = var_194, y = var_195_to_fp16)[name = tensor("w_13_cast_fp16")]; - tensor var_198_equation_0 = const()[name = tensor("op_198_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_198 = einsum(equation = var_198_equation_0, values = (var_143_7, var_129_7))[name = tensor("op_198")]; - tensor var_199_to_fp16 = const()[name = tensor("op_199_to_fp16"), val = tensor(0x1p-3)]; - tensor w_15_cast_fp16 = mul(x = var_198, y = var_199_to_fp16)[name = tensor("w_15_cast_fp16")]; - tensor var_202_equation_0 = const()[name = tensor("op_202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_202 = einsum(equation = var_202_equation_0, values = (var_143_8, var_129_8))[name = tensor("op_202")]; - tensor var_203_to_fp16 = const()[name = tensor("op_203_to_fp16"), val = tensor(0x1p-3)]; - tensor w_17_cast_fp16 = mul(x = var_202, y = var_203_to_fp16)[name = tensor("w_17_cast_fp16")]; - tensor var_206_equation_0 = const()[name = tensor("op_206_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_206 = einsum(equation = var_206_equation_0, values = (var_143_9, var_129_9))[name = tensor("op_206")]; - tensor var_207_to_fp16 = const()[name = tensor("op_207_to_fp16"), val = tensor(0x1p-3)]; - tensor w_19_cast_fp16 = mul(x = var_206, y = var_207_to_fp16)[name = tensor("w_19_cast_fp16")]; - tensor var_210_equation_0 = const()[name = tensor("op_210_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_210 = einsum(equation = var_210_equation_0, values = (var_143_10, var_129_10))[name = tensor("op_210")]; - tensor var_211_to_fp16 = const()[name = tensor("op_211_to_fp16"), val = tensor(0x1p-3)]; - tensor w_21_cast_fp16 = mul(x = var_210, y = var_211_to_fp16)[name = tensor("w_21_cast_fp16")]; - tensor var_214_equation_0 = const()[name = tensor("op_214_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_214 = einsum(equation = var_214_equation_0, values = (var_143_11, var_129_11))[name = tensor("op_214")]; - tensor var_215_to_fp16 = const()[name = tensor("op_215_to_fp16"), val = tensor(0x1p-3)]; - tensor w_23_cast_fp16 = mul(x = var_214, y = var_215_to_fp16)[name = tensor("w_23_cast_fp16")]; - tensor var_217_cast_fp16 = add(x = w_1_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_217_cast_fp16")]; - tensor var_218_cast_fp16 = softmax(axis = var_77, x = var_217_cast_fp16)[name = tensor("op_218_cast_fp16")]; - tensor var_219_cast_fp16 = add(x = w_3_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_219_cast_fp16")]; - tensor var_220_cast_fp16 = softmax(axis = var_77, x = var_219_cast_fp16)[name = tensor("op_220_cast_fp16")]; - tensor var_221_cast_fp16 = add(x = w_5_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_221_cast_fp16")]; - tensor var_222_cast_fp16 = softmax(axis = var_77, x = var_221_cast_fp16)[name = tensor("op_222_cast_fp16")]; - tensor var_223_cast_fp16 = add(x = w_7_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_223_cast_fp16")]; - tensor var_224_cast_fp16 = softmax(axis = var_77, x = var_223_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor var_225_cast_fp16 = add(x = w_9_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_225_cast_fp16")]; - tensor var_226_cast_fp16 = softmax(axis = var_77, x = var_225_cast_fp16)[name = tensor("op_226_cast_fp16")]; - tensor var_227_cast_fp16 = add(x = w_11_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_227_cast_fp16")]; - tensor var_228_cast_fp16 = softmax(axis = var_77, x = var_227_cast_fp16)[name = tensor("op_228_cast_fp16")]; - tensor var_229_cast_fp16 = add(x = w_13_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_229_cast_fp16")]; - tensor var_230_cast_fp16 = softmax(axis = var_77, x = var_229_cast_fp16)[name = tensor("op_230_cast_fp16")]; - tensor var_231_cast_fp16 = add(x = w_15_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_231_cast_fp16")]; - tensor var_232_cast_fp16 = softmax(axis = var_77, x = var_231_cast_fp16)[name = tensor("op_232_cast_fp16")]; - tensor var_233_cast_fp16 = add(x = w_17_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_233_cast_fp16")]; - tensor var_234_cast_fp16 = softmax(axis = var_77, x = var_233_cast_fp16)[name = tensor("op_234_cast_fp16")]; - tensor var_235_cast_fp16 = add(x = w_19_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_235_cast_fp16")]; - tensor var_236_cast_fp16 = softmax(axis = var_77, x = var_235_cast_fp16)[name = tensor("op_236_cast_fp16")]; - tensor var_237_cast_fp16 = add(x = w_21_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_237_cast_fp16")]; - tensor var_238_cast_fp16 = softmax(axis = var_77, x = var_237_cast_fp16)[name = tensor("op_238_cast_fp16")]; - tensor var_239_cast_fp16 = add(x = w_23_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_239_cast_fp16")]; - tensor var_240_cast_fp16 = softmax(axis = var_77, x = var_239_cast_fp16)[name = tensor("op_240_cast_fp16")]; - tensor var_242_equation_0 = const()[name = tensor("op_242_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_242_cast_fp16 = einsum(equation = var_242_equation_0, values = (var_156_0, var_218_cast_fp16))[name = tensor("op_242_cast_fp16")]; + tensor var_76_axes_0 = const()[name = tensor("op_76_axes_0"), val = tensor([2])]; + tensor var_76_cast_fp16 = expand_dims(axes = var_76_axes_0, x = mask)[name = tensor("op_76_cast_fp16")]; + tensor var_78_axes_0 = const()[name = tensor("op_78_axes_0"), val = tensor([3])]; + tensor var_78_cast_fp16 = expand_dims(axes = var_78_axes_0, x = var_76_cast_fp16)[name = tensor("op_78_cast_fp16")]; + tensor var_80_to_fp16 = const()[name = tensor("op_80_to_fp16"), val = tensor(0x1p+0)]; + tensor var_81_cast_fp16 = sub(x = var_78_cast_fp16, y = var_80_to_fp16)[name = tensor("op_81_cast_fp16")]; + tensor var_82_to_fp16 = const()[name = tensor("op_82_to_fp16"), val = tensor(0x1.388p+13)]; + tensor var_83_cast_fp16 = mul(x = var_81_cast_fp16, y = var_82_to_fp16)[name = tensor("op_83_cast_fp16")]; + tensor var_88 = const()[name = tensor("op_88"), val = tensor(1)]; + tensor var_89 = const()[name = tensor("op_89"), val = tensor(0)]; + tensor var_90 = const()[name = tensor("op_90"), val = tensor(true)]; + tensor var_112 = const()[name = tensor("op_112"), val = tensor([1, 1])]; + tensor var_114 = const()[name = tensor("op_114"), val = tensor([1, 1])]; + tensor var_116_pad_type_0 = const()[name = tensor("op_116_pad_type_0"), val = tensor("custom")]; + tensor var_116_pad_0 = const()[name = tensor("op_116_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_116 = conv(bias = layers_0_attention_q_proj_bias, dilations = var_114, groups = var_88, pad = var_116_pad_0, pad_type = var_116_pad_type_0, strides = var_112, weight = layers_0_attention_q_proj_weight, x = var_63_cast_fp16)[name = tensor("op_116")]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121 = const()[name = tensor("op_121"), val = tensor([1, 1])]; + tensor ks_1_pad_type_0 = const()[name = tensor("ks_1_pad_type_0"), val = tensor("custom")]; + tensor ks_1_pad_0 = const()[name = tensor("ks_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_1 = conv(bias = layers_0_attention_k_proj_bias, dilations = var_121, groups = var_88, pad = ks_1_pad_0, pad_type = ks_1_pad_type_0, strides = var_119, weight = layers_0_attention_k_proj_weight, x = var_63_cast_fp16)[name = tensor("ks_1")]; + tensor var_126 = const()[name = tensor("op_126"), val = tensor([1, 1])]; + tensor var_128 = const()[name = tensor("op_128"), val = tensor([1, 1])]; + tensor var_130_pad_type_0 = const()[name = tensor("op_130_pad_type_0"), val = tensor("custom")]; + tensor var_130_pad_0 = const()[name = tensor("op_130_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_130 = conv(bias = layers_0_attention_v_proj_bias, dilations = var_128, groups = var_88, pad = var_130_pad_0, pad_type = var_130_pad_type_0, strides = var_126, weight = layers_0_attention_v_proj_weight, x = var_63_cast_fp16)[name = tensor("op_130")]; + tensor tile_2 = const()[name = tensor("tile_2"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_131_axis_0 = const()[name = tensor("op_131_axis_0"), val = tensor(1)]; + tensor var_131_0, tensor var_131_1, tensor var_131_2, tensor var_131_3, tensor var_131_4, tensor var_131_5, tensor var_131_6, tensor var_131_7, tensor var_131_8, tensor var_131_9, tensor var_131_10, tensor var_131_11 = split(axis = var_131_axis_0, split_sizes = tile_2, x = var_116)[name = tensor("op_131")]; + tensor var_144_perm_0 = const()[name = tensor("op_144_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_3 = const()[name = tensor("tile_3"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_145_axis_0 = const()[name = tensor("op_145_axis_0"), val = tensor(3)]; + tensor transpose_11 = transpose(perm = var_144_perm_0, x = ks_1)[name = tensor("transpose_11")]; + tensor var_145_0, tensor var_145_1, tensor var_145_2, tensor var_145_3, tensor var_145_4, tensor var_145_5, tensor var_145_6, tensor var_145_7, tensor var_145_8, tensor var_145_9, tensor var_145_10, tensor var_145_11 = split(axis = var_145_axis_0, split_sizes = tile_3, x = transpose_11)[name = tensor("op_145")]; + tensor tile_4 = const()[name = tensor("tile_4"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_158_axis_0 = const()[name = tensor("op_158_axis_0"), val = tensor(1)]; + tensor var_158_0, tensor var_158_1, tensor var_158_2, tensor var_158_3, tensor var_158_4, tensor var_158_5, tensor var_158_6, tensor var_158_7, tensor var_158_8, tensor var_158_9, tensor var_158_10, tensor var_158_11 = split(axis = var_158_axis_0, split_sizes = tile_4, x = var_130)[name = tensor("op_158")]; + tensor var_172_equation_0 = const()[name = tensor("op_172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_172 = einsum(equation = var_172_equation_0, values = (var_145_0, var_131_0))[name = tensor("op_172")]; + tensor var_173_to_fp16 = const()[name = tensor("op_173_to_fp16"), val = tensor(0x1p-3)]; + tensor w_1_cast_fp16 = mul(x = var_172, y = var_173_to_fp16)[name = tensor("w_1_cast_fp16")]; + tensor var_176_equation_0 = const()[name = tensor("op_176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_176 = einsum(equation = var_176_equation_0, values = (var_145_1, var_131_1))[name = tensor("op_176")]; + tensor var_177_to_fp16 = const()[name = tensor("op_177_to_fp16"), val = tensor(0x1p-3)]; + tensor w_3_cast_fp16 = mul(x = var_176, y = var_177_to_fp16)[name = tensor("w_3_cast_fp16")]; + tensor var_180_equation_0 = const()[name = tensor("op_180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_180 = einsum(equation = var_180_equation_0, values = (var_145_2, var_131_2))[name = tensor("op_180")]; + tensor var_181_to_fp16 = const()[name = tensor("op_181_to_fp16"), val = tensor(0x1p-3)]; + tensor w_5_cast_fp16 = mul(x = var_180, y = var_181_to_fp16)[name = tensor("w_5_cast_fp16")]; + tensor var_184_equation_0 = const()[name = tensor("op_184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_184 = einsum(equation = var_184_equation_0, values = (var_145_3, var_131_3))[name = tensor("op_184")]; + tensor var_185_to_fp16 = const()[name = tensor("op_185_to_fp16"), val = tensor(0x1p-3)]; + tensor w_7_cast_fp16 = mul(x = var_184, y = var_185_to_fp16)[name = tensor("w_7_cast_fp16")]; + tensor var_188_equation_0 = const()[name = tensor("op_188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_188 = einsum(equation = var_188_equation_0, values = (var_145_4, var_131_4))[name = tensor("op_188")]; + tensor var_189_to_fp16 = const()[name = tensor("op_189_to_fp16"), val = tensor(0x1p-3)]; + tensor w_9_cast_fp16 = mul(x = var_188, y = var_189_to_fp16)[name = tensor("w_9_cast_fp16")]; + tensor var_192_equation_0 = const()[name = tensor("op_192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_192 = einsum(equation = var_192_equation_0, values = (var_145_5, var_131_5))[name = tensor("op_192")]; + tensor var_193_to_fp16 = const()[name = tensor("op_193_to_fp16"), val = tensor(0x1p-3)]; + tensor w_11_cast_fp16 = mul(x = var_192, y = var_193_to_fp16)[name = tensor("w_11_cast_fp16")]; + tensor var_196_equation_0 = const()[name = tensor("op_196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_196 = einsum(equation = var_196_equation_0, values = (var_145_6, var_131_6))[name = tensor("op_196")]; + tensor var_197_to_fp16 = const()[name = tensor("op_197_to_fp16"), val = tensor(0x1p-3)]; + tensor w_13_cast_fp16 = mul(x = var_196, y = var_197_to_fp16)[name = tensor("w_13_cast_fp16")]; + tensor var_200_equation_0 = const()[name = tensor("op_200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_200 = einsum(equation = var_200_equation_0, values = (var_145_7, var_131_7))[name = tensor("op_200")]; + tensor var_201_to_fp16 = const()[name = tensor("op_201_to_fp16"), val = tensor(0x1p-3)]; + tensor w_15_cast_fp16 = mul(x = var_200, y = var_201_to_fp16)[name = tensor("w_15_cast_fp16")]; + tensor var_204_equation_0 = const()[name = tensor("op_204_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_204 = einsum(equation = var_204_equation_0, values = (var_145_8, var_131_8))[name = tensor("op_204")]; + tensor var_205_to_fp16 = const()[name = tensor("op_205_to_fp16"), val = tensor(0x1p-3)]; + tensor w_17_cast_fp16 = mul(x = var_204, y = var_205_to_fp16)[name = tensor("w_17_cast_fp16")]; + tensor var_208_equation_0 = const()[name = tensor("op_208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_208 = einsum(equation = var_208_equation_0, values = (var_145_9, var_131_9))[name = tensor("op_208")]; + tensor var_209_to_fp16 = const()[name = tensor("op_209_to_fp16"), val = tensor(0x1p-3)]; + tensor w_19_cast_fp16 = mul(x = var_208, y = var_209_to_fp16)[name = tensor("w_19_cast_fp16")]; + tensor var_212_equation_0 = const()[name = tensor("op_212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_212 = einsum(equation = var_212_equation_0, values = (var_145_10, var_131_10))[name = tensor("op_212")]; + tensor var_213_to_fp16 = const()[name = tensor("op_213_to_fp16"), val = tensor(0x1p-3)]; + tensor w_21_cast_fp16 = mul(x = var_212, y = var_213_to_fp16)[name = tensor("w_21_cast_fp16")]; + tensor var_216_equation_0 = const()[name = tensor("op_216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_216 = einsum(equation = var_216_equation_0, values = (var_145_11, var_131_11))[name = tensor("op_216")]; + tensor var_217_to_fp16 = const()[name = tensor("op_217_to_fp16"), val = tensor(0x1p-3)]; + tensor w_23_cast_fp16 = mul(x = var_216, y = var_217_to_fp16)[name = tensor("w_23_cast_fp16")]; + tensor input_3_cast_fp16 = add(x = w_1_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_220_cast_fp16 = softmax(axis = var_88, x = input_3_cast_fp16)[name = tensor("op_220_cast_fp16")]; + tensor input_5_cast_fp16 = add(x = w_3_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_222_cast_fp16 = softmax(axis = var_88, x = input_5_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor input_7_cast_fp16 = add(x = w_5_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_224_cast_fp16 = softmax(axis = var_88, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; + tensor input_9_cast_fp16 = add(x = w_7_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_226_cast_fp16 = softmax(axis = var_88, x = input_9_cast_fp16)[name = tensor("op_226_cast_fp16")]; + tensor input_11_cast_fp16 = add(x = w_9_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_228_cast_fp16 = softmax(axis = var_88, x = input_11_cast_fp16)[name = tensor("op_228_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = w_11_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_230_cast_fp16 = softmax(axis = var_88, x = input_13_cast_fp16)[name = tensor("op_230_cast_fp16")]; + tensor input_15_cast_fp16 = add(x = w_13_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_232_cast_fp16 = softmax(axis = var_88, x = input_15_cast_fp16)[name = tensor("op_232_cast_fp16")]; + tensor input_17_cast_fp16 = add(x = w_15_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_234_cast_fp16 = softmax(axis = var_88, x = input_17_cast_fp16)[name = tensor("op_234_cast_fp16")]; + tensor input_19_cast_fp16 = add(x = w_17_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_236_cast_fp16 = softmax(axis = var_88, x = input_19_cast_fp16)[name = tensor("op_236_cast_fp16")]; + tensor input_21_cast_fp16 = add(x = w_19_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_238_cast_fp16 = softmax(axis = var_88, x = input_21_cast_fp16)[name = tensor("op_238_cast_fp16")]; + tensor input_23_cast_fp16 = add(x = w_21_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_240_cast_fp16 = softmax(axis = var_88, x = input_23_cast_fp16)[name = tensor("op_240_cast_fp16")]; + tensor input_25_cast_fp16 = add(x = w_23_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_242_cast_fp16 = softmax(axis = var_88, x = input_25_cast_fp16)[name = tensor("op_242_cast_fp16")]; tensor var_244_equation_0 = const()[name = tensor("op_244_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_244_cast_fp16 = einsum(equation = var_244_equation_0, values = (var_156_1, var_220_cast_fp16))[name = tensor("op_244_cast_fp16")]; + tensor var_244_cast_fp16 = einsum(equation = var_244_equation_0, values = (var_158_0, var_220_cast_fp16))[name = tensor("op_244_cast_fp16")]; tensor var_246_equation_0 = const()[name = tensor("op_246_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_246_cast_fp16 = einsum(equation = var_246_equation_0, values = (var_156_2, var_222_cast_fp16))[name = tensor("op_246_cast_fp16")]; + tensor var_246_cast_fp16 = einsum(equation = var_246_equation_0, values = (var_158_1, var_222_cast_fp16))[name = tensor("op_246_cast_fp16")]; tensor var_248_equation_0 = const()[name = tensor("op_248_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_248_cast_fp16 = einsum(equation = var_248_equation_0, values = (var_156_3, var_224_cast_fp16))[name = tensor("op_248_cast_fp16")]; + tensor var_248_cast_fp16 = einsum(equation = var_248_equation_0, values = (var_158_2, var_224_cast_fp16))[name = tensor("op_248_cast_fp16")]; tensor var_250_equation_0 = const()[name = tensor("op_250_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_250_cast_fp16 = einsum(equation = var_250_equation_0, values = (var_156_4, var_226_cast_fp16))[name = tensor("op_250_cast_fp16")]; + tensor var_250_cast_fp16 = einsum(equation = var_250_equation_0, values = (var_158_3, var_226_cast_fp16))[name = tensor("op_250_cast_fp16")]; tensor var_252_equation_0 = const()[name = tensor("op_252_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_252_cast_fp16 = einsum(equation = var_252_equation_0, values = (var_156_5, var_228_cast_fp16))[name = tensor("op_252_cast_fp16")]; + tensor var_252_cast_fp16 = einsum(equation = var_252_equation_0, values = (var_158_4, var_228_cast_fp16))[name = tensor("op_252_cast_fp16")]; tensor var_254_equation_0 = const()[name = tensor("op_254_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_254_cast_fp16 = einsum(equation = var_254_equation_0, values = (var_156_6, var_230_cast_fp16))[name = tensor("op_254_cast_fp16")]; + tensor var_254_cast_fp16 = einsum(equation = var_254_equation_0, values = (var_158_5, var_230_cast_fp16))[name = tensor("op_254_cast_fp16")]; tensor var_256_equation_0 = const()[name = tensor("op_256_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_256_cast_fp16 = einsum(equation = var_256_equation_0, values = (var_156_7, var_232_cast_fp16))[name = tensor("op_256_cast_fp16")]; + tensor var_256_cast_fp16 = einsum(equation = var_256_equation_0, values = (var_158_6, var_232_cast_fp16))[name = tensor("op_256_cast_fp16")]; tensor var_258_equation_0 = const()[name = tensor("op_258_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_258_cast_fp16 = einsum(equation = var_258_equation_0, values = (var_156_8, var_234_cast_fp16))[name = tensor("op_258_cast_fp16")]; + tensor var_258_cast_fp16 = einsum(equation = var_258_equation_0, values = (var_158_7, var_234_cast_fp16))[name = tensor("op_258_cast_fp16")]; tensor var_260_equation_0 = const()[name = tensor("op_260_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_260_cast_fp16 = einsum(equation = var_260_equation_0, values = (var_156_9, var_236_cast_fp16))[name = tensor("op_260_cast_fp16")]; + tensor var_260_cast_fp16 = einsum(equation = var_260_equation_0, values = (var_158_8, var_236_cast_fp16))[name = tensor("op_260_cast_fp16")]; tensor var_262_equation_0 = const()[name = tensor("op_262_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_262_cast_fp16 = einsum(equation = var_262_equation_0, values = (var_156_10, var_238_cast_fp16))[name = tensor("op_262_cast_fp16")]; + tensor var_262_cast_fp16 = einsum(equation = var_262_equation_0, values = (var_158_9, var_238_cast_fp16))[name = tensor("op_262_cast_fp16")]; tensor var_264_equation_0 = const()[name = tensor("op_264_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_264_cast_fp16 = einsum(equation = var_264_equation_0, values = (var_156_11, var_240_cast_fp16))[name = tensor("op_264_cast_fp16")]; - tensor var_266_interleave_0 = const()[name = tensor("op_266_interleave_0"), val = tensor(false)]; - tensor var_266_cast_fp16 = concat(axis = var_77, interleave = var_266_interleave_0, values = (var_242_cast_fp16, var_244_cast_fp16, var_246_cast_fp16, var_248_cast_fp16, var_250_cast_fp16, var_252_cast_fp16, var_254_cast_fp16, var_256_cast_fp16, var_258_cast_fp16, var_260_cast_fp16, var_262_cast_fp16, var_264_cast_fp16))[name = tensor("op_266_cast_fp16")]; - tensor var_270 = const()[name = tensor("op_270"), val = tensor([1, 1])]; + tensor var_264_cast_fp16 = einsum(equation = var_264_equation_0, values = (var_158_10, var_240_cast_fp16))[name = tensor("op_264_cast_fp16")]; + tensor var_266_equation_0 = const()[name = tensor("op_266_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_266_cast_fp16 = einsum(equation = var_266_equation_0, values = (var_158_11, var_242_cast_fp16))[name = tensor("op_266_cast_fp16")]; + tensor var_268_interleave_0 = const()[name = tensor("op_268_interleave_0"), val = tensor(false)]; + tensor var_268_cast_fp16 = concat(axis = var_88, interleave = var_268_interleave_0, values = (var_244_cast_fp16, var_246_cast_fp16, var_248_cast_fp16, var_250_cast_fp16, var_252_cast_fp16, var_254_cast_fp16, var_256_cast_fp16, var_258_cast_fp16, var_260_cast_fp16, var_262_cast_fp16, var_264_cast_fp16, var_266_cast_fp16))[name = tensor("op_268_cast_fp16")]; tensor var_272 = const()[name = tensor("op_272"), val = tensor([1, 1])]; - tensor var_274_pad_type_0 = const()[name = tensor("op_274_pad_type_0"), val = tensor("custom")]; - tensor var_274_pad_0 = const()[name = tensor("op_274_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_274 = conv(bias = layers_0_attention_o_proj_bias, dilations = var_272, groups = var_77, pad = var_274_pad_0, pad_type = var_274_pad_type_0, strides = var_270, weight = layers_0_attention_o_proj_weight, x = var_266_cast_fp16)[name = tensor("op_274")]; - tensor var_276_interleave_0 = const()[name = tensor("op_276_interleave_0"), val = tensor(false)]; - tensor var_276 = concat(axis = var_78, interleave = var_276_interleave_0, values = var_274)[name = tensor("op_276")]; - tensor x_5 = add(x = var_63_cast_fp16, y = var_276)[name = tensor("x_5")]; - tensor var_75_promoted = const()[name = tensor("op_75_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_76_promoted = const()[name = tensor("op_76_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_7 = clip(alpha = var_75_promoted, beta = var_76_promoted, x = x_5)[name = tensor("x_7")]; - tensor var_281 = const()[name = tensor("op_281"), val = tensor([1])]; - tensor mean_3 = reduce_mean(axes = var_281, keep_dims = var_79, x = x_7)[name = tensor("mean_3")]; + tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; + tensor var_276_pad_type_0 = const()[name = tensor("op_276_pad_type_0"), val = tensor("custom")]; + tensor var_276_pad_0 = const()[name = tensor("op_276_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_276 = conv(bias = layers_0_attention_o_proj_bias, dilations = var_274, groups = var_88, pad = var_276_pad_0, pad_type = var_276_pad_type_0, strides = var_272, weight = layers_0_attention_o_proj_weight, x = var_268_cast_fp16)[name = tensor("op_276")]; + tensor var_278_interleave_0 = const()[name = tensor("op_278_interleave_0"), val = tensor(false)]; + tensor var_278 = concat(axis = var_89, interleave = var_278_interleave_0, values = var_276)[name = tensor("op_278")]; + tensor x_5 = add(x = var_63_cast_fp16, y = var_278)[name = tensor("x_5")]; + tensor var_85_promoted = const()[name = tensor("op_85_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_86_promoted = const()[name = tensor("op_86_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_7 = clip(alpha = var_85_promoted, beta = var_86_promoted, x = x_5)[name = tensor("x_7")]; + tensor var_283 = const()[name = tensor("op_283"), val = tensor([1])]; + tensor mean_3 = reduce_mean(axes = var_283, keep_dims = var_90, x = x_7)[name = tensor("mean_3")]; tensor zero_mean_3 = sub(x = x_7, y = mean_3)[name = tensor("zero_mean_3")]; - tensor var_84_promoted = const()[name = tensor("op_84_promoted"), val = tensor(0x1p+1)]; - tensor var_284 = pow(x = zero_mean_3, y = var_84_promoted)[name = tensor("op_284")]; - tensor var_285 = const()[name = tensor("op_285"), val = tensor([1])]; - tensor var_286 = reduce_mean(axes = var_285, keep_dims = var_79, x = var_284)[name = tensor("op_286")]; - tensor var_287_to_fp16 = const()[name = tensor("op_287_to_fp16"), val = tensor(0x1p-24)]; - tensor var_288_cast_fp16 = add(x = var_286, y = var_287_to_fp16)[name = tensor("op_288_cast_fp16")]; + tensor var_87_promoted = const()[name = tensor("op_87_promoted"), val = tensor(0x1p+1)]; + tensor var_286 = pow(x = zero_mean_3, y = var_87_promoted)[name = tensor("op_286")]; + tensor var_287 = const()[name = tensor("op_287"), val = tensor([1])]; + tensor var_288 = reduce_mean(axes = var_287, keep_dims = var_90, x = var_286)[name = tensor("op_288")]; + tensor var_289_to_fp16 = const()[name = tensor("op_289_to_fp16"), val = tensor(0x1p-24)]; + tensor var_290_cast_fp16 = add(x = var_288, y = var_289_to_fp16)[name = tensor("op_290_cast_fp16")]; tensor denom_3_epsilon_0 = const()[name = tensor("denom_3_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0, x = var_288_cast_fp16)[name = tensor("denom_3_cast_fp16")]; - tensor var_290_cast_fp16 = mul(x = zero_mean_3, y = denom_3_cast_fp16)[name = tensor("op_290_cast_fp16")]; - tensor var_292_gamma_0_to_fp16 = const()[name = tensor("op_292_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218505728)))]; - tensor var_292_beta_0_to_fp16 = const()[name = tensor("op_292_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218507328)))]; - tensor var_292_epsilon_0_to_fp16 = const()[name = tensor("op_292_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_292_cast_fp16 = batch_norm(beta = var_292_beta_0_to_fp16, epsilon = var_292_epsilon_0_to_fp16, gamma = var_292_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_290_cast_fp16)[name = tensor("op_292_cast_fp16")]; - tensor var_298 = const()[name = tensor("op_298"), val = tensor([1, 1])]; + tensor denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0, x = var_290_cast_fp16)[name = tensor("denom_3_cast_fp16")]; + tensor var_292_cast_fp16 = mul(x = zero_mean_3, y = denom_3_cast_fp16)[name = tensor("op_292_cast_fp16")]; + tensor var_294_gamma_0_to_fp16 = const()[name = tensor("op_294_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218505728)))]; + tensor var_294_beta_0_to_fp16 = const()[name = tensor("op_294_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218507328)))]; + tensor var_294_epsilon_0_to_fp16 = const()[name = tensor("op_294_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_294_cast_fp16 = batch_norm(beta = var_294_beta_0_to_fp16, epsilon = var_294_epsilon_0_to_fp16, gamma = var_294_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_292_cast_fp16)[name = tensor("op_294_cast_fp16")]; tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 1])]; - tensor var_302_pad_type_0 = const()[name = tensor("op_302_pad_type_0"), val = tensor("custom")]; - tensor var_302_pad_0 = const()[name = tensor("op_302_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_302 = conv(bias = layers_0_mlp_fc1_bias, dilations = var_300, groups = var_77, pad = var_302_pad_0, pad_type = var_302_pad_type_0, strides = var_298, weight = layers_0_mlp_fc1_weight, x = var_292_cast_fp16)[name = tensor("op_302")]; - tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; - tensor input_7 = gelu(mode = input_7_mode_0, x = var_302)[name = tensor("input_7")]; - tensor var_306 = const()[name = tensor("op_306"), val = tensor([1, 1])]; + tensor var_302 = const()[name = tensor("op_302"), val = tensor([1, 1])]; + tensor var_304_pad_type_0 = const()[name = tensor("op_304_pad_type_0"), val = tensor("custom")]; + tensor var_304_pad_0 = const()[name = tensor("op_304_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_304 = conv(bias = layers_0_mlp_fc1_bias, dilations = var_302, groups = var_88, pad = var_304_pad_0, pad_type = var_304_pad_type_0, strides = var_300, weight = layers_0_mlp_fc1_weight, x = var_294_cast_fp16)[name = tensor("op_304")]; + tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; + tensor input_31 = gelu(mode = input_31_mode_0, x = var_304)[name = tensor("input_31")]; tensor var_308 = const()[name = tensor("op_308"), val = tensor([1, 1])]; - tensor var_310_pad_type_0 = const()[name = tensor("op_310_pad_type_0"), val = tensor("custom")]; - tensor var_310_pad_0 = const()[name = tensor("op_310_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_310 = conv(bias = layers_0_mlp_fc2_bias, dilations = var_308, groups = var_77, pad = var_310_pad_0, pad_type = var_310_pad_type_0, strides = var_306, weight = layers_0_mlp_fc2_weight, x = input_7)[name = tensor("op_310")]; - tensor x_9 = add(x = var_292_cast_fp16, y = var_310)[name = tensor("x_9")]; - tensor var_75_promoted_1 = const()[name = tensor("op_75_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_76_promoted_1 = const()[name = tensor("op_76_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_11 = clip(alpha = var_75_promoted_1, beta = var_76_promoted_1, x = x_9)[name = tensor("x_11")]; - tensor var_315 = const()[name = tensor("op_315"), val = tensor([1])]; - tensor mean_5 = reduce_mean(axes = var_315, keep_dims = var_79, x = x_11)[name = tensor("mean_5")]; + tensor var_310 = const()[name = tensor("op_310"), val = tensor([1, 1])]; + tensor var_312_pad_type_0 = const()[name = tensor("op_312_pad_type_0"), val = tensor("custom")]; + tensor var_312_pad_0 = const()[name = tensor("op_312_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_312 = conv(bias = layers_0_mlp_fc2_bias, dilations = var_310, groups = var_88, pad = var_312_pad_0, pad_type = var_312_pad_type_0, strides = var_308, weight = layers_0_mlp_fc2_weight, x = input_31)[name = tensor("op_312")]; + tensor x_9 = add(x = var_294_cast_fp16, y = var_312)[name = tensor("x_9")]; + tensor var_85_promoted_1 = const()[name = tensor("op_85_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_86_promoted_1 = const()[name = tensor("op_86_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_11 = clip(alpha = var_85_promoted_1, beta = var_86_promoted_1, x = x_9)[name = tensor("x_11")]; + tensor var_317 = const()[name = tensor("op_317"), val = tensor([1])]; + tensor mean_5 = reduce_mean(axes = var_317, keep_dims = var_90, x = x_11)[name = tensor("mean_5")]; tensor zero_mean_5 = sub(x = x_11, y = mean_5)[name = tensor("zero_mean_5")]; - tensor var_84_promoted_1 = const()[name = tensor("op_84_promoted_1"), val = tensor(0x1p+1)]; - tensor var_318 = pow(x = zero_mean_5, y = var_84_promoted_1)[name = tensor("op_318")]; - tensor var_319 = const()[name = tensor("op_319"), val = tensor([1])]; - tensor var_320 = reduce_mean(axes = var_319, keep_dims = var_79, x = var_318)[name = tensor("op_320")]; - tensor var_321_to_fp16 = const()[name = tensor("op_321_to_fp16"), val = tensor(0x1p-24)]; - tensor var_322_cast_fp16 = add(x = var_320, y = var_321_to_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_87_promoted_1 = const()[name = tensor("op_87_promoted_1"), val = tensor(0x1p+1)]; + tensor var_320 = pow(x = zero_mean_5, y = var_87_promoted_1)[name = tensor("op_320")]; + tensor var_321 = const()[name = tensor("op_321"), val = tensor([1])]; + tensor var_322 = reduce_mean(axes = var_321, keep_dims = var_90, x = var_320)[name = tensor("op_322")]; + tensor var_323_to_fp16 = const()[name = tensor("op_323_to_fp16"), val = tensor(0x1p-24)]; + tensor var_324_cast_fp16 = add(x = var_322, y = var_323_to_fp16)[name = tensor("op_324_cast_fp16")]; tensor denom_5_epsilon_0 = const()[name = tensor("denom_5_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0, x = var_322_cast_fp16)[name = tensor("denom_5_cast_fp16")]; - tensor var_324_cast_fp16 = mul(x = zero_mean_5, y = denom_5_cast_fp16)[name = tensor("op_324_cast_fp16")]; - tensor var_326_gamma_0_to_fp16 = const()[name = tensor("op_326_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218508928)))]; - tensor var_326_beta_0_to_fp16 = const()[name = tensor("op_326_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218510528)))]; - tensor var_326_epsilon_0_to_fp16 = const()[name = tensor("op_326_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_326_cast_fp16 = batch_norm(beta = var_326_beta_0_to_fp16, epsilon = var_326_epsilon_0_to_fp16, gamma = var_326_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_324_cast_fp16)[name = tensor("op_326_cast_fp16")]; - tensor var_331 = const()[name = tensor("op_331"), val = tensor(1)]; - tensor var_332 = const()[name = tensor("op_332"), val = tensor(0)]; - tensor var_333 = const()[name = tensor("op_333"), val = tensor(true)]; + tensor denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0, x = var_324_cast_fp16)[name = tensor("denom_5_cast_fp16")]; + tensor var_326_cast_fp16 = mul(x = zero_mean_5, y = denom_5_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_328_gamma_0_to_fp16 = const()[name = tensor("op_328_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218508928)))]; + tensor var_328_beta_0_to_fp16 = const()[name = tensor("op_328_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218510528)))]; + tensor var_328_epsilon_0_to_fp16 = const()[name = tensor("op_328_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_328_cast_fp16 = batch_norm(beta = var_328_beta_0_to_fp16, epsilon = var_328_epsilon_0_to_fp16, gamma = var_328_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_326_cast_fp16)[name = tensor("op_328_cast_fp16")]; + tensor var_334 = const()[name = tensor("op_334"), val = tensor(1)]; + tensor var_335 = const()[name = tensor("op_335"), val = tensor(0)]; + tensor var_336 = const()[name = tensor("op_336"), val = tensor(true)]; tensor var_358 = const()[name = tensor("op_358"), val = tensor([1, 1])]; tensor var_360 = const()[name = tensor("op_360"), val = tensor([1, 1])]; tensor var_362_pad_type_0 = const()[name = tensor("op_362_pad_type_0"), val = tensor("custom")]; tensor var_362_pad_0 = const()[name = tensor("op_362_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_362 = conv(bias = layers_1_attention_q_proj_bias, dilations = var_360, groups = var_331, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_358, weight = layers_1_attention_q_proj_weight, x = var_326_cast_fp16)[name = tensor("op_362")]; - tensor var_363 = const()[name = tensor("op_363"), val = tensor([1, 64, 12, 512])]; - tensor var_364 = reshape(shape = var_363, x = var_362)[name = tensor("op_364")]; + tensor var_362 = conv(bias = layers_1_attention_q_proj_bias, dilations = var_360, groups = var_334, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_358, weight = layers_1_attention_q_proj_weight, x = var_328_cast_fp16)[name = tensor("op_362")]; + tensor var_365 = const()[name = tensor("op_365"), val = tensor([1, 1])]; tensor var_367 = const()[name = tensor("op_367"), val = tensor([1, 1])]; - tensor var_369 = const()[name = tensor("op_369"), val = tensor([1, 1])]; - tensor var_371_pad_type_0 = const()[name = tensor("op_371_pad_type_0"), val = tensor("custom")]; - tensor var_371_pad_0 = const()[name = tensor("op_371_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_371 = conv(bias = layers_1_attention_k_proj_bias, dilations = var_369, groups = var_331, pad = var_371_pad_0, pad_type = var_371_pad_type_0, strides = var_367, weight = layers_1_attention_k_proj_weight, x = var_326_cast_fp16)[name = tensor("op_371")]; - tensor var_372 = const()[name = tensor("op_372"), val = tensor([1, 64, 12, 512])]; - tensor ks_3 = reshape(shape = var_372, x = var_371)[name = tensor("ks_3")]; - tensor var_376 = const()[name = tensor("op_376"), val = tensor([1, 1])]; - tensor var_378 = const()[name = tensor("op_378"), val = tensor([1, 1])]; - tensor var_380_pad_type_0 = const()[name = tensor("op_380_pad_type_0"), val = tensor("custom")]; - tensor var_380_pad_0 = const()[name = tensor("op_380_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_380 = conv(bias = layers_1_attention_v_proj_bias, dilations = var_378, groups = var_331, pad = var_380_pad_0, pad_type = var_380_pad_type_0, strides = var_376, weight = layers_1_attention_v_proj_weight, x = var_326_cast_fp16)[name = tensor("op_380")]; - tensor var_381 = const()[name = tensor("op_381"), val = tensor([1, 64, 12, 512])]; - tensor var_382 = reshape(shape = var_381, x = var_380)[name = tensor("op_382")]; - tensor tile_7 = const()[name = tensor("tile_7"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_383_axis_0 = const()[name = tensor("op_383_axis_0"), val = tensor(2)]; - tensor var_383_0, tensor var_383_1, tensor var_383_2, tensor var_383_3, tensor var_383_4, tensor var_383_5, tensor var_383_6, tensor var_383_7, tensor var_383_8, tensor var_383_9, tensor var_383_10, tensor var_383_11 = split(axis = var_383_axis_0, split_sizes = tile_7, x = var_364)[name = tensor("op_383")]; - tensor var_396_perm_0 = const()[name = tensor("op_396_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_8 = const()[name = tensor("tile_8"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_397_axis_0 = const()[name = tensor("op_397_axis_0"), val = tensor(2)]; - tensor transpose_10 = transpose(perm = var_396_perm_0, x = ks_3)[name = tensor("transpose_10")]; - tensor var_397_0, tensor var_397_1, tensor var_397_2, tensor var_397_3, tensor var_397_4, tensor var_397_5, tensor var_397_6, tensor var_397_7, tensor var_397_8, tensor var_397_9, tensor var_397_10, tensor var_397_11 = split(axis = var_397_axis_0, split_sizes = tile_8, x = transpose_10)[name = tensor("op_397")]; - tensor tile_9 = const()[name = tensor("tile_9"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_410_axis_0 = const()[name = tensor("op_410_axis_0"), val = tensor(2)]; - tensor var_410_0, tensor var_410_1, tensor var_410_2, tensor var_410_3, tensor var_410_4, tensor var_410_5, tensor var_410_6, tensor var_410_7, tensor var_410_8, tensor var_410_9, tensor var_410_10, tensor var_410_11 = split(axis = var_410_axis_0, split_sizes = tile_9, x = var_382)[name = tensor("op_410")]; - tensor var_424_equation_0 = const()[name = tensor("op_424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_424 = einsum(equation = var_424_equation_0, values = (var_397_0, var_383_0))[name = tensor("op_424")]; - tensor var_425_to_fp16 = const()[name = tensor("op_425_to_fp16"), val = tensor(0x1p-3)]; - tensor w_25_cast_fp16 = mul(x = var_424, y = var_425_to_fp16)[name = tensor("w_25_cast_fp16")]; - tensor var_428_equation_0 = const()[name = tensor("op_428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_428 = einsum(equation = var_428_equation_0, values = (var_397_1, var_383_1))[name = tensor("op_428")]; - tensor var_429_to_fp16 = const()[name = tensor("op_429_to_fp16"), val = tensor(0x1p-3)]; - tensor w_27_cast_fp16 = mul(x = var_428, y = var_429_to_fp16)[name = tensor("w_27_cast_fp16")]; - tensor var_432_equation_0 = const()[name = tensor("op_432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_432 = einsum(equation = var_432_equation_0, values = (var_397_2, var_383_2))[name = tensor("op_432")]; - tensor var_433_to_fp16 = const()[name = tensor("op_433_to_fp16"), val = tensor(0x1p-3)]; - tensor w_29_cast_fp16 = mul(x = var_432, y = var_433_to_fp16)[name = tensor("w_29_cast_fp16")]; - tensor var_436_equation_0 = const()[name = tensor("op_436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_436 = einsum(equation = var_436_equation_0, values = (var_397_3, var_383_3))[name = tensor("op_436")]; - tensor var_437_to_fp16 = const()[name = tensor("op_437_to_fp16"), val = tensor(0x1p-3)]; - tensor w_31_cast_fp16 = mul(x = var_436, y = var_437_to_fp16)[name = tensor("w_31_cast_fp16")]; - tensor var_440_equation_0 = const()[name = tensor("op_440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_440 = einsum(equation = var_440_equation_0, values = (var_397_4, var_383_4))[name = tensor("op_440")]; - tensor var_441_to_fp16 = const()[name = tensor("op_441_to_fp16"), val = tensor(0x1p-3)]; - tensor w_33_cast_fp16 = mul(x = var_440, y = var_441_to_fp16)[name = tensor("w_33_cast_fp16")]; - tensor var_444_equation_0 = const()[name = tensor("op_444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_444 = einsum(equation = var_444_equation_0, values = (var_397_5, var_383_5))[name = tensor("op_444")]; - tensor var_445_to_fp16 = const()[name = tensor("op_445_to_fp16"), val = tensor(0x1p-3)]; - tensor w_35_cast_fp16 = mul(x = var_444, y = var_445_to_fp16)[name = tensor("w_35_cast_fp16")]; - tensor var_448_equation_0 = const()[name = tensor("op_448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_448 = einsum(equation = var_448_equation_0, values = (var_397_6, var_383_6))[name = tensor("op_448")]; - tensor var_449_to_fp16 = const()[name = tensor("op_449_to_fp16"), val = tensor(0x1p-3)]; - tensor w_37_cast_fp16 = mul(x = var_448, y = var_449_to_fp16)[name = tensor("w_37_cast_fp16")]; - tensor var_452_equation_0 = const()[name = tensor("op_452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_452 = einsum(equation = var_452_equation_0, values = (var_397_7, var_383_7))[name = tensor("op_452")]; - tensor var_453_to_fp16 = const()[name = tensor("op_453_to_fp16"), val = tensor(0x1p-3)]; - tensor w_39_cast_fp16 = mul(x = var_452, y = var_453_to_fp16)[name = tensor("w_39_cast_fp16")]; - tensor var_456_equation_0 = const()[name = tensor("op_456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_456 = einsum(equation = var_456_equation_0, values = (var_397_8, var_383_8))[name = tensor("op_456")]; - tensor var_457_to_fp16 = const()[name = tensor("op_457_to_fp16"), val = tensor(0x1p-3)]; - tensor w_41_cast_fp16 = mul(x = var_456, y = var_457_to_fp16)[name = tensor("w_41_cast_fp16")]; - tensor var_460_equation_0 = const()[name = tensor("op_460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_460 = einsum(equation = var_460_equation_0, values = (var_397_9, var_383_9))[name = tensor("op_460")]; - tensor var_461_to_fp16 = const()[name = tensor("op_461_to_fp16"), val = tensor(0x1p-3)]; - tensor w_43_cast_fp16 = mul(x = var_460, y = var_461_to_fp16)[name = tensor("w_43_cast_fp16")]; - tensor var_464_equation_0 = const()[name = tensor("op_464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_464 = einsum(equation = var_464_equation_0, values = (var_397_10, var_383_10))[name = tensor("op_464")]; - tensor var_465_to_fp16 = const()[name = tensor("op_465_to_fp16"), val = tensor(0x1p-3)]; - tensor w_45_cast_fp16 = mul(x = var_464, y = var_465_to_fp16)[name = tensor("w_45_cast_fp16")]; - tensor var_468_equation_0 = const()[name = tensor("op_468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_468 = einsum(equation = var_468_equation_0, values = (var_397_11, var_383_11))[name = tensor("op_468")]; - tensor var_469_to_fp16 = const()[name = tensor("op_469_to_fp16"), val = tensor(0x1p-3)]; - tensor w_47_cast_fp16 = mul(x = var_468, y = var_469_to_fp16)[name = tensor("w_47_cast_fp16")]; - tensor var_471_cast_fp16 = add(x = w_25_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_471_cast_fp16")]; - tensor var_472_cast_fp16 = softmax(axis = var_331, x = var_471_cast_fp16)[name = tensor("op_472_cast_fp16")]; - tensor var_473_cast_fp16 = add(x = w_27_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_473_cast_fp16")]; - tensor var_474_cast_fp16 = softmax(axis = var_331, x = var_473_cast_fp16)[name = tensor("op_474_cast_fp16")]; - tensor var_475_cast_fp16 = add(x = w_29_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_475_cast_fp16")]; - tensor var_476_cast_fp16 = softmax(axis = var_331, x = var_475_cast_fp16)[name = tensor("op_476_cast_fp16")]; - tensor var_477_cast_fp16 = add(x = w_31_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_477_cast_fp16")]; - tensor var_478_cast_fp16 = softmax(axis = var_331, x = var_477_cast_fp16)[name = tensor("op_478_cast_fp16")]; - tensor var_479_cast_fp16 = add(x = w_33_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_479_cast_fp16")]; - tensor var_480_cast_fp16 = softmax(axis = var_331, x = var_479_cast_fp16)[name = tensor("op_480_cast_fp16")]; - tensor var_481_cast_fp16 = add(x = w_35_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_481_cast_fp16")]; - tensor var_482_cast_fp16 = softmax(axis = var_331, x = var_481_cast_fp16)[name = tensor("op_482_cast_fp16")]; - tensor var_483_cast_fp16 = add(x = w_37_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_483_cast_fp16")]; - tensor var_484_cast_fp16 = softmax(axis = var_331, x = var_483_cast_fp16)[name = tensor("op_484_cast_fp16")]; - tensor var_485_cast_fp16 = add(x = w_39_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_485_cast_fp16")]; - tensor var_486_cast_fp16 = softmax(axis = var_331, x = var_485_cast_fp16)[name = tensor("op_486_cast_fp16")]; - tensor var_487_cast_fp16 = add(x = w_41_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_487_cast_fp16")]; - tensor var_488_cast_fp16 = softmax(axis = var_331, x = var_487_cast_fp16)[name = tensor("op_488_cast_fp16")]; - tensor var_489_cast_fp16 = add(x = w_43_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_489_cast_fp16")]; - tensor var_490_cast_fp16 = softmax(axis = var_331, x = var_489_cast_fp16)[name = tensor("op_490_cast_fp16")]; - tensor var_491_cast_fp16 = add(x = w_45_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_491_cast_fp16")]; - tensor var_492_cast_fp16 = softmax(axis = var_331, x = var_491_cast_fp16)[name = tensor("op_492_cast_fp16")]; - tensor var_493_cast_fp16 = add(x = w_47_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_493_cast_fp16")]; - tensor var_494_cast_fp16 = softmax(axis = var_331, x = var_493_cast_fp16)[name = tensor("op_494_cast_fp16")]; + tensor ks_3_pad_type_0 = const()[name = tensor("ks_3_pad_type_0"), val = tensor("custom")]; + tensor ks_3_pad_0 = const()[name = tensor("ks_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_3 = conv(bias = layers_1_attention_k_proj_bias, dilations = var_367, groups = var_334, pad = ks_3_pad_0, pad_type = ks_3_pad_type_0, strides = var_365, weight = layers_1_attention_k_proj_weight, x = var_328_cast_fp16)[name = tensor("ks_3")]; + tensor var_372 = const()[name = tensor("op_372"), val = tensor([1, 1])]; + tensor var_374 = const()[name = tensor("op_374"), val = tensor([1, 1])]; + tensor var_376_pad_type_0 = const()[name = tensor("op_376_pad_type_0"), val = tensor("custom")]; + tensor var_376_pad_0 = const()[name = tensor("op_376_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_376 = conv(bias = layers_1_attention_v_proj_bias, dilations = var_374, groups = var_334, pad = var_376_pad_0, pad_type = var_376_pad_type_0, strides = var_372, weight = layers_1_attention_v_proj_weight, x = var_328_cast_fp16)[name = tensor("op_376")]; + tensor tile_7 = const()[name = tensor("tile_7"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_377_axis_0 = const()[name = tensor("op_377_axis_0"), val = tensor(1)]; + tensor var_377_0, tensor var_377_1, tensor var_377_2, tensor var_377_3, tensor var_377_4, tensor var_377_5, tensor var_377_6, tensor var_377_7, tensor var_377_8, tensor var_377_9, tensor var_377_10, tensor var_377_11 = split(axis = var_377_axis_0, split_sizes = tile_7, x = var_362)[name = tensor("op_377")]; + tensor var_390_perm_0 = const()[name = tensor("op_390_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_8 = const()[name = tensor("tile_8"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_391_axis_0 = const()[name = tensor("op_391_axis_0"), val = tensor(3)]; + tensor transpose_10 = transpose(perm = var_390_perm_0, x = ks_3)[name = tensor("transpose_10")]; + tensor var_391_0, tensor var_391_1, tensor var_391_2, tensor var_391_3, tensor var_391_4, tensor var_391_5, tensor var_391_6, tensor var_391_7, tensor var_391_8, tensor var_391_9, tensor var_391_10, tensor var_391_11 = split(axis = var_391_axis_0, split_sizes = tile_8, x = transpose_10)[name = tensor("op_391")]; + tensor tile_9 = const()[name = tensor("tile_9"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_404_axis_0 = const()[name = tensor("op_404_axis_0"), val = tensor(1)]; + tensor var_404_0, tensor var_404_1, tensor var_404_2, tensor var_404_3, tensor var_404_4, tensor var_404_5, tensor var_404_6, tensor var_404_7, tensor var_404_8, tensor var_404_9, tensor var_404_10, tensor var_404_11 = split(axis = var_404_axis_0, split_sizes = tile_9, x = var_376)[name = tensor("op_404")]; + tensor var_418_equation_0 = const()[name = tensor("op_418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_418 = einsum(equation = var_418_equation_0, values = (var_391_0, var_377_0))[name = tensor("op_418")]; + tensor var_419_to_fp16 = const()[name = tensor("op_419_to_fp16"), val = tensor(0x1p-3)]; + tensor w_25_cast_fp16 = mul(x = var_418, y = var_419_to_fp16)[name = tensor("w_25_cast_fp16")]; + tensor var_422_equation_0 = const()[name = tensor("op_422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_422 = einsum(equation = var_422_equation_0, values = (var_391_1, var_377_1))[name = tensor("op_422")]; + tensor var_423_to_fp16 = const()[name = tensor("op_423_to_fp16"), val = tensor(0x1p-3)]; + tensor w_27_cast_fp16 = mul(x = var_422, y = var_423_to_fp16)[name = tensor("w_27_cast_fp16")]; + tensor var_426_equation_0 = const()[name = tensor("op_426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_426 = einsum(equation = var_426_equation_0, values = (var_391_2, var_377_2))[name = tensor("op_426")]; + tensor var_427_to_fp16 = const()[name = tensor("op_427_to_fp16"), val = tensor(0x1p-3)]; + tensor w_29_cast_fp16 = mul(x = var_426, y = var_427_to_fp16)[name = tensor("w_29_cast_fp16")]; + tensor var_430_equation_0 = const()[name = tensor("op_430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_430 = einsum(equation = var_430_equation_0, values = (var_391_3, var_377_3))[name = tensor("op_430")]; + tensor var_431_to_fp16 = const()[name = tensor("op_431_to_fp16"), val = tensor(0x1p-3)]; + tensor w_31_cast_fp16 = mul(x = var_430, y = var_431_to_fp16)[name = tensor("w_31_cast_fp16")]; + tensor var_434_equation_0 = const()[name = tensor("op_434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_434 = einsum(equation = var_434_equation_0, values = (var_391_4, var_377_4))[name = tensor("op_434")]; + tensor var_435_to_fp16 = const()[name = tensor("op_435_to_fp16"), val = tensor(0x1p-3)]; + tensor w_33_cast_fp16 = mul(x = var_434, y = var_435_to_fp16)[name = tensor("w_33_cast_fp16")]; + tensor var_438_equation_0 = const()[name = tensor("op_438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_438 = einsum(equation = var_438_equation_0, values = (var_391_5, var_377_5))[name = tensor("op_438")]; + tensor var_439_to_fp16 = const()[name = tensor("op_439_to_fp16"), val = tensor(0x1p-3)]; + tensor w_35_cast_fp16 = mul(x = var_438, y = var_439_to_fp16)[name = tensor("w_35_cast_fp16")]; + tensor var_442_equation_0 = const()[name = tensor("op_442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_442 = einsum(equation = var_442_equation_0, values = (var_391_6, var_377_6))[name = tensor("op_442")]; + tensor var_443_to_fp16 = const()[name = tensor("op_443_to_fp16"), val = tensor(0x1p-3)]; + tensor w_37_cast_fp16 = mul(x = var_442, y = var_443_to_fp16)[name = tensor("w_37_cast_fp16")]; + tensor var_446_equation_0 = const()[name = tensor("op_446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_446 = einsum(equation = var_446_equation_0, values = (var_391_7, var_377_7))[name = tensor("op_446")]; + tensor var_447_to_fp16 = const()[name = tensor("op_447_to_fp16"), val = tensor(0x1p-3)]; + tensor w_39_cast_fp16 = mul(x = var_446, y = var_447_to_fp16)[name = tensor("w_39_cast_fp16")]; + tensor var_450_equation_0 = const()[name = tensor("op_450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_450 = einsum(equation = var_450_equation_0, values = (var_391_8, var_377_8))[name = tensor("op_450")]; + tensor var_451_to_fp16 = const()[name = tensor("op_451_to_fp16"), val = tensor(0x1p-3)]; + tensor w_41_cast_fp16 = mul(x = var_450, y = var_451_to_fp16)[name = tensor("w_41_cast_fp16")]; + tensor var_454_equation_0 = const()[name = tensor("op_454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_454 = einsum(equation = var_454_equation_0, values = (var_391_9, var_377_9))[name = tensor("op_454")]; + tensor var_455_to_fp16 = const()[name = tensor("op_455_to_fp16"), val = tensor(0x1p-3)]; + tensor w_43_cast_fp16 = mul(x = var_454, y = var_455_to_fp16)[name = tensor("w_43_cast_fp16")]; + tensor var_458_equation_0 = const()[name = tensor("op_458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_458 = einsum(equation = var_458_equation_0, values = (var_391_10, var_377_10))[name = tensor("op_458")]; + tensor var_459_to_fp16 = const()[name = tensor("op_459_to_fp16"), val = tensor(0x1p-3)]; + tensor w_45_cast_fp16 = mul(x = var_458, y = var_459_to_fp16)[name = tensor("w_45_cast_fp16")]; + tensor var_462_equation_0 = const()[name = tensor("op_462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_462 = einsum(equation = var_462_equation_0, values = (var_391_11, var_377_11))[name = tensor("op_462")]; + tensor var_463_to_fp16 = const()[name = tensor("op_463_to_fp16"), val = tensor(0x1p-3)]; + tensor w_47_cast_fp16 = mul(x = var_462, y = var_463_to_fp16)[name = tensor("w_47_cast_fp16")]; + tensor input_35_cast_fp16 = add(x = w_25_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_466_cast_fp16 = softmax(axis = var_334, x = input_35_cast_fp16)[name = tensor("op_466_cast_fp16")]; + tensor input_37_cast_fp16 = add(x = w_27_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor var_468_cast_fp16 = softmax(axis = var_334, x = input_37_cast_fp16)[name = tensor("op_468_cast_fp16")]; + tensor input_39_cast_fp16 = add(x = w_29_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_470_cast_fp16 = softmax(axis = var_334, x = input_39_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor input_41_cast_fp16 = add(x = w_31_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_472_cast_fp16 = softmax(axis = var_334, x = input_41_cast_fp16)[name = tensor("op_472_cast_fp16")]; + tensor input_43_cast_fp16 = add(x = w_33_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_474_cast_fp16 = softmax(axis = var_334, x = input_43_cast_fp16)[name = tensor("op_474_cast_fp16")]; + tensor input_45_cast_fp16 = add(x = w_35_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor var_476_cast_fp16 = softmax(axis = var_334, x = input_45_cast_fp16)[name = tensor("op_476_cast_fp16")]; + tensor input_47_cast_fp16 = add(x = w_37_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_478_cast_fp16 = softmax(axis = var_334, x = input_47_cast_fp16)[name = tensor("op_478_cast_fp16")]; + tensor input_49_cast_fp16 = add(x = w_39_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_480_cast_fp16 = softmax(axis = var_334, x = input_49_cast_fp16)[name = tensor("op_480_cast_fp16")]; + tensor input_51_cast_fp16 = add(x = w_41_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_482_cast_fp16 = softmax(axis = var_334, x = input_51_cast_fp16)[name = tensor("op_482_cast_fp16")]; + tensor input_53_cast_fp16 = add(x = w_43_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor var_484_cast_fp16 = softmax(axis = var_334, x = input_53_cast_fp16)[name = tensor("op_484_cast_fp16")]; + tensor input_55_cast_fp16 = add(x = w_45_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_486_cast_fp16 = softmax(axis = var_334, x = input_55_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor input_57_cast_fp16 = add(x = w_47_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor var_488_cast_fp16 = softmax(axis = var_334, x = input_57_cast_fp16)[name = tensor("op_488_cast_fp16")]; + tensor var_490_equation_0 = const()[name = tensor("op_490_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_490_cast_fp16 = einsum(equation = var_490_equation_0, values = (var_404_0, var_466_cast_fp16))[name = tensor("op_490_cast_fp16")]; + tensor var_492_equation_0 = const()[name = tensor("op_492_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_492_cast_fp16 = einsum(equation = var_492_equation_0, values = (var_404_1, var_468_cast_fp16))[name = tensor("op_492_cast_fp16")]; + tensor var_494_equation_0 = const()[name = tensor("op_494_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_494_cast_fp16 = einsum(equation = var_494_equation_0, values = (var_404_2, var_470_cast_fp16))[name = tensor("op_494_cast_fp16")]; tensor var_496_equation_0 = const()[name = tensor("op_496_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_410_0, var_472_cast_fp16))[name = tensor("op_496_cast_fp16")]; + tensor var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_404_3, var_472_cast_fp16))[name = tensor("op_496_cast_fp16")]; tensor var_498_equation_0 = const()[name = tensor("op_498_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_410_1, var_474_cast_fp16))[name = tensor("op_498_cast_fp16")]; + tensor var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_404_4, var_474_cast_fp16))[name = tensor("op_498_cast_fp16")]; tensor var_500_equation_0 = const()[name = tensor("op_500_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_410_2, var_476_cast_fp16))[name = tensor("op_500_cast_fp16")]; + tensor var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_404_5, var_476_cast_fp16))[name = tensor("op_500_cast_fp16")]; tensor var_502_equation_0 = const()[name = tensor("op_502_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_410_3, var_478_cast_fp16))[name = tensor("op_502_cast_fp16")]; + tensor var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_404_6, var_478_cast_fp16))[name = tensor("op_502_cast_fp16")]; tensor var_504_equation_0 = const()[name = tensor("op_504_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_410_4, var_480_cast_fp16))[name = tensor("op_504_cast_fp16")]; + tensor var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_404_7, var_480_cast_fp16))[name = tensor("op_504_cast_fp16")]; tensor var_506_equation_0 = const()[name = tensor("op_506_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_410_5, var_482_cast_fp16))[name = tensor("op_506_cast_fp16")]; + tensor var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_404_8, var_482_cast_fp16))[name = tensor("op_506_cast_fp16")]; tensor var_508_equation_0 = const()[name = tensor("op_508_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_410_6, var_484_cast_fp16))[name = tensor("op_508_cast_fp16")]; + tensor var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_404_9, var_484_cast_fp16))[name = tensor("op_508_cast_fp16")]; tensor var_510_equation_0 = const()[name = tensor("op_510_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_410_7, var_486_cast_fp16))[name = tensor("op_510_cast_fp16")]; + tensor var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_404_10, var_486_cast_fp16))[name = tensor("op_510_cast_fp16")]; tensor var_512_equation_0 = const()[name = tensor("op_512_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_410_8, var_488_cast_fp16))[name = tensor("op_512_cast_fp16")]; - tensor var_514_equation_0 = const()[name = tensor("op_514_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_514_cast_fp16 = einsum(equation = var_514_equation_0, values = (var_410_9, var_490_cast_fp16))[name = tensor("op_514_cast_fp16")]; - tensor var_516_equation_0 = const()[name = tensor("op_516_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_516_cast_fp16 = einsum(equation = var_516_equation_0, values = (var_410_10, var_492_cast_fp16))[name = tensor("op_516_cast_fp16")]; - tensor var_518_equation_0 = const()[name = tensor("op_518_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_518_cast_fp16 = einsum(equation = var_518_equation_0, values = (var_410_11, var_494_cast_fp16))[name = tensor("op_518_cast_fp16")]; - tensor var_520_interleave_0 = const()[name = tensor("op_520_interleave_0"), val = tensor(false)]; - tensor var_520_cast_fp16 = concat(axis = var_331, interleave = var_520_interleave_0, values = (var_496_cast_fp16, var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16, var_514_cast_fp16, var_516_cast_fp16, var_518_cast_fp16))[name = tensor("op_520_cast_fp16")]; - tensor var_524 = const()[name = tensor("op_524"), val = tensor([1, 1])]; - tensor var_526 = const()[name = tensor("op_526"), val = tensor([1, 1])]; - tensor var_528_pad_type_0 = const()[name = tensor("op_528_pad_type_0"), val = tensor("custom")]; - tensor var_528_pad_0 = const()[name = tensor("op_528_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_528 = conv(bias = layers_1_attention_o_proj_bias, dilations = var_526, groups = var_331, pad = var_528_pad_0, pad_type = var_528_pad_type_0, strides = var_524, weight = layers_1_attention_o_proj_weight, x = var_520_cast_fp16)[name = tensor("op_528")]; - tensor var_530_interleave_0 = const()[name = tensor("op_530_interleave_0"), val = tensor(false)]; - tensor var_530 = concat(axis = var_332, interleave = var_530_interleave_0, values = var_528)[name = tensor("op_530")]; - tensor x_13 = add(x = var_326_cast_fp16, y = var_530)[name = tensor("x_13")]; - tensor var_329_promoted = const()[name = tensor("op_329_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_330_promoted = const()[name = tensor("op_330_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_15 = clip(alpha = var_329_promoted, beta = var_330_promoted, x = x_13)[name = tensor("x_15")]; - tensor var_535 = const()[name = tensor("op_535"), val = tensor([1])]; - tensor mean_7 = reduce_mean(axes = var_535, keep_dims = var_333, x = x_15)[name = tensor("mean_7")]; + tensor var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_404_11, var_488_cast_fp16))[name = tensor("op_512_cast_fp16")]; + tensor var_514_interleave_0 = const()[name = tensor("op_514_interleave_0"), val = tensor(false)]; + tensor var_514_cast_fp16 = concat(axis = var_334, interleave = var_514_interleave_0, values = (var_490_cast_fp16, var_492_cast_fp16, var_494_cast_fp16, var_496_cast_fp16, var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16))[name = tensor("op_514_cast_fp16")]; + tensor var_518 = const()[name = tensor("op_518"), val = tensor([1, 1])]; + tensor var_520 = const()[name = tensor("op_520"), val = tensor([1, 1])]; + tensor var_522_pad_type_0 = const()[name = tensor("op_522_pad_type_0"), val = tensor("custom")]; + tensor var_522_pad_0 = const()[name = tensor("op_522_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_522 = conv(bias = layers_1_attention_o_proj_bias, dilations = var_520, groups = var_334, pad = var_522_pad_0, pad_type = var_522_pad_type_0, strides = var_518, weight = layers_1_attention_o_proj_weight, x = var_514_cast_fp16)[name = tensor("op_522")]; + tensor var_524_interleave_0 = const()[name = tensor("op_524_interleave_0"), val = tensor(false)]; + tensor var_524 = concat(axis = var_335, interleave = var_524_interleave_0, values = var_522)[name = tensor("op_524")]; + tensor x_13 = add(x = var_328_cast_fp16, y = var_524)[name = tensor("x_13")]; + tensor var_331_promoted = const()[name = tensor("op_331_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_332_promoted = const()[name = tensor("op_332_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_15 = clip(alpha = var_331_promoted, beta = var_332_promoted, x = x_13)[name = tensor("x_15")]; + tensor var_529 = const()[name = tensor("op_529"), val = tensor([1])]; + tensor mean_7 = reduce_mean(axes = var_529, keep_dims = var_336, x = x_15)[name = tensor("mean_7")]; tensor zero_mean_7 = sub(x = x_15, y = mean_7)[name = tensor("zero_mean_7")]; - tensor var_338_promoted = const()[name = tensor("op_338_promoted"), val = tensor(0x1p+1)]; - tensor var_538 = pow(x = zero_mean_7, y = var_338_promoted)[name = tensor("op_538")]; - tensor var_539 = const()[name = tensor("op_539"), val = tensor([1])]; - tensor var_540 = reduce_mean(axes = var_539, keep_dims = var_333, x = var_538)[name = tensor("op_540")]; - tensor var_541_to_fp16 = const()[name = tensor("op_541_to_fp16"), val = tensor(0x1p-24)]; - tensor var_542_cast_fp16 = add(x = var_540, y = var_541_to_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_333_promoted = const()[name = tensor("op_333_promoted"), val = tensor(0x1p+1)]; + tensor var_532 = pow(x = zero_mean_7, y = var_333_promoted)[name = tensor("op_532")]; + tensor var_533 = const()[name = tensor("op_533"), val = tensor([1])]; + tensor var_534 = reduce_mean(axes = var_533, keep_dims = var_336, x = var_532)[name = tensor("op_534")]; + tensor var_535_to_fp16 = const()[name = tensor("op_535_to_fp16"), val = tensor(0x1p-24)]; + tensor var_536_cast_fp16 = add(x = var_534, y = var_535_to_fp16)[name = tensor("op_536_cast_fp16")]; tensor denom_7_epsilon_0 = const()[name = tensor("denom_7_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0, x = var_542_cast_fp16)[name = tensor("denom_7_cast_fp16")]; - tensor var_544_cast_fp16 = mul(x = zero_mean_7, y = denom_7_cast_fp16)[name = tensor("op_544_cast_fp16")]; - tensor var_546_gamma_0_to_fp16 = const()[name = tensor("op_546_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218512128)))]; - tensor var_546_beta_0_to_fp16 = const()[name = tensor("op_546_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218513728)))]; - tensor var_546_epsilon_0_to_fp16 = const()[name = tensor("op_546_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_546_cast_fp16 = batch_norm(beta = var_546_beta_0_to_fp16, epsilon = var_546_epsilon_0_to_fp16, gamma = var_546_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_544_cast_fp16)[name = tensor("op_546_cast_fp16")]; - tensor var_552 = const()[name = tensor("op_552"), val = tensor([1, 1])]; + tensor denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0, x = var_536_cast_fp16)[name = tensor("denom_7_cast_fp16")]; + tensor var_538_cast_fp16 = mul(x = zero_mean_7, y = denom_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; + tensor var_540_gamma_0_to_fp16 = const()[name = tensor("op_540_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218512128)))]; + tensor var_540_beta_0_to_fp16 = const()[name = tensor("op_540_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218513728)))]; + tensor var_540_epsilon_0_to_fp16 = const()[name = tensor("op_540_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_540_cast_fp16 = batch_norm(beta = var_540_beta_0_to_fp16, epsilon = var_540_epsilon_0_to_fp16, gamma = var_540_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_538_cast_fp16)[name = tensor("op_540_cast_fp16")]; + tensor var_546 = const()[name = tensor("op_546"), val = tensor([1, 1])]; + tensor var_548 = const()[name = tensor("op_548"), val = tensor([1, 1])]; + tensor var_550_pad_type_0 = const()[name = tensor("op_550_pad_type_0"), val = tensor("custom")]; + tensor var_550_pad_0 = const()[name = tensor("op_550_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_550 = conv(bias = layers_1_mlp_fc1_bias, dilations = var_548, groups = var_334, pad = var_550_pad_0, pad_type = var_550_pad_type_0, strides = var_546, weight = layers_1_mlp_fc1_weight, x = var_540_cast_fp16)[name = tensor("op_550")]; + tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; + tensor input_63 = gelu(mode = input_63_mode_0, x = var_550)[name = tensor("input_63")]; tensor var_554 = const()[name = tensor("op_554"), val = tensor([1, 1])]; - tensor var_556_pad_type_0 = const()[name = tensor("op_556_pad_type_0"), val = tensor("custom")]; - tensor var_556_pad_0 = const()[name = tensor("op_556_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_556 = conv(bias = layers_1_mlp_fc1_bias, dilations = var_554, groups = var_331, pad = var_556_pad_0, pad_type = var_556_pad_type_0, strides = var_552, weight = layers_1_mlp_fc1_weight, x = var_546_cast_fp16)[name = tensor("op_556")]; - tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; - tensor input_15 = gelu(mode = input_15_mode_0, x = var_556)[name = tensor("input_15")]; - tensor var_560 = const()[name = tensor("op_560"), val = tensor([1, 1])]; - tensor var_562 = const()[name = tensor("op_562"), val = tensor([1, 1])]; - tensor var_564_pad_type_0 = const()[name = tensor("op_564_pad_type_0"), val = tensor("custom")]; - tensor var_564_pad_0 = const()[name = tensor("op_564_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_564 = conv(bias = layers_1_mlp_fc2_bias, dilations = var_562, groups = var_331, pad = var_564_pad_0, pad_type = var_564_pad_type_0, strides = var_560, weight = layers_1_mlp_fc2_weight, x = input_15)[name = tensor("op_564")]; - tensor x_17 = add(x = var_546_cast_fp16, y = var_564)[name = tensor("x_17")]; - tensor var_329_promoted_1 = const()[name = tensor("op_329_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_330_promoted_1 = const()[name = tensor("op_330_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_19 = clip(alpha = var_329_promoted_1, beta = var_330_promoted_1, x = x_17)[name = tensor("x_19")]; - tensor var_569 = const()[name = tensor("op_569"), val = tensor([1])]; - tensor mean_9 = reduce_mean(axes = var_569, keep_dims = var_333, x = x_19)[name = tensor("mean_9")]; + tensor var_556 = const()[name = tensor("op_556"), val = tensor([1, 1])]; + tensor var_558_pad_type_0 = const()[name = tensor("op_558_pad_type_0"), val = tensor("custom")]; + tensor var_558_pad_0 = const()[name = tensor("op_558_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_558 = conv(bias = layers_1_mlp_fc2_bias, dilations = var_556, groups = var_334, pad = var_558_pad_0, pad_type = var_558_pad_type_0, strides = var_554, weight = layers_1_mlp_fc2_weight, x = input_63)[name = tensor("op_558")]; + tensor x_17 = add(x = var_540_cast_fp16, y = var_558)[name = tensor("x_17")]; + tensor var_331_promoted_1 = const()[name = tensor("op_331_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_332_promoted_1 = const()[name = tensor("op_332_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_19 = clip(alpha = var_331_promoted_1, beta = var_332_promoted_1, x = x_17)[name = tensor("x_19")]; + tensor var_563 = const()[name = tensor("op_563"), val = tensor([1])]; + tensor mean_9 = reduce_mean(axes = var_563, keep_dims = var_336, x = x_19)[name = tensor("mean_9")]; tensor zero_mean_9 = sub(x = x_19, y = mean_9)[name = tensor("zero_mean_9")]; - tensor var_338_promoted_1 = const()[name = tensor("op_338_promoted_1"), val = tensor(0x1p+1)]; - tensor var_572 = pow(x = zero_mean_9, y = var_338_promoted_1)[name = tensor("op_572")]; - tensor var_573 = const()[name = tensor("op_573"), val = tensor([1])]; - tensor var_574 = reduce_mean(axes = var_573, keep_dims = var_333, x = var_572)[name = tensor("op_574")]; - tensor var_575_to_fp16 = const()[name = tensor("op_575_to_fp16"), val = tensor(0x1p-24)]; - tensor var_576_cast_fp16 = add(x = var_574, y = var_575_to_fp16)[name = tensor("op_576_cast_fp16")]; + tensor var_333_promoted_1 = const()[name = tensor("op_333_promoted_1"), val = tensor(0x1p+1)]; + tensor var_566 = pow(x = zero_mean_9, y = var_333_promoted_1)[name = tensor("op_566")]; + tensor var_567 = const()[name = tensor("op_567"), val = tensor([1])]; + tensor var_568 = reduce_mean(axes = var_567, keep_dims = var_336, x = var_566)[name = tensor("op_568")]; + tensor var_569_to_fp16 = const()[name = tensor("op_569_to_fp16"), val = tensor(0x1p-24)]; + tensor var_570_cast_fp16 = add(x = var_568, y = var_569_to_fp16)[name = tensor("op_570_cast_fp16")]; tensor denom_9_epsilon_0 = const()[name = tensor("denom_9_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0, x = var_576_cast_fp16)[name = tensor("denom_9_cast_fp16")]; - tensor var_578_cast_fp16 = mul(x = zero_mean_9, y = denom_9_cast_fp16)[name = tensor("op_578_cast_fp16")]; - tensor var_580_gamma_0_to_fp16 = const()[name = tensor("op_580_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218515328)))]; - tensor var_580_beta_0_to_fp16 = const()[name = tensor("op_580_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218516928)))]; - tensor var_580_epsilon_0_to_fp16 = const()[name = tensor("op_580_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_580_cast_fp16 = batch_norm(beta = var_580_beta_0_to_fp16, epsilon = var_580_epsilon_0_to_fp16, gamma = var_580_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_578_cast_fp16)[name = tensor("op_580_cast_fp16")]; - tensor var_585 = const()[name = tensor("op_585"), val = tensor(1)]; - tensor var_586 = const()[name = tensor("op_586"), val = tensor(0)]; - tensor var_587 = const()[name = tensor("op_587"), val = tensor(true)]; - tensor var_612 = const()[name = tensor("op_612"), val = tensor([1, 1])]; - tensor var_614 = const()[name = tensor("op_614"), val = tensor([1, 1])]; - tensor var_616_pad_type_0 = const()[name = tensor("op_616_pad_type_0"), val = tensor("custom")]; - tensor var_616_pad_0 = const()[name = tensor("op_616_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_616 = conv(bias = layers_2_attention_q_proj_bias, dilations = var_614, groups = var_585, pad = var_616_pad_0, pad_type = var_616_pad_type_0, strides = var_612, weight = layers_2_attention_q_proj_weight, x = var_580_cast_fp16)[name = tensor("op_616")]; - tensor var_617 = const()[name = tensor("op_617"), val = tensor([1, 64, 12, 512])]; - tensor var_618 = reshape(shape = var_617, x = var_616)[name = tensor("op_618")]; - tensor var_621 = const()[name = tensor("op_621"), val = tensor([1, 1])]; - tensor var_623 = const()[name = tensor("op_623"), val = tensor([1, 1])]; - tensor var_625_pad_type_0 = const()[name = tensor("op_625_pad_type_0"), val = tensor("custom")]; - tensor var_625_pad_0 = const()[name = tensor("op_625_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_625 = conv(bias = layers_2_attention_k_proj_bias, dilations = var_623, groups = var_585, pad = var_625_pad_0, pad_type = var_625_pad_type_0, strides = var_621, weight = layers_2_attention_k_proj_weight, x = var_580_cast_fp16)[name = tensor("op_625")]; - tensor var_626 = const()[name = tensor("op_626"), val = tensor([1, 64, 12, 512])]; - tensor ks_5 = reshape(shape = var_626, x = var_625)[name = tensor("ks_5")]; - tensor var_630 = const()[name = tensor("op_630"), val = tensor([1, 1])]; - tensor var_632 = const()[name = tensor("op_632"), val = tensor([1, 1])]; - tensor var_634_pad_type_0 = const()[name = tensor("op_634_pad_type_0"), val = tensor("custom")]; - tensor var_634_pad_0 = const()[name = tensor("op_634_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_634 = conv(bias = layers_2_attention_v_proj_bias, dilations = var_632, groups = var_585, pad = var_634_pad_0, pad_type = var_634_pad_type_0, strides = var_630, weight = layers_2_attention_v_proj_weight, x = var_580_cast_fp16)[name = tensor("op_634")]; - tensor var_635 = const()[name = tensor("op_635"), val = tensor([1, 64, 12, 512])]; - tensor var_636 = reshape(shape = var_635, x = var_634)[name = tensor("op_636")]; - tensor tile_12 = const()[name = tensor("tile_12"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_637_axis_0 = const()[name = tensor("op_637_axis_0"), val = tensor(2)]; - tensor var_637_0, tensor var_637_1, tensor var_637_2, tensor var_637_3, tensor var_637_4, tensor var_637_5, tensor var_637_6, tensor var_637_7, tensor var_637_8, tensor var_637_9, tensor var_637_10, tensor var_637_11 = split(axis = var_637_axis_0, split_sizes = tile_12, x = var_618)[name = tensor("op_637")]; - tensor var_650_perm_0 = const()[name = tensor("op_650_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_13 = const()[name = tensor("tile_13"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_651_axis_0 = const()[name = tensor("op_651_axis_0"), val = tensor(2)]; - tensor transpose_9 = transpose(perm = var_650_perm_0, x = ks_5)[name = tensor("transpose_9")]; - tensor var_651_0, tensor var_651_1, tensor var_651_2, tensor var_651_3, tensor var_651_4, tensor var_651_5, tensor var_651_6, tensor var_651_7, tensor var_651_8, tensor var_651_9, tensor var_651_10, tensor var_651_11 = split(axis = var_651_axis_0, split_sizes = tile_13, x = transpose_9)[name = tensor("op_651")]; - tensor tile_14 = const()[name = tensor("tile_14"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_664_axis_0 = const()[name = tensor("op_664_axis_0"), val = tensor(2)]; - tensor var_664_0, tensor var_664_1, tensor var_664_2, tensor var_664_3, tensor var_664_4, tensor var_664_5, tensor var_664_6, tensor var_664_7, tensor var_664_8, tensor var_664_9, tensor var_664_10, tensor var_664_11 = split(axis = var_664_axis_0, split_sizes = tile_14, x = var_636)[name = tensor("op_664")]; - tensor var_678_equation_0 = const()[name = tensor("op_678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_678 = einsum(equation = var_678_equation_0, values = (var_651_0, var_637_0))[name = tensor("op_678")]; - tensor var_679_to_fp16 = const()[name = tensor("op_679_to_fp16"), val = tensor(0x1p-3)]; - tensor w_49_cast_fp16 = mul(x = var_678, y = var_679_to_fp16)[name = tensor("w_49_cast_fp16")]; - tensor var_682_equation_0 = const()[name = tensor("op_682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_682 = einsum(equation = var_682_equation_0, values = (var_651_1, var_637_1))[name = tensor("op_682")]; - tensor var_683_to_fp16 = const()[name = tensor("op_683_to_fp16"), val = tensor(0x1p-3)]; - tensor w_51_cast_fp16 = mul(x = var_682, y = var_683_to_fp16)[name = tensor("w_51_cast_fp16")]; - tensor var_686_equation_0 = const()[name = tensor("op_686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_686 = einsum(equation = var_686_equation_0, values = (var_651_2, var_637_2))[name = tensor("op_686")]; - tensor var_687_to_fp16 = const()[name = tensor("op_687_to_fp16"), val = tensor(0x1p-3)]; - tensor w_53_cast_fp16 = mul(x = var_686, y = var_687_to_fp16)[name = tensor("w_53_cast_fp16")]; - tensor var_690_equation_0 = const()[name = tensor("op_690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_690 = einsum(equation = var_690_equation_0, values = (var_651_3, var_637_3))[name = tensor("op_690")]; - tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(0x1p-3)]; - tensor w_55_cast_fp16 = mul(x = var_690, y = var_691_to_fp16)[name = tensor("w_55_cast_fp16")]; - tensor var_694_equation_0 = const()[name = tensor("op_694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_694 = einsum(equation = var_694_equation_0, values = (var_651_4, var_637_4))[name = tensor("op_694")]; - tensor var_695_to_fp16 = const()[name = tensor("op_695_to_fp16"), val = tensor(0x1p-3)]; - tensor w_57_cast_fp16 = mul(x = var_694, y = var_695_to_fp16)[name = tensor("w_57_cast_fp16")]; - tensor var_698_equation_0 = const()[name = tensor("op_698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_698 = einsum(equation = var_698_equation_0, values = (var_651_5, var_637_5))[name = tensor("op_698")]; - tensor var_699_to_fp16 = const()[name = tensor("op_699_to_fp16"), val = tensor(0x1p-3)]; - tensor w_59_cast_fp16 = mul(x = var_698, y = var_699_to_fp16)[name = tensor("w_59_cast_fp16")]; - tensor var_702_equation_0 = const()[name = tensor("op_702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_702 = einsum(equation = var_702_equation_0, values = (var_651_6, var_637_6))[name = tensor("op_702")]; - tensor var_703_to_fp16 = const()[name = tensor("op_703_to_fp16"), val = tensor(0x1p-3)]; - tensor w_61_cast_fp16 = mul(x = var_702, y = var_703_to_fp16)[name = tensor("w_61_cast_fp16")]; - tensor var_706_equation_0 = const()[name = tensor("op_706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_706 = einsum(equation = var_706_equation_0, values = (var_651_7, var_637_7))[name = tensor("op_706")]; - tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(0x1p-3)]; - tensor w_63_cast_fp16 = mul(x = var_706, y = var_707_to_fp16)[name = tensor("w_63_cast_fp16")]; - tensor var_710_equation_0 = const()[name = tensor("op_710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_710 = einsum(equation = var_710_equation_0, values = (var_651_8, var_637_8))[name = tensor("op_710")]; - tensor var_711_to_fp16 = const()[name = tensor("op_711_to_fp16"), val = tensor(0x1p-3)]; - tensor w_65_cast_fp16 = mul(x = var_710, y = var_711_to_fp16)[name = tensor("w_65_cast_fp16")]; - tensor var_714_equation_0 = const()[name = tensor("op_714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_714 = einsum(equation = var_714_equation_0, values = (var_651_9, var_637_9))[name = tensor("op_714")]; - tensor var_715_to_fp16 = const()[name = tensor("op_715_to_fp16"), val = tensor(0x1p-3)]; - tensor w_67_cast_fp16 = mul(x = var_714, y = var_715_to_fp16)[name = tensor("w_67_cast_fp16")]; - tensor var_718_equation_0 = const()[name = tensor("op_718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_718 = einsum(equation = var_718_equation_0, values = (var_651_10, var_637_10))[name = tensor("op_718")]; - tensor var_719_to_fp16 = const()[name = tensor("op_719_to_fp16"), val = tensor(0x1p-3)]; - tensor w_69_cast_fp16 = mul(x = var_718, y = var_719_to_fp16)[name = tensor("w_69_cast_fp16")]; - tensor var_722_equation_0 = const()[name = tensor("op_722_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_722 = einsum(equation = var_722_equation_0, values = (var_651_11, var_637_11))[name = tensor("op_722")]; - tensor var_723_to_fp16 = const()[name = tensor("op_723_to_fp16"), val = tensor(0x1p-3)]; - tensor w_71_cast_fp16 = mul(x = var_722, y = var_723_to_fp16)[name = tensor("w_71_cast_fp16")]; - tensor var_725_cast_fp16 = add(x = w_49_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_725_cast_fp16")]; - tensor var_726_cast_fp16 = softmax(axis = var_585, x = var_725_cast_fp16)[name = tensor("op_726_cast_fp16")]; - tensor var_727_cast_fp16 = add(x = w_51_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_727_cast_fp16")]; - tensor var_728_cast_fp16 = softmax(axis = var_585, x = var_727_cast_fp16)[name = tensor("op_728_cast_fp16")]; - tensor var_729_cast_fp16 = add(x = w_53_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_729_cast_fp16")]; - tensor var_730_cast_fp16 = softmax(axis = var_585, x = var_729_cast_fp16)[name = tensor("op_730_cast_fp16")]; - tensor var_731_cast_fp16 = add(x = w_55_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_731_cast_fp16")]; - tensor var_732_cast_fp16 = softmax(axis = var_585, x = var_731_cast_fp16)[name = tensor("op_732_cast_fp16")]; - tensor var_733_cast_fp16 = add(x = w_57_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_733_cast_fp16")]; - tensor var_734_cast_fp16 = softmax(axis = var_585, x = var_733_cast_fp16)[name = tensor("op_734_cast_fp16")]; - tensor var_735_cast_fp16 = add(x = w_59_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_735_cast_fp16")]; - tensor var_736_cast_fp16 = softmax(axis = var_585, x = var_735_cast_fp16)[name = tensor("op_736_cast_fp16")]; - tensor var_737_cast_fp16 = add(x = w_61_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_737_cast_fp16")]; - tensor var_738_cast_fp16 = softmax(axis = var_585, x = var_737_cast_fp16)[name = tensor("op_738_cast_fp16")]; - tensor var_739_cast_fp16 = add(x = w_63_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_739_cast_fp16")]; - tensor var_740_cast_fp16 = softmax(axis = var_585, x = var_739_cast_fp16)[name = tensor("op_740_cast_fp16")]; - tensor var_741_cast_fp16 = add(x = w_65_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_741_cast_fp16")]; - tensor var_742_cast_fp16 = softmax(axis = var_585, x = var_741_cast_fp16)[name = tensor("op_742_cast_fp16")]; - tensor var_743_cast_fp16 = add(x = w_67_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_743_cast_fp16")]; - tensor var_744_cast_fp16 = softmax(axis = var_585, x = var_743_cast_fp16)[name = tensor("op_744_cast_fp16")]; - tensor var_745_cast_fp16 = add(x = w_69_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_745_cast_fp16")]; - tensor var_746_cast_fp16 = softmax(axis = var_585, x = var_745_cast_fp16)[name = tensor("op_746_cast_fp16")]; - tensor var_747_cast_fp16 = add(x = w_71_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_747_cast_fp16")]; - tensor var_748_cast_fp16 = softmax(axis = var_585, x = var_747_cast_fp16)[name = tensor("op_748_cast_fp16")]; + tensor denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0, x = var_570_cast_fp16)[name = tensor("denom_9_cast_fp16")]; + tensor var_572_cast_fp16 = mul(x = zero_mean_9, y = denom_9_cast_fp16)[name = tensor("op_572_cast_fp16")]; + tensor var_574_gamma_0_to_fp16 = const()[name = tensor("op_574_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218515328)))]; + tensor var_574_beta_0_to_fp16 = const()[name = tensor("op_574_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218516928)))]; + tensor var_574_epsilon_0_to_fp16 = const()[name = tensor("op_574_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_574_cast_fp16 = batch_norm(beta = var_574_beta_0_to_fp16, epsilon = var_574_epsilon_0_to_fp16, gamma = var_574_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_572_cast_fp16)[name = tensor("op_574_cast_fp16")]; + tensor var_580 = const()[name = tensor("op_580"), val = tensor(1)]; + tensor var_581 = const()[name = tensor("op_581"), val = tensor(0)]; + tensor var_582 = const()[name = tensor("op_582"), val = tensor(true)]; + tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; + tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; + tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; + tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_608 = conv(bias = layers_2_attention_q_proj_bias, dilations = var_606, groups = var_580, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = layers_2_attention_q_proj_weight, x = var_574_cast_fp16)[name = tensor("op_608")]; + tensor var_611 = const()[name = tensor("op_611"), val = tensor([1, 1])]; + tensor var_613 = const()[name = tensor("op_613"), val = tensor([1, 1])]; + tensor ks_5_pad_type_0 = const()[name = tensor("ks_5_pad_type_0"), val = tensor("custom")]; + tensor ks_5_pad_0 = const()[name = tensor("ks_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_5 = conv(bias = layers_2_attention_k_proj_bias, dilations = var_613, groups = var_580, pad = ks_5_pad_0, pad_type = ks_5_pad_type_0, strides = var_611, weight = layers_2_attention_k_proj_weight, x = var_574_cast_fp16)[name = tensor("ks_5")]; + tensor var_618 = const()[name = tensor("op_618"), val = tensor([1, 1])]; + tensor var_620 = const()[name = tensor("op_620"), val = tensor([1, 1])]; + tensor var_622_pad_type_0 = const()[name = tensor("op_622_pad_type_0"), val = tensor("custom")]; + tensor var_622_pad_0 = const()[name = tensor("op_622_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_622 = conv(bias = layers_2_attention_v_proj_bias, dilations = var_620, groups = var_580, pad = var_622_pad_0, pad_type = var_622_pad_type_0, strides = var_618, weight = layers_2_attention_v_proj_weight, x = var_574_cast_fp16)[name = tensor("op_622")]; + tensor tile_12 = const()[name = tensor("tile_12"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_623_axis_0 = const()[name = tensor("op_623_axis_0"), val = tensor(1)]; + tensor var_623_0, tensor var_623_1, tensor var_623_2, tensor var_623_3, tensor var_623_4, tensor var_623_5, tensor var_623_6, tensor var_623_7, tensor var_623_8, tensor var_623_9, tensor var_623_10, tensor var_623_11 = split(axis = var_623_axis_0, split_sizes = tile_12, x = var_608)[name = tensor("op_623")]; + tensor var_636_perm_0 = const()[name = tensor("op_636_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_13 = const()[name = tensor("tile_13"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_637_axis_0 = const()[name = tensor("op_637_axis_0"), val = tensor(3)]; + tensor transpose_9 = transpose(perm = var_636_perm_0, x = ks_5)[name = tensor("transpose_9")]; + tensor var_637_0, tensor var_637_1, tensor var_637_2, tensor var_637_3, tensor var_637_4, tensor var_637_5, tensor var_637_6, tensor var_637_7, tensor var_637_8, tensor var_637_9, tensor var_637_10, tensor var_637_11 = split(axis = var_637_axis_0, split_sizes = tile_13, x = transpose_9)[name = tensor("op_637")]; + tensor tile_14 = const()[name = tensor("tile_14"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_650_axis_0 = const()[name = tensor("op_650_axis_0"), val = tensor(1)]; + tensor var_650_0, tensor var_650_1, tensor var_650_2, tensor var_650_3, tensor var_650_4, tensor var_650_5, tensor var_650_6, tensor var_650_7, tensor var_650_8, tensor var_650_9, tensor var_650_10, tensor var_650_11 = split(axis = var_650_axis_0, split_sizes = tile_14, x = var_622)[name = tensor("op_650")]; + tensor var_664_equation_0 = const()[name = tensor("op_664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_664 = einsum(equation = var_664_equation_0, values = (var_637_0, var_623_0))[name = tensor("op_664")]; + tensor var_665_to_fp16 = const()[name = tensor("op_665_to_fp16"), val = tensor(0x1p-3)]; + tensor w_49_cast_fp16 = mul(x = var_664, y = var_665_to_fp16)[name = tensor("w_49_cast_fp16")]; + tensor var_668_equation_0 = const()[name = tensor("op_668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_668 = einsum(equation = var_668_equation_0, values = (var_637_1, var_623_1))[name = tensor("op_668")]; + tensor var_669_to_fp16 = const()[name = tensor("op_669_to_fp16"), val = tensor(0x1p-3)]; + tensor w_51_cast_fp16 = mul(x = var_668, y = var_669_to_fp16)[name = tensor("w_51_cast_fp16")]; + tensor var_672_equation_0 = const()[name = tensor("op_672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_672 = einsum(equation = var_672_equation_0, values = (var_637_2, var_623_2))[name = tensor("op_672")]; + tensor var_673_to_fp16 = const()[name = tensor("op_673_to_fp16"), val = tensor(0x1p-3)]; + tensor w_53_cast_fp16 = mul(x = var_672, y = var_673_to_fp16)[name = tensor("w_53_cast_fp16")]; + tensor var_676_equation_0 = const()[name = tensor("op_676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_676 = einsum(equation = var_676_equation_0, values = (var_637_3, var_623_3))[name = tensor("op_676")]; + tensor var_677_to_fp16 = const()[name = tensor("op_677_to_fp16"), val = tensor(0x1p-3)]; + tensor w_55_cast_fp16 = mul(x = var_676, y = var_677_to_fp16)[name = tensor("w_55_cast_fp16")]; + tensor var_680_equation_0 = const()[name = tensor("op_680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_680 = einsum(equation = var_680_equation_0, values = (var_637_4, var_623_4))[name = tensor("op_680")]; + tensor var_681_to_fp16 = const()[name = tensor("op_681_to_fp16"), val = tensor(0x1p-3)]; + tensor w_57_cast_fp16 = mul(x = var_680, y = var_681_to_fp16)[name = tensor("w_57_cast_fp16")]; + tensor var_684_equation_0 = const()[name = tensor("op_684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_684 = einsum(equation = var_684_equation_0, values = (var_637_5, var_623_5))[name = tensor("op_684")]; + tensor var_685_to_fp16 = const()[name = tensor("op_685_to_fp16"), val = tensor(0x1p-3)]; + tensor w_59_cast_fp16 = mul(x = var_684, y = var_685_to_fp16)[name = tensor("w_59_cast_fp16")]; + tensor var_688_equation_0 = const()[name = tensor("op_688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_688 = einsum(equation = var_688_equation_0, values = (var_637_6, var_623_6))[name = tensor("op_688")]; + tensor var_689_to_fp16 = const()[name = tensor("op_689_to_fp16"), val = tensor(0x1p-3)]; + tensor w_61_cast_fp16 = mul(x = var_688, y = var_689_to_fp16)[name = tensor("w_61_cast_fp16")]; + tensor var_692_equation_0 = const()[name = tensor("op_692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_692 = einsum(equation = var_692_equation_0, values = (var_637_7, var_623_7))[name = tensor("op_692")]; + tensor var_693_to_fp16 = const()[name = tensor("op_693_to_fp16"), val = tensor(0x1p-3)]; + tensor w_63_cast_fp16 = mul(x = var_692, y = var_693_to_fp16)[name = tensor("w_63_cast_fp16")]; + tensor var_696_equation_0 = const()[name = tensor("op_696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_696 = einsum(equation = var_696_equation_0, values = (var_637_8, var_623_8))[name = tensor("op_696")]; + tensor var_697_to_fp16 = const()[name = tensor("op_697_to_fp16"), val = tensor(0x1p-3)]; + tensor w_65_cast_fp16 = mul(x = var_696, y = var_697_to_fp16)[name = tensor("w_65_cast_fp16")]; + tensor var_700_equation_0 = const()[name = tensor("op_700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_700 = einsum(equation = var_700_equation_0, values = (var_637_9, var_623_9))[name = tensor("op_700")]; + tensor var_701_to_fp16 = const()[name = tensor("op_701_to_fp16"), val = tensor(0x1p-3)]; + tensor w_67_cast_fp16 = mul(x = var_700, y = var_701_to_fp16)[name = tensor("w_67_cast_fp16")]; + tensor var_704_equation_0 = const()[name = tensor("op_704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_704 = einsum(equation = var_704_equation_0, values = (var_637_10, var_623_10))[name = tensor("op_704")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1p-3)]; + tensor w_69_cast_fp16 = mul(x = var_704, y = var_705_to_fp16)[name = tensor("w_69_cast_fp16")]; + tensor var_708_equation_0 = const()[name = tensor("op_708_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_708 = einsum(equation = var_708_equation_0, values = (var_637_11, var_623_11))[name = tensor("op_708")]; + tensor var_709_to_fp16 = const()[name = tensor("op_709_to_fp16"), val = tensor(0x1p-3)]; + tensor w_71_cast_fp16 = mul(x = var_708, y = var_709_to_fp16)[name = tensor("w_71_cast_fp16")]; + tensor input_67_cast_fp16 = add(x = w_49_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor var_712_cast_fp16 = softmax(axis = var_580, x = input_67_cast_fp16)[name = tensor("op_712_cast_fp16")]; + tensor input_69_cast_fp16 = add(x = w_51_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor var_714_cast_fp16 = softmax(axis = var_580, x = input_69_cast_fp16)[name = tensor("op_714_cast_fp16")]; + tensor input_71_cast_fp16 = add(x = w_53_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_716_cast_fp16 = softmax(axis = var_580, x = input_71_cast_fp16)[name = tensor("op_716_cast_fp16")]; + tensor input_73_cast_fp16 = add(x = w_55_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor var_718_cast_fp16 = softmax(axis = var_580, x = input_73_cast_fp16)[name = tensor("op_718_cast_fp16")]; + tensor input_75_cast_fp16 = add(x = w_57_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_720_cast_fp16 = softmax(axis = var_580, x = input_75_cast_fp16)[name = tensor("op_720_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = w_59_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor var_722_cast_fp16 = softmax(axis = var_580, x = input_77_cast_fp16)[name = tensor("op_722_cast_fp16")]; + tensor input_79_cast_fp16 = add(x = w_61_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_724_cast_fp16 = softmax(axis = var_580, x = input_79_cast_fp16)[name = tensor("op_724_cast_fp16")]; + tensor input_81_cast_fp16 = add(x = w_63_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_726_cast_fp16 = softmax(axis = var_580, x = input_81_cast_fp16)[name = tensor("op_726_cast_fp16")]; + tensor input_83_cast_fp16 = add(x = w_65_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_728_cast_fp16 = softmax(axis = var_580, x = input_83_cast_fp16)[name = tensor("op_728_cast_fp16")]; + tensor input_85_cast_fp16 = add(x = w_67_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor var_730_cast_fp16 = softmax(axis = var_580, x = input_85_cast_fp16)[name = tensor("op_730_cast_fp16")]; + tensor input_87_cast_fp16 = add(x = w_69_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_732_cast_fp16 = softmax(axis = var_580, x = input_87_cast_fp16)[name = tensor("op_732_cast_fp16")]; + tensor input_89_cast_fp16 = add(x = w_71_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_734_cast_fp16 = softmax(axis = var_580, x = input_89_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor var_736_equation_0 = const()[name = tensor("op_736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_736_cast_fp16 = einsum(equation = var_736_equation_0, values = (var_650_0, var_712_cast_fp16))[name = tensor("op_736_cast_fp16")]; + tensor var_738_equation_0 = const()[name = tensor("op_738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_738_cast_fp16 = einsum(equation = var_738_equation_0, values = (var_650_1, var_714_cast_fp16))[name = tensor("op_738_cast_fp16")]; + tensor var_740_equation_0 = const()[name = tensor("op_740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_740_cast_fp16 = einsum(equation = var_740_equation_0, values = (var_650_2, var_716_cast_fp16))[name = tensor("op_740_cast_fp16")]; + tensor var_742_equation_0 = const()[name = tensor("op_742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_742_cast_fp16 = einsum(equation = var_742_equation_0, values = (var_650_3, var_718_cast_fp16))[name = tensor("op_742_cast_fp16")]; + tensor var_744_equation_0 = const()[name = tensor("op_744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_744_cast_fp16 = einsum(equation = var_744_equation_0, values = (var_650_4, var_720_cast_fp16))[name = tensor("op_744_cast_fp16")]; + tensor var_746_equation_0 = const()[name = tensor("op_746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_746_cast_fp16 = einsum(equation = var_746_equation_0, values = (var_650_5, var_722_cast_fp16))[name = tensor("op_746_cast_fp16")]; + tensor var_748_equation_0 = const()[name = tensor("op_748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_748_cast_fp16 = einsum(equation = var_748_equation_0, values = (var_650_6, var_724_cast_fp16))[name = tensor("op_748_cast_fp16")]; tensor var_750_equation_0 = const()[name = tensor("op_750_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_750_cast_fp16 = einsum(equation = var_750_equation_0, values = (var_664_0, var_726_cast_fp16))[name = tensor("op_750_cast_fp16")]; + tensor var_750_cast_fp16 = einsum(equation = var_750_equation_0, values = (var_650_7, var_726_cast_fp16))[name = tensor("op_750_cast_fp16")]; tensor var_752_equation_0 = const()[name = tensor("op_752_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_752_cast_fp16 = einsum(equation = var_752_equation_0, values = (var_664_1, var_728_cast_fp16))[name = tensor("op_752_cast_fp16")]; + tensor var_752_cast_fp16 = einsum(equation = var_752_equation_0, values = (var_650_8, var_728_cast_fp16))[name = tensor("op_752_cast_fp16")]; tensor var_754_equation_0 = const()[name = tensor("op_754_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_754_cast_fp16 = einsum(equation = var_754_equation_0, values = (var_664_2, var_730_cast_fp16))[name = tensor("op_754_cast_fp16")]; + tensor var_754_cast_fp16 = einsum(equation = var_754_equation_0, values = (var_650_9, var_730_cast_fp16))[name = tensor("op_754_cast_fp16")]; tensor var_756_equation_0 = const()[name = tensor("op_756_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_756_cast_fp16 = einsum(equation = var_756_equation_0, values = (var_664_3, var_732_cast_fp16))[name = tensor("op_756_cast_fp16")]; + tensor var_756_cast_fp16 = einsum(equation = var_756_equation_0, values = (var_650_10, var_732_cast_fp16))[name = tensor("op_756_cast_fp16")]; tensor var_758_equation_0 = const()[name = tensor("op_758_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_758_cast_fp16 = einsum(equation = var_758_equation_0, values = (var_664_4, var_734_cast_fp16))[name = tensor("op_758_cast_fp16")]; - tensor var_760_equation_0 = const()[name = tensor("op_760_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_760_cast_fp16 = einsum(equation = var_760_equation_0, values = (var_664_5, var_736_cast_fp16))[name = tensor("op_760_cast_fp16")]; - tensor var_762_equation_0 = const()[name = tensor("op_762_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_762_cast_fp16 = einsum(equation = var_762_equation_0, values = (var_664_6, var_738_cast_fp16))[name = tensor("op_762_cast_fp16")]; - tensor var_764_equation_0 = const()[name = tensor("op_764_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_764_cast_fp16 = einsum(equation = var_764_equation_0, values = (var_664_7, var_740_cast_fp16))[name = tensor("op_764_cast_fp16")]; - tensor var_766_equation_0 = const()[name = tensor("op_766_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_766_cast_fp16 = einsum(equation = var_766_equation_0, values = (var_664_8, var_742_cast_fp16))[name = tensor("op_766_cast_fp16")]; - tensor var_768_equation_0 = const()[name = tensor("op_768_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_768_cast_fp16 = einsum(equation = var_768_equation_0, values = (var_664_9, var_744_cast_fp16))[name = tensor("op_768_cast_fp16")]; - tensor var_770_equation_0 = const()[name = tensor("op_770_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_770_cast_fp16 = einsum(equation = var_770_equation_0, values = (var_664_10, var_746_cast_fp16))[name = tensor("op_770_cast_fp16")]; - tensor var_772_equation_0 = const()[name = tensor("op_772_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_772_cast_fp16 = einsum(equation = var_772_equation_0, values = (var_664_11, var_748_cast_fp16))[name = tensor("op_772_cast_fp16")]; - tensor var_774_interleave_0 = const()[name = tensor("op_774_interleave_0"), val = tensor(false)]; - tensor var_774_cast_fp16 = concat(axis = var_585, interleave = var_774_interleave_0, values = (var_750_cast_fp16, var_752_cast_fp16, var_754_cast_fp16, var_756_cast_fp16, var_758_cast_fp16, var_760_cast_fp16, var_762_cast_fp16, var_764_cast_fp16, var_766_cast_fp16, var_768_cast_fp16, var_770_cast_fp16, var_772_cast_fp16))[name = tensor("op_774_cast_fp16")]; - tensor var_778 = const()[name = tensor("op_778"), val = tensor([1, 1])]; - tensor var_780 = const()[name = tensor("op_780"), val = tensor([1, 1])]; - tensor var_782_pad_type_0 = const()[name = tensor("op_782_pad_type_0"), val = tensor("custom")]; - tensor var_782_pad_0 = const()[name = tensor("op_782_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_782 = conv(bias = layers_2_attention_o_proj_bias, dilations = var_780, groups = var_585, pad = var_782_pad_0, pad_type = var_782_pad_type_0, strides = var_778, weight = layers_2_attention_o_proj_weight, x = var_774_cast_fp16)[name = tensor("op_782")]; - tensor var_784_interleave_0 = const()[name = tensor("op_784_interleave_0"), val = tensor(false)]; - tensor var_784 = concat(axis = var_586, interleave = var_784_interleave_0, values = var_782)[name = tensor("op_784")]; - tensor x_21 = add(x = var_580_cast_fp16, y = var_784)[name = tensor("x_21")]; - tensor var_583_promoted = const()[name = tensor("op_583_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_584_promoted = const()[name = tensor("op_584_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_23 = clip(alpha = var_583_promoted, beta = var_584_promoted, x = x_21)[name = tensor("x_23")]; - tensor var_789 = const()[name = tensor("op_789"), val = tensor([1])]; - tensor mean_11 = reduce_mean(axes = var_789, keep_dims = var_587, x = x_23)[name = tensor("mean_11")]; + tensor var_758_cast_fp16 = einsum(equation = var_758_equation_0, values = (var_650_11, var_734_cast_fp16))[name = tensor("op_758_cast_fp16")]; + tensor var_760_interleave_0 = const()[name = tensor("op_760_interleave_0"), val = tensor(false)]; + tensor var_760_cast_fp16 = concat(axis = var_580, interleave = var_760_interleave_0, values = (var_736_cast_fp16, var_738_cast_fp16, var_740_cast_fp16, var_742_cast_fp16, var_744_cast_fp16, var_746_cast_fp16, var_748_cast_fp16, var_750_cast_fp16, var_752_cast_fp16, var_754_cast_fp16, var_756_cast_fp16, var_758_cast_fp16))[name = tensor("op_760_cast_fp16")]; + tensor var_764 = const()[name = tensor("op_764"), val = tensor([1, 1])]; + tensor var_766 = const()[name = tensor("op_766"), val = tensor([1, 1])]; + tensor var_768_pad_type_0 = const()[name = tensor("op_768_pad_type_0"), val = tensor("custom")]; + tensor var_768_pad_0 = const()[name = tensor("op_768_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_768 = conv(bias = layers_2_attention_o_proj_bias, dilations = var_766, groups = var_580, pad = var_768_pad_0, pad_type = var_768_pad_type_0, strides = var_764, weight = layers_2_attention_o_proj_weight, x = var_760_cast_fp16)[name = tensor("op_768")]; + tensor var_770_interleave_0 = const()[name = tensor("op_770_interleave_0"), val = tensor(false)]; + tensor var_770 = concat(axis = var_581, interleave = var_770_interleave_0, values = var_768)[name = tensor("op_770")]; + tensor x_21 = add(x = var_574_cast_fp16, y = var_770)[name = tensor("x_21")]; + tensor var_577_promoted = const()[name = tensor("op_577_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_578_promoted = const()[name = tensor("op_578_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_23 = clip(alpha = var_577_promoted, beta = var_578_promoted, x = x_21)[name = tensor("x_23")]; + tensor var_775 = const()[name = tensor("op_775"), val = tensor([1])]; + tensor mean_11 = reduce_mean(axes = var_775, keep_dims = var_582, x = x_23)[name = tensor("mean_11")]; tensor zero_mean_11 = sub(x = x_23, y = mean_11)[name = tensor("zero_mean_11")]; - tensor var_592_promoted = const()[name = tensor("op_592_promoted"), val = tensor(0x1p+1)]; - tensor var_792 = pow(x = zero_mean_11, y = var_592_promoted)[name = tensor("op_792")]; - tensor var_793 = const()[name = tensor("op_793"), val = tensor([1])]; - tensor var_794 = reduce_mean(axes = var_793, keep_dims = var_587, x = var_792)[name = tensor("op_794")]; - tensor var_795_to_fp16 = const()[name = tensor("op_795_to_fp16"), val = tensor(0x1p-24)]; - tensor var_796_cast_fp16 = add(x = var_794, y = var_795_to_fp16)[name = tensor("op_796_cast_fp16")]; + tensor var_579_promoted = const()[name = tensor("op_579_promoted"), val = tensor(0x1p+1)]; + tensor var_778 = pow(x = zero_mean_11, y = var_579_promoted)[name = tensor("op_778")]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor([1])]; + tensor var_780 = reduce_mean(axes = var_779, keep_dims = var_582, x = var_778)[name = tensor("op_780")]; + tensor var_781_to_fp16 = const()[name = tensor("op_781_to_fp16"), val = tensor(0x1p-24)]; + tensor var_782_cast_fp16 = add(x = var_780, y = var_781_to_fp16)[name = tensor("op_782_cast_fp16")]; tensor denom_11_epsilon_0 = const()[name = tensor("denom_11_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0, x = var_796_cast_fp16)[name = tensor("denom_11_cast_fp16")]; - tensor var_798_cast_fp16 = mul(x = zero_mean_11, y = denom_11_cast_fp16)[name = tensor("op_798_cast_fp16")]; - tensor var_800_gamma_0_to_fp16 = const()[name = tensor("op_800_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218518528)))]; - tensor var_800_beta_0_to_fp16 = const()[name = tensor("op_800_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218520128)))]; - tensor var_800_epsilon_0_to_fp16 = const()[name = tensor("op_800_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_800_cast_fp16 = batch_norm(beta = var_800_beta_0_to_fp16, epsilon = var_800_epsilon_0_to_fp16, gamma = var_800_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_798_cast_fp16)[name = tensor("op_800_cast_fp16")]; - tensor var_806 = const()[name = tensor("op_806"), val = tensor([1, 1])]; - tensor var_808 = const()[name = tensor("op_808"), val = tensor([1, 1])]; - tensor var_810_pad_type_0 = const()[name = tensor("op_810_pad_type_0"), val = tensor("custom")]; - tensor var_810_pad_0 = const()[name = tensor("op_810_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_810 = conv(bias = layers_2_mlp_fc1_bias, dilations = var_808, groups = var_585, pad = var_810_pad_0, pad_type = var_810_pad_type_0, strides = var_806, weight = layers_2_mlp_fc1_weight, x = var_800_cast_fp16)[name = tensor("op_810")]; - tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; - tensor input_23 = gelu(mode = input_23_mode_0, x = var_810)[name = tensor("input_23")]; - tensor var_814 = const()[name = tensor("op_814"), val = tensor([1, 1])]; - tensor var_816 = const()[name = tensor("op_816"), val = tensor([1, 1])]; - tensor var_818_pad_type_0 = const()[name = tensor("op_818_pad_type_0"), val = tensor("custom")]; - tensor var_818_pad_0 = const()[name = tensor("op_818_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_818 = conv(bias = layers_2_mlp_fc2_bias, dilations = var_816, groups = var_585, pad = var_818_pad_0, pad_type = var_818_pad_type_0, strides = var_814, weight = layers_2_mlp_fc2_weight, x = input_23)[name = tensor("op_818")]; - tensor x_25 = add(x = var_800_cast_fp16, y = var_818)[name = tensor("x_25")]; - tensor var_583_promoted_1 = const()[name = tensor("op_583_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_584_promoted_1 = const()[name = tensor("op_584_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_27 = clip(alpha = var_583_promoted_1, beta = var_584_promoted_1, x = x_25)[name = tensor("x_27")]; - tensor var_823 = const()[name = tensor("op_823"), val = tensor([1])]; - tensor mean_13 = reduce_mean(axes = var_823, keep_dims = var_587, x = x_27)[name = tensor("mean_13")]; + tensor denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0, x = var_782_cast_fp16)[name = tensor("denom_11_cast_fp16")]; + tensor var_784_cast_fp16 = mul(x = zero_mean_11, y = denom_11_cast_fp16)[name = tensor("op_784_cast_fp16")]; + tensor var_786_gamma_0_to_fp16 = const()[name = tensor("op_786_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218518528)))]; + tensor var_786_beta_0_to_fp16 = const()[name = tensor("op_786_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218520128)))]; + tensor var_786_epsilon_0_to_fp16 = const()[name = tensor("op_786_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_786_cast_fp16 = batch_norm(beta = var_786_beta_0_to_fp16, epsilon = var_786_epsilon_0_to_fp16, gamma = var_786_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_784_cast_fp16)[name = tensor("op_786_cast_fp16")]; + tensor var_792 = const()[name = tensor("op_792"), val = tensor([1, 1])]; + tensor var_794 = const()[name = tensor("op_794"), val = tensor([1, 1])]; + tensor var_796_pad_type_0 = const()[name = tensor("op_796_pad_type_0"), val = tensor("custom")]; + tensor var_796_pad_0 = const()[name = tensor("op_796_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_796 = conv(bias = layers_2_mlp_fc1_bias, dilations = var_794, groups = var_580, pad = var_796_pad_0, pad_type = var_796_pad_type_0, strides = var_792, weight = layers_2_mlp_fc1_weight, x = var_786_cast_fp16)[name = tensor("op_796")]; + tensor input_95_mode_0 = const()[name = tensor("input_95_mode_0"), val = tensor("EXACT")]; + tensor input_95 = gelu(mode = input_95_mode_0, x = var_796)[name = tensor("input_95")]; + tensor var_800 = const()[name = tensor("op_800"), val = tensor([1, 1])]; + tensor var_802 = const()[name = tensor("op_802"), val = tensor([1, 1])]; + tensor var_804_pad_type_0 = const()[name = tensor("op_804_pad_type_0"), val = tensor("custom")]; + tensor var_804_pad_0 = const()[name = tensor("op_804_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_804 = conv(bias = layers_2_mlp_fc2_bias, dilations = var_802, groups = var_580, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_800, weight = layers_2_mlp_fc2_weight, x = input_95)[name = tensor("op_804")]; + tensor x_25 = add(x = var_786_cast_fp16, y = var_804)[name = tensor("x_25")]; + tensor var_577_promoted_1 = const()[name = tensor("op_577_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_578_promoted_1 = const()[name = tensor("op_578_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_27 = clip(alpha = var_577_promoted_1, beta = var_578_promoted_1, x = x_25)[name = tensor("x_27")]; + tensor var_809 = const()[name = tensor("op_809"), val = tensor([1])]; + tensor mean_13 = reduce_mean(axes = var_809, keep_dims = var_582, x = x_27)[name = tensor("mean_13")]; tensor zero_mean_13 = sub(x = x_27, y = mean_13)[name = tensor("zero_mean_13")]; - tensor var_592_promoted_1 = const()[name = tensor("op_592_promoted_1"), val = tensor(0x1p+1)]; - tensor var_826 = pow(x = zero_mean_13, y = var_592_promoted_1)[name = tensor("op_826")]; - tensor var_827 = const()[name = tensor("op_827"), val = tensor([1])]; - tensor var_828 = reduce_mean(axes = var_827, keep_dims = var_587, x = var_826)[name = tensor("op_828")]; - tensor var_829_to_fp16 = const()[name = tensor("op_829_to_fp16"), val = tensor(0x1p-24)]; - tensor var_830_cast_fp16 = add(x = var_828, y = var_829_to_fp16)[name = tensor("op_830_cast_fp16")]; + tensor var_579_promoted_1 = const()[name = tensor("op_579_promoted_1"), val = tensor(0x1p+1)]; + tensor var_812 = pow(x = zero_mean_13, y = var_579_promoted_1)[name = tensor("op_812")]; + tensor var_813 = const()[name = tensor("op_813"), val = tensor([1])]; + tensor var_814 = reduce_mean(axes = var_813, keep_dims = var_582, x = var_812)[name = tensor("op_814")]; + tensor var_815_to_fp16 = const()[name = tensor("op_815_to_fp16"), val = tensor(0x1p-24)]; + tensor var_816_cast_fp16 = add(x = var_814, y = var_815_to_fp16)[name = tensor("op_816_cast_fp16")]; tensor denom_13_epsilon_0 = const()[name = tensor("denom_13_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0, x = var_830_cast_fp16)[name = tensor("denom_13_cast_fp16")]; - tensor var_832_cast_fp16 = mul(x = zero_mean_13, y = denom_13_cast_fp16)[name = tensor("op_832_cast_fp16")]; - tensor var_834_gamma_0_to_fp16 = const()[name = tensor("op_834_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218521728)))]; - tensor var_834_beta_0_to_fp16 = const()[name = tensor("op_834_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218523328)))]; - tensor var_834_epsilon_0_to_fp16 = const()[name = tensor("op_834_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_834_cast_fp16 = batch_norm(beta = var_834_beta_0_to_fp16, epsilon = var_834_epsilon_0_to_fp16, gamma = var_834_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_832_cast_fp16)[name = tensor("op_834_cast_fp16")]; - tensor var_839 = const()[name = tensor("op_839"), val = tensor(1)]; - tensor var_840 = const()[name = tensor("op_840"), val = tensor(0)]; - tensor var_841 = const()[name = tensor("op_841"), val = tensor(true)]; + tensor denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0, x = var_816_cast_fp16)[name = tensor("denom_13_cast_fp16")]; + tensor var_818_cast_fp16 = mul(x = zero_mean_13, y = denom_13_cast_fp16)[name = tensor("op_818_cast_fp16")]; + tensor var_820_gamma_0_to_fp16 = const()[name = tensor("op_820_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218521728)))]; + tensor var_820_beta_0_to_fp16 = const()[name = tensor("op_820_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218523328)))]; + tensor var_820_epsilon_0_to_fp16 = const()[name = tensor("op_820_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_820_cast_fp16 = batch_norm(beta = var_820_beta_0_to_fp16, epsilon = var_820_epsilon_0_to_fp16, gamma = var_820_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_818_cast_fp16)[name = tensor("op_820_cast_fp16")]; + tensor var_826 = const()[name = tensor("op_826"), val = tensor(1)]; + tensor var_827 = const()[name = tensor("op_827"), val = tensor(0)]; + tensor var_828 = const()[name = tensor("op_828"), val = tensor(true)]; + tensor var_850 = const()[name = tensor("op_850"), val = tensor([1, 1])]; + tensor var_852 = const()[name = tensor("op_852"), val = tensor([1, 1])]; + tensor var_854_pad_type_0 = const()[name = tensor("op_854_pad_type_0"), val = tensor("custom")]; + tensor var_854_pad_0 = const()[name = tensor("op_854_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_854 = conv(bias = layers_3_attention_q_proj_bias, dilations = var_852, groups = var_826, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_850, weight = layers_3_attention_q_proj_weight, x = var_820_cast_fp16)[name = tensor("op_854")]; + tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; + tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor ks_7_pad_type_0 = const()[name = tensor("ks_7_pad_type_0"), val = tensor("custom")]; + tensor ks_7_pad_0 = const()[name = tensor("ks_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_7 = conv(bias = layers_3_attention_k_proj_bias, dilations = var_859, groups = var_826, pad = ks_7_pad_0, pad_type = ks_7_pad_type_0, strides = var_857, weight = layers_3_attention_k_proj_weight, x = var_820_cast_fp16)[name = tensor("ks_7")]; + tensor var_864 = const()[name = tensor("op_864"), val = tensor([1, 1])]; tensor var_866 = const()[name = tensor("op_866"), val = tensor([1, 1])]; - tensor var_868 = const()[name = tensor("op_868"), val = tensor([1, 1])]; - tensor var_870_pad_type_0 = const()[name = tensor("op_870_pad_type_0"), val = tensor("custom")]; - tensor var_870_pad_0 = const()[name = tensor("op_870_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_870 = conv(bias = layers_3_attention_q_proj_bias, dilations = var_868, groups = var_839, pad = var_870_pad_0, pad_type = var_870_pad_type_0, strides = var_866, weight = layers_3_attention_q_proj_weight, x = var_834_cast_fp16)[name = tensor("op_870")]; - tensor var_871 = const()[name = tensor("op_871"), val = tensor([1, 64, 12, 512])]; - tensor var_872 = reshape(shape = var_871, x = var_870)[name = tensor("op_872")]; - tensor var_875 = const()[name = tensor("op_875"), val = tensor([1, 1])]; - tensor var_877 = const()[name = tensor("op_877"), val = tensor([1, 1])]; - tensor var_879_pad_type_0 = const()[name = tensor("op_879_pad_type_0"), val = tensor("custom")]; - tensor var_879_pad_0 = const()[name = tensor("op_879_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_879 = conv(bias = layers_3_attention_k_proj_bias, dilations = var_877, groups = var_839, pad = var_879_pad_0, pad_type = var_879_pad_type_0, strides = var_875, weight = layers_3_attention_k_proj_weight, x = var_834_cast_fp16)[name = tensor("op_879")]; - tensor var_880 = const()[name = tensor("op_880"), val = tensor([1, 64, 12, 512])]; - tensor ks_7 = reshape(shape = var_880, x = var_879)[name = tensor("ks_7")]; - tensor var_884 = const()[name = tensor("op_884"), val = tensor([1, 1])]; - tensor var_886 = const()[name = tensor("op_886"), val = tensor([1, 1])]; - tensor var_888_pad_type_0 = const()[name = tensor("op_888_pad_type_0"), val = tensor("custom")]; - tensor var_888_pad_0 = const()[name = tensor("op_888_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_888 = conv(bias = layers_3_attention_v_proj_bias, dilations = var_886, groups = var_839, pad = var_888_pad_0, pad_type = var_888_pad_type_0, strides = var_884, weight = layers_3_attention_v_proj_weight, x = var_834_cast_fp16)[name = tensor("op_888")]; - tensor var_889 = const()[name = tensor("op_889"), val = tensor([1, 64, 12, 512])]; - tensor var_890 = reshape(shape = var_889, x = var_888)[name = tensor("op_890")]; - tensor tile_17 = const()[name = tensor("tile_17"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_891_axis_0 = const()[name = tensor("op_891_axis_0"), val = tensor(2)]; - tensor var_891_0, tensor var_891_1, tensor var_891_2, tensor var_891_3, tensor var_891_4, tensor var_891_5, tensor var_891_6, tensor var_891_7, tensor var_891_8, tensor var_891_9, tensor var_891_10, tensor var_891_11 = split(axis = var_891_axis_0, split_sizes = tile_17, x = var_872)[name = tensor("op_891")]; - tensor var_904_perm_0 = const()[name = tensor("op_904_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_18 = const()[name = tensor("tile_18"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_905_axis_0 = const()[name = tensor("op_905_axis_0"), val = tensor(2)]; - tensor transpose_8 = transpose(perm = var_904_perm_0, x = ks_7)[name = tensor("transpose_8")]; - tensor var_905_0, tensor var_905_1, tensor var_905_2, tensor var_905_3, tensor var_905_4, tensor var_905_5, tensor var_905_6, tensor var_905_7, tensor var_905_8, tensor var_905_9, tensor var_905_10, tensor var_905_11 = split(axis = var_905_axis_0, split_sizes = tile_18, x = transpose_8)[name = tensor("op_905")]; - tensor tile_19 = const()[name = tensor("tile_19"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_918_axis_0 = const()[name = tensor("op_918_axis_0"), val = tensor(2)]; - tensor var_918_0, tensor var_918_1, tensor var_918_2, tensor var_918_3, tensor var_918_4, tensor var_918_5, tensor var_918_6, tensor var_918_7, tensor var_918_8, tensor var_918_9, tensor var_918_10, tensor var_918_11 = split(axis = var_918_axis_0, split_sizes = tile_19, x = var_890)[name = tensor("op_918")]; - tensor var_932_equation_0 = const()[name = tensor("op_932_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_932 = einsum(equation = var_932_equation_0, values = (var_905_0, var_891_0))[name = tensor("op_932")]; - tensor var_933_to_fp16 = const()[name = tensor("op_933_to_fp16"), val = tensor(0x1p-3)]; - tensor w_73_cast_fp16 = mul(x = var_932, y = var_933_to_fp16)[name = tensor("w_73_cast_fp16")]; - tensor var_936_equation_0 = const()[name = tensor("op_936_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_936 = einsum(equation = var_936_equation_0, values = (var_905_1, var_891_1))[name = tensor("op_936")]; - tensor var_937_to_fp16 = const()[name = tensor("op_937_to_fp16"), val = tensor(0x1p-3)]; - tensor w_75_cast_fp16 = mul(x = var_936, y = var_937_to_fp16)[name = tensor("w_75_cast_fp16")]; - tensor var_940_equation_0 = const()[name = tensor("op_940_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_940 = einsum(equation = var_940_equation_0, values = (var_905_2, var_891_2))[name = tensor("op_940")]; - tensor var_941_to_fp16 = const()[name = tensor("op_941_to_fp16"), val = tensor(0x1p-3)]; - tensor w_77_cast_fp16 = mul(x = var_940, y = var_941_to_fp16)[name = tensor("w_77_cast_fp16")]; - tensor var_944_equation_0 = const()[name = tensor("op_944_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_944 = einsum(equation = var_944_equation_0, values = (var_905_3, var_891_3))[name = tensor("op_944")]; - tensor var_945_to_fp16 = const()[name = tensor("op_945_to_fp16"), val = tensor(0x1p-3)]; - tensor w_79_cast_fp16 = mul(x = var_944, y = var_945_to_fp16)[name = tensor("w_79_cast_fp16")]; - tensor var_948_equation_0 = const()[name = tensor("op_948_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_948 = einsum(equation = var_948_equation_0, values = (var_905_4, var_891_4))[name = tensor("op_948")]; - tensor var_949_to_fp16 = const()[name = tensor("op_949_to_fp16"), val = tensor(0x1p-3)]; - tensor w_81_cast_fp16 = mul(x = var_948, y = var_949_to_fp16)[name = tensor("w_81_cast_fp16")]; - tensor var_952_equation_0 = const()[name = tensor("op_952_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_952 = einsum(equation = var_952_equation_0, values = (var_905_5, var_891_5))[name = tensor("op_952")]; - tensor var_953_to_fp16 = const()[name = tensor("op_953_to_fp16"), val = tensor(0x1p-3)]; - tensor w_83_cast_fp16 = mul(x = var_952, y = var_953_to_fp16)[name = tensor("w_83_cast_fp16")]; - tensor var_956_equation_0 = const()[name = tensor("op_956_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_956 = einsum(equation = var_956_equation_0, values = (var_905_6, var_891_6))[name = tensor("op_956")]; - tensor var_957_to_fp16 = const()[name = tensor("op_957_to_fp16"), val = tensor(0x1p-3)]; - tensor w_85_cast_fp16 = mul(x = var_956, y = var_957_to_fp16)[name = tensor("w_85_cast_fp16")]; - tensor var_960_equation_0 = const()[name = tensor("op_960_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_960 = einsum(equation = var_960_equation_0, values = (var_905_7, var_891_7))[name = tensor("op_960")]; - tensor var_961_to_fp16 = const()[name = tensor("op_961_to_fp16"), val = tensor(0x1p-3)]; - tensor w_87_cast_fp16 = mul(x = var_960, y = var_961_to_fp16)[name = tensor("w_87_cast_fp16")]; - tensor var_964_equation_0 = const()[name = tensor("op_964_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_964 = einsum(equation = var_964_equation_0, values = (var_905_8, var_891_8))[name = tensor("op_964")]; - tensor var_965_to_fp16 = const()[name = tensor("op_965_to_fp16"), val = tensor(0x1p-3)]; - tensor w_89_cast_fp16 = mul(x = var_964, y = var_965_to_fp16)[name = tensor("w_89_cast_fp16")]; - tensor var_968_equation_0 = const()[name = tensor("op_968_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_968 = einsum(equation = var_968_equation_0, values = (var_905_9, var_891_9))[name = tensor("op_968")]; - tensor var_969_to_fp16 = const()[name = tensor("op_969_to_fp16"), val = tensor(0x1p-3)]; - tensor w_91_cast_fp16 = mul(x = var_968, y = var_969_to_fp16)[name = tensor("w_91_cast_fp16")]; - tensor var_972_equation_0 = const()[name = tensor("op_972_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_972 = einsum(equation = var_972_equation_0, values = (var_905_10, var_891_10))[name = tensor("op_972")]; - tensor var_973_to_fp16 = const()[name = tensor("op_973_to_fp16"), val = tensor(0x1p-3)]; - tensor w_93_cast_fp16 = mul(x = var_972, y = var_973_to_fp16)[name = tensor("w_93_cast_fp16")]; - tensor var_976_equation_0 = const()[name = tensor("op_976_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_976 = einsum(equation = var_976_equation_0, values = (var_905_11, var_891_11))[name = tensor("op_976")]; - tensor var_977_to_fp16 = const()[name = tensor("op_977_to_fp16"), val = tensor(0x1p-3)]; - tensor w_95_cast_fp16 = mul(x = var_976, y = var_977_to_fp16)[name = tensor("w_95_cast_fp16")]; - tensor var_979_cast_fp16 = add(x = w_73_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_979_cast_fp16")]; - tensor var_980_cast_fp16 = softmax(axis = var_839, x = var_979_cast_fp16)[name = tensor("op_980_cast_fp16")]; - tensor var_981_cast_fp16 = add(x = w_75_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_981_cast_fp16")]; - tensor var_982_cast_fp16 = softmax(axis = var_839, x = var_981_cast_fp16)[name = tensor("op_982_cast_fp16")]; - tensor var_983_cast_fp16 = add(x = w_77_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_983_cast_fp16")]; - tensor var_984_cast_fp16 = softmax(axis = var_839, x = var_983_cast_fp16)[name = tensor("op_984_cast_fp16")]; - tensor var_985_cast_fp16 = add(x = w_79_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_985_cast_fp16")]; - tensor var_986_cast_fp16 = softmax(axis = var_839, x = var_985_cast_fp16)[name = tensor("op_986_cast_fp16")]; - tensor var_987_cast_fp16 = add(x = w_81_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_987_cast_fp16")]; - tensor var_988_cast_fp16 = softmax(axis = var_839, x = var_987_cast_fp16)[name = tensor("op_988_cast_fp16")]; - tensor var_989_cast_fp16 = add(x = w_83_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_989_cast_fp16")]; - tensor var_990_cast_fp16 = softmax(axis = var_839, x = var_989_cast_fp16)[name = tensor("op_990_cast_fp16")]; - tensor var_991_cast_fp16 = add(x = w_85_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_991_cast_fp16")]; - tensor var_992_cast_fp16 = softmax(axis = var_839, x = var_991_cast_fp16)[name = tensor("op_992_cast_fp16")]; - tensor var_993_cast_fp16 = add(x = w_87_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_993_cast_fp16")]; - tensor var_994_cast_fp16 = softmax(axis = var_839, x = var_993_cast_fp16)[name = tensor("op_994_cast_fp16")]; - tensor var_995_cast_fp16 = add(x = w_89_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_995_cast_fp16")]; - tensor var_996_cast_fp16 = softmax(axis = var_839, x = var_995_cast_fp16)[name = tensor("op_996_cast_fp16")]; - tensor var_997_cast_fp16 = add(x = w_91_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_997_cast_fp16")]; - tensor var_998_cast_fp16 = softmax(axis = var_839, x = var_997_cast_fp16)[name = tensor("op_998_cast_fp16")]; - tensor var_999_cast_fp16 = add(x = w_93_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_999_cast_fp16")]; - tensor var_1000_cast_fp16 = softmax(axis = var_839, x = var_999_cast_fp16)[name = tensor("op_1000_cast_fp16")]; - tensor var_1001_cast_fp16 = add(x = w_95_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1001_cast_fp16")]; - tensor var_1002_cast_fp16 = softmax(axis = var_839, x = var_1001_cast_fp16)[name = tensor("op_1002_cast_fp16")]; + tensor var_868_pad_type_0 = const()[name = tensor("op_868_pad_type_0"), val = tensor("custom")]; + tensor var_868_pad_0 = const()[name = tensor("op_868_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_868 = conv(bias = layers_3_attention_v_proj_bias, dilations = var_866, groups = var_826, pad = var_868_pad_0, pad_type = var_868_pad_type_0, strides = var_864, weight = layers_3_attention_v_proj_weight, x = var_820_cast_fp16)[name = tensor("op_868")]; + tensor tile_17 = const()[name = tensor("tile_17"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_869_axis_0 = const()[name = tensor("op_869_axis_0"), val = tensor(1)]; + tensor var_869_0, tensor var_869_1, tensor var_869_2, tensor var_869_3, tensor var_869_4, tensor var_869_5, tensor var_869_6, tensor var_869_7, tensor var_869_8, tensor var_869_9, tensor var_869_10, tensor var_869_11 = split(axis = var_869_axis_0, split_sizes = tile_17, x = var_854)[name = tensor("op_869")]; + tensor var_882_perm_0 = const()[name = tensor("op_882_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_18 = const()[name = tensor("tile_18"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_883_axis_0 = const()[name = tensor("op_883_axis_0"), val = tensor(3)]; + tensor transpose_8 = transpose(perm = var_882_perm_0, x = ks_7)[name = tensor("transpose_8")]; + tensor var_883_0, tensor var_883_1, tensor var_883_2, tensor var_883_3, tensor var_883_4, tensor var_883_5, tensor var_883_6, tensor var_883_7, tensor var_883_8, tensor var_883_9, tensor var_883_10, tensor var_883_11 = split(axis = var_883_axis_0, split_sizes = tile_18, x = transpose_8)[name = tensor("op_883")]; + tensor tile_19 = const()[name = tensor("tile_19"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_896_axis_0 = const()[name = tensor("op_896_axis_0"), val = tensor(1)]; + tensor var_896_0, tensor var_896_1, tensor var_896_2, tensor var_896_3, tensor var_896_4, tensor var_896_5, tensor var_896_6, tensor var_896_7, tensor var_896_8, tensor var_896_9, tensor var_896_10, tensor var_896_11 = split(axis = var_896_axis_0, split_sizes = tile_19, x = var_868)[name = tensor("op_896")]; + tensor var_910_equation_0 = const()[name = tensor("op_910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_910 = einsum(equation = var_910_equation_0, values = (var_883_0, var_869_0))[name = tensor("op_910")]; + tensor var_911_to_fp16 = const()[name = tensor("op_911_to_fp16"), val = tensor(0x1p-3)]; + tensor w_73_cast_fp16 = mul(x = var_910, y = var_911_to_fp16)[name = tensor("w_73_cast_fp16")]; + tensor var_914_equation_0 = const()[name = tensor("op_914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_914 = einsum(equation = var_914_equation_0, values = (var_883_1, var_869_1))[name = tensor("op_914")]; + tensor var_915_to_fp16 = const()[name = tensor("op_915_to_fp16"), val = tensor(0x1p-3)]; + tensor w_75_cast_fp16 = mul(x = var_914, y = var_915_to_fp16)[name = tensor("w_75_cast_fp16")]; + tensor var_918_equation_0 = const()[name = tensor("op_918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_918 = einsum(equation = var_918_equation_0, values = (var_883_2, var_869_2))[name = tensor("op_918")]; + tensor var_919_to_fp16 = const()[name = tensor("op_919_to_fp16"), val = tensor(0x1p-3)]; + tensor w_77_cast_fp16 = mul(x = var_918, y = var_919_to_fp16)[name = tensor("w_77_cast_fp16")]; + tensor var_922_equation_0 = const()[name = tensor("op_922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_922 = einsum(equation = var_922_equation_0, values = (var_883_3, var_869_3))[name = tensor("op_922")]; + tensor var_923_to_fp16 = const()[name = tensor("op_923_to_fp16"), val = tensor(0x1p-3)]; + tensor w_79_cast_fp16 = mul(x = var_922, y = var_923_to_fp16)[name = tensor("w_79_cast_fp16")]; + tensor var_926_equation_0 = const()[name = tensor("op_926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_926 = einsum(equation = var_926_equation_0, values = (var_883_4, var_869_4))[name = tensor("op_926")]; + tensor var_927_to_fp16 = const()[name = tensor("op_927_to_fp16"), val = tensor(0x1p-3)]; + tensor w_81_cast_fp16 = mul(x = var_926, y = var_927_to_fp16)[name = tensor("w_81_cast_fp16")]; + tensor var_930_equation_0 = const()[name = tensor("op_930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_930 = einsum(equation = var_930_equation_0, values = (var_883_5, var_869_5))[name = tensor("op_930")]; + tensor var_931_to_fp16 = const()[name = tensor("op_931_to_fp16"), val = tensor(0x1p-3)]; + tensor w_83_cast_fp16 = mul(x = var_930, y = var_931_to_fp16)[name = tensor("w_83_cast_fp16")]; + tensor var_934_equation_0 = const()[name = tensor("op_934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_934 = einsum(equation = var_934_equation_0, values = (var_883_6, var_869_6))[name = tensor("op_934")]; + tensor var_935_to_fp16 = const()[name = tensor("op_935_to_fp16"), val = tensor(0x1p-3)]; + tensor w_85_cast_fp16 = mul(x = var_934, y = var_935_to_fp16)[name = tensor("w_85_cast_fp16")]; + tensor var_938_equation_0 = const()[name = tensor("op_938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_938 = einsum(equation = var_938_equation_0, values = (var_883_7, var_869_7))[name = tensor("op_938")]; + tensor var_939_to_fp16 = const()[name = tensor("op_939_to_fp16"), val = tensor(0x1p-3)]; + tensor w_87_cast_fp16 = mul(x = var_938, y = var_939_to_fp16)[name = tensor("w_87_cast_fp16")]; + tensor var_942_equation_0 = const()[name = tensor("op_942_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_942 = einsum(equation = var_942_equation_0, values = (var_883_8, var_869_8))[name = tensor("op_942")]; + tensor var_943_to_fp16 = const()[name = tensor("op_943_to_fp16"), val = tensor(0x1p-3)]; + tensor w_89_cast_fp16 = mul(x = var_942, y = var_943_to_fp16)[name = tensor("w_89_cast_fp16")]; + tensor var_946_equation_0 = const()[name = tensor("op_946_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_946 = einsum(equation = var_946_equation_0, values = (var_883_9, var_869_9))[name = tensor("op_946")]; + tensor var_947_to_fp16 = const()[name = tensor("op_947_to_fp16"), val = tensor(0x1p-3)]; + tensor w_91_cast_fp16 = mul(x = var_946, y = var_947_to_fp16)[name = tensor("w_91_cast_fp16")]; + tensor var_950_equation_0 = const()[name = tensor("op_950_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_950 = einsum(equation = var_950_equation_0, values = (var_883_10, var_869_10))[name = tensor("op_950")]; + tensor var_951_to_fp16 = const()[name = tensor("op_951_to_fp16"), val = tensor(0x1p-3)]; + tensor w_93_cast_fp16 = mul(x = var_950, y = var_951_to_fp16)[name = tensor("w_93_cast_fp16")]; + tensor var_954_equation_0 = const()[name = tensor("op_954_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_954 = einsum(equation = var_954_equation_0, values = (var_883_11, var_869_11))[name = tensor("op_954")]; + tensor var_955_to_fp16 = const()[name = tensor("op_955_to_fp16"), val = tensor(0x1p-3)]; + tensor w_95_cast_fp16 = mul(x = var_954, y = var_955_to_fp16)[name = tensor("w_95_cast_fp16")]; + tensor input_99_cast_fp16 = add(x = w_73_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_958_cast_fp16 = softmax(axis = var_826, x = input_99_cast_fp16)[name = tensor("op_958_cast_fp16")]; + tensor input_101_cast_fp16 = add(x = w_75_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor var_960_cast_fp16 = softmax(axis = var_826, x = input_101_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor input_103_cast_fp16 = add(x = w_77_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor var_962_cast_fp16 = softmax(axis = var_826, x = input_103_cast_fp16)[name = tensor("op_962_cast_fp16")]; + tensor input_105_cast_fp16 = add(x = w_79_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_105_cast_fp16")]; + tensor var_964_cast_fp16 = softmax(axis = var_826, x = input_105_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor input_107_cast_fp16 = add(x = w_81_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_107_cast_fp16")]; + tensor var_966_cast_fp16 = softmax(axis = var_826, x = input_107_cast_fp16)[name = tensor("op_966_cast_fp16")]; + tensor input_109_cast_fp16 = add(x = w_83_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor var_968_cast_fp16 = softmax(axis = var_826, x = input_109_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor input_111_cast_fp16 = add(x = w_85_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_970_cast_fp16 = softmax(axis = var_826, x = input_111_cast_fp16)[name = tensor("op_970_cast_fp16")]; + tensor input_113_cast_fp16 = add(x = w_87_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_113_cast_fp16")]; + tensor var_972_cast_fp16 = softmax(axis = var_826, x = input_113_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor input_115_cast_fp16 = add(x = w_89_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_974_cast_fp16 = softmax(axis = var_826, x = input_115_cast_fp16)[name = tensor("op_974_cast_fp16")]; + tensor input_117_cast_fp16 = add(x = w_91_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor var_976_cast_fp16 = softmax(axis = var_826, x = input_117_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor input_119_cast_fp16 = add(x = w_93_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_119_cast_fp16")]; + tensor var_978_cast_fp16 = softmax(axis = var_826, x = input_119_cast_fp16)[name = tensor("op_978_cast_fp16")]; + tensor input_121_cast_fp16 = add(x = w_95_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_121_cast_fp16")]; + tensor var_980_cast_fp16 = softmax(axis = var_826, x = input_121_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_982_equation_0 = const()[name = tensor("op_982_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_896_0, var_958_cast_fp16))[name = tensor("op_982_cast_fp16")]; + tensor var_984_equation_0 = const()[name = tensor("op_984_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_984_cast_fp16 = einsum(equation = var_984_equation_0, values = (var_896_1, var_960_cast_fp16))[name = tensor("op_984_cast_fp16")]; + tensor var_986_equation_0 = const()[name = tensor("op_986_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_896_2, var_962_cast_fp16))[name = tensor("op_986_cast_fp16")]; + tensor var_988_equation_0 = const()[name = tensor("op_988_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_988_cast_fp16 = einsum(equation = var_988_equation_0, values = (var_896_3, var_964_cast_fp16))[name = tensor("op_988_cast_fp16")]; + tensor var_990_equation_0 = const()[name = tensor("op_990_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_896_4, var_966_cast_fp16))[name = tensor("op_990_cast_fp16")]; + tensor var_992_equation_0 = const()[name = tensor("op_992_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_992_cast_fp16 = einsum(equation = var_992_equation_0, values = (var_896_5, var_968_cast_fp16))[name = tensor("op_992_cast_fp16")]; + tensor var_994_equation_0 = const()[name = tensor("op_994_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_896_6, var_970_cast_fp16))[name = tensor("op_994_cast_fp16")]; + tensor var_996_equation_0 = const()[name = tensor("op_996_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_996_cast_fp16 = einsum(equation = var_996_equation_0, values = (var_896_7, var_972_cast_fp16))[name = tensor("op_996_cast_fp16")]; + tensor var_998_equation_0 = const()[name = tensor("op_998_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_896_8, var_974_cast_fp16))[name = tensor("op_998_cast_fp16")]; + tensor var_1000_equation_0 = const()[name = tensor("op_1000_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1000_cast_fp16 = einsum(equation = var_1000_equation_0, values = (var_896_9, var_976_cast_fp16))[name = tensor("op_1000_cast_fp16")]; + tensor var_1002_equation_0 = const()[name = tensor("op_1002_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_896_10, var_978_cast_fp16))[name = tensor("op_1002_cast_fp16")]; tensor var_1004_equation_0 = const()[name = tensor("op_1004_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1004_cast_fp16 = einsum(equation = var_1004_equation_0, values = (var_918_0, var_980_cast_fp16))[name = tensor("op_1004_cast_fp16")]; - tensor var_1006_equation_0 = const()[name = tensor("op_1006_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1006_cast_fp16 = einsum(equation = var_1006_equation_0, values = (var_918_1, var_982_cast_fp16))[name = tensor("op_1006_cast_fp16")]; - tensor var_1008_equation_0 = const()[name = tensor("op_1008_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1008_cast_fp16 = einsum(equation = var_1008_equation_0, values = (var_918_2, var_984_cast_fp16))[name = tensor("op_1008_cast_fp16")]; - tensor var_1010_equation_0 = const()[name = tensor("op_1010_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1010_cast_fp16 = einsum(equation = var_1010_equation_0, values = (var_918_3, var_986_cast_fp16))[name = tensor("op_1010_cast_fp16")]; - tensor var_1012_equation_0 = const()[name = tensor("op_1012_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1012_cast_fp16 = einsum(equation = var_1012_equation_0, values = (var_918_4, var_988_cast_fp16))[name = tensor("op_1012_cast_fp16")]; - tensor var_1014_equation_0 = const()[name = tensor("op_1014_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1014_cast_fp16 = einsum(equation = var_1014_equation_0, values = (var_918_5, var_990_cast_fp16))[name = tensor("op_1014_cast_fp16")]; - tensor var_1016_equation_0 = const()[name = tensor("op_1016_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1016_cast_fp16 = einsum(equation = var_1016_equation_0, values = (var_918_6, var_992_cast_fp16))[name = tensor("op_1016_cast_fp16")]; - tensor var_1018_equation_0 = const()[name = tensor("op_1018_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1018_cast_fp16 = einsum(equation = var_1018_equation_0, values = (var_918_7, var_994_cast_fp16))[name = tensor("op_1018_cast_fp16")]; - tensor var_1020_equation_0 = const()[name = tensor("op_1020_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1020_cast_fp16 = einsum(equation = var_1020_equation_0, values = (var_918_8, var_996_cast_fp16))[name = tensor("op_1020_cast_fp16")]; - tensor var_1022_equation_0 = const()[name = tensor("op_1022_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1022_cast_fp16 = einsum(equation = var_1022_equation_0, values = (var_918_9, var_998_cast_fp16))[name = tensor("op_1022_cast_fp16")]; - tensor var_1024_equation_0 = const()[name = tensor("op_1024_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1024_cast_fp16 = einsum(equation = var_1024_equation_0, values = (var_918_10, var_1000_cast_fp16))[name = tensor("op_1024_cast_fp16")]; - tensor var_1026_equation_0 = const()[name = tensor("op_1026_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1026_cast_fp16 = einsum(equation = var_1026_equation_0, values = (var_918_11, var_1002_cast_fp16))[name = tensor("op_1026_cast_fp16")]; - tensor var_1028_interleave_0 = const()[name = tensor("op_1028_interleave_0"), val = tensor(false)]; - tensor var_1028_cast_fp16 = concat(axis = var_839, interleave = var_1028_interleave_0, values = (var_1004_cast_fp16, var_1006_cast_fp16, var_1008_cast_fp16, var_1010_cast_fp16, var_1012_cast_fp16, var_1014_cast_fp16, var_1016_cast_fp16, var_1018_cast_fp16, var_1020_cast_fp16, var_1022_cast_fp16, var_1024_cast_fp16, var_1026_cast_fp16))[name = tensor("op_1028_cast_fp16")]; - tensor var_1032 = const()[name = tensor("op_1032"), val = tensor([1, 1])]; - tensor var_1034 = const()[name = tensor("op_1034"), val = tensor([1, 1])]; - tensor var_1036_pad_type_0 = const()[name = tensor("op_1036_pad_type_0"), val = tensor("custom")]; - tensor var_1036_pad_0 = const()[name = tensor("op_1036_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1036 = conv(bias = layers_3_attention_o_proj_bias, dilations = var_1034, groups = var_839, pad = var_1036_pad_0, pad_type = var_1036_pad_type_0, strides = var_1032, weight = layers_3_attention_o_proj_weight, x = var_1028_cast_fp16)[name = tensor("op_1036")]; - tensor var_1038_interleave_0 = const()[name = tensor("op_1038_interleave_0"), val = tensor(false)]; - tensor var_1038 = concat(axis = var_840, interleave = var_1038_interleave_0, values = var_1036)[name = tensor("op_1038")]; - tensor x_29 = add(x = var_834_cast_fp16, y = var_1038)[name = tensor("x_29")]; - tensor var_837_promoted = const()[name = tensor("op_837_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_838_promoted = const()[name = tensor("op_838_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_31 = clip(alpha = var_837_promoted, beta = var_838_promoted, x = x_29)[name = tensor("x_31")]; - tensor var_1043 = const()[name = tensor("op_1043"), val = tensor([1])]; - tensor mean_15 = reduce_mean(axes = var_1043, keep_dims = var_841, x = x_31)[name = tensor("mean_15")]; + tensor var_1004_cast_fp16 = einsum(equation = var_1004_equation_0, values = (var_896_11, var_980_cast_fp16))[name = tensor("op_1004_cast_fp16")]; + tensor var_1006_interleave_0 = const()[name = tensor("op_1006_interleave_0"), val = tensor(false)]; + tensor var_1006_cast_fp16 = concat(axis = var_826, interleave = var_1006_interleave_0, values = (var_982_cast_fp16, var_984_cast_fp16, var_986_cast_fp16, var_988_cast_fp16, var_990_cast_fp16, var_992_cast_fp16, var_994_cast_fp16, var_996_cast_fp16, var_998_cast_fp16, var_1000_cast_fp16, var_1002_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1006_cast_fp16")]; + tensor var_1010 = const()[name = tensor("op_1010"), val = tensor([1, 1])]; + tensor var_1012 = const()[name = tensor("op_1012"), val = tensor([1, 1])]; + tensor var_1014_pad_type_0 = const()[name = tensor("op_1014_pad_type_0"), val = tensor("custom")]; + tensor var_1014_pad_0 = const()[name = tensor("op_1014_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1014 = conv(bias = layers_3_attention_o_proj_bias, dilations = var_1012, groups = var_826, pad = var_1014_pad_0, pad_type = var_1014_pad_type_0, strides = var_1010, weight = layers_3_attention_o_proj_weight, x = var_1006_cast_fp16)[name = tensor("op_1014")]; + tensor var_1016_interleave_0 = const()[name = tensor("op_1016_interleave_0"), val = tensor(false)]; + tensor var_1016 = concat(axis = var_827, interleave = var_1016_interleave_0, values = var_1014)[name = tensor("op_1016")]; + tensor x_29 = add(x = var_820_cast_fp16, y = var_1016)[name = tensor("x_29")]; + tensor var_823_promoted = const()[name = tensor("op_823_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_824_promoted = const()[name = tensor("op_824_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_31 = clip(alpha = var_823_promoted, beta = var_824_promoted, x = x_29)[name = tensor("x_31")]; + tensor var_1021 = const()[name = tensor("op_1021"), val = tensor([1])]; + tensor mean_15 = reduce_mean(axes = var_1021, keep_dims = var_828, x = x_31)[name = tensor("mean_15")]; tensor zero_mean_15 = sub(x = x_31, y = mean_15)[name = tensor("zero_mean_15")]; - tensor var_846_promoted = const()[name = tensor("op_846_promoted"), val = tensor(0x1p+1)]; - tensor var_1046 = pow(x = zero_mean_15, y = var_846_promoted)[name = tensor("op_1046")]; - tensor var_1047 = const()[name = tensor("op_1047"), val = tensor([1])]; - tensor var_1048 = reduce_mean(axes = var_1047, keep_dims = var_841, x = var_1046)[name = tensor("op_1048")]; - tensor var_1049_to_fp16 = const()[name = tensor("op_1049_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1050_cast_fp16 = add(x = var_1048, y = var_1049_to_fp16)[name = tensor("op_1050_cast_fp16")]; + tensor var_825_promoted = const()[name = tensor("op_825_promoted"), val = tensor(0x1p+1)]; + tensor var_1024 = pow(x = zero_mean_15, y = var_825_promoted)[name = tensor("op_1024")]; + tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1])]; + tensor var_1026 = reduce_mean(axes = var_1025, keep_dims = var_828, x = var_1024)[name = tensor("op_1026")]; + tensor var_1027_to_fp16 = const()[name = tensor("op_1027_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1028_cast_fp16 = add(x = var_1026, y = var_1027_to_fp16)[name = tensor("op_1028_cast_fp16")]; tensor denom_15_epsilon_0 = const()[name = tensor("denom_15_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0, x = var_1050_cast_fp16)[name = tensor("denom_15_cast_fp16")]; - tensor var_1052_cast_fp16 = mul(x = zero_mean_15, y = denom_15_cast_fp16)[name = tensor("op_1052_cast_fp16")]; - tensor var_1054_gamma_0_to_fp16 = const()[name = tensor("op_1054_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218524928)))]; - tensor var_1054_beta_0_to_fp16 = const()[name = tensor("op_1054_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218526528)))]; - tensor var_1054_epsilon_0_to_fp16 = const()[name = tensor("op_1054_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1054_cast_fp16 = batch_norm(beta = var_1054_beta_0_to_fp16, epsilon = var_1054_epsilon_0_to_fp16, gamma = var_1054_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1052_cast_fp16)[name = tensor("op_1054_cast_fp16")]; - tensor var_1060 = const()[name = tensor("op_1060"), val = tensor([1, 1])]; - tensor var_1062 = const()[name = tensor("op_1062"), val = tensor([1, 1])]; - tensor var_1064_pad_type_0 = const()[name = tensor("op_1064_pad_type_0"), val = tensor("custom")]; - tensor var_1064_pad_0 = const()[name = tensor("op_1064_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1064 = conv(bias = layers_3_mlp_fc1_bias, dilations = var_1062, groups = var_839, pad = var_1064_pad_0, pad_type = var_1064_pad_type_0, strides = var_1060, weight = layers_3_mlp_fc1_weight, x = var_1054_cast_fp16)[name = tensor("op_1064")]; - tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; - tensor input_31 = gelu(mode = input_31_mode_0, x = var_1064)[name = tensor("input_31")]; - tensor var_1068 = const()[name = tensor("op_1068"), val = tensor([1, 1])]; - tensor var_1070 = const()[name = tensor("op_1070"), val = tensor([1, 1])]; - tensor var_1072_pad_type_0 = const()[name = tensor("op_1072_pad_type_0"), val = tensor("custom")]; - tensor var_1072_pad_0 = const()[name = tensor("op_1072_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1072 = conv(bias = layers_3_mlp_fc2_bias, dilations = var_1070, groups = var_839, pad = var_1072_pad_0, pad_type = var_1072_pad_type_0, strides = var_1068, weight = layers_3_mlp_fc2_weight, x = input_31)[name = tensor("op_1072")]; - tensor x_33 = add(x = var_1054_cast_fp16, y = var_1072)[name = tensor("x_33")]; - tensor var_837_promoted_1 = const()[name = tensor("op_837_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_838_promoted_1 = const()[name = tensor("op_838_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_35 = clip(alpha = var_837_promoted_1, beta = var_838_promoted_1, x = x_33)[name = tensor("x_35")]; - tensor var_1077 = const()[name = tensor("op_1077"), val = tensor([1])]; - tensor mean_17 = reduce_mean(axes = var_1077, keep_dims = var_841, x = x_35)[name = tensor("mean_17")]; + tensor denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0, x = var_1028_cast_fp16)[name = tensor("denom_15_cast_fp16")]; + tensor var_1030_cast_fp16 = mul(x = zero_mean_15, y = denom_15_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1032_gamma_0_to_fp16 = const()[name = tensor("op_1032_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218524928)))]; + tensor var_1032_beta_0_to_fp16 = const()[name = tensor("op_1032_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218526528)))]; + tensor var_1032_epsilon_0_to_fp16 = const()[name = tensor("op_1032_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1032_cast_fp16 = batch_norm(beta = var_1032_beta_0_to_fp16, epsilon = var_1032_epsilon_0_to_fp16, gamma = var_1032_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1030_cast_fp16)[name = tensor("op_1032_cast_fp16")]; + tensor var_1038 = const()[name = tensor("op_1038"), val = tensor([1, 1])]; + tensor var_1040 = const()[name = tensor("op_1040"), val = tensor([1, 1])]; + tensor var_1042_pad_type_0 = const()[name = tensor("op_1042_pad_type_0"), val = tensor("custom")]; + tensor var_1042_pad_0 = const()[name = tensor("op_1042_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1042 = conv(bias = layers_3_mlp_fc1_bias, dilations = var_1040, groups = var_826, pad = var_1042_pad_0, pad_type = var_1042_pad_type_0, strides = var_1038, weight = layers_3_mlp_fc1_weight, x = var_1032_cast_fp16)[name = tensor("op_1042")]; + tensor input_127_mode_0 = const()[name = tensor("input_127_mode_0"), val = tensor("EXACT")]; + tensor input_127 = gelu(mode = input_127_mode_0, x = var_1042)[name = tensor("input_127")]; + tensor var_1046 = const()[name = tensor("op_1046"), val = tensor([1, 1])]; + tensor var_1048 = const()[name = tensor("op_1048"), val = tensor([1, 1])]; + tensor var_1050_pad_type_0 = const()[name = tensor("op_1050_pad_type_0"), val = tensor("custom")]; + tensor var_1050_pad_0 = const()[name = tensor("op_1050_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1050 = conv(bias = layers_3_mlp_fc2_bias, dilations = var_1048, groups = var_826, pad = var_1050_pad_0, pad_type = var_1050_pad_type_0, strides = var_1046, weight = layers_3_mlp_fc2_weight, x = input_127)[name = tensor("op_1050")]; + tensor x_33 = add(x = var_1032_cast_fp16, y = var_1050)[name = tensor("x_33")]; + tensor var_823_promoted_1 = const()[name = tensor("op_823_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_824_promoted_1 = const()[name = tensor("op_824_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_35 = clip(alpha = var_823_promoted_1, beta = var_824_promoted_1, x = x_33)[name = tensor("x_35")]; + tensor var_1055 = const()[name = tensor("op_1055"), val = tensor([1])]; + tensor mean_17 = reduce_mean(axes = var_1055, keep_dims = var_828, x = x_35)[name = tensor("mean_17")]; tensor zero_mean_17 = sub(x = x_35, y = mean_17)[name = tensor("zero_mean_17")]; - tensor var_846_promoted_1 = const()[name = tensor("op_846_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1080 = pow(x = zero_mean_17, y = var_846_promoted_1)[name = tensor("op_1080")]; - tensor var_1081 = const()[name = tensor("op_1081"), val = tensor([1])]; - tensor var_1082 = reduce_mean(axes = var_1081, keep_dims = var_841, x = var_1080)[name = tensor("op_1082")]; - tensor var_1083_to_fp16 = const()[name = tensor("op_1083_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1084_cast_fp16 = add(x = var_1082, y = var_1083_to_fp16)[name = tensor("op_1084_cast_fp16")]; + tensor var_825_promoted_1 = const()[name = tensor("op_825_promoted_1"), val = tensor(0x1p+1)]; + tensor var_1058 = pow(x = zero_mean_17, y = var_825_promoted_1)[name = tensor("op_1058")]; + tensor var_1059 = const()[name = tensor("op_1059"), val = tensor([1])]; + tensor var_1060 = reduce_mean(axes = var_1059, keep_dims = var_828, x = var_1058)[name = tensor("op_1060")]; + tensor var_1061_to_fp16 = const()[name = tensor("op_1061_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1062_cast_fp16 = add(x = var_1060, y = var_1061_to_fp16)[name = tensor("op_1062_cast_fp16")]; tensor denom_17_epsilon_0 = const()[name = tensor("denom_17_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0, x = var_1084_cast_fp16)[name = tensor("denom_17_cast_fp16")]; - tensor var_1086_cast_fp16 = mul(x = zero_mean_17, y = denom_17_cast_fp16)[name = tensor("op_1086_cast_fp16")]; - tensor var_1088_gamma_0_to_fp16 = const()[name = tensor("op_1088_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218528128)))]; - tensor var_1088_beta_0_to_fp16 = const()[name = tensor("op_1088_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218529728)))]; - tensor var_1088_epsilon_0_to_fp16 = const()[name = tensor("op_1088_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1088_cast_fp16 = batch_norm(beta = var_1088_beta_0_to_fp16, epsilon = var_1088_epsilon_0_to_fp16, gamma = var_1088_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1086_cast_fp16)[name = tensor("op_1088_cast_fp16")]; - tensor var_1093 = const()[name = tensor("op_1093"), val = tensor(1)]; - tensor var_1094 = const()[name = tensor("op_1094"), val = tensor(0)]; - tensor var_1095 = const()[name = tensor("op_1095"), val = tensor(true)]; - tensor var_1120 = const()[name = tensor("op_1120"), val = tensor([1, 1])]; - tensor var_1122 = const()[name = tensor("op_1122"), val = tensor([1, 1])]; - tensor var_1124_pad_type_0 = const()[name = tensor("op_1124_pad_type_0"), val = tensor("custom")]; - tensor var_1124_pad_0 = const()[name = tensor("op_1124_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1124 = conv(bias = layers_4_attention_q_proj_bias, dilations = var_1122, groups = var_1093, pad = var_1124_pad_0, pad_type = var_1124_pad_type_0, strides = var_1120, weight = layers_4_attention_q_proj_weight, x = var_1088_cast_fp16)[name = tensor("op_1124")]; - tensor var_1125 = const()[name = tensor("op_1125"), val = tensor([1, 64, 12, 512])]; - tensor var_1126 = reshape(shape = var_1125, x = var_1124)[name = tensor("op_1126")]; - tensor var_1129 = const()[name = tensor("op_1129"), val = tensor([1, 1])]; - tensor var_1131 = const()[name = tensor("op_1131"), val = tensor([1, 1])]; - tensor var_1133_pad_type_0 = const()[name = tensor("op_1133_pad_type_0"), val = tensor("custom")]; - tensor var_1133_pad_0 = const()[name = tensor("op_1133_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1133 = conv(bias = layers_4_attention_k_proj_bias, dilations = var_1131, groups = var_1093, pad = var_1133_pad_0, pad_type = var_1133_pad_type_0, strides = var_1129, weight = layers_4_attention_k_proj_weight, x = var_1088_cast_fp16)[name = tensor("op_1133")]; - tensor var_1134 = const()[name = tensor("op_1134"), val = tensor([1, 64, 12, 512])]; - tensor ks_9 = reshape(shape = var_1134, x = var_1133)[name = tensor("ks_9")]; - tensor var_1138 = const()[name = tensor("op_1138"), val = tensor([1, 1])]; - tensor var_1140 = const()[name = tensor("op_1140"), val = tensor([1, 1])]; - tensor var_1142_pad_type_0 = const()[name = tensor("op_1142_pad_type_0"), val = tensor("custom")]; - tensor var_1142_pad_0 = const()[name = tensor("op_1142_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1142 = conv(bias = layers_4_attention_v_proj_bias, dilations = var_1140, groups = var_1093, pad = var_1142_pad_0, pad_type = var_1142_pad_type_0, strides = var_1138, weight = layers_4_attention_v_proj_weight, x = var_1088_cast_fp16)[name = tensor("op_1142")]; - tensor var_1143 = const()[name = tensor("op_1143"), val = tensor([1, 64, 12, 512])]; - tensor var_1144 = reshape(shape = var_1143, x = var_1142)[name = tensor("op_1144")]; - tensor tile_22 = const()[name = tensor("tile_22"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1145_axis_0 = const()[name = tensor("op_1145_axis_0"), val = tensor(2)]; - tensor var_1145_0, tensor var_1145_1, tensor var_1145_2, tensor var_1145_3, tensor var_1145_4, tensor var_1145_5, tensor var_1145_6, tensor var_1145_7, tensor var_1145_8, tensor var_1145_9, tensor var_1145_10, tensor var_1145_11 = split(axis = var_1145_axis_0, split_sizes = tile_22, x = var_1126)[name = tensor("op_1145")]; - tensor var_1158_perm_0 = const()[name = tensor("op_1158_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_23 = const()[name = tensor("tile_23"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1159_axis_0 = const()[name = tensor("op_1159_axis_0"), val = tensor(2)]; - tensor transpose_7 = transpose(perm = var_1158_perm_0, x = ks_9)[name = tensor("transpose_7")]; - tensor var_1159_0, tensor var_1159_1, tensor var_1159_2, tensor var_1159_3, tensor var_1159_4, tensor var_1159_5, tensor var_1159_6, tensor var_1159_7, tensor var_1159_8, tensor var_1159_9, tensor var_1159_10, tensor var_1159_11 = split(axis = var_1159_axis_0, split_sizes = tile_23, x = transpose_7)[name = tensor("op_1159")]; - tensor tile_24 = const()[name = tensor("tile_24"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1172_axis_0 = const()[name = tensor("op_1172_axis_0"), val = tensor(2)]; - tensor var_1172_0, tensor var_1172_1, tensor var_1172_2, tensor var_1172_3, tensor var_1172_4, tensor var_1172_5, tensor var_1172_6, tensor var_1172_7, tensor var_1172_8, tensor var_1172_9, tensor var_1172_10, tensor var_1172_11 = split(axis = var_1172_axis_0, split_sizes = tile_24, x = var_1144)[name = tensor("op_1172")]; - tensor var_1186_equation_0 = const()[name = tensor("op_1186_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1186 = einsum(equation = var_1186_equation_0, values = (var_1159_0, var_1145_0))[name = tensor("op_1186")]; - tensor var_1187_to_fp16 = const()[name = tensor("op_1187_to_fp16"), val = tensor(0x1p-3)]; - tensor w_97_cast_fp16 = mul(x = var_1186, y = var_1187_to_fp16)[name = tensor("w_97_cast_fp16")]; - tensor var_1190_equation_0 = const()[name = tensor("op_1190_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1190 = einsum(equation = var_1190_equation_0, values = (var_1159_1, var_1145_1))[name = tensor("op_1190")]; - tensor var_1191_to_fp16 = const()[name = tensor("op_1191_to_fp16"), val = tensor(0x1p-3)]; - tensor w_99_cast_fp16 = mul(x = var_1190, y = var_1191_to_fp16)[name = tensor("w_99_cast_fp16")]; - tensor var_1194_equation_0 = const()[name = tensor("op_1194_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1194 = einsum(equation = var_1194_equation_0, values = (var_1159_2, var_1145_2))[name = tensor("op_1194")]; - tensor var_1195_to_fp16 = const()[name = tensor("op_1195_to_fp16"), val = tensor(0x1p-3)]; - tensor w_101_cast_fp16 = mul(x = var_1194, y = var_1195_to_fp16)[name = tensor("w_101_cast_fp16")]; - tensor var_1198_equation_0 = const()[name = tensor("op_1198_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1198 = einsum(equation = var_1198_equation_0, values = (var_1159_3, var_1145_3))[name = tensor("op_1198")]; - tensor var_1199_to_fp16 = const()[name = tensor("op_1199_to_fp16"), val = tensor(0x1p-3)]; - tensor w_103_cast_fp16 = mul(x = var_1198, y = var_1199_to_fp16)[name = tensor("w_103_cast_fp16")]; - tensor var_1202_equation_0 = const()[name = tensor("op_1202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1202 = einsum(equation = var_1202_equation_0, values = (var_1159_4, var_1145_4))[name = tensor("op_1202")]; - tensor var_1203_to_fp16 = const()[name = tensor("op_1203_to_fp16"), val = tensor(0x1p-3)]; - tensor w_105_cast_fp16 = mul(x = var_1202, y = var_1203_to_fp16)[name = tensor("w_105_cast_fp16")]; - tensor var_1206_equation_0 = const()[name = tensor("op_1206_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1206 = einsum(equation = var_1206_equation_0, values = (var_1159_5, var_1145_5))[name = tensor("op_1206")]; - tensor var_1207_to_fp16 = const()[name = tensor("op_1207_to_fp16"), val = tensor(0x1p-3)]; - tensor w_107_cast_fp16 = mul(x = var_1206, y = var_1207_to_fp16)[name = tensor("w_107_cast_fp16")]; - tensor var_1210_equation_0 = const()[name = tensor("op_1210_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1210 = einsum(equation = var_1210_equation_0, values = (var_1159_6, var_1145_6))[name = tensor("op_1210")]; - tensor var_1211_to_fp16 = const()[name = tensor("op_1211_to_fp16"), val = tensor(0x1p-3)]; - tensor w_109_cast_fp16 = mul(x = var_1210, y = var_1211_to_fp16)[name = tensor("w_109_cast_fp16")]; - tensor var_1214_equation_0 = const()[name = tensor("op_1214_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1214 = einsum(equation = var_1214_equation_0, values = (var_1159_7, var_1145_7))[name = tensor("op_1214")]; - tensor var_1215_to_fp16 = const()[name = tensor("op_1215_to_fp16"), val = tensor(0x1p-3)]; - tensor w_111_cast_fp16 = mul(x = var_1214, y = var_1215_to_fp16)[name = tensor("w_111_cast_fp16")]; - tensor var_1218_equation_0 = const()[name = tensor("op_1218_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1218 = einsum(equation = var_1218_equation_0, values = (var_1159_8, var_1145_8))[name = tensor("op_1218")]; - tensor var_1219_to_fp16 = const()[name = tensor("op_1219_to_fp16"), val = tensor(0x1p-3)]; - tensor w_113_cast_fp16 = mul(x = var_1218, y = var_1219_to_fp16)[name = tensor("w_113_cast_fp16")]; - tensor var_1222_equation_0 = const()[name = tensor("op_1222_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1222 = einsum(equation = var_1222_equation_0, values = (var_1159_9, var_1145_9))[name = tensor("op_1222")]; - tensor var_1223_to_fp16 = const()[name = tensor("op_1223_to_fp16"), val = tensor(0x1p-3)]; - tensor w_115_cast_fp16 = mul(x = var_1222, y = var_1223_to_fp16)[name = tensor("w_115_cast_fp16")]; - tensor var_1226_equation_0 = const()[name = tensor("op_1226_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1226 = einsum(equation = var_1226_equation_0, values = (var_1159_10, var_1145_10))[name = tensor("op_1226")]; - tensor var_1227_to_fp16 = const()[name = tensor("op_1227_to_fp16"), val = tensor(0x1p-3)]; - tensor w_117_cast_fp16 = mul(x = var_1226, y = var_1227_to_fp16)[name = tensor("w_117_cast_fp16")]; - tensor var_1230_equation_0 = const()[name = tensor("op_1230_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1230 = einsum(equation = var_1230_equation_0, values = (var_1159_11, var_1145_11))[name = tensor("op_1230")]; - tensor var_1231_to_fp16 = const()[name = tensor("op_1231_to_fp16"), val = tensor(0x1p-3)]; - tensor w_119_cast_fp16 = mul(x = var_1230, y = var_1231_to_fp16)[name = tensor("w_119_cast_fp16")]; - tensor var_1233_cast_fp16 = add(x = w_97_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1233_cast_fp16")]; - tensor var_1234_cast_fp16 = softmax(axis = var_1093, x = var_1233_cast_fp16)[name = tensor("op_1234_cast_fp16")]; - tensor var_1235_cast_fp16 = add(x = w_99_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1235_cast_fp16")]; - tensor var_1236_cast_fp16 = softmax(axis = var_1093, x = var_1235_cast_fp16)[name = tensor("op_1236_cast_fp16")]; - tensor var_1237_cast_fp16 = add(x = w_101_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1237_cast_fp16")]; - tensor var_1238_cast_fp16 = softmax(axis = var_1093, x = var_1237_cast_fp16)[name = tensor("op_1238_cast_fp16")]; - tensor var_1239_cast_fp16 = add(x = w_103_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1239_cast_fp16")]; - tensor var_1240_cast_fp16 = softmax(axis = var_1093, x = var_1239_cast_fp16)[name = tensor("op_1240_cast_fp16")]; - tensor var_1241_cast_fp16 = add(x = w_105_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1241_cast_fp16")]; - tensor var_1242_cast_fp16 = softmax(axis = var_1093, x = var_1241_cast_fp16)[name = tensor("op_1242_cast_fp16")]; - tensor var_1243_cast_fp16 = add(x = w_107_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1243_cast_fp16")]; - tensor var_1244_cast_fp16 = softmax(axis = var_1093, x = var_1243_cast_fp16)[name = tensor("op_1244_cast_fp16")]; - tensor var_1245_cast_fp16 = add(x = w_109_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1245_cast_fp16")]; - tensor var_1246_cast_fp16 = softmax(axis = var_1093, x = var_1245_cast_fp16)[name = tensor("op_1246_cast_fp16")]; - tensor var_1247_cast_fp16 = add(x = w_111_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1247_cast_fp16")]; - tensor var_1248_cast_fp16 = softmax(axis = var_1093, x = var_1247_cast_fp16)[name = tensor("op_1248_cast_fp16")]; - tensor var_1249_cast_fp16 = add(x = w_113_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1249_cast_fp16")]; - tensor var_1250_cast_fp16 = softmax(axis = var_1093, x = var_1249_cast_fp16)[name = tensor("op_1250_cast_fp16")]; - tensor var_1251_cast_fp16 = add(x = w_115_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1251_cast_fp16")]; - tensor var_1252_cast_fp16 = softmax(axis = var_1093, x = var_1251_cast_fp16)[name = tensor("op_1252_cast_fp16")]; - tensor var_1253_cast_fp16 = add(x = w_117_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1253_cast_fp16")]; - tensor var_1254_cast_fp16 = softmax(axis = var_1093, x = var_1253_cast_fp16)[name = tensor("op_1254_cast_fp16")]; - tensor var_1255_cast_fp16 = add(x = w_119_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1255_cast_fp16")]; - tensor var_1256_cast_fp16 = softmax(axis = var_1093, x = var_1255_cast_fp16)[name = tensor("op_1256_cast_fp16")]; - tensor var_1258_equation_0 = const()[name = tensor("op_1258_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1258_cast_fp16 = einsum(equation = var_1258_equation_0, values = (var_1172_0, var_1234_cast_fp16))[name = tensor("op_1258_cast_fp16")]; - tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1172_1, var_1236_cast_fp16))[name = tensor("op_1260_cast_fp16")]; - tensor var_1262_equation_0 = const()[name = tensor("op_1262_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1262_cast_fp16 = einsum(equation = var_1262_equation_0, values = (var_1172_2, var_1238_cast_fp16))[name = tensor("op_1262_cast_fp16")]; - tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1172_3, var_1240_cast_fp16))[name = tensor("op_1264_cast_fp16")]; - tensor var_1266_equation_0 = const()[name = tensor("op_1266_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1266_cast_fp16 = einsum(equation = var_1266_equation_0, values = (var_1172_4, var_1242_cast_fp16))[name = tensor("op_1266_cast_fp16")]; - tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1172_5, var_1244_cast_fp16))[name = tensor("op_1268_cast_fp16")]; - tensor var_1270_equation_0 = const()[name = tensor("op_1270_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1270_cast_fp16 = einsum(equation = var_1270_equation_0, values = (var_1172_6, var_1246_cast_fp16))[name = tensor("op_1270_cast_fp16")]; - tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1172_7, var_1248_cast_fp16))[name = tensor("op_1272_cast_fp16")]; - tensor var_1274_equation_0 = const()[name = tensor("op_1274_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1274_cast_fp16 = einsum(equation = var_1274_equation_0, values = (var_1172_8, var_1250_cast_fp16))[name = tensor("op_1274_cast_fp16")]; - tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1172_9, var_1252_cast_fp16))[name = tensor("op_1276_cast_fp16")]; - tensor var_1278_equation_0 = const()[name = tensor("op_1278_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1278_cast_fp16 = einsum(equation = var_1278_equation_0, values = (var_1172_10, var_1254_cast_fp16))[name = tensor("op_1278_cast_fp16")]; - tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1172_11, var_1256_cast_fp16))[name = tensor("op_1280_cast_fp16")]; - tensor var_1282_interleave_0 = const()[name = tensor("op_1282_interleave_0"), val = tensor(false)]; - tensor var_1282_cast_fp16 = concat(axis = var_1093, interleave = var_1282_interleave_0, values = (var_1258_cast_fp16, var_1260_cast_fp16, var_1262_cast_fp16, var_1264_cast_fp16, var_1266_cast_fp16, var_1268_cast_fp16, var_1270_cast_fp16, var_1272_cast_fp16, var_1274_cast_fp16, var_1276_cast_fp16, var_1278_cast_fp16, var_1280_cast_fp16))[name = tensor("op_1282_cast_fp16")]; - tensor var_1286 = const()[name = tensor("op_1286"), val = tensor([1, 1])]; - tensor var_1288 = const()[name = tensor("op_1288"), val = tensor([1, 1])]; - tensor var_1290_pad_type_0 = const()[name = tensor("op_1290_pad_type_0"), val = tensor("custom")]; - tensor var_1290_pad_0 = const()[name = tensor("op_1290_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1290 = conv(bias = layers_4_attention_o_proj_bias, dilations = var_1288, groups = var_1093, pad = var_1290_pad_0, pad_type = var_1290_pad_type_0, strides = var_1286, weight = layers_4_attention_o_proj_weight, x = var_1282_cast_fp16)[name = tensor("op_1290")]; - tensor var_1292_interleave_0 = const()[name = tensor("op_1292_interleave_0"), val = tensor(false)]; - tensor var_1292 = concat(axis = var_1094, interleave = var_1292_interleave_0, values = var_1290)[name = tensor("op_1292")]; - tensor x_37 = add(x = var_1088_cast_fp16, y = var_1292)[name = tensor("x_37")]; - tensor var_1091_promoted = const()[name = tensor("op_1091_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1092_promoted = const()[name = tensor("op_1092_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_39 = clip(alpha = var_1091_promoted, beta = var_1092_promoted, x = x_37)[name = tensor("x_39")]; - tensor var_1297 = const()[name = tensor("op_1297"), val = tensor([1])]; - tensor mean_19 = reduce_mean(axes = var_1297, keep_dims = var_1095, x = x_39)[name = tensor("mean_19")]; + tensor denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0, x = var_1062_cast_fp16)[name = tensor("denom_17_cast_fp16")]; + tensor var_1064_cast_fp16 = mul(x = zero_mean_17, y = denom_17_cast_fp16)[name = tensor("op_1064_cast_fp16")]; + tensor var_1066_gamma_0_to_fp16 = const()[name = tensor("op_1066_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218528128)))]; + tensor var_1066_beta_0_to_fp16 = const()[name = tensor("op_1066_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218529728)))]; + tensor var_1066_epsilon_0_to_fp16 = const()[name = tensor("op_1066_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1066_cast_fp16 = batch_norm(beta = var_1066_beta_0_to_fp16, epsilon = var_1066_epsilon_0_to_fp16, gamma = var_1066_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1064_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1072 = const()[name = tensor("op_1072"), val = tensor(1)]; + tensor var_1073 = const()[name = tensor("op_1073"), val = tensor(0)]; + tensor var_1074 = const()[name = tensor("op_1074"), val = tensor(true)]; + tensor var_1096 = const()[name = tensor("op_1096"), val = tensor([1, 1])]; + tensor var_1098 = const()[name = tensor("op_1098"), val = tensor([1, 1])]; + tensor var_1100_pad_type_0 = const()[name = tensor("op_1100_pad_type_0"), val = tensor("custom")]; + tensor var_1100_pad_0 = const()[name = tensor("op_1100_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1100 = conv(bias = layers_4_attention_q_proj_bias, dilations = var_1098, groups = var_1072, pad = var_1100_pad_0, pad_type = var_1100_pad_type_0, strides = var_1096, weight = layers_4_attention_q_proj_weight, x = var_1066_cast_fp16)[name = tensor("op_1100")]; + tensor var_1103 = const()[name = tensor("op_1103"), val = tensor([1, 1])]; + tensor var_1105 = const()[name = tensor("op_1105"), val = tensor([1, 1])]; + tensor ks_9_pad_type_0 = const()[name = tensor("ks_9_pad_type_0"), val = tensor("custom")]; + tensor ks_9_pad_0 = const()[name = tensor("ks_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_9 = conv(bias = layers_4_attention_k_proj_bias, dilations = var_1105, groups = var_1072, pad = ks_9_pad_0, pad_type = ks_9_pad_type_0, strides = var_1103, weight = layers_4_attention_k_proj_weight, x = var_1066_cast_fp16)[name = tensor("ks_9")]; + tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 1])]; + tensor var_1112 = const()[name = tensor("op_1112"), val = tensor([1, 1])]; + tensor var_1114_pad_type_0 = const()[name = tensor("op_1114_pad_type_0"), val = tensor("custom")]; + tensor var_1114_pad_0 = const()[name = tensor("op_1114_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1114 = conv(bias = layers_4_attention_v_proj_bias, dilations = var_1112, groups = var_1072, pad = var_1114_pad_0, pad_type = var_1114_pad_type_0, strides = var_1110, weight = layers_4_attention_v_proj_weight, x = var_1066_cast_fp16)[name = tensor("op_1114")]; + tensor tile_22 = const()[name = tensor("tile_22"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1115_axis_0 = const()[name = tensor("op_1115_axis_0"), val = tensor(1)]; + tensor var_1115_0, tensor var_1115_1, tensor var_1115_2, tensor var_1115_3, tensor var_1115_4, tensor var_1115_5, tensor var_1115_6, tensor var_1115_7, tensor var_1115_8, tensor var_1115_9, tensor var_1115_10, tensor var_1115_11 = split(axis = var_1115_axis_0, split_sizes = tile_22, x = var_1100)[name = tensor("op_1115")]; + tensor var_1128_perm_0 = const()[name = tensor("op_1128_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_23 = const()[name = tensor("tile_23"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1129_axis_0 = const()[name = tensor("op_1129_axis_0"), val = tensor(3)]; + tensor transpose_7 = transpose(perm = var_1128_perm_0, x = ks_9)[name = tensor("transpose_7")]; + tensor var_1129_0, tensor var_1129_1, tensor var_1129_2, tensor var_1129_3, tensor var_1129_4, tensor var_1129_5, tensor var_1129_6, tensor var_1129_7, tensor var_1129_8, tensor var_1129_9, tensor var_1129_10, tensor var_1129_11 = split(axis = var_1129_axis_0, split_sizes = tile_23, x = transpose_7)[name = tensor("op_1129")]; + tensor tile_24 = const()[name = tensor("tile_24"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1142_axis_0 = const()[name = tensor("op_1142_axis_0"), val = tensor(1)]; + tensor var_1142_0, tensor var_1142_1, tensor var_1142_2, tensor var_1142_3, tensor var_1142_4, tensor var_1142_5, tensor var_1142_6, tensor var_1142_7, tensor var_1142_8, tensor var_1142_9, tensor var_1142_10, tensor var_1142_11 = split(axis = var_1142_axis_0, split_sizes = tile_24, x = var_1114)[name = tensor("op_1142")]; + tensor var_1156_equation_0 = const()[name = tensor("op_1156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1156 = einsum(equation = var_1156_equation_0, values = (var_1129_0, var_1115_0))[name = tensor("op_1156")]; + tensor var_1157_to_fp16 = const()[name = tensor("op_1157_to_fp16"), val = tensor(0x1p-3)]; + tensor w_97_cast_fp16 = mul(x = var_1156, y = var_1157_to_fp16)[name = tensor("w_97_cast_fp16")]; + tensor var_1160_equation_0 = const()[name = tensor("op_1160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1160 = einsum(equation = var_1160_equation_0, values = (var_1129_1, var_1115_1))[name = tensor("op_1160")]; + tensor var_1161_to_fp16 = const()[name = tensor("op_1161_to_fp16"), val = tensor(0x1p-3)]; + tensor w_99_cast_fp16 = mul(x = var_1160, y = var_1161_to_fp16)[name = tensor("w_99_cast_fp16")]; + tensor var_1164_equation_0 = const()[name = tensor("op_1164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1164 = einsum(equation = var_1164_equation_0, values = (var_1129_2, var_1115_2))[name = tensor("op_1164")]; + tensor var_1165_to_fp16 = const()[name = tensor("op_1165_to_fp16"), val = tensor(0x1p-3)]; + tensor w_101_cast_fp16 = mul(x = var_1164, y = var_1165_to_fp16)[name = tensor("w_101_cast_fp16")]; + tensor var_1168_equation_0 = const()[name = tensor("op_1168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1168 = einsum(equation = var_1168_equation_0, values = (var_1129_3, var_1115_3))[name = tensor("op_1168")]; + tensor var_1169_to_fp16 = const()[name = tensor("op_1169_to_fp16"), val = tensor(0x1p-3)]; + tensor w_103_cast_fp16 = mul(x = var_1168, y = var_1169_to_fp16)[name = tensor("w_103_cast_fp16")]; + tensor var_1172_equation_0 = const()[name = tensor("op_1172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1172 = einsum(equation = var_1172_equation_0, values = (var_1129_4, var_1115_4))[name = tensor("op_1172")]; + tensor var_1173_to_fp16 = const()[name = tensor("op_1173_to_fp16"), val = tensor(0x1p-3)]; + tensor w_105_cast_fp16 = mul(x = var_1172, y = var_1173_to_fp16)[name = tensor("w_105_cast_fp16")]; + tensor var_1176_equation_0 = const()[name = tensor("op_1176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1176 = einsum(equation = var_1176_equation_0, values = (var_1129_5, var_1115_5))[name = tensor("op_1176")]; + tensor var_1177_to_fp16 = const()[name = tensor("op_1177_to_fp16"), val = tensor(0x1p-3)]; + tensor w_107_cast_fp16 = mul(x = var_1176, y = var_1177_to_fp16)[name = tensor("w_107_cast_fp16")]; + tensor var_1180_equation_0 = const()[name = tensor("op_1180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1180 = einsum(equation = var_1180_equation_0, values = (var_1129_6, var_1115_6))[name = tensor("op_1180")]; + tensor var_1181_to_fp16 = const()[name = tensor("op_1181_to_fp16"), val = tensor(0x1p-3)]; + tensor w_109_cast_fp16 = mul(x = var_1180, y = var_1181_to_fp16)[name = tensor("w_109_cast_fp16")]; + tensor var_1184_equation_0 = const()[name = tensor("op_1184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1184 = einsum(equation = var_1184_equation_0, values = (var_1129_7, var_1115_7))[name = tensor("op_1184")]; + tensor var_1185_to_fp16 = const()[name = tensor("op_1185_to_fp16"), val = tensor(0x1p-3)]; + tensor w_111_cast_fp16 = mul(x = var_1184, y = var_1185_to_fp16)[name = tensor("w_111_cast_fp16")]; + tensor var_1188_equation_0 = const()[name = tensor("op_1188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1188 = einsum(equation = var_1188_equation_0, values = (var_1129_8, var_1115_8))[name = tensor("op_1188")]; + tensor var_1189_to_fp16 = const()[name = tensor("op_1189_to_fp16"), val = tensor(0x1p-3)]; + tensor w_113_cast_fp16 = mul(x = var_1188, y = var_1189_to_fp16)[name = tensor("w_113_cast_fp16")]; + tensor var_1192_equation_0 = const()[name = tensor("op_1192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1192 = einsum(equation = var_1192_equation_0, values = (var_1129_9, var_1115_9))[name = tensor("op_1192")]; + tensor var_1193_to_fp16 = const()[name = tensor("op_1193_to_fp16"), val = tensor(0x1p-3)]; + tensor w_115_cast_fp16 = mul(x = var_1192, y = var_1193_to_fp16)[name = tensor("w_115_cast_fp16")]; + tensor var_1196_equation_0 = const()[name = tensor("op_1196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1196 = einsum(equation = var_1196_equation_0, values = (var_1129_10, var_1115_10))[name = tensor("op_1196")]; + tensor var_1197_to_fp16 = const()[name = tensor("op_1197_to_fp16"), val = tensor(0x1p-3)]; + tensor w_117_cast_fp16 = mul(x = var_1196, y = var_1197_to_fp16)[name = tensor("w_117_cast_fp16")]; + tensor var_1200_equation_0 = const()[name = tensor("op_1200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1200 = einsum(equation = var_1200_equation_0, values = (var_1129_11, var_1115_11))[name = tensor("op_1200")]; + tensor var_1201_to_fp16 = const()[name = tensor("op_1201_to_fp16"), val = tensor(0x1p-3)]; + tensor w_119_cast_fp16 = mul(x = var_1200, y = var_1201_to_fp16)[name = tensor("w_119_cast_fp16")]; + tensor input_131_cast_fp16 = add(x = w_97_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_131_cast_fp16")]; + tensor var_1204_cast_fp16 = softmax(axis = var_1072, x = input_131_cast_fp16)[name = tensor("op_1204_cast_fp16")]; + tensor input_133_cast_fp16 = add(x = w_99_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_133_cast_fp16")]; + tensor var_1206_cast_fp16 = softmax(axis = var_1072, x = input_133_cast_fp16)[name = tensor("op_1206_cast_fp16")]; + tensor input_135_cast_fp16 = add(x = w_101_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_135_cast_fp16")]; + tensor var_1208_cast_fp16 = softmax(axis = var_1072, x = input_135_cast_fp16)[name = tensor("op_1208_cast_fp16")]; + tensor input_137_cast_fp16 = add(x = w_103_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_137_cast_fp16")]; + tensor var_1210_cast_fp16 = softmax(axis = var_1072, x = input_137_cast_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor input_139_cast_fp16 = add(x = w_105_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_139_cast_fp16")]; + tensor var_1212_cast_fp16 = softmax(axis = var_1072, x = input_139_cast_fp16)[name = tensor("op_1212_cast_fp16")]; + tensor input_141_cast_fp16 = add(x = w_107_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_141_cast_fp16")]; + tensor var_1214_cast_fp16 = softmax(axis = var_1072, x = input_141_cast_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor input_143_cast_fp16 = add(x = w_109_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_143_cast_fp16")]; + tensor var_1216_cast_fp16 = softmax(axis = var_1072, x = input_143_cast_fp16)[name = tensor("op_1216_cast_fp16")]; + tensor input_145_cast_fp16 = add(x = w_111_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_145_cast_fp16")]; + tensor var_1218_cast_fp16 = softmax(axis = var_1072, x = input_145_cast_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor input_147_cast_fp16 = add(x = w_113_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_147_cast_fp16")]; + tensor var_1220_cast_fp16 = softmax(axis = var_1072, x = input_147_cast_fp16)[name = tensor("op_1220_cast_fp16")]; + tensor input_149_cast_fp16 = add(x = w_115_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_149_cast_fp16")]; + tensor var_1222_cast_fp16 = softmax(axis = var_1072, x = input_149_cast_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor input_151_cast_fp16 = add(x = w_117_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_151_cast_fp16")]; + tensor var_1224_cast_fp16 = softmax(axis = var_1072, x = input_151_cast_fp16)[name = tensor("op_1224_cast_fp16")]; + tensor input_153_cast_fp16 = add(x = w_119_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_153_cast_fp16")]; + tensor var_1226_cast_fp16 = softmax(axis = var_1072, x = input_153_cast_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1142_0, var_1204_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1230_equation_0 = const()[name = tensor("op_1230_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1230_cast_fp16 = einsum(equation = var_1230_equation_0, values = (var_1142_1, var_1206_cast_fp16))[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1142_2, var_1208_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1234_equation_0 = const()[name = tensor("op_1234_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1234_cast_fp16 = einsum(equation = var_1234_equation_0, values = (var_1142_3, var_1210_cast_fp16))[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1142_4, var_1212_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1238_equation_0 = const()[name = tensor("op_1238_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_1142_5, var_1214_cast_fp16))[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1142_6, var_1216_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1242_equation_0 = const()[name = tensor("op_1242_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_1142_7, var_1218_cast_fp16))[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1142_8, var_1220_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1246_equation_0 = const()[name = tensor("op_1246_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_1142_9, var_1222_cast_fp16))[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1142_10, var_1224_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1250_equation_0 = const()[name = tensor("op_1250_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_1142_11, var_1226_cast_fp16))[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_interleave_0 = const()[name = tensor("op_1252_interleave_0"), val = tensor(false)]; + tensor var_1252_cast_fp16 = concat(axis = var_1072, interleave = var_1252_interleave_0, values = (var_1228_cast_fp16, var_1230_cast_fp16, var_1232_cast_fp16, var_1234_cast_fp16, var_1236_cast_fp16, var_1238_cast_fp16, var_1240_cast_fp16, var_1242_cast_fp16, var_1244_cast_fp16, var_1246_cast_fp16, var_1248_cast_fp16, var_1250_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1256 = const()[name = tensor("op_1256"), val = tensor([1, 1])]; + tensor var_1258 = const()[name = tensor("op_1258"), val = tensor([1, 1])]; + tensor var_1260_pad_type_0 = const()[name = tensor("op_1260_pad_type_0"), val = tensor("custom")]; + tensor var_1260_pad_0 = const()[name = tensor("op_1260_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1260 = conv(bias = layers_4_attention_o_proj_bias, dilations = var_1258, groups = var_1072, pad = var_1260_pad_0, pad_type = var_1260_pad_type_0, strides = var_1256, weight = layers_4_attention_o_proj_weight, x = var_1252_cast_fp16)[name = tensor("op_1260")]; + tensor var_1262_interleave_0 = const()[name = tensor("op_1262_interleave_0"), val = tensor(false)]; + tensor var_1262 = concat(axis = var_1073, interleave = var_1262_interleave_0, values = var_1260)[name = tensor("op_1262")]; + tensor x_37 = add(x = var_1066_cast_fp16, y = var_1262)[name = tensor("x_37")]; + tensor var_1069_promoted = const()[name = tensor("op_1069_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_1070_promoted = const()[name = tensor("op_1070_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_39 = clip(alpha = var_1069_promoted, beta = var_1070_promoted, x = x_37)[name = tensor("x_39")]; + tensor var_1267 = const()[name = tensor("op_1267"), val = tensor([1])]; + tensor mean_19 = reduce_mean(axes = var_1267, keep_dims = var_1074, x = x_39)[name = tensor("mean_19")]; tensor zero_mean_19 = sub(x = x_39, y = mean_19)[name = tensor("zero_mean_19")]; - tensor var_1100_promoted = const()[name = tensor("op_1100_promoted"), val = tensor(0x1p+1)]; - tensor var_1300 = pow(x = zero_mean_19, y = var_1100_promoted)[name = tensor("op_1300")]; - tensor var_1301 = const()[name = tensor("op_1301"), val = tensor([1])]; - tensor var_1302 = reduce_mean(axes = var_1301, keep_dims = var_1095, x = var_1300)[name = tensor("op_1302")]; - tensor var_1303_to_fp16 = const()[name = tensor("op_1303_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1304_cast_fp16 = add(x = var_1302, y = var_1303_to_fp16)[name = tensor("op_1304_cast_fp16")]; + tensor var_1071_promoted = const()[name = tensor("op_1071_promoted"), val = tensor(0x1p+1)]; + tensor var_1270 = pow(x = zero_mean_19, y = var_1071_promoted)[name = tensor("op_1270")]; + tensor var_1271 = const()[name = tensor("op_1271"), val = tensor([1])]; + tensor var_1272 = reduce_mean(axes = var_1271, keep_dims = var_1074, x = var_1270)[name = tensor("op_1272")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1274_cast_fp16 = add(x = var_1272, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; tensor denom_19_epsilon_0 = const()[name = tensor("denom_19_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0, x = var_1304_cast_fp16)[name = tensor("denom_19_cast_fp16")]; - tensor var_1306_cast_fp16 = mul(x = zero_mean_19, y = denom_19_cast_fp16)[name = tensor("op_1306_cast_fp16")]; - tensor var_1308_gamma_0_to_fp16 = const()[name = tensor("op_1308_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218531328)))]; - tensor var_1308_beta_0_to_fp16 = const()[name = tensor("op_1308_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218532928)))]; - tensor var_1308_epsilon_0_to_fp16 = const()[name = tensor("op_1308_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1308_cast_fp16 = batch_norm(beta = var_1308_beta_0_to_fp16, epsilon = var_1308_epsilon_0_to_fp16, gamma = var_1308_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1306_cast_fp16)[name = tensor("op_1308_cast_fp16")]; - tensor var_1314 = const()[name = tensor("op_1314"), val = tensor([1, 1])]; - tensor var_1316 = const()[name = tensor("op_1316"), val = tensor([1, 1])]; - tensor var_1318_pad_type_0 = const()[name = tensor("op_1318_pad_type_0"), val = tensor("custom")]; - tensor var_1318_pad_0 = const()[name = tensor("op_1318_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1318 = conv(bias = layers_4_mlp_fc1_bias, dilations = var_1316, groups = var_1093, pad = var_1318_pad_0, pad_type = var_1318_pad_type_0, strides = var_1314, weight = layers_4_mlp_fc1_weight, x = var_1308_cast_fp16)[name = tensor("op_1318")]; - tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; - tensor input_39 = gelu(mode = input_39_mode_0, x = var_1318)[name = tensor("input_39")]; - tensor var_1322 = const()[name = tensor("op_1322"), val = tensor([1, 1])]; - tensor var_1324 = const()[name = tensor("op_1324"), val = tensor([1, 1])]; - tensor var_1326_pad_type_0 = const()[name = tensor("op_1326_pad_type_0"), val = tensor("custom")]; - tensor var_1326_pad_0 = const()[name = tensor("op_1326_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1326 = conv(bias = layers_4_mlp_fc2_bias, dilations = var_1324, groups = var_1093, pad = var_1326_pad_0, pad_type = var_1326_pad_type_0, strides = var_1322, weight = layers_4_mlp_fc2_weight, x = input_39)[name = tensor("op_1326")]; - tensor x_41 = add(x = var_1308_cast_fp16, y = var_1326)[name = tensor("x_41")]; - tensor var_1091_promoted_1 = const()[name = tensor("op_1091_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1092_promoted_1 = const()[name = tensor("op_1092_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_43 = clip(alpha = var_1091_promoted_1, beta = var_1092_promoted_1, x = x_41)[name = tensor("x_43")]; - tensor var_1331 = const()[name = tensor("op_1331"), val = tensor([1])]; - tensor mean_21 = reduce_mean(axes = var_1331, keep_dims = var_1095, x = x_43)[name = tensor("mean_21")]; + tensor denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0, x = var_1274_cast_fp16)[name = tensor("denom_19_cast_fp16")]; + tensor var_1276_cast_fp16 = mul(x = zero_mean_19, y = denom_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor var_1278_gamma_0_to_fp16 = const()[name = tensor("op_1278_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218531328)))]; + tensor var_1278_beta_0_to_fp16 = const()[name = tensor("op_1278_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218532928)))]; + tensor var_1278_epsilon_0_to_fp16 = const()[name = tensor("op_1278_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1278_cast_fp16 = batch_norm(beta = var_1278_beta_0_to_fp16, epsilon = var_1278_epsilon_0_to_fp16, gamma = var_1278_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1276_cast_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1284 = const()[name = tensor("op_1284"), val = tensor([1, 1])]; + tensor var_1286 = const()[name = tensor("op_1286"), val = tensor([1, 1])]; + tensor var_1288_pad_type_0 = const()[name = tensor("op_1288_pad_type_0"), val = tensor("custom")]; + tensor var_1288_pad_0 = const()[name = tensor("op_1288_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1288 = conv(bias = layers_4_mlp_fc1_bias, dilations = var_1286, groups = var_1072, pad = var_1288_pad_0, pad_type = var_1288_pad_type_0, strides = var_1284, weight = layers_4_mlp_fc1_weight, x = var_1278_cast_fp16)[name = tensor("op_1288")]; + tensor input_159_mode_0 = const()[name = tensor("input_159_mode_0"), val = tensor("EXACT")]; + tensor input_159 = gelu(mode = input_159_mode_0, x = var_1288)[name = tensor("input_159")]; + tensor var_1292 = const()[name = tensor("op_1292"), val = tensor([1, 1])]; + tensor var_1294 = const()[name = tensor("op_1294"), val = tensor([1, 1])]; + tensor var_1296_pad_type_0 = const()[name = tensor("op_1296_pad_type_0"), val = tensor("custom")]; + tensor var_1296_pad_0 = const()[name = tensor("op_1296_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1296 = conv(bias = layers_4_mlp_fc2_bias, dilations = var_1294, groups = var_1072, pad = var_1296_pad_0, pad_type = var_1296_pad_type_0, strides = var_1292, weight = layers_4_mlp_fc2_weight, x = input_159)[name = tensor("op_1296")]; + tensor x_41 = add(x = var_1278_cast_fp16, y = var_1296)[name = tensor("x_41")]; + tensor var_1069_promoted_1 = const()[name = tensor("op_1069_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_1070_promoted_1 = const()[name = tensor("op_1070_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_43 = clip(alpha = var_1069_promoted_1, beta = var_1070_promoted_1, x = x_41)[name = tensor("x_43")]; + tensor var_1301 = const()[name = tensor("op_1301"), val = tensor([1])]; + tensor mean_21 = reduce_mean(axes = var_1301, keep_dims = var_1074, x = x_43)[name = tensor("mean_21")]; tensor zero_mean_21 = sub(x = x_43, y = mean_21)[name = tensor("zero_mean_21")]; - tensor var_1100_promoted_1 = const()[name = tensor("op_1100_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1334 = pow(x = zero_mean_21, y = var_1100_promoted_1)[name = tensor("op_1334")]; - tensor var_1335 = const()[name = tensor("op_1335"), val = tensor([1])]; - tensor var_1336 = reduce_mean(axes = var_1335, keep_dims = var_1095, x = var_1334)[name = tensor("op_1336")]; - tensor var_1337_to_fp16 = const()[name = tensor("op_1337_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1338_cast_fp16 = add(x = var_1336, y = var_1337_to_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1071_promoted_1 = const()[name = tensor("op_1071_promoted_1"), val = tensor(0x1p+1)]; + tensor var_1304 = pow(x = zero_mean_21, y = var_1071_promoted_1)[name = tensor("op_1304")]; + tensor var_1305 = const()[name = tensor("op_1305"), val = tensor([1])]; + tensor var_1306 = reduce_mean(axes = var_1305, keep_dims = var_1074, x = var_1304)[name = tensor("op_1306")]; + tensor var_1307_to_fp16 = const()[name = tensor("op_1307_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1308_cast_fp16 = add(x = var_1306, y = var_1307_to_fp16)[name = tensor("op_1308_cast_fp16")]; tensor denom_21_epsilon_0 = const()[name = tensor("denom_21_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0, x = var_1338_cast_fp16)[name = tensor("denom_21_cast_fp16")]; - tensor var_1340_cast_fp16 = mul(x = zero_mean_21, y = denom_21_cast_fp16)[name = tensor("op_1340_cast_fp16")]; - tensor var_1342_gamma_0_to_fp16 = const()[name = tensor("op_1342_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218534528)))]; - tensor var_1342_beta_0_to_fp16 = const()[name = tensor("op_1342_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218536128)))]; - tensor var_1342_epsilon_0_to_fp16 = const()[name = tensor("op_1342_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1342_cast_fp16 = batch_norm(beta = var_1342_beta_0_to_fp16, epsilon = var_1342_epsilon_0_to_fp16, gamma = var_1342_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1340_cast_fp16)[name = tensor("op_1342_cast_fp16")]; - tensor var_1347 = const()[name = tensor("op_1347"), val = tensor(1)]; - tensor var_1348 = const()[name = tensor("op_1348"), val = tensor(0)]; - tensor var_1349 = const()[name = tensor("op_1349"), val = tensor(true)]; - tensor var_1374 = const()[name = tensor("op_1374"), val = tensor([1, 1])]; - tensor var_1376 = const()[name = tensor("op_1376"), val = tensor([1, 1])]; - tensor var_1378_pad_type_0 = const()[name = tensor("op_1378_pad_type_0"), val = tensor("custom")]; - tensor var_1378_pad_0 = const()[name = tensor("op_1378_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1378 = conv(bias = layers_5_attention_q_proj_bias, dilations = var_1376, groups = var_1347, pad = var_1378_pad_0, pad_type = var_1378_pad_type_0, strides = var_1374, weight = layers_5_attention_q_proj_weight, x = var_1342_cast_fp16)[name = tensor("op_1378")]; - tensor var_1379 = const()[name = tensor("op_1379"), val = tensor([1, 64, 12, 512])]; - tensor var_1380 = reshape(shape = var_1379, x = var_1378)[name = tensor("op_1380")]; - tensor var_1383 = const()[name = tensor("op_1383"), val = tensor([1, 1])]; - tensor var_1385 = const()[name = tensor("op_1385"), val = tensor([1, 1])]; - tensor var_1387_pad_type_0 = const()[name = tensor("op_1387_pad_type_0"), val = tensor("custom")]; - tensor var_1387_pad_0 = const()[name = tensor("op_1387_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1387 = conv(bias = layers_5_attention_k_proj_bias, dilations = var_1385, groups = var_1347, pad = var_1387_pad_0, pad_type = var_1387_pad_type_0, strides = var_1383, weight = layers_5_attention_k_proj_weight, x = var_1342_cast_fp16)[name = tensor("op_1387")]; - tensor var_1388 = const()[name = tensor("op_1388"), val = tensor([1, 64, 12, 512])]; - tensor ks_11 = reshape(shape = var_1388, x = var_1387)[name = tensor("ks_11")]; - tensor var_1392 = const()[name = tensor("op_1392"), val = tensor([1, 1])]; - tensor var_1394 = const()[name = tensor("op_1394"), val = tensor([1, 1])]; - tensor var_1396_pad_type_0 = const()[name = tensor("op_1396_pad_type_0"), val = tensor("custom")]; - tensor var_1396_pad_0 = const()[name = tensor("op_1396_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1396 = conv(bias = layers_5_attention_v_proj_bias, dilations = var_1394, groups = var_1347, pad = var_1396_pad_0, pad_type = var_1396_pad_type_0, strides = var_1392, weight = layers_5_attention_v_proj_weight, x = var_1342_cast_fp16)[name = tensor("op_1396")]; - tensor var_1397 = const()[name = tensor("op_1397"), val = tensor([1, 64, 12, 512])]; - tensor var_1398 = reshape(shape = var_1397, x = var_1396)[name = tensor("op_1398")]; - tensor tile_27 = const()[name = tensor("tile_27"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1399_axis_0 = const()[name = tensor("op_1399_axis_0"), val = tensor(2)]; - tensor var_1399_0, tensor var_1399_1, tensor var_1399_2, tensor var_1399_3, tensor var_1399_4, tensor var_1399_5, tensor var_1399_6, tensor var_1399_7, tensor var_1399_8, tensor var_1399_9, tensor var_1399_10, tensor var_1399_11 = split(axis = var_1399_axis_0, split_sizes = tile_27, x = var_1380)[name = tensor("op_1399")]; - tensor var_1412_perm_0 = const()[name = tensor("op_1412_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_28 = const()[name = tensor("tile_28"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1413_axis_0 = const()[name = tensor("op_1413_axis_0"), val = tensor(2)]; - tensor transpose_6 = transpose(perm = var_1412_perm_0, x = ks_11)[name = tensor("transpose_6")]; - tensor var_1413_0, tensor var_1413_1, tensor var_1413_2, tensor var_1413_3, tensor var_1413_4, tensor var_1413_5, tensor var_1413_6, tensor var_1413_7, tensor var_1413_8, tensor var_1413_9, tensor var_1413_10, tensor var_1413_11 = split(axis = var_1413_axis_0, split_sizes = tile_28, x = transpose_6)[name = tensor("op_1413")]; - tensor tile_29 = const()[name = tensor("tile_29"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1426_axis_0 = const()[name = tensor("op_1426_axis_0"), val = tensor(2)]; - tensor var_1426_0, tensor var_1426_1, tensor var_1426_2, tensor var_1426_3, tensor var_1426_4, tensor var_1426_5, tensor var_1426_6, tensor var_1426_7, tensor var_1426_8, tensor var_1426_9, tensor var_1426_10, tensor var_1426_11 = split(axis = var_1426_axis_0, split_sizes = tile_29, x = var_1398)[name = tensor("op_1426")]; - tensor var_1440_equation_0 = const()[name = tensor("op_1440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1440 = einsum(equation = var_1440_equation_0, values = (var_1413_0, var_1399_0))[name = tensor("op_1440")]; - tensor var_1441_to_fp16 = const()[name = tensor("op_1441_to_fp16"), val = tensor(0x1p-3)]; - tensor w_121_cast_fp16 = mul(x = var_1440, y = var_1441_to_fp16)[name = tensor("w_121_cast_fp16")]; - tensor var_1444_equation_0 = const()[name = tensor("op_1444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1444 = einsum(equation = var_1444_equation_0, values = (var_1413_1, var_1399_1))[name = tensor("op_1444")]; - tensor var_1445_to_fp16 = const()[name = tensor("op_1445_to_fp16"), val = tensor(0x1p-3)]; - tensor w_123_cast_fp16 = mul(x = var_1444, y = var_1445_to_fp16)[name = tensor("w_123_cast_fp16")]; - tensor var_1448_equation_0 = const()[name = tensor("op_1448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1448 = einsum(equation = var_1448_equation_0, values = (var_1413_2, var_1399_2))[name = tensor("op_1448")]; - tensor var_1449_to_fp16 = const()[name = tensor("op_1449_to_fp16"), val = tensor(0x1p-3)]; - tensor w_125_cast_fp16 = mul(x = var_1448, y = var_1449_to_fp16)[name = tensor("w_125_cast_fp16")]; - tensor var_1452_equation_0 = const()[name = tensor("op_1452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1452 = einsum(equation = var_1452_equation_0, values = (var_1413_3, var_1399_3))[name = tensor("op_1452")]; - tensor var_1453_to_fp16 = const()[name = tensor("op_1453_to_fp16"), val = tensor(0x1p-3)]; - tensor w_127_cast_fp16 = mul(x = var_1452, y = var_1453_to_fp16)[name = tensor("w_127_cast_fp16")]; - tensor var_1456_equation_0 = const()[name = tensor("op_1456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1456 = einsum(equation = var_1456_equation_0, values = (var_1413_4, var_1399_4))[name = tensor("op_1456")]; - tensor var_1457_to_fp16 = const()[name = tensor("op_1457_to_fp16"), val = tensor(0x1p-3)]; - tensor w_129_cast_fp16 = mul(x = var_1456, y = var_1457_to_fp16)[name = tensor("w_129_cast_fp16")]; - tensor var_1460_equation_0 = const()[name = tensor("op_1460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1460 = einsum(equation = var_1460_equation_0, values = (var_1413_5, var_1399_5))[name = tensor("op_1460")]; - tensor var_1461_to_fp16 = const()[name = tensor("op_1461_to_fp16"), val = tensor(0x1p-3)]; - tensor w_131_cast_fp16 = mul(x = var_1460, y = var_1461_to_fp16)[name = tensor("w_131_cast_fp16")]; - tensor var_1464_equation_0 = const()[name = tensor("op_1464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1464 = einsum(equation = var_1464_equation_0, values = (var_1413_6, var_1399_6))[name = tensor("op_1464")]; - tensor var_1465_to_fp16 = const()[name = tensor("op_1465_to_fp16"), val = tensor(0x1p-3)]; - tensor w_133_cast_fp16 = mul(x = var_1464, y = var_1465_to_fp16)[name = tensor("w_133_cast_fp16")]; - tensor var_1468_equation_0 = const()[name = tensor("op_1468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1468 = einsum(equation = var_1468_equation_0, values = (var_1413_7, var_1399_7))[name = tensor("op_1468")]; - tensor var_1469_to_fp16 = const()[name = tensor("op_1469_to_fp16"), val = tensor(0x1p-3)]; - tensor w_135_cast_fp16 = mul(x = var_1468, y = var_1469_to_fp16)[name = tensor("w_135_cast_fp16")]; - tensor var_1472_equation_0 = const()[name = tensor("op_1472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1472 = einsum(equation = var_1472_equation_0, values = (var_1413_8, var_1399_8))[name = tensor("op_1472")]; - tensor var_1473_to_fp16 = const()[name = tensor("op_1473_to_fp16"), val = tensor(0x1p-3)]; - tensor w_137_cast_fp16 = mul(x = var_1472, y = var_1473_to_fp16)[name = tensor("w_137_cast_fp16")]; - tensor var_1476_equation_0 = const()[name = tensor("op_1476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1476 = einsum(equation = var_1476_equation_0, values = (var_1413_9, var_1399_9))[name = tensor("op_1476")]; - tensor var_1477_to_fp16 = const()[name = tensor("op_1477_to_fp16"), val = tensor(0x1p-3)]; - tensor w_139_cast_fp16 = mul(x = var_1476, y = var_1477_to_fp16)[name = tensor("w_139_cast_fp16")]; - tensor var_1480_equation_0 = const()[name = tensor("op_1480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1480 = einsum(equation = var_1480_equation_0, values = (var_1413_10, var_1399_10))[name = tensor("op_1480")]; - tensor var_1481_to_fp16 = const()[name = tensor("op_1481_to_fp16"), val = tensor(0x1p-3)]; - tensor w_141_cast_fp16 = mul(x = var_1480, y = var_1481_to_fp16)[name = tensor("w_141_cast_fp16")]; - tensor var_1484_equation_0 = const()[name = tensor("op_1484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1484 = einsum(equation = var_1484_equation_0, values = (var_1413_11, var_1399_11))[name = tensor("op_1484")]; - tensor var_1485_to_fp16 = const()[name = tensor("op_1485_to_fp16"), val = tensor(0x1p-3)]; - tensor w_143_cast_fp16 = mul(x = var_1484, y = var_1485_to_fp16)[name = tensor("w_143_cast_fp16")]; - tensor var_1487_cast_fp16 = add(x = w_121_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1487_cast_fp16")]; - tensor var_1488_cast_fp16 = softmax(axis = var_1347, x = var_1487_cast_fp16)[name = tensor("op_1488_cast_fp16")]; - tensor var_1489_cast_fp16 = add(x = w_123_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1489_cast_fp16")]; - tensor var_1490_cast_fp16 = softmax(axis = var_1347, x = var_1489_cast_fp16)[name = tensor("op_1490_cast_fp16")]; - tensor var_1491_cast_fp16 = add(x = w_125_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1491_cast_fp16")]; - tensor var_1492_cast_fp16 = softmax(axis = var_1347, x = var_1491_cast_fp16)[name = tensor("op_1492_cast_fp16")]; - tensor var_1493_cast_fp16 = add(x = w_127_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1493_cast_fp16")]; - tensor var_1494_cast_fp16 = softmax(axis = var_1347, x = var_1493_cast_fp16)[name = tensor("op_1494_cast_fp16")]; - tensor var_1495_cast_fp16 = add(x = w_129_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1495_cast_fp16")]; - tensor var_1496_cast_fp16 = softmax(axis = var_1347, x = var_1495_cast_fp16)[name = tensor("op_1496_cast_fp16")]; - tensor var_1497_cast_fp16 = add(x = w_131_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1497_cast_fp16")]; - tensor var_1498_cast_fp16 = softmax(axis = var_1347, x = var_1497_cast_fp16)[name = tensor("op_1498_cast_fp16")]; - tensor var_1499_cast_fp16 = add(x = w_133_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1499_cast_fp16")]; - tensor var_1500_cast_fp16 = softmax(axis = var_1347, x = var_1499_cast_fp16)[name = tensor("op_1500_cast_fp16")]; - tensor var_1501_cast_fp16 = add(x = w_135_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1501_cast_fp16")]; - tensor var_1502_cast_fp16 = softmax(axis = var_1347, x = var_1501_cast_fp16)[name = tensor("op_1502_cast_fp16")]; - tensor var_1503_cast_fp16 = add(x = w_137_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1503_cast_fp16")]; - tensor var_1504_cast_fp16 = softmax(axis = var_1347, x = var_1503_cast_fp16)[name = tensor("op_1504_cast_fp16")]; - tensor var_1505_cast_fp16 = add(x = w_139_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1505_cast_fp16")]; - tensor var_1506_cast_fp16 = softmax(axis = var_1347, x = var_1505_cast_fp16)[name = tensor("op_1506_cast_fp16")]; - tensor var_1507_cast_fp16 = add(x = w_141_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1507_cast_fp16")]; - tensor var_1508_cast_fp16 = softmax(axis = var_1347, x = var_1507_cast_fp16)[name = tensor("op_1508_cast_fp16")]; - tensor var_1509_cast_fp16 = add(x = w_143_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1509_cast_fp16")]; - tensor var_1510_cast_fp16 = softmax(axis = var_1347, x = var_1509_cast_fp16)[name = tensor("op_1510_cast_fp16")]; - tensor var_1512_equation_0 = const()[name = tensor("op_1512_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1512_cast_fp16 = einsum(equation = var_1512_equation_0, values = (var_1426_0, var_1488_cast_fp16))[name = tensor("op_1512_cast_fp16")]; - tensor var_1514_equation_0 = const()[name = tensor("op_1514_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1514_cast_fp16 = einsum(equation = var_1514_equation_0, values = (var_1426_1, var_1490_cast_fp16))[name = tensor("op_1514_cast_fp16")]; - tensor var_1516_equation_0 = const()[name = tensor("op_1516_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1516_cast_fp16 = einsum(equation = var_1516_equation_0, values = (var_1426_2, var_1492_cast_fp16))[name = tensor("op_1516_cast_fp16")]; - tensor var_1518_equation_0 = const()[name = tensor("op_1518_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1518_cast_fp16 = einsum(equation = var_1518_equation_0, values = (var_1426_3, var_1494_cast_fp16))[name = tensor("op_1518_cast_fp16")]; - tensor var_1520_equation_0 = const()[name = tensor("op_1520_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1520_cast_fp16 = einsum(equation = var_1520_equation_0, values = (var_1426_4, var_1496_cast_fp16))[name = tensor("op_1520_cast_fp16")]; - tensor var_1522_equation_0 = const()[name = tensor("op_1522_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1522_cast_fp16 = einsum(equation = var_1522_equation_0, values = (var_1426_5, var_1498_cast_fp16))[name = tensor("op_1522_cast_fp16")]; - tensor var_1524_equation_0 = const()[name = tensor("op_1524_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1524_cast_fp16 = einsum(equation = var_1524_equation_0, values = (var_1426_6, var_1500_cast_fp16))[name = tensor("op_1524_cast_fp16")]; - tensor var_1526_equation_0 = const()[name = tensor("op_1526_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1526_cast_fp16 = einsum(equation = var_1526_equation_0, values = (var_1426_7, var_1502_cast_fp16))[name = tensor("op_1526_cast_fp16")]; - tensor var_1528_equation_0 = const()[name = tensor("op_1528_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1528_cast_fp16 = einsum(equation = var_1528_equation_0, values = (var_1426_8, var_1504_cast_fp16))[name = tensor("op_1528_cast_fp16")]; - tensor var_1530_equation_0 = const()[name = tensor("op_1530_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1530_cast_fp16 = einsum(equation = var_1530_equation_0, values = (var_1426_9, var_1506_cast_fp16))[name = tensor("op_1530_cast_fp16")]; - tensor var_1532_equation_0 = const()[name = tensor("op_1532_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1532_cast_fp16 = einsum(equation = var_1532_equation_0, values = (var_1426_10, var_1508_cast_fp16))[name = tensor("op_1532_cast_fp16")]; - tensor var_1534_equation_0 = const()[name = tensor("op_1534_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1534_cast_fp16 = einsum(equation = var_1534_equation_0, values = (var_1426_11, var_1510_cast_fp16))[name = tensor("op_1534_cast_fp16")]; - tensor var_1536_interleave_0 = const()[name = tensor("op_1536_interleave_0"), val = tensor(false)]; - tensor var_1536_cast_fp16 = concat(axis = var_1347, interleave = var_1536_interleave_0, values = (var_1512_cast_fp16, var_1514_cast_fp16, var_1516_cast_fp16, var_1518_cast_fp16, var_1520_cast_fp16, var_1522_cast_fp16, var_1524_cast_fp16, var_1526_cast_fp16, var_1528_cast_fp16, var_1530_cast_fp16, var_1532_cast_fp16, var_1534_cast_fp16))[name = tensor("op_1536_cast_fp16")]; - tensor var_1540 = const()[name = tensor("op_1540"), val = tensor([1, 1])]; - tensor var_1542 = const()[name = tensor("op_1542"), val = tensor([1, 1])]; - tensor var_1544_pad_type_0 = const()[name = tensor("op_1544_pad_type_0"), val = tensor("custom")]; - tensor var_1544_pad_0 = const()[name = tensor("op_1544_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1544 = conv(bias = layers_5_attention_o_proj_bias, dilations = var_1542, groups = var_1347, pad = var_1544_pad_0, pad_type = var_1544_pad_type_0, strides = var_1540, weight = layers_5_attention_o_proj_weight, x = var_1536_cast_fp16)[name = tensor("op_1544")]; - tensor var_1546_interleave_0 = const()[name = tensor("op_1546_interleave_0"), val = tensor(false)]; - tensor var_1546 = concat(axis = var_1348, interleave = var_1546_interleave_0, values = var_1544)[name = tensor("op_1546")]; - tensor x_45 = add(x = var_1342_cast_fp16, y = var_1546)[name = tensor("x_45")]; - tensor var_1345_promoted = const()[name = tensor("op_1345_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1346_promoted = const()[name = tensor("op_1346_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_47 = clip(alpha = var_1345_promoted, beta = var_1346_promoted, x = x_45)[name = tensor("x_47")]; - tensor var_1551 = const()[name = tensor("op_1551"), val = tensor([1])]; - tensor mean_23 = reduce_mean(axes = var_1551, keep_dims = var_1349, x = x_47)[name = tensor("mean_23")]; + tensor denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0, x = var_1308_cast_fp16)[name = tensor("denom_21_cast_fp16")]; + tensor var_1310_cast_fp16 = mul(x = zero_mean_21, y = denom_21_cast_fp16)[name = tensor("op_1310_cast_fp16")]; + tensor var_1312_gamma_0_to_fp16 = const()[name = tensor("op_1312_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218534528)))]; + tensor var_1312_beta_0_to_fp16 = const()[name = tensor("op_1312_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218536128)))]; + tensor var_1312_epsilon_0_to_fp16 = const()[name = tensor("op_1312_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1312_cast_fp16 = batch_norm(beta = var_1312_beta_0_to_fp16, epsilon = var_1312_epsilon_0_to_fp16, gamma = var_1312_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1310_cast_fp16)[name = tensor("op_1312_cast_fp16")]; + tensor var_1318 = const()[name = tensor("op_1318"), val = tensor(1)]; + tensor var_1319 = const()[name = tensor("op_1319"), val = tensor(0)]; + tensor var_1320 = const()[name = tensor("op_1320"), val = tensor(true)]; + tensor var_1342 = const()[name = tensor("op_1342"), val = tensor([1, 1])]; + tensor var_1344 = const()[name = tensor("op_1344"), val = tensor([1, 1])]; + tensor var_1346_pad_type_0 = const()[name = tensor("op_1346_pad_type_0"), val = tensor("custom")]; + tensor var_1346_pad_0 = const()[name = tensor("op_1346_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1346 = conv(bias = layers_5_attention_q_proj_bias, dilations = var_1344, groups = var_1318, pad = var_1346_pad_0, pad_type = var_1346_pad_type_0, strides = var_1342, weight = layers_5_attention_q_proj_weight, x = var_1312_cast_fp16)[name = tensor("op_1346")]; + tensor var_1349 = const()[name = tensor("op_1349"), val = tensor([1, 1])]; + tensor var_1351 = const()[name = tensor("op_1351"), val = tensor([1, 1])]; + tensor ks_11_pad_type_0 = const()[name = tensor("ks_11_pad_type_0"), val = tensor("custom")]; + tensor ks_11_pad_0 = const()[name = tensor("ks_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_11 = conv(bias = layers_5_attention_k_proj_bias, dilations = var_1351, groups = var_1318, pad = ks_11_pad_0, pad_type = ks_11_pad_type_0, strides = var_1349, weight = layers_5_attention_k_proj_weight, x = var_1312_cast_fp16)[name = tensor("ks_11")]; + tensor var_1356 = const()[name = tensor("op_1356"), val = tensor([1, 1])]; + tensor var_1358 = const()[name = tensor("op_1358"), val = tensor([1, 1])]; + tensor var_1360_pad_type_0 = const()[name = tensor("op_1360_pad_type_0"), val = tensor("custom")]; + tensor var_1360_pad_0 = const()[name = tensor("op_1360_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1360 = conv(bias = layers_5_attention_v_proj_bias, dilations = var_1358, groups = var_1318, pad = var_1360_pad_0, pad_type = var_1360_pad_type_0, strides = var_1356, weight = layers_5_attention_v_proj_weight, x = var_1312_cast_fp16)[name = tensor("op_1360")]; + tensor tile_27 = const()[name = tensor("tile_27"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1361_axis_0 = const()[name = tensor("op_1361_axis_0"), val = tensor(1)]; + tensor var_1361_0, tensor var_1361_1, tensor var_1361_2, tensor var_1361_3, tensor var_1361_4, tensor var_1361_5, tensor var_1361_6, tensor var_1361_7, tensor var_1361_8, tensor var_1361_9, tensor var_1361_10, tensor var_1361_11 = split(axis = var_1361_axis_0, split_sizes = tile_27, x = var_1346)[name = tensor("op_1361")]; + tensor var_1374_perm_0 = const()[name = tensor("op_1374_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_28 = const()[name = tensor("tile_28"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1375_axis_0 = const()[name = tensor("op_1375_axis_0"), val = tensor(3)]; + tensor transpose_6 = transpose(perm = var_1374_perm_0, x = ks_11)[name = tensor("transpose_6")]; + tensor var_1375_0, tensor var_1375_1, tensor var_1375_2, tensor var_1375_3, tensor var_1375_4, tensor var_1375_5, tensor var_1375_6, tensor var_1375_7, tensor var_1375_8, tensor var_1375_9, tensor var_1375_10, tensor var_1375_11 = split(axis = var_1375_axis_0, split_sizes = tile_28, x = transpose_6)[name = tensor("op_1375")]; + tensor tile_29 = const()[name = tensor("tile_29"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1388_axis_0 = const()[name = tensor("op_1388_axis_0"), val = tensor(1)]; + tensor var_1388_0, tensor var_1388_1, tensor var_1388_2, tensor var_1388_3, tensor var_1388_4, tensor var_1388_5, tensor var_1388_6, tensor var_1388_7, tensor var_1388_8, tensor var_1388_9, tensor var_1388_10, tensor var_1388_11 = split(axis = var_1388_axis_0, split_sizes = tile_29, x = var_1360)[name = tensor("op_1388")]; + tensor var_1402_equation_0 = const()[name = tensor("op_1402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1402 = einsum(equation = var_1402_equation_0, values = (var_1375_0, var_1361_0))[name = tensor("op_1402")]; + tensor var_1403_to_fp16 = const()[name = tensor("op_1403_to_fp16"), val = tensor(0x1p-3)]; + tensor w_121_cast_fp16 = mul(x = var_1402, y = var_1403_to_fp16)[name = tensor("w_121_cast_fp16")]; + tensor var_1406_equation_0 = const()[name = tensor("op_1406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1406 = einsum(equation = var_1406_equation_0, values = (var_1375_1, var_1361_1))[name = tensor("op_1406")]; + tensor var_1407_to_fp16 = const()[name = tensor("op_1407_to_fp16"), val = tensor(0x1p-3)]; + tensor w_123_cast_fp16 = mul(x = var_1406, y = var_1407_to_fp16)[name = tensor("w_123_cast_fp16")]; + tensor var_1410_equation_0 = const()[name = tensor("op_1410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1410 = einsum(equation = var_1410_equation_0, values = (var_1375_2, var_1361_2))[name = tensor("op_1410")]; + tensor var_1411_to_fp16 = const()[name = tensor("op_1411_to_fp16"), val = tensor(0x1p-3)]; + tensor w_125_cast_fp16 = mul(x = var_1410, y = var_1411_to_fp16)[name = tensor("w_125_cast_fp16")]; + tensor var_1414_equation_0 = const()[name = tensor("op_1414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1414 = einsum(equation = var_1414_equation_0, values = (var_1375_3, var_1361_3))[name = tensor("op_1414")]; + tensor var_1415_to_fp16 = const()[name = tensor("op_1415_to_fp16"), val = tensor(0x1p-3)]; + tensor w_127_cast_fp16 = mul(x = var_1414, y = var_1415_to_fp16)[name = tensor("w_127_cast_fp16")]; + tensor var_1418_equation_0 = const()[name = tensor("op_1418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1418 = einsum(equation = var_1418_equation_0, values = (var_1375_4, var_1361_4))[name = tensor("op_1418")]; + tensor var_1419_to_fp16 = const()[name = tensor("op_1419_to_fp16"), val = tensor(0x1p-3)]; + tensor w_129_cast_fp16 = mul(x = var_1418, y = var_1419_to_fp16)[name = tensor("w_129_cast_fp16")]; + tensor var_1422_equation_0 = const()[name = tensor("op_1422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1422 = einsum(equation = var_1422_equation_0, values = (var_1375_5, var_1361_5))[name = tensor("op_1422")]; + tensor var_1423_to_fp16 = const()[name = tensor("op_1423_to_fp16"), val = tensor(0x1p-3)]; + tensor w_131_cast_fp16 = mul(x = var_1422, y = var_1423_to_fp16)[name = tensor("w_131_cast_fp16")]; + tensor var_1426_equation_0 = const()[name = tensor("op_1426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1426 = einsum(equation = var_1426_equation_0, values = (var_1375_6, var_1361_6))[name = tensor("op_1426")]; + tensor var_1427_to_fp16 = const()[name = tensor("op_1427_to_fp16"), val = tensor(0x1p-3)]; + tensor w_133_cast_fp16 = mul(x = var_1426, y = var_1427_to_fp16)[name = tensor("w_133_cast_fp16")]; + tensor var_1430_equation_0 = const()[name = tensor("op_1430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1430 = einsum(equation = var_1430_equation_0, values = (var_1375_7, var_1361_7))[name = tensor("op_1430")]; + tensor var_1431_to_fp16 = const()[name = tensor("op_1431_to_fp16"), val = tensor(0x1p-3)]; + tensor w_135_cast_fp16 = mul(x = var_1430, y = var_1431_to_fp16)[name = tensor("w_135_cast_fp16")]; + tensor var_1434_equation_0 = const()[name = tensor("op_1434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1434 = einsum(equation = var_1434_equation_0, values = (var_1375_8, var_1361_8))[name = tensor("op_1434")]; + tensor var_1435_to_fp16 = const()[name = tensor("op_1435_to_fp16"), val = tensor(0x1p-3)]; + tensor w_137_cast_fp16 = mul(x = var_1434, y = var_1435_to_fp16)[name = tensor("w_137_cast_fp16")]; + tensor var_1438_equation_0 = const()[name = tensor("op_1438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1438 = einsum(equation = var_1438_equation_0, values = (var_1375_9, var_1361_9))[name = tensor("op_1438")]; + tensor var_1439_to_fp16 = const()[name = tensor("op_1439_to_fp16"), val = tensor(0x1p-3)]; + tensor w_139_cast_fp16 = mul(x = var_1438, y = var_1439_to_fp16)[name = tensor("w_139_cast_fp16")]; + tensor var_1442_equation_0 = const()[name = tensor("op_1442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1442 = einsum(equation = var_1442_equation_0, values = (var_1375_10, var_1361_10))[name = tensor("op_1442")]; + tensor var_1443_to_fp16 = const()[name = tensor("op_1443_to_fp16"), val = tensor(0x1p-3)]; + tensor w_141_cast_fp16 = mul(x = var_1442, y = var_1443_to_fp16)[name = tensor("w_141_cast_fp16")]; + tensor var_1446_equation_0 = const()[name = tensor("op_1446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1446 = einsum(equation = var_1446_equation_0, values = (var_1375_11, var_1361_11))[name = tensor("op_1446")]; + tensor var_1447_to_fp16 = const()[name = tensor("op_1447_to_fp16"), val = tensor(0x1p-3)]; + tensor w_143_cast_fp16 = mul(x = var_1446, y = var_1447_to_fp16)[name = tensor("w_143_cast_fp16")]; + tensor input_163_cast_fp16 = add(x = w_121_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_163_cast_fp16")]; + tensor var_1450_cast_fp16 = softmax(axis = var_1318, x = input_163_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor input_165_cast_fp16 = add(x = w_123_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_165_cast_fp16")]; + tensor var_1452_cast_fp16 = softmax(axis = var_1318, x = input_165_cast_fp16)[name = tensor("op_1452_cast_fp16")]; + tensor input_167_cast_fp16 = add(x = w_125_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_167_cast_fp16")]; + tensor var_1454_cast_fp16 = softmax(axis = var_1318, x = input_167_cast_fp16)[name = tensor("op_1454_cast_fp16")]; + tensor input_169_cast_fp16 = add(x = w_127_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_169_cast_fp16")]; + tensor var_1456_cast_fp16 = softmax(axis = var_1318, x = input_169_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_171_cast_fp16 = add(x = w_129_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_171_cast_fp16")]; + tensor var_1458_cast_fp16 = softmax(axis = var_1318, x = input_171_cast_fp16)[name = tensor("op_1458_cast_fp16")]; + tensor input_173_cast_fp16 = add(x = w_131_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_173_cast_fp16")]; + tensor var_1460_cast_fp16 = softmax(axis = var_1318, x = input_173_cast_fp16)[name = tensor("op_1460_cast_fp16")]; + tensor input_175_cast_fp16 = add(x = w_133_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_175_cast_fp16")]; + tensor var_1462_cast_fp16 = softmax(axis = var_1318, x = input_175_cast_fp16)[name = tensor("op_1462_cast_fp16")]; + tensor input_177_cast_fp16 = add(x = w_135_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_177_cast_fp16")]; + tensor var_1464_cast_fp16 = softmax(axis = var_1318, x = input_177_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + tensor input_179_cast_fp16 = add(x = w_137_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_179_cast_fp16")]; + tensor var_1466_cast_fp16 = softmax(axis = var_1318, x = input_179_cast_fp16)[name = tensor("op_1466_cast_fp16")]; + tensor input_181_cast_fp16 = add(x = w_139_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_181_cast_fp16")]; + tensor var_1468_cast_fp16 = softmax(axis = var_1318, x = input_181_cast_fp16)[name = tensor("op_1468_cast_fp16")]; + tensor input_183_cast_fp16 = add(x = w_141_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_183_cast_fp16")]; + tensor var_1470_cast_fp16 = softmax(axis = var_1318, x = input_183_cast_fp16)[name = tensor("op_1470_cast_fp16")]; + tensor input_185_cast_fp16 = add(x = w_143_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_185_cast_fp16")]; + tensor var_1472_cast_fp16 = softmax(axis = var_1318, x = input_185_cast_fp16)[name = tensor("op_1472_cast_fp16")]; + tensor var_1474_equation_0 = const()[name = tensor("op_1474_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1388_0, var_1450_cast_fp16))[name = tensor("op_1474_cast_fp16")]; + tensor var_1476_equation_0 = const()[name = tensor("op_1476_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1388_1, var_1452_cast_fp16))[name = tensor("op_1476_cast_fp16")]; + tensor var_1478_equation_0 = const()[name = tensor("op_1478_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1478_cast_fp16 = einsum(equation = var_1478_equation_0, values = (var_1388_2, var_1454_cast_fp16))[name = tensor("op_1478_cast_fp16")]; + tensor var_1480_equation_0 = const()[name = tensor("op_1480_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1480_cast_fp16 = einsum(equation = var_1480_equation_0, values = (var_1388_3, var_1456_cast_fp16))[name = tensor("op_1480_cast_fp16")]; + tensor var_1482_equation_0 = const()[name = tensor("op_1482_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1482_cast_fp16 = einsum(equation = var_1482_equation_0, values = (var_1388_4, var_1458_cast_fp16))[name = tensor("op_1482_cast_fp16")]; + tensor var_1484_equation_0 = const()[name = tensor("op_1484_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1484_cast_fp16 = einsum(equation = var_1484_equation_0, values = (var_1388_5, var_1460_cast_fp16))[name = tensor("op_1484_cast_fp16")]; + tensor var_1486_equation_0 = const()[name = tensor("op_1486_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1486_cast_fp16 = einsum(equation = var_1486_equation_0, values = (var_1388_6, var_1462_cast_fp16))[name = tensor("op_1486_cast_fp16")]; + tensor var_1488_equation_0 = const()[name = tensor("op_1488_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1488_cast_fp16 = einsum(equation = var_1488_equation_0, values = (var_1388_7, var_1464_cast_fp16))[name = tensor("op_1488_cast_fp16")]; + tensor var_1490_equation_0 = const()[name = tensor("op_1490_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1490_cast_fp16 = einsum(equation = var_1490_equation_0, values = (var_1388_8, var_1466_cast_fp16))[name = tensor("op_1490_cast_fp16")]; + tensor var_1492_equation_0 = const()[name = tensor("op_1492_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1492_cast_fp16 = einsum(equation = var_1492_equation_0, values = (var_1388_9, var_1468_cast_fp16))[name = tensor("op_1492_cast_fp16")]; + tensor var_1494_equation_0 = const()[name = tensor("op_1494_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1494_cast_fp16 = einsum(equation = var_1494_equation_0, values = (var_1388_10, var_1470_cast_fp16))[name = tensor("op_1494_cast_fp16")]; + tensor var_1496_equation_0 = const()[name = tensor("op_1496_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1496_cast_fp16 = einsum(equation = var_1496_equation_0, values = (var_1388_11, var_1472_cast_fp16))[name = tensor("op_1496_cast_fp16")]; + tensor var_1498_interleave_0 = const()[name = tensor("op_1498_interleave_0"), val = tensor(false)]; + tensor var_1498_cast_fp16 = concat(axis = var_1318, interleave = var_1498_interleave_0, values = (var_1474_cast_fp16, var_1476_cast_fp16, var_1478_cast_fp16, var_1480_cast_fp16, var_1482_cast_fp16, var_1484_cast_fp16, var_1486_cast_fp16, var_1488_cast_fp16, var_1490_cast_fp16, var_1492_cast_fp16, var_1494_cast_fp16, var_1496_cast_fp16))[name = tensor("op_1498_cast_fp16")]; + tensor var_1502 = const()[name = tensor("op_1502"), val = tensor([1, 1])]; + tensor var_1504 = const()[name = tensor("op_1504"), val = tensor([1, 1])]; + tensor var_1506_pad_type_0 = const()[name = tensor("op_1506_pad_type_0"), val = tensor("custom")]; + tensor var_1506_pad_0 = const()[name = tensor("op_1506_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1506 = conv(bias = layers_5_attention_o_proj_bias, dilations = var_1504, groups = var_1318, pad = var_1506_pad_0, pad_type = var_1506_pad_type_0, strides = var_1502, weight = layers_5_attention_o_proj_weight, x = var_1498_cast_fp16)[name = tensor("op_1506")]; + tensor var_1508_interleave_0 = const()[name = tensor("op_1508_interleave_0"), val = tensor(false)]; + tensor var_1508 = concat(axis = var_1319, interleave = var_1508_interleave_0, values = var_1506)[name = tensor("op_1508")]; + tensor x_45 = add(x = var_1312_cast_fp16, y = var_1508)[name = tensor("x_45")]; + tensor var_1315_promoted = const()[name = tensor("op_1315_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_1316_promoted = const()[name = tensor("op_1316_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_47 = clip(alpha = var_1315_promoted, beta = var_1316_promoted, x = x_45)[name = tensor("x_47")]; + tensor var_1513 = const()[name = tensor("op_1513"), val = tensor([1])]; + tensor mean_23 = reduce_mean(axes = var_1513, keep_dims = var_1320, x = x_47)[name = tensor("mean_23")]; tensor zero_mean_23 = sub(x = x_47, y = mean_23)[name = tensor("zero_mean_23")]; - tensor var_1354_promoted = const()[name = tensor("op_1354_promoted"), val = tensor(0x1p+1)]; - tensor var_1554 = pow(x = zero_mean_23, y = var_1354_promoted)[name = tensor("op_1554")]; - tensor var_1555 = const()[name = tensor("op_1555"), val = tensor([1])]; - tensor var_1556 = reduce_mean(axes = var_1555, keep_dims = var_1349, x = var_1554)[name = tensor("op_1556")]; - tensor var_1557_to_fp16 = const()[name = tensor("op_1557_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1558_cast_fp16 = add(x = var_1556, y = var_1557_to_fp16)[name = tensor("op_1558_cast_fp16")]; + tensor var_1317_promoted = const()[name = tensor("op_1317_promoted"), val = tensor(0x1p+1)]; + tensor var_1516 = pow(x = zero_mean_23, y = var_1317_promoted)[name = tensor("op_1516")]; + tensor var_1517 = const()[name = tensor("op_1517"), val = tensor([1])]; + tensor var_1518 = reduce_mean(axes = var_1517, keep_dims = var_1320, x = var_1516)[name = tensor("op_1518")]; + tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1520_cast_fp16 = add(x = var_1518, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; tensor denom_23_epsilon_0 = const()[name = tensor("denom_23_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0, x = var_1558_cast_fp16)[name = tensor("denom_23_cast_fp16")]; - tensor var_1560_cast_fp16 = mul(x = zero_mean_23, y = denom_23_cast_fp16)[name = tensor("op_1560_cast_fp16")]; - tensor var_1562_gamma_0_to_fp16 = const()[name = tensor("op_1562_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218537728)))]; - tensor var_1562_beta_0_to_fp16 = const()[name = tensor("op_1562_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218539328)))]; - tensor var_1562_epsilon_0_to_fp16 = const()[name = tensor("op_1562_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1562_cast_fp16 = batch_norm(beta = var_1562_beta_0_to_fp16, epsilon = var_1562_epsilon_0_to_fp16, gamma = var_1562_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1560_cast_fp16)[name = tensor("op_1562_cast_fp16")]; - tensor var_1568 = const()[name = tensor("op_1568"), val = tensor([1, 1])]; - tensor var_1570 = const()[name = tensor("op_1570"), val = tensor([1, 1])]; - tensor var_1572_pad_type_0 = const()[name = tensor("op_1572_pad_type_0"), val = tensor("custom")]; - tensor var_1572_pad_0 = const()[name = tensor("op_1572_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1572 = conv(bias = layers_5_mlp_fc1_bias, dilations = var_1570, groups = var_1347, pad = var_1572_pad_0, pad_type = var_1572_pad_type_0, strides = var_1568, weight = layers_5_mlp_fc1_weight, x = var_1562_cast_fp16)[name = tensor("op_1572")]; - tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; - tensor input_47 = gelu(mode = input_47_mode_0, x = var_1572)[name = tensor("input_47")]; - tensor var_1576 = const()[name = tensor("op_1576"), val = tensor([1, 1])]; - tensor var_1578 = const()[name = tensor("op_1578"), val = tensor([1, 1])]; - tensor var_1580_pad_type_0 = const()[name = tensor("op_1580_pad_type_0"), val = tensor("custom")]; - tensor var_1580_pad_0 = const()[name = tensor("op_1580_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1580 = conv(bias = layers_5_mlp_fc2_bias, dilations = var_1578, groups = var_1347, pad = var_1580_pad_0, pad_type = var_1580_pad_type_0, strides = var_1576, weight = layers_5_mlp_fc2_weight, x = input_47)[name = tensor("op_1580")]; - tensor x_49 = add(x = var_1562_cast_fp16, y = var_1580)[name = tensor("x_49")]; - tensor var_1345_promoted_1 = const()[name = tensor("op_1345_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1346_promoted_1 = const()[name = tensor("op_1346_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_51 = clip(alpha = var_1345_promoted_1, beta = var_1346_promoted_1, x = x_49)[name = tensor("x_51")]; - tensor var_1585 = const()[name = tensor("op_1585"), val = tensor([1])]; - tensor mean_25 = reduce_mean(axes = var_1585, keep_dims = var_1349, x = x_51)[name = tensor("mean_25")]; + tensor denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0, x = var_1520_cast_fp16)[name = tensor("denom_23_cast_fp16")]; + tensor var_1522_cast_fp16 = mul(x = zero_mean_23, y = denom_23_cast_fp16)[name = tensor("op_1522_cast_fp16")]; + tensor var_1524_gamma_0_to_fp16 = const()[name = tensor("op_1524_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218537728)))]; + tensor var_1524_beta_0_to_fp16 = const()[name = tensor("op_1524_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218539328)))]; + tensor var_1524_epsilon_0_to_fp16 = const()[name = tensor("op_1524_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1524_cast_fp16 = batch_norm(beta = var_1524_beta_0_to_fp16, epsilon = var_1524_epsilon_0_to_fp16, gamma = var_1524_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1522_cast_fp16)[name = tensor("op_1524_cast_fp16")]; + tensor var_1530 = const()[name = tensor("op_1530"), val = tensor([1, 1])]; + tensor var_1532 = const()[name = tensor("op_1532"), val = tensor([1, 1])]; + tensor var_1534_pad_type_0 = const()[name = tensor("op_1534_pad_type_0"), val = tensor("custom")]; + tensor var_1534_pad_0 = const()[name = tensor("op_1534_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1534 = conv(bias = layers_5_mlp_fc1_bias, dilations = var_1532, groups = var_1318, pad = var_1534_pad_0, pad_type = var_1534_pad_type_0, strides = var_1530, weight = layers_5_mlp_fc1_weight, x = var_1524_cast_fp16)[name = tensor("op_1534")]; + tensor input_191_mode_0 = const()[name = tensor("input_191_mode_0"), val = tensor("EXACT")]; + tensor input_191 = gelu(mode = input_191_mode_0, x = var_1534)[name = tensor("input_191")]; + tensor var_1538 = const()[name = tensor("op_1538"), val = tensor([1, 1])]; + tensor var_1540 = const()[name = tensor("op_1540"), val = tensor([1, 1])]; + tensor var_1542_pad_type_0 = const()[name = tensor("op_1542_pad_type_0"), val = tensor("custom")]; + tensor var_1542_pad_0 = const()[name = tensor("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1542 = conv(bias = layers_5_mlp_fc2_bias, dilations = var_1540, groups = var_1318, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1538, weight = layers_5_mlp_fc2_weight, x = input_191)[name = tensor("op_1542")]; + tensor x_49 = add(x = var_1524_cast_fp16, y = var_1542)[name = tensor("x_49")]; + tensor var_1315_promoted_1 = const()[name = tensor("op_1315_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_1316_promoted_1 = const()[name = tensor("op_1316_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_51 = clip(alpha = var_1315_promoted_1, beta = var_1316_promoted_1, x = x_49)[name = tensor("x_51")]; + tensor var_1547 = const()[name = tensor("op_1547"), val = tensor([1])]; + tensor mean_25 = reduce_mean(axes = var_1547, keep_dims = var_1320, x = x_51)[name = tensor("mean_25")]; tensor zero_mean_25 = sub(x = x_51, y = mean_25)[name = tensor("zero_mean_25")]; - tensor var_1354_promoted_1 = const()[name = tensor("op_1354_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1588 = pow(x = zero_mean_25, y = var_1354_promoted_1)[name = tensor("op_1588")]; - tensor var_1589 = const()[name = tensor("op_1589"), val = tensor([1])]; - tensor var_1590 = reduce_mean(axes = var_1589, keep_dims = var_1349, x = var_1588)[name = tensor("op_1590")]; - tensor var_1591_to_fp16 = const()[name = tensor("op_1591_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1592_cast_fp16 = add(x = var_1590, y = var_1591_to_fp16)[name = tensor("op_1592_cast_fp16")]; + tensor var_1317_promoted_1 = const()[name = tensor("op_1317_promoted_1"), val = tensor(0x1p+1)]; + tensor var_1550 = pow(x = zero_mean_25, y = var_1317_promoted_1)[name = tensor("op_1550")]; + tensor var_1551 = const()[name = tensor("op_1551"), val = tensor([1])]; + tensor var_1552 = reduce_mean(axes = var_1551, keep_dims = var_1320, x = var_1550)[name = tensor("op_1552")]; + tensor var_1553_to_fp16 = const()[name = tensor("op_1553_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1554_cast_fp16 = add(x = var_1552, y = var_1553_to_fp16)[name = tensor("op_1554_cast_fp16")]; tensor denom_25_epsilon_0 = const()[name = tensor("denom_25_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0, x = var_1592_cast_fp16)[name = tensor("denom_25_cast_fp16")]; - tensor var_1594_cast_fp16 = mul(x = zero_mean_25, y = denom_25_cast_fp16)[name = tensor("op_1594_cast_fp16")]; - tensor var_1596_gamma_0_to_fp16 = const()[name = tensor("op_1596_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218540928)))]; - tensor var_1596_beta_0_to_fp16 = const()[name = tensor("op_1596_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218542528)))]; - tensor var_1596_epsilon_0_to_fp16 = const()[name = tensor("op_1596_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1596_cast_fp16 = batch_norm(beta = var_1596_beta_0_to_fp16, epsilon = var_1596_epsilon_0_to_fp16, gamma = var_1596_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1594_cast_fp16)[name = tensor("op_1596_cast_fp16")]; - tensor var_1601 = const()[name = tensor("op_1601"), val = tensor(1)]; - tensor var_1602 = const()[name = tensor("op_1602"), val = tensor(0)]; - tensor var_1603 = const()[name = tensor("op_1603"), val = tensor(true)]; - tensor var_1628 = const()[name = tensor("op_1628"), val = tensor([1, 1])]; - tensor var_1630 = const()[name = tensor("op_1630"), val = tensor([1, 1])]; - tensor var_1632_pad_type_0 = const()[name = tensor("op_1632_pad_type_0"), val = tensor("custom")]; - tensor var_1632_pad_0 = const()[name = tensor("op_1632_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1632 = conv(bias = layers_6_attention_q_proj_bias, dilations = var_1630, groups = var_1601, pad = var_1632_pad_0, pad_type = var_1632_pad_type_0, strides = var_1628, weight = layers_6_attention_q_proj_weight, x = var_1596_cast_fp16)[name = tensor("op_1632")]; - tensor var_1633 = const()[name = tensor("op_1633"), val = tensor([1, 64, 12, 512])]; - tensor var_1634 = reshape(shape = var_1633, x = var_1632)[name = tensor("op_1634")]; - tensor var_1637 = const()[name = tensor("op_1637"), val = tensor([1, 1])]; - tensor var_1639 = const()[name = tensor("op_1639"), val = tensor([1, 1])]; - tensor var_1641_pad_type_0 = const()[name = tensor("op_1641_pad_type_0"), val = tensor("custom")]; - tensor var_1641_pad_0 = const()[name = tensor("op_1641_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1641 = conv(bias = layers_6_attention_k_proj_bias, dilations = var_1639, groups = var_1601, pad = var_1641_pad_0, pad_type = var_1641_pad_type_0, strides = var_1637, weight = layers_6_attention_k_proj_weight, x = var_1596_cast_fp16)[name = tensor("op_1641")]; - tensor var_1642 = const()[name = tensor("op_1642"), val = tensor([1, 64, 12, 512])]; - tensor ks_13 = reshape(shape = var_1642, x = var_1641)[name = tensor("ks_13")]; - tensor var_1646 = const()[name = tensor("op_1646"), val = tensor([1, 1])]; - tensor var_1648 = const()[name = tensor("op_1648"), val = tensor([1, 1])]; - tensor var_1650_pad_type_0 = const()[name = tensor("op_1650_pad_type_0"), val = tensor("custom")]; - tensor var_1650_pad_0 = const()[name = tensor("op_1650_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1650 = conv(bias = layers_6_attention_v_proj_bias, dilations = var_1648, groups = var_1601, pad = var_1650_pad_0, pad_type = var_1650_pad_type_0, strides = var_1646, weight = layers_6_attention_v_proj_weight, x = var_1596_cast_fp16)[name = tensor("op_1650")]; - tensor var_1651 = const()[name = tensor("op_1651"), val = tensor([1, 64, 12, 512])]; - tensor var_1652 = reshape(shape = var_1651, x = var_1650)[name = tensor("op_1652")]; - tensor tile_32 = const()[name = tensor("tile_32"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1653_axis_0 = const()[name = tensor("op_1653_axis_0"), val = tensor(2)]; - tensor var_1653_0, tensor var_1653_1, tensor var_1653_2, tensor var_1653_3, tensor var_1653_4, tensor var_1653_5, tensor var_1653_6, tensor var_1653_7, tensor var_1653_8, tensor var_1653_9, tensor var_1653_10, tensor var_1653_11 = split(axis = var_1653_axis_0, split_sizes = tile_32, x = var_1634)[name = tensor("op_1653")]; - tensor var_1666_perm_0 = const()[name = tensor("op_1666_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_33 = const()[name = tensor("tile_33"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1667_axis_0 = const()[name = tensor("op_1667_axis_0"), val = tensor(2)]; - tensor transpose_5 = transpose(perm = var_1666_perm_0, x = ks_13)[name = tensor("transpose_5")]; - tensor var_1667_0, tensor var_1667_1, tensor var_1667_2, tensor var_1667_3, tensor var_1667_4, tensor var_1667_5, tensor var_1667_6, tensor var_1667_7, tensor var_1667_8, tensor var_1667_9, tensor var_1667_10, tensor var_1667_11 = split(axis = var_1667_axis_0, split_sizes = tile_33, x = transpose_5)[name = tensor("op_1667")]; - tensor tile_34 = const()[name = tensor("tile_34"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1680_axis_0 = const()[name = tensor("op_1680_axis_0"), val = tensor(2)]; - tensor var_1680_0, tensor var_1680_1, tensor var_1680_2, tensor var_1680_3, tensor var_1680_4, tensor var_1680_5, tensor var_1680_6, tensor var_1680_7, tensor var_1680_8, tensor var_1680_9, tensor var_1680_10, tensor var_1680_11 = split(axis = var_1680_axis_0, split_sizes = tile_34, x = var_1652)[name = tensor("op_1680")]; - tensor var_1694_equation_0 = const()[name = tensor("op_1694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1694 = einsum(equation = var_1694_equation_0, values = (var_1667_0, var_1653_0))[name = tensor("op_1694")]; - tensor var_1695_to_fp16 = const()[name = tensor("op_1695_to_fp16"), val = tensor(0x1p-3)]; - tensor w_145_cast_fp16 = mul(x = var_1694, y = var_1695_to_fp16)[name = tensor("w_145_cast_fp16")]; - tensor var_1698_equation_0 = const()[name = tensor("op_1698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1698 = einsum(equation = var_1698_equation_0, values = (var_1667_1, var_1653_1))[name = tensor("op_1698")]; - tensor var_1699_to_fp16 = const()[name = tensor("op_1699_to_fp16"), val = tensor(0x1p-3)]; - tensor w_147_cast_fp16 = mul(x = var_1698, y = var_1699_to_fp16)[name = tensor("w_147_cast_fp16")]; - tensor var_1702_equation_0 = const()[name = tensor("op_1702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1702 = einsum(equation = var_1702_equation_0, values = (var_1667_2, var_1653_2))[name = tensor("op_1702")]; - tensor var_1703_to_fp16 = const()[name = tensor("op_1703_to_fp16"), val = tensor(0x1p-3)]; - tensor w_149_cast_fp16 = mul(x = var_1702, y = var_1703_to_fp16)[name = tensor("w_149_cast_fp16")]; - tensor var_1706_equation_0 = const()[name = tensor("op_1706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1706 = einsum(equation = var_1706_equation_0, values = (var_1667_3, var_1653_3))[name = tensor("op_1706")]; - tensor var_1707_to_fp16 = const()[name = tensor("op_1707_to_fp16"), val = tensor(0x1p-3)]; - tensor w_151_cast_fp16 = mul(x = var_1706, y = var_1707_to_fp16)[name = tensor("w_151_cast_fp16")]; - tensor var_1710_equation_0 = const()[name = tensor("op_1710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1710 = einsum(equation = var_1710_equation_0, values = (var_1667_4, var_1653_4))[name = tensor("op_1710")]; - tensor var_1711_to_fp16 = const()[name = tensor("op_1711_to_fp16"), val = tensor(0x1p-3)]; - tensor w_153_cast_fp16 = mul(x = var_1710, y = var_1711_to_fp16)[name = tensor("w_153_cast_fp16")]; - tensor var_1714_equation_0 = const()[name = tensor("op_1714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1714 = einsum(equation = var_1714_equation_0, values = (var_1667_5, var_1653_5))[name = tensor("op_1714")]; - tensor var_1715_to_fp16 = const()[name = tensor("op_1715_to_fp16"), val = tensor(0x1p-3)]; - tensor w_155_cast_fp16 = mul(x = var_1714, y = var_1715_to_fp16)[name = tensor("w_155_cast_fp16")]; - tensor var_1718_equation_0 = const()[name = tensor("op_1718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1718 = einsum(equation = var_1718_equation_0, values = (var_1667_6, var_1653_6))[name = tensor("op_1718")]; - tensor var_1719_to_fp16 = const()[name = tensor("op_1719_to_fp16"), val = tensor(0x1p-3)]; - tensor w_157_cast_fp16 = mul(x = var_1718, y = var_1719_to_fp16)[name = tensor("w_157_cast_fp16")]; - tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1722 = einsum(equation = var_1722_equation_0, values = (var_1667_7, var_1653_7))[name = tensor("op_1722")]; - tensor var_1723_to_fp16 = const()[name = tensor("op_1723_to_fp16"), val = tensor(0x1p-3)]; - tensor w_159_cast_fp16 = mul(x = var_1722, y = var_1723_to_fp16)[name = tensor("w_159_cast_fp16")]; - tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1726 = einsum(equation = var_1726_equation_0, values = (var_1667_8, var_1653_8))[name = tensor("op_1726")]; - tensor var_1727_to_fp16 = const()[name = tensor("op_1727_to_fp16"), val = tensor(0x1p-3)]; - tensor w_161_cast_fp16 = mul(x = var_1726, y = var_1727_to_fp16)[name = tensor("w_161_cast_fp16")]; - tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1730 = einsum(equation = var_1730_equation_0, values = (var_1667_9, var_1653_9))[name = tensor("op_1730")]; - tensor var_1731_to_fp16 = const()[name = tensor("op_1731_to_fp16"), val = tensor(0x1p-3)]; - tensor w_163_cast_fp16 = mul(x = var_1730, y = var_1731_to_fp16)[name = tensor("w_163_cast_fp16")]; - tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1734 = einsum(equation = var_1734_equation_0, values = (var_1667_10, var_1653_10))[name = tensor("op_1734")]; - tensor var_1735_to_fp16 = const()[name = tensor("op_1735_to_fp16"), val = tensor(0x1p-3)]; - tensor w_165_cast_fp16 = mul(x = var_1734, y = var_1735_to_fp16)[name = tensor("w_165_cast_fp16")]; - tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1738 = einsum(equation = var_1738_equation_0, values = (var_1667_11, var_1653_11))[name = tensor("op_1738")]; - tensor var_1739_to_fp16 = const()[name = tensor("op_1739_to_fp16"), val = tensor(0x1p-3)]; - tensor w_167_cast_fp16 = mul(x = var_1738, y = var_1739_to_fp16)[name = tensor("w_167_cast_fp16")]; - tensor var_1741_cast_fp16 = add(x = w_145_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1741_cast_fp16")]; - tensor var_1742_cast_fp16 = softmax(axis = var_1601, x = var_1741_cast_fp16)[name = tensor("op_1742_cast_fp16")]; - tensor var_1743_cast_fp16 = add(x = w_147_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1743_cast_fp16")]; - tensor var_1744_cast_fp16 = softmax(axis = var_1601, x = var_1743_cast_fp16)[name = tensor("op_1744_cast_fp16")]; - tensor var_1745_cast_fp16 = add(x = w_149_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1745_cast_fp16")]; - tensor var_1746_cast_fp16 = softmax(axis = var_1601, x = var_1745_cast_fp16)[name = tensor("op_1746_cast_fp16")]; - tensor var_1747_cast_fp16 = add(x = w_151_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1747_cast_fp16")]; - tensor var_1748_cast_fp16 = softmax(axis = var_1601, x = var_1747_cast_fp16)[name = tensor("op_1748_cast_fp16")]; - tensor var_1749_cast_fp16 = add(x = w_153_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1749_cast_fp16")]; - tensor var_1750_cast_fp16 = softmax(axis = var_1601, x = var_1749_cast_fp16)[name = tensor("op_1750_cast_fp16")]; - tensor var_1751_cast_fp16 = add(x = w_155_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1751_cast_fp16")]; - tensor var_1752_cast_fp16 = softmax(axis = var_1601, x = var_1751_cast_fp16)[name = tensor("op_1752_cast_fp16")]; - tensor var_1753_cast_fp16 = add(x = w_157_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1753_cast_fp16")]; - tensor var_1754_cast_fp16 = softmax(axis = var_1601, x = var_1753_cast_fp16)[name = tensor("op_1754_cast_fp16")]; - tensor var_1755_cast_fp16 = add(x = w_159_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1755_cast_fp16")]; - tensor var_1756_cast_fp16 = softmax(axis = var_1601, x = var_1755_cast_fp16)[name = tensor("op_1756_cast_fp16")]; - tensor var_1757_cast_fp16 = add(x = w_161_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1757_cast_fp16")]; - tensor var_1758_cast_fp16 = softmax(axis = var_1601, x = var_1757_cast_fp16)[name = tensor("op_1758_cast_fp16")]; - tensor var_1759_cast_fp16 = add(x = w_163_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1759_cast_fp16")]; - tensor var_1760_cast_fp16 = softmax(axis = var_1601, x = var_1759_cast_fp16)[name = tensor("op_1760_cast_fp16")]; - tensor var_1761_cast_fp16 = add(x = w_165_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1761_cast_fp16")]; - tensor var_1762_cast_fp16 = softmax(axis = var_1601, x = var_1761_cast_fp16)[name = tensor("op_1762_cast_fp16")]; - tensor var_1763_cast_fp16 = add(x = w_167_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1763_cast_fp16")]; - tensor var_1764_cast_fp16 = softmax(axis = var_1601, x = var_1763_cast_fp16)[name = tensor("op_1764_cast_fp16")]; - tensor var_1766_equation_0 = const()[name = tensor("op_1766_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1766_cast_fp16 = einsum(equation = var_1766_equation_0, values = (var_1680_0, var_1742_cast_fp16))[name = tensor("op_1766_cast_fp16")]; - tensor var_1768_equation_0 = const()[name = tensor("op_1768_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1768_cast_fp16 = einsum(equation = var_1768_equation_0, values = (var_1680_1, var_1744_cast_fp16))[name = tensor("op_1768_cast_fp16")]; - tensor var_1770_equation_0 = const()[name = tensor("op_1770_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1770_cast_fp16 = einsum(equation = var_1770_equation_0, values = (var_1680_2, var_1746_cast_fp16))[name = tensor("op_1770_cast_fp16")]; - tensor var_1772_equation_0 = const()[name = tensor("op_1772_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1772_cast_fp16 = einsum(equation = var_1772_equation_0, values = (var_1680_3, var_1748_cast_fp16))[name = tensor("op_1772_cast_fp16")]; - tensor var_1774_equation_0 = const()[name = tensor("op_1774_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1774_cast_fp16 = einsum(equation = var_1774_equation_0, values = (var_1680_4, var_1750_cast_fp16))[name = tensor("op_1774_cast_fp16")]; - tensor var_1776_equation_0 = const()[name = tensor("op_1776_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1776_cast_fp16 = einsum(equation = var_1776_equation_0, values = (var_1680_5, var_1752_cast_fp16))[name = tensor("op_1776_cast_fp16")]; - tensor var_1778_equation_0 = const()[name = tensor("op_1778_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1778_cast_fp16 = einsum(equation = var_1778_equation_0, values = (var_1680_6, var_1754_cast_fp16))[name = tensor("op_1778_cast_fp16")]; - tensor var_1780_equation_0 = const()[name = tensor("op_1780_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1780_cast_fp16 = einsum(equation = var_1780_equation_0, values = (var_1680_7, var_1756_cast_fp16))[name = tensor("op_1780_cast_fp16")]; - tensor var_1782_equation_0 = const()[name = tensor("op_1782_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1782_cast_fp16 = einsum(equation = var_1782_equation_0, values = (var_1680_8, var_1758_cast_fp16))[name = tensor("op_1782_cast_fp16")]; - tensor var_1784_equation_0 = const()[name = tensor("op_1784_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1784_cast_fp16 = einsum(equation = var_1784_equation_0, values = (var_1680_9, var_1760_cast_fp16))[name = tensor("op_1784_cast_fp16")]; - tensor var_1786_equation_0 = const()[name = tensor("op_1786_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1786_cast_fp16 = einsum(equation = var_1786_equation_0, values = (var_1680_10, var_1762_cast_fp16))[name = tensor("op_1786_cast_fp16")]; - tensor var_1788_equation_0 = const()[name = tensor("op_1788_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1788_cast_fp16 = einsum(equation = var_1788_equation_0, values = (var_1680_11, var_1764_cast_fp16))[name = tensor("op_1788_cast_fp16")]; - tensor var_1790_interleave_0 = const()[name = tensor("op_1790_interleave_0"), val = tensor(false)]; - tensor var_1790_cast_fp16 = concat(axis = var_1601, interleave = var_1790_interleave_0, values = (var_1766_cast_fp16, var_1768_cast_fp16, var_1770_cast_fp16, var_1772_cast_fp16, var_1774_cast_fp16, var_1776_cast_fp16, var_1778_cast_fp16, var_1780_cast_fp16, var_1782_cast_fp16, var_1784_cast_fp16, var_1786_cast_fp16, var_1788_cast_fp16))[name = tensor("op_1790_cast_fp16")]; - tensor var_1794 = const()[name = tensor("op_1794"), val = tensor([1, 1])]; - tensor var_1796 = const()[name = tensor("op_1796"), val = tensor([1, 1])]; - tensor var_1798_pad_type_0 = const()[name = tensor("op_1798_pad_type_0"), val = tensor("custom")]; - tensor var_1798_pad_0 = const()[name = tensor("op_1798_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1798 = conv(bias = layers_6_attention_o_proj_bias, dilations = var_1796, groups = var_1601, pad = var_1798_pad_0, pad_type = var_1798_pad_type_0, strides = var_1794, weight = layers_6_attention_o_proj_weight, x = var_1790_cast_fp16)[name = tensor("op_1798")]; - tensor var_1800_interleave_0 = const()[name = tensor("op_1800_interleave_0"), val = tensor(false)]; - tensor var_1800 = concat(axis = var_1602, interleave = var_1800_interleave_0, values = var_1798)[name = tensor("op_1800")]; - tensor x_53 = add(x = var_1596_cast_fp16, y = var_1800)[name = tensor("x_53")]; - tensor var_1599_promoted = const()[name = tensor("op_1599_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1600_promoted = const()[name = tensor("op_1600_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_55 = clip(alpha = var_1599_promoted, beta = var_1600_promoted, x = x_53)[name = tensor("x_55")]; - tensor var_1805 = const()[name = tensor("op_1805"), val = tensor([1])]; - tensor mean_27 = reduce_mean(axes = var_1805, keep_dims = var_1603, x = x_55)[name = tensor("mean_27")]; + tensor denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0, x = var_1554_cast_fp16)[name = tensor("denom_25_cast_fp16")]; + tensor var_1556_cast_fp16 = mul(x = zero_mean_25, y = denom_25_cast_fp16)[name = tensor("op_1556_cast_fp16")]; + tensor var_1558_gamma_0_to_fp16 = const()[name = tensor("op_1558_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218540928)))]; + tensor var_1558_beta_0_to_fp16 = const()[name = tensor("op_1558_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218542528)))]; + tensor var_1558_epsilon_0_to_fp16 = const()[name = tensor("op_1558_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1558_cast_fp16 = batch_norm(beta = var_1558_beta_0_to_fp16, epsilon = var_1558_epsilon_0_to_fp16, gamma = var_1558_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1556_cast_fp16)[name = tensor("op_1558_cast_fp16")]; + tensor var_1564 = const()[name = tensor("op_1564"), val = tensor(1)]; + tensor var_1565 = const()[name = tensor("op_1565"), val = tensor(0)]; + tensor var_1566 = const()[name = tensor("op_1566"), val = tensor(true)]; + tensor var_1588 = const()[name = tensor("op_1588"), val = tensor([1, 1])]; + tensor var_1590 = const()[name = tensor("op_1590"), val = tensor([1, 1])]; + tensor var_1592_pad_type_0 = const()[name = tensor("op_1592_pad_type_0"), val = tensor("custom")]; + tensor var_1592_pad_0 = const()[name = tensor("op_1592_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1592 = conv(bias = layers_6_attention_q_proj_bias, dilations = var_1590, groups = var_1564, pad = var_1592_pad_0, pad_type = var_1592_pad_type_0, strides = var_1588, weight = layers_6_attention_q_proj_weight, x = var_1558_cast_fp16)[name = tensor("op_1592")]; + tensor var_1595 = const()[name = tensor("op_1595"), val = tensor([1, 1])]; + tensor var_1597 = const()[name = tensor("op_1597"), val = tensor([1, 1])]; + tensor ks_13_pad_type_0 = const()[name = tensor("ks_13_pad_type_0"), val = tensor("custom")]; + tensor ks_13_pad_0 = const()[name = tensor("ks_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_13 = conv(bias = layers_6_attention_k_proj_bias, dilations = var_1597, groups = var_1564, pad = ks_13_pad_0, pad_type = ks_13_pad_type_0, strides = var_1595, weight = layers_6_attention_k_proj_weight, x = var_1558_cast_fp16)[name = tensor("ks_13")]; + tensor var_1602 = const()[name = tensor("op_1602"), val = tensor([1, 1])]; + tensor var_1604 = const()[name = tensor("op_1604"), val = tensor([1, 1])]; + tensor var_1606_pad_type_0 = const()[name = tensor("op_1606_pad_type_0"), val = tensor("custom")]; + tensor var_1606_pad_0 = const()[name = tensor("op_1606_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1606 = conv(bias = layers_6_attention_v_proj_bias, dilations = var_1604, groups = var_1564, pad = var_1606_pad_0, pad_type = var_1606_pad_type_0, strides = var_1602, weight = layers_6_attention_v_proj_weight, x = var_1558_cast_fp16)[name = tensor("op_1606")]; + tensor tile_32 = const()[name = tensor("tile_32"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1607_axis_0 = const()[name = tensor("op_1607_axis_0"), val = tensor(1)]; + tensor var_1607_0, tensor var_1607_1, tensor var_1607_2, tensor var_1607_3, tensor var_1607_4, tensor var_1607_5, tensor var_1607_6, tensor var_1607_7, tensor var_1607_8, tensor var_1607_9, tensor var_1607_10, tensor var_1607_11 = split(axis = var_1607_axis_0, split_sizes = tile_32, x = var_1592)[name = tensor("op_1607")]; + tensor var_1620_perm_0 = const()[name = tensor("op_1620_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_33 = const()[name = tensor("tile_33"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1621_axis_0 = const()[name = tensor("op_1621_axis_0"), val = tensor(3)]; + tensor transpose_5 = transpose(perm = var_1620_perm_0, x = ks_13)[name = tensor("transpose_5")]; + tensor var_1621_0, tensor var_1621_1, tensor var_1621_2, tensor var_1621_3, tensor var_1621_4, tensor var_1621_5, tensor var_1621_6, tensor var_1621_7, tensor var_1621_8, tensor var_1621_9, tensor var_1621_10, tensor var_1621_11 = split(axis = var_1621_axis_0, split_sizes = tile_33, x = transpose_5)[name = tensor("op_1621")]; + tensor tile_34 = const()[name = tensor("tile_34"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1634_axis_0 = const()[name = tensor("op_1634_axis_0"), val = tensor(1)]; + tensor var_1634_0, tensor var_1634_1, tensor var_1634_2, tensor var_1634_3, tensor var_1634_4, tensor var_1634_5, tensor var_1634_6, tensor var_1634_7, tensor var_1634_8, tensor var_1634_9, tensor var_1634_10, tensor var_1634_11 = split(axis = var_1634_axis_0, split_sizes = tile_34, x = var_1606)[name = tensor("op_1634")]; + tensor var_1648_equation_0 = const()[name = tensor("op_1648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1648 = einsum(equation = var_1648_equation_0, values = (var_1621_0, var_1607_0))[name = tensor("op_1648")]; + tensor var_1649_to_fp16 = const()[name = tensor("op_1649_to_fp16"), val = tensor(0x1p-3)]; + tensor w_145_cast_fp16 = mul(x = var_1648, y = var_1649_to_fp16)[name = tensor("w_145_cast_fp16")]; + tensor var_1652_equation_0 = const()[name = tensor("op_1652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1652 = einsum(equation = var_1652_equation_0, values = (var_1621_1, var_1607_1))[name = tensor("op_1652")]; + tensor var_1653_to_fp16 = const()[name = tensor("op_1653_to_fp16"), val = tensor(0x1p-3)]; + tensor w_147_cast_fp16 = mul(x = var_1652, y = var_1653_to_fp16)[name = tensor("w_147_cast_fp16")]; + tensor var_1656_equation_0 = const()[name = tensor("op_1656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1656 = einsum(equation = var_1656_equation_0, values = (var_1621_2, var_1607_2))[name = tensor("op_1656")]; + tensor var_1657_to_fp16 = const()[name = tensor("op_1657_to_fp16"), val = tensor(0x1p-3)]; + tensor w_149_cast_fp16 = mul(x = var_1656, y = var_1657_to_fp16)[name = tensor("w_149_cast_fp16")]; + tensor var_1660_equation_0 = const()[name = tensor("op_1660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1660 = einsum(equation = var_1660_equation_0, values = (var_1621_3, var_1607_3))[name = tensor("op_1660")]; + tensor var_1661_to_fp16 = const()[name = tensor("op_1661_to_fp16"), val = tensor(0x1p-3)]; + tensor w_151_cast_fp16 = mul(x = var_1660, y = var_1661_to_fp16)[name = tensor("w_151_cast_fp16")]; + tensor var_1664_equation_0 = const()[name = tensor("op_1664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1664 = einsum(equation = var_1664_equation_0, values = (var_1621_4, var_1607_4))[name = tensor("op_1664")]; + tensor var_1665_to_fp16 = const()[name = tensor("op_1665_to_fp16"), val = tensor(0x1p-3)]; + tensor w_153_cast_fp16 = mul(x = var_1664, y = var_1665_to_fp16)[name = tensor("w_153_cast_fp16")]; + tensor var_1668_equation_0 = const()[name = tensor("op_1668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1668 = einsum(equation = var_1668_equation_0, values = (var_1621_5, var_1607_5))[name = tensor("op_1668")]; + tensor var_1669_to_fp16 = const()[name = tensor("op_1669_to_fp16"), val = tensor(0x1p-3)]; + tensor w_155_cast_fp16 = mul(x = var_1668, y = var_1669_to_fp16)[name = tensor("w_155_cast_fp16")]; + tensor var_1672_equation_0 = const()[name = tensor("op_1672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1672 = einsum(equation = var_1672_equation_0, values = (var_1621_6, var_1607_6))[name = tensor("op_1672")]; + tensor var_1673_to_fp16 = const()[name = tensor("op_1673_to_fp16"), val = tensor(0x1p-3)]; + tensor w_157_cast_fp16 = mul(x = var_1672, y = var_1673_to_fp16)[name = tensor("w_157_cast_fp16")]; + tensor var_1676_equation_0 = const()[name = tensor("op_1676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1676 = einsum(equation = var_1676_equation_0, values = (var_1621_7, var_1607_7))[name = tensor("op_1676")]; + tensor var_1677_to_fp16 = const()[name = tensor("op_1677_to_fp16"), val = tensor(0x1p-3)]; + tensor w_159_cast_fp16 = mul(x = var_1676, y = var_1677_to_fp16)[name = tensor("w_159_cast_fp16")]; + tensor var_1680_equation_0 = const()[name = tensor("op_1680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1680 = einsum(equation = var_1680_equation_0, values = (var_1621_8, var_1607_8))[name = tensor("op_1680")]; + tensor var_1681_to_fp16 = const()[name = tensor("op_1681_to_fp16"), val = tensor(0x1p-3)]; + tensor w_161_cast_fp16 = mul(x = var_1680, y = var_1681_to_fp16)[name = tensor("w_161_cast_fp16")]; + tensor var_1684_equation_0 = const()[name = tensor("op_1684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1684 = einsum(equation = var_1684_equation_0, values = (var_1621_9, var_1607_9))[name = tensor("op_1684")]; + tensor var_1685_to_fp16 = const()[name = tensor("op_1685_to_fp16"), val = tensor(0x1p-3)]; + tensor w_163_cast_fp16 = mul(x = var_1684, y = var_1685_to_fp16)[name = tensor("w_163_cast_fp16")]; + tensor var_1688_equation_0 = const()[name = tensor("op_1688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1688 = einsum(equation = var_1688_equation_0, values = (var_1621_10, var_1607_10))[name = tensor("op_1688")]; + tensor var_1689_to_fp16 = const()[name = tensor("op_1689_to_fp16"), val = tensor(0x1p-3)]; + tensor w_165_cast_fp16 = mul(x = var_1688, y = var_1689_to_fp16)[name = tensor("w_165_cast_fp16")]; + tensor var_1692_equation_0 = const()[name = tensor("op_1692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1692 = einsum(equation = var_1692_equation_0, values = (var_1621_11, var_1607_11))[name = tensor("op_1692")]; + tensor var_1693_to_fp16 = const()[name = tensor("op_1693_to_fp16"), val = tensor(0x1p-3)]; + tensor w_167_cast_fp16 = mul(x = var_1692, y = var_1693_to_fp16)[name = tensor("w_167_cast_fp16")]; + tensor input_195_cast_fp16 = add(x = w_145_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_195_cast_fp16")]; + tensor var_1696_cast_fp16 = softmax(axis = var_1564, x = input_195_cast_fp16)[name = tensor("op_1696_cast_fp16")]; + tensor input_197_cast_fp16 = add(x = w_147_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_197_cast_fp16")]; + tensor var_1698_cast_fp16 = softmax(axis = var_1564, x = input_197_cast_fp16)[name = tensor("op_1698_cast_fp16")]; + tensor input_199_cast_fp16 = add(x = w_149_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_199_cast_fp16")]; + tensor var_1700_cast_fp16 = softmax(axis = var_1564, x = input_199_cast_fp16)[name = tensor("op_1700_cast_fp16")]; + tensor input_201_cast_fp16 = add(x = w_151_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_201_cast_fp16")]; + tensor var_1702_cast_fp16 = softmax(axis = var_1564, x = input_201_cast_fp16)[name = tensor("op_1702_cast_fp16")]; + tensor input_203_cast_fp16 = add(x = w_153_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_203_cast_fp16")]; + tensor var_1704_cast_fp16 = softmax(axis = var_1564, x = input_203_cast_fp16)[name = tensor("op_1704_cast_fp16")]; + tensor input_205_cast_fp16 = add(x = w_155_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_205_cast_fp16")]; + tensor var_1706_cast_fp16 = softmax(axis = var_1564, x = input_205_cast_fp16)[name = tensor("op_1706_cast_fp16")]; + tensor input_207_cast_fp16 = add(x = w_157_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_207_cast_fp16")]; + tensor var_1708_cast_fp16 = softmax(axis = var_1564, x = input_207_cast_fp16)[name = tensor("op_1708_cast_fp16")]; + tensor input_209_cast_fp16 = add(x = w_159_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_209_cast_fp16")]; + tensor var_1710_cast_fp16 = softmax(axis = var_1564, x = input_209_cast_fp16)[name = tensor("op_1710_cast_fp16")]; + tensor input_211_cast_fp16 = add(x = w_161_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_211_cast_fp16")]; + tensor var_1712_cast_fp16 = softmax(axis = var_1564, x = input_211_cast_fp16)[name = tensor("op_1712_cast_fp16")]; + tensor input_213_cast_fp16 = add(x = w_163_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_213_cast_fp16")]; + tensor var_1714_cast_fp16 = softmax(axis = var_1564, x = input_213_cast_fp16)[name = tensor("op_1714_cast_fp16")]; + tensor input_215_cast_fp16 = add(x = w_165_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_215_cast_fp16")]; + tensor var_1716_cast_fp16 = softmax(axis = var_1564, x = input_215_cast_fp16)[name = tensor("op_1716_cast_fp16")]; + tensor input_217_cast_fp16 = add(x = w_167_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_217_cast_fp16")]; + tensor var_1718_cast_fp16 = softmax(axis = var_1564, x = input_217_cast_fp16)[name = tensor("op_1718_cast_fp16")]; + tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1634_0, var_1696_cast_fp16))[name = tensor("op_1720_cast_fp16")]; + tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1634_1, var_1698_cast_fp16))[name = tensor("op_1722_cast_fp16")]; + tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1634_2, var_1700_cast_fp16))[name = tensor("op_1724_cast_fp16")]; + tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1634_3, var_1702_cast_fp16))[name = tensor("op_1726_cast_fp16")]; + tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1634_4, var_1704_cast_fp16))[name = tensor("op_1728_cast_fp16")]; + tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1634_5, var_1706_cast_fp16))[name = tensor("op_1730_cast_fp16")]; + tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1634_6, var_1708_cast_fp16))[name = tensor("op_1732_cast_fp16")]; + tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1634_7, var_1710_cast_fp16))[name = tensor("op_1734_cast_fp16")]; + tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1634_8, var_1712_cast_fp16))[name = tensor("op_1736_cast_fp16")]; + tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1634_9, var_1714_cast_fp16))[name = tensor("op_1738_cast_fp16")]; + tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1634_10, var_1716_cast_fp16))[name = tensor("op_1740_cast_fp16")]; + tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1634_11, var_1718_cast_fp16))[name = tensor("op_1742_cast_fp16")]; + tensor var_1744_interleave_0 = const()[name = tensor("op_1744_interleave_0"), val = tensor(false)]; + tensor var_1744_cast_fp16 = concat(axis = var_1564, interleave = var_1744_interleave_0, values = (var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16))[name = tensor("op_1744_cast_fp16")]; + tensor var_1748 = const()[name = tensor("op_1748"), val = tensor([1, 1])]; + tensor var_1750 = const()[name = tensor("op_1750"), val = tensor([1, 1])]; + tensor var_1752_pad_type_0 = const()[name = tensor("op_1752_pad_type_0"), val = tensor("custom")]; + tensor var_1752_pad_0 = const()[name = tensor("op_1752_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1752 = conv(bias = layers_6_attention_o_proj_bias, dilations = var_1750, groups = var_1564, pad = var_1752_pad_0, pad_type = var_1752_pad_type_0, strides = var_1748, weight = layers_6_attention_o_proj_weight, x = var_1744_cast_fp16)[name = tensor("op_1752")]; + tensor var_1754_interleave_0 = const()[name = tensor("op_1754_interleave_0"), val = tensor(false)]; + tensor var_1754 = concat(axis = var_1565, interleave = var_1754_interleave_0, values = var_1752)[name = tensor("op_1754")]; + tensor x_53 = add(x = var_1558_cast_fp16, y = var_1754)[name = tensor("x_53")]; + tensor var_1561_promoted = const()[name = tensor("op_1561_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_1562_promoted = const()[name = tensor("op_1562_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_55 = clip(alpha = var_1561_promoted, beta = var_1562_promoted, x = x_53)[name = tensor("x_55")]; + tensor var_1759 = const()[name = tensor("op_1759"), val = tensor([1])]; + tensor mean_27 = reduce_mean(axes = var_1759, keep_dims = var_1566, x = x_55)[name = tensor("mean_27")]; tensor zero_mean_27 = sub(x = x_55, y = mean_27)[name = tensor("zero_mean_27")]; - tensor var_1608_promoted = const()[name = tensor("op_1608_promoted"), val = tensor(0x1p+1)]; - tensor var_1808 = pow(x = zero_mean_27, y = var_1608_promoted)[name = tensor("op_1808")]; - tensor var_1809 = const()[name = tensor("op_1809"), val = tensor([1])]; - tensor var_1810 = reduce_mean(axes = var_1809, keep_dims = var_1603, x = var_1808)[name = tensor("op_1810")]; - tensor var_1811_to_fp16 = const()[name = tensor("op_1811_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1812_cast_fp16 = add(x = var_1810, y = var_1811_to_fp16)[name = tensor("op_1812_cast_fp16")]; + tensor var_1563_promoted = const()[name = tensor("op_1563_promoted"), val = tensor(0x1p+1)]; + tensor var_1762 = pow(x = zero_mean_27, y = var_1563_promoted)[name = tensor("op_1762")]; + tensor var_1763 = const()[name = tensor("op_1763"), val = tensor([1])]; + tensor var_1764 = reduce_mean(axes = var_1763, keep_dims = var_1566, x = var_1762)[name = tensor("op_1764")]; + tensor var_1765_to_fp16 = const()[name = tensor("op_1765_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1766_cast_fp16 = add(x = var_1764, y = var_1765_to_fp16)[name = tensor("op_1766_cast_fp16")]; tensor denom_27_epsilon_0 = const()[name = tensor("denom_27_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0, x = var_1812_cast_fp16)[name = tensor("denom_27_cast_fp16")]; - tensor var_1814_cast_fp16 = mul(x = zero_mean_27, y = denom_27_cast_fp16)[name = tensor("op_1814_cast_fp16")]; - tensor var_1816_gamma_0_to_fp16 = const()[name = tensor("op_1816_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218544128)))]; - tensor var_1816_beta_0_to_fp16 = const()[name = tensor("op_1816_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218545728)))]; - tensor var_1816_epsilon_0_to_fp16 = const()[name = tensor("op_1816_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1816_cast_fp16 = batch_norm(beta = var_1816_beta_0_to_fp16, epsilon = var_1816_epsilon_0_to_fp16, gamma = var_1816_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1814_cast_fp16)[name = tensor("op_1816_cast_fp16")]; - tensor var_1822 = const()[name = tensor("op_1822"), val = tensor([1, 1])]; - tensor var_1824 = const()[name = tensor("op_1824"), val = tensor([1, 1])]; - tensor var_1826_pad_type_0 = const()[name = tensor("op_1826_pad_type_0"), val = tensor("custom")]; - tensor var_1826_pad_0 = const()[name = tensor("op_1826_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1826 = conv(bias = layers_6_mlp_fc1_bias, dilations = var_1824, groups = var_1601, pad = var_1826_pad_0, pad_type = var_1826_pad_type_0, strides = var_1822, weight = layers_6_mlp_fc1_weight, x = var_1816_cast_fp16)[name = tensor("op_1826")]; - tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; - tensor input_55 = gelu(mode = input_55_mode_0, x = var_1826)[name = tensor("input_55")]; - tensor var_1830 = const()[name = tensor("op_1830"), val = tensor([1, 1])]; - tensor var_1832 = const()[name = tensor("op_1832"), val = tensor([1, 1])]; - tensor var_1834_pad_type_0 = const()[name = tensor("op_1834_pad_type_0"), val = tensor("custom")]; - tensor var_1834_pad_0 = const()[name = tensor("op_1834_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1834 = conv(bias = layers_6_mlp_fc2_bias, dilations = var_1832, groups = var_1601, pad = var_1834_pad_0, pad_type = var_1834_pad_type_0, strides = var_1830, weight = layers_6_mlp_fc2_weight, x = input_55)[name = tensor("op_1834")]; - tensor x_57 = add(x = var_1816_cast_fp16, y = var_1834)[name = tensor("x_57")]; - tensor var_1599_promoted_1 = const()[name = tensor("op_1599_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1600_promoted_1 = const()[name = tensor("op_1600_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_59 = clip(alpha = var_1599_promoted_1, beta = var_1600_promoted_1, x = x_57)[name = tensor("x_59")]; - tensor var_1839 = const()[name = tensor("op_1839"), val = tensor([1])]; - tensor mean_29 = reduce_mean(axes = var_1839, keep_dims = var_1603, x = x_59)[name = tensor("mean_29")]; + tensor denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0, x = var_1766_cast_fp16)[name = tensor("denom_27_cast_fp16")]; + tensor var_1768_cast_fp16 = mul(x = zero_mean_27, y = denom_27_cast_fp16)[name = tensor("op_1768_cast_fp16")]; + tensor var_1770_gamma_0_to_fp16 = const()[name = tensor("op_1770_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218544128)))]; + tensor var_1770_beta_0_to_fp16 = const()[name = tensor("op_1770_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218545728)))]; + tensor var_1770_epsilon_0_to_fp16 = const()[name = tensor("op_1770_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1770_cast_fp16 = batch_norm(beta = var_1770_beta_0_to_fp16, epsilon = var_1770_epsilon_0_to_fp16, gamma = var_1770_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1768_cast_fp16)[name = tensor("op_1770_cast_fp16")]; + tensor var_1776 = const()[name = tensor("op_1776"), val = tensor([1, 1])]; + tensor var_1778 = const()[name = tensor("op_1778"), val = tensor([1, 1])]; + tensor var_1780_pad_type_0 = const()[name = tensor("op_1780_pad_type_0"), val = tensor("custom")]; + tensor var_1780_pad_0 = const()[name = tensor("op_1780_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1780 = conv(bias = layers_6_mlp_fc1_bias, dilations = var_1778, groups = var_1564, pad = var_1780_pad_0, pad_type = var_1780_pad_type_0, strides = var_1776, weight = layers_6_mlp_fc1_weight, x = var_1770_cast_fp16)[name = tensor("op_1780")]; + tensor input_223_mode_0 = const()[name = tensor("input_223_mode_0"), val = tensor("EXACT")]; + tensor input_223 = gelu(mode = input_223_mode_0, x = var_1780)[name = tensor("input_223")]; + tensor var_1784 = const()[name = tensor("op_1784"), val = tensor([1, 1])]; + tensor var_1786 = const()[name = tensor("op_1786"), val = tensor([1, 1])]; + tensor var_1788_pad_type_0 = const()[name = tensor("op_1788_pad_type_0"), val = tensor("custom")]; + tensor var_1788_pad_0 = const()[name = tensor("op_1788_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1788 = conv(bias = layers_6_mlp_fc2_bias, dilations = var_1786, groups = var_1564, pad = var_1788_pad_0, pad_type = var_1788_pad_type_0, strides = var_1784, weight = layers_6_mlp_fc2_weight, x = input_223)[name = tensor("op_1788")]; + tensor x_57 = add(x = var_1770_cast_fp16, y = var_1788)[name = tensor("x_57")]; + tensor var_1561_promoted_1 = const()[name = tensor("op_1561_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_1562_promoted_1 = const()[name = tensor("op_1562_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_59 = clip(alpha = var_1561_promoted_1, beta = var_1562_promoted_1, x = x_57)[name = tensor("x_59")]; + tensor var_1793 = const()[name = tensor("op_1793"), val = tensor([1])]; + tensor mean_29 = reduce_mean(axes = var_1793, keep_dims = var_1566, x = x_59)[name = tensor("mean_29")]; tensor zero_mean_29 = sub(x = x_59, y = mean_29)[name = tensor("zero_mean_29")]; - tensor var_1608_promoted_1 = const()[name = tensor("op_1608_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1842 = pow(x = zero_mean_29, y = var_1608_promoted_1)[name = tensor("op_1842")]; - tensor var_1843 = const()[name = tensor("op_1843"), val = tensor([1])]; - tensor var_1844 = reduce_mean(axes = var_1843, keep_dims = var_1603, x = var_1842)[name = tensor("op_1844")]; - tensor var_1845_to_fp16 = const()[name = tensor("op_1845_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1846_cast_fp16 = add(x = var_1844, y = var_1845_to_fp16)[name = tensor("op_1846_cast_fp16")]; + tensor var_1563_promoted_1 = const()[name = tensor("op_1563_promoted_1"), val = tensor(0x1p+1)]; + tensor var_1796 = pow(x = zero_mean_29, y = var_1563_promoted_1)[name = tensor("op_1796")]; + tensor var_1797 = const()[name = tensor("op_1797"), val = tensor([1])]; + tensor var_1798 = reduce_mean(axes = var_1797, keep_dims = var_1566, x = var_1796)[name = tensor("op_1798")]; + tensor var_1799_to_fp16 = const()[name = tensor("op_1799_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1800_cast_fp16 = add(x = var_1798, y = var_1799_to_fp16)[name = tensor("op_1800_cast_fp16")]; tensor denom_29_epsilon_0 = const()[name = tensor("denom_29_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0, x = var_1846_cast_fp16)[name = tensor("denom_29_cast_fp16")]; - tensor var_1848_cast_fp16 = mul(x = zero_mean_29, y = denom_29_cast_fp16)[name = tensor("op_1848_cast_fp16")]; - tensor var_1850_gamma_0_to_fp16 = const()[name = tensor("op_1850_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218547328)))]; - tensor var_1850_beta_0_to_fp16 = const()[name = tensor("op_1850_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218548928)))]; - tensor var_1850_epsilon_0_to_fp16 = const()[name = tensor("op_1850_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1850_cast_fp16 = batch_norm(beta = var_1850_beta_0_to_fp16, epsilon = var_1850_epsilon_0_to_fp16, gamma = var_1850_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1848_cast_fp16)[name = tensor("op_1850_cast_fp16")]; - tensor var_1855 = const()[name = tensor("op_1855"), val = tensor(1)]; - tensor var_1856 = const()[name = tensor("op_1856"), val = tensor(0)]; - tensor var_1857 = const()[name = tensor("op_1857"), val = tensor(true)]; - tensor var_1882 = const()[name = tensor("op_1882"), val = tensor([1, 1])]; - tensor var_1884 = const()[name = tensor("op_1884"), val = tensor([1, 1])]; - tensor var_1886_pad_type_0 = const()[name = tensor("op_1886_pad_type_0"), val = tensor("custom")]; - tensor var_1886_pad_0 = const()[name = tensor("op_1886_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1886 = conv(bias = layers_7_attention_q_proj_bias, dilations = var_1884, groups = var_1855, pad = var_1886_pad_0, pad_type = var_1886_pad_type_0, strides = var_1882, weight = layers_7_attention_q_proj_weight, x = var_1850_cast_fp16)[name = tensor("op_1886")]; - tensor var_1887 = const()[name = tensor("op_1887"), val = tensor([1, 64, 12, 512])]; - tensor var_1888 = reshape(shape = var_1887, x = var_1886)[name = tensor("op_1888")]; - tensor var_1891 = const()[name = tensor("op_1891"), val = tensor([1, 1])]; - tensor var_1893 = const()[name = tensor("op_1893"), val = tensor([1, 1])]; - tensor var_1895_pad_type_0 = const()[name = tensor("op_1895_pad_type_0"), val = tensor("custom")]; - tensor var_1895_pad_0 = const()[name = tensor("op_1895_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1895 = conv(bias = layers_7_attention_k_proj_bias, dilations = var_1893, groups = var_1855, pad = var_1895_pad_0, pad_type = var_1895_pad_type_0, strides = var_1891, weight = layers_7_attention_k_proj_weight, x = var_1850_cast_fp16)[name = tensor("op_1895")]; - tensor var_1896 = const()[name = tensor("op_1896"), val = tensor([1, 64, 12, 512])]; - tensor ks_15 = reshape(shape = var_1896, x = var_1895)[name = tensor("ks_15")]; - tensor var_1900 = const()[name = tensor("op_1900"), val = tensor([1, 1])]; - tensor var_1902 = const()[name = tensor("op_1902"), val = tensor([1, 1])]; - tensor var_1904_pad_type_0 = const()[name = tensor("op_1904_pad_type_0"), val = tensor("custom")]; - tensor var_1904_pad_0 = const()[name = tensor("op_1904_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1904 = conv(bias = layers_7_attention_v_proj_bias, dilations = var_1902, groups = var_1855, pad = var_1904_pad_0, pad_type = var_1904_pad_type_0, strides = var_1900, weight = layers_7_attention_v_proj_weight, x = var_1850_cast_fp16)[name = tensor("op_1904")]; - tensor var_1905 = const()[name = tensor("op_1905"), val = tensor([1, 64, 12, 512])]; - tensor var_1906 = reshape(shape = var_1905, x = var_1904)[name = tensor("op_1906")]; - tensor tile_37 = const()[name = tensor("tile_37"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1907_axis_0 = const()[name = tensor("op_1907_axis_0"), val = tensor(2)]; - tensor var_1907_0, tensor var_1907_1, tensor var_1907_2, tensor var_1907_3, tensor var_1907_4, tensor var_1907_5, tensor var_1907_6, tensor var_1907_7, tensor var_1907_8, tensor var_1907_9, tensor var_1907_10, tensor var_1907_11 = split(axis = var_1907_axis_0, split_sizes = tile_37, x = var_1888)[name = tensor("op_1907")]; - tensor var_1920_perm_0 = const()[name = tensor("op_1920_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_38 = const()[name = tensor("tile_38"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1921_axis_0 = const()[name = tensor("op_1921_axis_0"), val = tensor(2)]; - tensor transpose_4 = transpose(perm = var_1920_perm_0, x = ks_15)[name = tensor("transpose_4")]; - tensor var_1921_0, tensor var_1921_1, tensor var_1921_2, tensor var_1921_3, tensor var_1921_4, tensor var_1921_5, tensor var_1921_6, tensor var_1921_7, tensor var_1921_8, tensor var_1921_9, tensor var_1921_10, tensor var_1921_11 = split(axis = var_1921_axis_0, split_sizes = tile_38, x = transpose_4)[name = tensor("op_1921")]; - tensor tile_39 = const()[name = tensor("tile_39"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_1934_axis_0 = const()[name = tensor("op_1934_axis_0"), val = tensor(2)]; - tensor var_1934_0, tensor var_1934_1, tensor var_1934_2, tensor var_1934_3, tensor var_1934_4, tensor var_1934_5, tensor var_1934_6, tensor var_1934_7, tensor var_1934_8, tensor var_1934_9, tensor var_1934_10, tensor var_1934_11 = split(axis = var_1934_axis_0, split_sizes = tile_39, x = var_1906)[name = tensor("op_1934")]; - tensor var_1948_equation_0 = const()[name = tensor("op_1948_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1948 = einsum(equation = var_1948_equation_0, values = (var_1921_0, var_1907_0))[name = tensor("op_1948")]; - tensor var_1949_to_fp16 = const()[name = tensor("op_1949_to_fp16"), val = tensor(0x1p-3)]; - tensor w_169_cast_fp16 = mul(x = var_1948, y = var_1949_to_fp16)[name = tensor("w_169_cast_fp16")]; - tensor var_1952_equation_0 = const()[name = tensor("op_1952_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1952 = einsum(equation = var_1952_equation_0, values = (var_1921_1, var_1907_1))[name = tensor("op_1952")]; - tensor var_1953_to_fp16 = const()[name = tensor("op_1953_to_fp16"), val = tensor(0x1p-3)]; - tensor w_171_cast_fp16 = mul(x = var_1952, y = var_1953_to_fp16)[name = tensor("w_171_cast_fp16")]; - tensor var_1956_equation_0 = const()[name = tensor("op_1956_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1956 = einsum(equation = var_1956_equation_0, values = (var_1921_2, var_1907_2))[name = tensor("op_1956")]; - tensor var_1957_to_fp16 = const()[name = tensor("op_1957_to_fp16"), val = tensor(0x1p-3)]; - tensor w_173_cast_fp16 = mul(x = var_1956, y = var_1957_to_fp16)[name = tensor("w_173_cast_fp16")]; - tensor var_1960_equation_0 = const()[name = tensor("op_1960_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1960 = einsum(equation = var_1960_equation_0, values = (var_1921_3, var_1907_3))[name = tensor("op_1960")]; - tensor var_1961_to_fp16 = const()[name = tensor("op_1961_to_fp16"), val = tensor(0x1p-3)]; - tensor w_175_cast_fp16 = mul(x = var_1960, y = var_1961_to_fp16)[name = tensor("w_175_cast_fp16")]; - tensor var_1964_equation_0 = const()[name = tensor("op_1964_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1964 = einsum(equation = var_1964_equation_0, values = (var_1921_4, var_1907_4))[name = tensor("op_1964")]; - tensor var_1965_to_fp16 = const()[name = tensor("op_1965_to_fp16"), val = tensor(0x1p-3)]; - tensor w_177_cast_fp16 = mul(x = var_1964, y = var_1965_to_fp16)[name = tensor("w_177_cast_fp16")]; - tensor var_1968_equation_0 = const()[name = tensor("op_1968_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1968 = einsum(equation = var_1968_equation_0, values = (var_1921_5, var_1907_5))[name = tensor("op_1968")]; - tensor var_1969_to_fp16 = const()[name = tensor("op_1969_to_fp16"), val = tensor(0x1p-3)]; - tensor w_179_cast_fp16 = mul(x = var_1968, y = var_1969_to_fp16)[name = tensor("w_179_cast_fp16")]; - tensor var_1972_equation_0 = const()[name = tensor("op_1972_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1972 = einsum(equation = var_1972_equation_0, values = (var_1921_6, var_1907_6))[name = tensor("op_1972")]; - tensor var_1973_to_fp16 = const()[name = tensor("op_1973_to_fp16"), val = tensor(0x1p-3)]; - tensor w_181_cast_fp16 = mul(x = var_1972, y = var_1973_to_fp16)[name = tensor("w_181_cast_fp16")]; - tensor var_1976_equation_0 = const()[name = tensor("op_1976_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1976 = einsum(equation = var_1976_equation_0, values = (var_1921_7, var_1907_7))[name = tensor("op_1976")]; - tensor var_1977_to_fp16 = const()[name = tensor("op_1977_to_fp16"), val = tensor(0x1p-3)]; - tensor w_183_cast_fp16 = mul(x = var_1976, y = var_1977_to_fp16)[name = tensor("w_183_cast_fp16")]; - tensor var_1980_equation_0 = const()[name = tensor("op_1980_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1980 = einsum(equation = var_1980_equation_0, values = (var_1921_8, var_1907_8))[name = tensor("op_1980")]; - tensor var_1981_to_fp16 = const()[name = tensor("op_1981_to_fp16"), val = tensor(0x1p-3)]; - tensor w_185_cast_fp16 = mul(x = var_1980, y = var_1981_to_fp16)[name = tensor("w_185_cast_fp16")]; - tensor var_1984_equation_0 = const()[name = tensor("op_1984_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1984 = einsum(equation = var_1984_equation_0, values = (var_1921_9, var_1907_9))[name = tensor("op_1984")]; - tensor var_1985_to_fp16 = const()[name = tensor("op_1985_to_fp16"), val = tensor(0x1p-3)]; - tensor w_187_cast_fp16 = mul(x = var_1984, y = var_1985_to_fp16)[name = tensor("w_187_cast_fp16")]; - tensor var_1988_equation_0 = const()[name = tensor("op_1988_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1988 = einsum(equation = var_1988_equation_0, values = (var_1921_10, var_1907_10))[name = tensor("op_1988")]; - tensor var_1989_to_fp16 = const()[name = tensor("op_1989_to_fp16"), val = tensor(0x1p-3)]; - tensor w_189_cast_fp16 = mul(x = var_1988, y = var_1989_to_fp16)[name = tensor("w_189_cast_fp16")]; - tensor var_1992_equation_0 = const()[name = tensor("op_1992_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1992 = einsum(equation = var_1992_equation_0, values = (var_1921_11, var_1907_11))[name = tensor("op_1992")]; - tensor var_1993_to_fp16 = const()[name = tensor("op_1993_to_fp16"), val = tensor(0x1p-3)]; - tensor w_191_cast_fp16 = mul(x = var_1992, y = var_1993_to_fp16)[name = tensor("w_191_cast_fp16")]; - tensor var_1995_cast_fp16 = add(x = w_169_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1995_cast_fp16")]; - tensor var_1996_cast_fp16 = softmax(axis = var_1855, x = var_1995_cast_fp16)[name = tensor("op_1996_cast_fp16")]; - tensor var_1997_cast_fp16 = add(x = w_171_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1997_cast_fp16")]; - tensor var_1998_cast_fp16 = softmax(axis = var_1855, x = var_1997_cast_fp16)[name = tensor("op_1998_cast_fp16")]; - tensor var_1999_cast_fp16 = add(x = w_173_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_1999_cast_fp16")]; - tensor var_2000_cast_fp16 = softmax(axis = var_1855, x = var_1999_cast_fp16)[name = tensor("op_2000_cast_fp16")]; - tensor var_2001_cast_fp16 = add(x = w_175_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2001_cast_fp16")]; - tensor var_2002_cast_fp16 = softmax(axis = var_1855, x = var_2001_cast_fp16)[name = tensor("op_2002_cast_fp16")]; - tensor var_2003_cast_fp16 = add(x = w_177_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2003_cast_fp16")]; - tensor var_2004_cast_fp16 = softmax(axis = var_1855, x = var_2003_cast_fp16)[name = tensor("op_2004_cast_fp16")]; - tensor var_2005_cast_fp16 = add(x = w_179_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2005_cast_fp16")]; - tensor var_2006_cast_fp16 = softmax(axis = var_1855, x = var_2005_cast_fp16)[name = tensor("op_2006_cast_fp16")]; - tensor var_2007_cast_fp16 = add(x = w_181_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2007_cast_fp16")]; - tensor var_2008_cast_fp16 = softmax(axis = var_1855, x = var_2007_cast_fp16)[name = tensor("op_2008_cast_fp16")]; - tensor var_2009_cast_fp16 = add(x = w_183_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2009_cast_fp16")]; - tensor var_2010_cast_fp16 = softmax(axis = var_1855, x = var_2009_cast_fp16)[name = tensor("op_2010_cast_fp16")]; - tensor var_2011_cast_fp16 = add(x = w_185_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2011_cast_fp16")]; - tensor var_2012_cast_fp16 = softmax(axis = var_1855, x = var_2011_cast_fp16)[name = tensor("op_2012_cast_fp16")]; - tensor var_2013_cast_fp16 = add(x = w_187_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2013_cast_fp16")]; - tensor var_2014_cast_fp16 = softmax(axis = var_1855, x = var_2013_cast_fp16)[name = tensor("op_2014_cast_fp16")]; - tensor var_2015_cast_fp16 = add(x = w_189_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2015_cast_fp16")]; - tensor var_2016_cast_fp16 = softmax(axis = var_1855, x = var_2015_cast_fp16)[name = tensor("op_2016_cast_fp16")]; - tensor var_2017_cast_fp16 = add(x = w_191_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2017_cast_fp16")]; - tensor var_2018_cast_fp16 = softmax(axis = var_1855, x = var_2017_cast_fp16)[name = tensor("op_2018_cast_fp16")]; - tensor var_2020_equation_0 = const()[name = tensor("op_2020_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2020_cast_fp16 = einsum(equation = var_2020_equation_0, values = (var_1934_0, var_1996_cast_fp16))[name = tensor("op_2020_cast_fp16")]; - tensor var_2022_equation_0 = const()[name = tensor("op_2022_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2022_cast_fp16 = einsum(equation = var_2022_equation_0, values = (var_1934_1, var_1998_cast_fp16))[name = tensor("op_2022_cast_fp16")]; - tensor var_2024_equation_0 = const()[name = tensor("op_2024_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2024_cast_fp16 = einsum(equation = var_2024_equation_0, values = (var_1934_2, var_2000_cast_fp16))[name = tensor("op_2024_cast_fp16")]; - tensor var_2026_equation_0 = const()[name = tensor("op_2026_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2026_cast_fp16 = einsum(equation = var_2026_equation_0, values = (var_1934_3, var_2002_cast_fp16))[name = tensor("op_2026_cast_fp16")]; - tensor var_2028_equation_0 = const()[name = tensor("op_2028_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2028_cast_fp16 = einsum(equation = var_2028_equation_0, values = (var_1934_4, var_2004_cast_fp16))[name = tensor("op_2028_cast_fp16")]; - tensor var_2030_equation_0 = const()[name = tensor("op_2030_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2030_cast_fp16 = einsum(equation = var_2030_equation_0, values = (var_1934_5, var_2006_cast_fp16))[name = tensor("op_2030_cast_fp16")]; - tensor var_2032_equation_0 = const()[name = tensor("op_2032_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2032_cast_fp16 = einsum(equation = var_2032_equation_0, values = (var_1934_6, var_2008_cast_fp16))[name = tensor("op_2032_cast_fp16")]; - tensor var_2034_equation_0 = const()[name = tensor("op_2034_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2034_cast_fp16 = einsum(equation = var_2034_equation_0, values = (var_1934_7, var_2010_cast_fp16))[name = tensor("op_2034_cast_fp16")]; - tensor var_2036_equation_0 = const()[name = tensor("op_2036_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2036_cast_fp16 = einsum(equation = var_2036_equation_0, values = (var_1934_8, var_2012_cast_fp16))[name = tensor("op_2036_cast_fp16")]; - tensor var_2038_equation_0 = const()[name = tensor("op_2038_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2038_cast_fp16 = einsum(equation = var_2038_equation_0, values = (var_1934_9, var_2014_cast_fp16))[name = tensor("op_2038_cast_fp16")]; - tensor var_2040_equation_0 = const()[name = tensor("op_2040_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2040_cast_fp16 = einsum(equation = var_2040_equation_0, values = (var_1934_10, var_2016_cast_fp16))[name = tensor("op_2040_cast_fp16")]; - tensor var_2042_equation_0 = const()[name = tensor("op_2042_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2042_cast_fp16 = einsum(equation = var_2042_equation_0, values = (var_1934_11, var_2018_cast_fp16))[name = tensor("op_2042_cast_fp16")]; - tensor var_2044_interleave_0 = const()[name = tensor("op_2044_interleave_0"), val = tensor(false)]; - tensor var_2044_cast_fp16 = concat(axis = var_1855, interleave = var_2044_interleave_0, values = (var_2020_cast_fp16, var_2022_cast_fp16, var_2024_cast_fp16, var_2026_cast_fp16, var_2028_cast_fp16, var_2030_cast_fp16, var_2032_cast_fp16, var_2034_cast_fp16, var_2036_cast_fp16, var_2038_cast_fp16, var_2040_cast_fp16, var_2042_cast_fp16))[name = tensor("op_2044_cast_fp16")]; - tensor var_2048 = const()[name = tensor("op_2048"), val = tensor([1, 1])]; - tensor var_2050 = const()[name = tensor("op_2050"), val = tensor([1, 1])]; - tensor var_2052_pad_type_0 = const()[name = tensor("op_2052_pad_type_0"), val = tensor("custom")]; - tensor var_2052_pad_0 = const()[name = tensor("op_2052_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2052 = conv(bias = layers_7_attention_o_proj_bias, dilations = var_2050, groups = var_1855, pad = var_2052_pad_0, pad_type = var_2052_pad_type_0, strides = var_2048, weight = layers_7_attention_o_proj_weight, x = var_2044_cast_fp16)[name = tensor("op_2052")]; - tensor var_2054_interleave_0 = const()[name = tensor("op_2054_interleave_0"), val = tensor(false)]; - tensor var_2054 = concat(axis = var_1856, interleave = var_2054_interleave_0, values = var_2052)[name = tensor("op_2054")]; - tensor x_61 = add(x = var_1850_cast_fp16, y = var_2054)[name = tensor("x_61")]; - tensor var_1853_promoted = const()[name = tensor("op_1853_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1854_promoted = const()[name = tensor("op_1854_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_63 = clip(alpha = var_1853_promoted, beta = var_1854_promoted, x = x_61)[name = tensor("x_63")]; - tensor var_2059 = const()[name = tensor("op_2059"), val = tensor([1])]; - tensor mean_31 = reduce_mean(axes = var_2059, keep_dims = var_1857, x = x_63)[name = tensor("mean_31")]; + tensor denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0, x = var_1800_cast_fp16)[name = tensor("denom_29_cast_fp16")]; + tensor var_1802_cast_fp16 = mul(x = zero_mean_29, y = denom_29_cast_fp16)[name = tensor("op_1802_cast_fp16")]; + tensor var_1804_gamma_0_to_fp16 = const()[name = tensor("op_1804_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218547328)))]; + tensor var_1804_beta_0_to_fp16 = const()[name = tensor("op_1804_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218548928)))]; + tensor var_1804_epsilon_0_to_fp16 = const()[name = tensor("op_1804_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1804_cast_fp16 = batch_norm(beta = var_1804_beta_0_to_fp16, epsilon = var_1804_epsilon_0_to_fp16, gamma = var_1804_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1802_cast_fp16)[name = tensor("op_1804_cast_fp16")]; + tensor var_1810 = const()[name = tensor("op_1810"), val = tensor(1)]; + tensor var_1811 = const()[name = tensor("op_1811"), val = tensor(0)]; + tensor var_1812 = const()[name = tensor("op_1812"), val = tensor(true)]; + tensor var_1834 = const()[name = tensor("op_1834"), val = tensor([1, 1])]; + tensor var_1836 = const()[name = tensor("op_1836"), val = tensor([1, 1])]; + tensor var_1838_pad_type_0 = const()[name = tensor("op_1838_pad_type_0"), val = tensor("custom")]; + tensor var_1838_pad_0 = const()[name = tensor("op_1838_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1838 = conv(bias = layers_7_attention_q_proj_bias, dilations = var_1836, groups = var_1810, pad = var_1838_pad_0, pad_type = var_1838_pad_type_0, strides = var_1834, weight = layers_7_attention_q_proj_weight, x = var_1804_cast_fp16)[name = tensor("op_1838")]; + tensor var_1841 = const()[name = tensor("op_1841"), val = tensor([1, 1])]; + tensor var_1843 = const()[name = tensor("op_1843"), val = tensor([1, 1])]; + tensor ks_15_pad_type_0 = const()[name = tensor("ks_15_pad_type_0"), val = tensor("custom")]; + tensor ks_15_pad_0 = const()[name = tensor("ks_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_15 = conv(bias = layers_7_attention_k_proj_bias, dilations = var_1843, groups = var_1810, pad = ks_15_pad_0, pad_type = ks_15_pad_type_0, strides = var_1841, weight = layers_7_attention_k_proj_weight, x = var_1804_cast_fp16)[name = tensor("ks_15")]; + tensor var_1848 = const()[name = tensor("op_1848"), val = tensor([1, 1])]; + tensor var_1850 = const()[name = tensor("op_1850"), val = tensor([1, 1])]; + tensor var_1852_pad_type_0 = const()[name = tensor("op_1852_pad_type_0"), val = tensor("custom")]; + tensor var_1852_pad_0 = const()[name = tensor("op_1852_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1852 = conv(bias = layers_7_attention_v_proj_bias, dilations = var_1850, groups = var_1810, pad = var_1852_pad_0, pad_type = var_1852_pad_type_0, strides = var_1848, weight = layers_7_attention_v_proj_weight, x = var_1804_cast_fp16)[name = tensor("op_1852")]; + tensor tile_37 = const()[name = tensor("tile_37"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1853_axis_0 = const()[name = tensor("op_1853_axis_0"), val = tensor(1)]; + tensor var_1853_0, tensor var_1853_1, tensor var_1853_2, tensor var_1853_3, tensor var_1853_4, tensor var_1853_5, tensor var_1853_6, tensor var_1853_7, tensor var_1853_8, tensor var_1853_9, tensor var_1853_10, tensor var_1853_11 = split(axis = var_1853_axis_0, split_sizes = tile_37, x = var_1838)[name = tensor("op_1853")]; + tensor var_1866_perm_0 = const()[name = tensor("op_1866_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_38 = const()[name = tensor("tile_38"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1867_axis_0 = const()[name = tensor("op_1867_axis_0"), val = tensor(3)]; + tensor transpose_4 = transpose(perm = var_1866_perm_0, x = ks_15)[name = tensor("transpose_4")]; + tensor var_1867_0, tensor var_1867_1, tensor var_1867_2, tensor var_1867_3, tensor var_1867_4, tensor var_1867_5, tensor var_1867_6, tensor var_1867_7, tensor var_1867_8, tensor var_1867_9, tensor var_1867_10, tensor var_1867_11 = split(axis = var_1867_axis_0, split_sizes = tile_38, x = transpose_4)[name = tensor("op_1867")]; + tensor tile_39 = const()[name = tensor("tile_39"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_1880_axis_0 = const()[name = tensor("op_1880_axis_0"), val = tensor(1)]; + tensor var_1880_0, tensor var_1880_1, tensor var_1880_2, tensor var_1880_3, tensor var_1880_4, tensor var_1880_5, tensor var_1880_6, tensor var_1880_7, tensor var_1880_8, tensor var_1880_9, tensor var_1880_10, tensor var_1880_11 = split(axis = var_1880_axis_0, split_sizes = tile_39, x = var_1852)[name = tensor("op_1880")]; + tensor var_1894_equation_0 = const()[name = tensor("op_1894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1894 = einsum(equation = var_1894_equation_0, values = (var_1867_0, var_1853_0))[name = tensor("op_1894")]; + tensor var_1895_to_fp16 = const()[name = tensor("op_1895_to_fp16"), val = tensor(0x1p-3)]; + tensor w_169_cast_fp16 = mul(x = var_1894, y = var_1895_to_fp16)[name = tensor("w_169_cast_fp16")]; + tensor var_1898_equation_0 = const()[name = tensor("op_1898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1898 = einsum(equation = var_1898_equation_0, values = (var_1867_1, var_1853_1))[name = tensor("op_1898")]; + tensor var_1899_to_fp16 = const()[name = tensor("op_1899_to_fp16"), val = tensor(0x1p-3)]; + tensor w_171_cast_fp16 = mul(x = var_1898, y = var_1899_to_fp16)[name = tensor("w_171_cast_fp16")]; + tensor var_1902_equation_0 = const()[name = tensor("op_1902_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1902 = einsum(equation = var_1902_equation_0, values = (var_1867_2, var_1853_2))[name = tensor("op_1902")]; + tensor var_1903_to_fp16 = const()[name = tensor("op_1903_to_fp16"), val = tensor(0x1p-3)]; + tensor w_173_cast_fp16 = mul(x = var_1902, y = var_1903_to_fp16)[name = tensor("w_173_cast_fp16")]; + tensor var_1906_equation_0 = const()[name = tensor("op_1906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1906 = einsum(equation = var_1906_equation_0, values = (var_1867_3, var_1853_3))[name = tensor("op_1906")]; + tensor var_1907_to_fp16 = const()[name = tensor("op_1907_to_fp16"), val = tensor(0x1p-3)]; + tensor w_175_cast_fp16 = mul(x = var_1906, y = var_1907_to_fp16)[name = tensor("w_175_cast_fp16")]; + tensor var_1910_equation_0 = const()[name = tensor("op_1910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1910 = einsum(equation = var_1910_equation_0, values = (var_1867_4, var_1853_4))[name = tensor("op_1910")]; + tensor var_1911_to_fp16 = const()[name = tensor("op_1911_to_fp16"), val = tensor(0x1p-3)]; + tensor w_177_cast_fp16 = mul(x = var_1910, y = var_1911_to_fp16)[name = tensor("w_177_cast_fp16")]; + tensor var_1914_equation_0 = const()[name = tensor("op_1914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1914 = einsum(equation = var_1914_equation_0, values = (var_1867_5, var_1853_5))[name = tensor("op_1914")]; + tensor var_1915_to_fp16 = const()[name = tensor("op_1915_to_fp16"), val = tensor(0x1p-3)]; + tensor w_179_cast_fp16 = mul(x = var_1914, y = var_1915_to_fp16)[name = tensor("w_179_cast_fp16")]; + tensor var_1918_equation_0 = const()[name = tensor("op_1918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1918 = einsum(equation = var_1918_equation_0, values = (var_1867_6, var_1853_6))[name = tensor("op_1918")]; + tensor var_1919_to_fp16 = const()[name = tensor("op_1919_to_fp16"), val = tensor(0x1p-3)]; + tensor w_181_cast_fp16 = mul(x = var_1918, y = var_1919_to_fp16)[name = tensor("w_181_cast_fp16")]; + tensor var_1922_equation_0 = const()[name = tensor("op_1922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1922 = einsum(equation = var_1922_equation_0, values = (var_1867_7, var_1853_7))[name = tensor("op_1922")]; + tensor var_1923_to_fp16 = const()[name = tensor("op_1923_to_fp16"), val = tensor(0x1p-3)]; + tensor w_183_cast_fp16 = mul(x = var_1922, y = var_1923_to_fp16)[name = tensor("w_183_cast_fp16")]; + tensor var_1926_equation_0 = const()[name = tensor("op_1926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1926 = einsum(equation = var_1926_equation_0, values = (var_1867_8, var_1853_8))[name = tensor("op_1926")]; + tensor var_1927_to_fp16 = const()[name = tensor("op_1927_to_fp16"), val = tensor(0x1p-3)]; + tensor w_185_cast_fp16 = mul(x = var_1926, y = var_1927_to_fp16)[name = tensor("w_185_cast_fp16")]; + tensor var_1930_equation_0 = const()[name = tensor("op_1930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1930 = einsum(equation = var_1930_equation_0, values = (var_1867_9, var_1853_9))[name = tensor("op_1930")]; + tensor var_1931_to_fp16 = const()[name = tensor("op_1931_to_fp16"), val = tensor(0x1p-3)]; + tensor w_187_cast_fp16 = mul(x = var_1930, y = var_1931_to_fp16)[name = tensor("w_187_cast_fp16")]; + tensor var_1934_equation_0 = const()[name = tensor("op_1934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1934 = einsum(equation = var_1934_equation_0, values = (var_1867_10, var_1853_10))[name = tensor("op_1934")]; + tensor var_1935_to_fp16 = const()[name = tensor("op_1935_to_fp16"), val = tensor(0x1p-3)]; + tensor w_189_cast_fp16 = mul(x = var_1934, y = var_1935_to_fp16)[name = tensor("w_189_cast_fp16")]; + tensor var_1938_equation_0 = const()[name = tensor("op_1938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1938 = einsum(equation = var_1938_equation_0, values = (var_1867_11, var_1853_11))[name = tensor("op_1938")]; + tensor var_1939_to_fp16 = const()[name = tensor("op_1939_to_fp16"), val = tensor(0x1p-3)]; + tensor w_191_cast_fp16 = mul(x = var_1938, y = var_1939_to_fp16)[name = tensor("w_191_cast_fp16")]; + tensor input_227_cast_fp16 = add(x = w_169_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_227_cast_fp16")]; + tensor var_1942_cast_fp16 = softmax(axis = var_1810, x = input_227_cast_fp16)[name = tensor("op_1942_cast_fp16")]; + tensor input_229_cast_fp16 = add(x = w_171_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_229_cast_fp16")]; + tensor var_1944_cast_fp16 = softmax(axis = var_1810, x = input_229_cast_fp16)[name = tensor("op_1944_cast_fp16")]; + tensor input_231_cast_fp16 = add(x = w_173_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_231_cast_fp16")]; + tensor var_1946_cast_fp16 = softmax(axis = var_1810, x = input_231_cast_fp16)[name = tensor("op_1946_cast_fp16")]; + tensor input_233_cast_fp16 = add(x = w_175_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_233_cast_fp16")]; + tensor var_1948_cast_fp16 = softmax(axis = var_1810, x = input_233_cast_fp16)[name = tensor("op_1948_cast_fp16")]; + tensor input_235_cast_fp16 = add(x = w_177_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_235_cast_fp16")]; + tensor var_1950_cast_fp16 = softmax(axis = var_1810, x = input_235_cast_fp16)[name = tensor("op_1950_cast_fp16")]; + tensor input_237_cast_fp16 = add(x = w_179_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_237_cast_fp16")]; + tensor var_1952_cast_fp16 = softmax(axis = var_1810, x = input_237_cast_fp16)[name = tensor("op_1952_cast_fp16")]; + tensor input_239_cast_fp16 = add(x = w_181_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_239_cast_fp16")]; + tensor var_1954_cast_fp16 = softmax(axis = var_1810, x = input_239_cast_fp16)[name = tensor("op_1954_cast_fp16")]; + tensor input_241_cast_fp16 = add(x = w_183_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_241_cast_fp16")]; + tensor var_1956_cast_fp16 = softmax(axis = var_1810, x = input_241_cast_fp16)[name = tensor("op_1956_cast_fp16")]; + tensor input_243_cast_fp16 = add(x = w_185_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_243_cast_fp16")]; + tensor var_1958_cast_fp16 = softmax(axis = var_1810, x = input_243_cast_fp16)[name = tensor("op_1958_cast_fp16")]; + tensor input_245_cast_fp16 = add(x = w_187_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_245_cast_fp16")]; + tensor var_1960_cast_fp16 = softmax(axis = var_1810, x = input_245_cast_fp16)[name = tensor("op_1960_cast_fp16")]; + tensor input_247_cast_fp16 = add(x = w_189_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_247_cast_fp16")]; + tensor var_1962_cast_fp16 = softmax(axis = var_1810, x = input_247_cast_fp16)[name = tensor("op_1962_cast_fp16")]; + tensor input_249_cast_fp16 = add(x = w_191_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_249_cast_fp16")]; + tensor var_1964_cast_fp16 = softmax(axis = var_1810, x = input_249_cast_fp16)[name = tensor("op_1964_cast_fp16")]; + tensor var_1966_equation_0 = const()[name = tensor("op_1966_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1966_cast_fp16 = einsum(equation = var_1966_equation_0, values = (var_1880_0, var_1942_cast_fp16))[name = tensor("op_1966_cast_fp16")]; + tensor var_1968_equation_0 = const()[name = tensor("op_1968_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1968_cast_fp16 = einsum(equation = var_1968_equation_0, values = (var_1880_1, var_1944_cast_fp16))[name = tensor("op_1968_cast_fp16")]; + tensor var_1970_equation_0 = const()[name = tensor("op_1970_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1970_cast_fp16 = einsum(equation = var_1970_equation_0, values = (var_1880_2, var_1946_cast_fp16))[name = tensor("op_1970_cast_fp16")]; + tensor var_1972_equation_0 = const()[name = tensor("op_1972_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1972_cast_fp16 = einsum(equation = var_1972_equation_0, values = (var_1880_3, var_1948_cast_fp16))[name = tensor("op_1972_cast_fp16")]; + tensor var_1974_equation_0 = const()[name = tensor("op_1974_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1974_cast_fp16 = einsum(equation = var_1974_equation_0, values = (var_1880_4, var_1950_cast_fp16))[name = tensor("op_1974_cast_fp16")]; + tensor var_1976_equation_0 = const()[name = tensor("op_1976_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1976_cast_fp16 = einsum(equation = var_1976_equation_0, values = (var_1880_5, var_1952_cast_fp16))[name = tensor("op_1976_cast_fp16")]; + tensor var_1978_equation_0 = const()[name = tensor("op_1978_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1978_cast_fp16 = einsum(equation = var_1978_equation_0, values = (var_1880_6, var_1954_cast_fp16))[name = tensor("op_1978_cast_fp16")]; + tensor var_1980_equation_0 = const()[name = tensor("op_1980_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1980_cast_fp16 = einsum(equation = var_1980_equation_0, values = (var_1880_7, var_1956_cast_fp16))[name = tensor("op_1980_cast_fp16")]; + tensor var_1982_equation_0 = const()[name = tensor("op_1982_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1982_cast_fp16 = einsum(equation = var_1982_equation_0, values = (var_1880_8, var_1958_cast_fp16))[name = tensor("op_1982_cast_fp16")]; + tensor var_1984_equation_0 = const()[name = tensor("op_1984_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1984_cast_fp16 = einsum(equation = var_1984_equation_0, values = (var_1880_9, var_1960_cast_fp16))[name = tensor("op_1984_cast_fp16")]; + tensor var_1986_equation_0 = const()[name = tensor("op_1986_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1986_cast_fp16 = einsum(equation = var_1986_equation_0, values = (var_1880_10, var_1962_cast_fp16))[name = tensor("op_1986_cast_fp16")]; + tensor var_1988_equation_0 = const()[name = tensor("op_1988_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1988_cast_fp16 = einsum(equation = var_1988_equation_0, values = (var_1880_11, var_1964_cast_fp16))[name = tensor("op_1988_cast_fp16")]; + tensor var_1990_interleave_0 = const()[name = tensor("op_1990_interleave_0"), val = tensor(false)]; + tensor var_1990_cast_fp16 = concat(axis = var_1810, interleave = var_1990_interleave_0, values = (var_1966_cast_fp16, var_1968_cast_fp16, var_1970_cast_fp16, var_1972_cast_fp16, var_1974_cast_fp16, var_1976_cast_fp16, var_1978_cast_fp16, var_1980_cast_fp16, var_1982_cast_fp16, var_1984_cast_fp16, var_1986_cast_fp16, var_1988_cast_fp16))[name = tensor("op_1990_cast_fp16")]; + tensor var_1994 = const()[name = tensor("op_1994"), val = tensor([1, 1])]; + tensor var_1996 = const()[name = tensor("op_1996"), val = tensor([1, 1])]; + tensor var_1998_pad_type_0 = const()[name = tensor("op_1998_pad_type_0"), val = tensor("custom")]; + tensor var_1998_pad_0 = const()[name = tensor("op_1998_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1998 = conv(bias = layers_7_attention_o_proj_bias, dilations = var_1996, groups = var_1810, pad = var_1998_pad_0, pad_type = var_1998_pad_type_0, strides = var_1994, weight = layers_7_attention_o_proj_weight, x = var_1990_cast_fp16)[name = tensor("op_1998")]; + tensor var_2000_interleave_0 = const()[name = tensor("op_2000_interleave_0"), val = tensor(false)]; + tensor var_2000 = concat(axis = var_1811, interleave = var_2000_interleave_0, values = var_1998)[name = tensor("op_2000")]; + tensor x_61 = add(x = var_1804_cast_fp16, y = var_2000)[name = tensor("x_61")]; + tensor var_1807_promoted = const()[name = tensor("op_1807_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_1808_promoted = const()[name = tensor("op_1808_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_63 = clip(alpha = var_1807_promoted, beta = var_1808_promoted, x = x_61)[name = tensor("x_63")]; + tensor var_2005 = const()[name = tensor("op_2005"), val = tensor([1])]; + tensor mean_31 = reduce_mean(axes = var_2005, keep_dims = var_1812, x = x_63)[name = tensor("mean_31")]; tensor zero_mean_31 = sub(x = x_63, y = mean_31)[name = tensor("zero_mean_31")]; - tensor var_1862_promoted = const()[name = tensor("op_1862_promoted"), val = tensor(0x1p+1)]; - tensor var_2062 = pow(x = zero_mean_31, y = var_1862_promoted)[name = tensor("op_2062")]; - tensor var_2063 = const()[name = tensor("op_2063"), val = tensor([1])]; - tensor var_2064 = reduce_mean(axes = var_2063, keep_dims = var_1857, x = var_2062)[name = tensor("op_2064")]; - tensor var_2065_to_fp16 = const()[name = tensor("op_2065_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2066_cast_fp16 = add(x = var_2064, y = var_2065_to_fp16)[name = tensor("op_2066_cast_fp16")]; + tensor var_1809_promoted = const()[name = tensor("op_1809_promoted"), val = tensor(0x1p+1)]; + tensor var_2008 = pow(x = zero_mean_31, y = var_1809_promoted)[name = tensor("op_2008")]; + tensor var_2009 = const()[name = tensor("op_2009"), val = tensor([1])]; + tensor var_2010 = reduce_mean(axes = var_2009, keep_dims = var_1812, x = var_2008)[name = tensor("op_2010")]; + tensor var_2011_to_fp16 = const()[name = tensor("op_2011_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2012_cast_fp16 = add(x = var_2010, y = var_2011_to_fp16)[name = tensor("op_2012_cast_fp16")]; tensor denom_31_epsilon_0 = const()[name = tensor("denom_31_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0, x = var_2066_cast_fp16)[name = tensor("denom_31_cast_fp16")]; - tensor var_2068_cast_fp16 = mul(x = zero_mean_31, y = denom_31_cast_fp16)[name = tensor("op_2068_cast_fp16")]; - tensor var_2070_gamma_0_to_fp16 = const()[name = tensor("op_2070_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218550528)))]; - tensor var_2070_beta_0_to_fp16 = const()[name = tensor("op_2070_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218552128)))]; - tensor var_2070_epsilon_0_to_fp16 = const()[name = tensor("op_2070_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2070_cast_fp16 = batch_norm(beta = var_2070_beta_0_to_fp16, epsilon = var_2070_epsilon_0_to_fp16, gamma = var_2070_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2068_cast_fp16)[name = tensor("op_2070_cast_fp16")]; - tensor var_2076 = const()[name = tensor("op_2076"), val = tensor([1, 1])]; - tensor var_2078 = const()[name = tensor("op_2078"), val = tensor([1, 1])]; - tensor var_2080_pad_type_0 = const()[name = tensor("op_2080_pad_type_0"), val = tensor("custom")]; - tensor var_2080_pad_0 = const()[name = tensor("op_2080_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2080 = conv(bias = layers_7_mlp_fc1_bias, dilations = var_2078, groups = var_1855, pad = var_2080_pad_0, pad_type = var_2080_pad_type_0, strides = var_2076, weight = layers_7_mlp_fc1_weight, x = var_2070_cast_fp16)[name = tensor("op_2080")]; - tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; - tensor input_63 = gelu(mode = input_63_mode_0, x = var_2080)[name = tensor("input_63")]; - tensor var_2084 = const()[name = tensor("op_2084"), val = tensor([1, 1])]; - tensor var_2086 = const()[name = tensor("op_2086"), val = tensor([1, 1])]; - tensor var_2088_pad_type_0 = const()[name = tensor("op_2088_pad_type_0"), val = tensor("custom")]; - tensor var_2088_pad_0 = const()[name = tensor("op_2088_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2088 = conv(bias = layers_7_mlp_fc2_bias, dilations = var_2086, groups = var_1855, pad = var_2088_pad_0, pad_type = var_2088_pad_type_0, strides = var_2084, weight = layers_7_mlp_fc2_weight, x = input_63)[name = tensor("op_2088")]; - tensor x_65 = add(x = var_2070_cast_fp16, y = var_2088)[name = tensor("x_65")]; - tensor var_1853_promoted_1 = const()[name = tensor("op_1853_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1854_promoted_1 = const()[name = tensor("op_1854_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_67 = clip(alpha = var_1853_promoted_1, beta = var_1854_promoted_1, x = x_65)[name = tensor("x_67")]; - tensor var_2093 = const()[name = tensor("op_2093"), val = tensor([1])]; - tensor mean_33 = reduce_mean(axes = var_2093, keep_dims = var_1857, x = x_67)[name = tensor("mean_33")]; + tensor denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0, x = var_2012_cast_fp16)[name = tensor("denom_31_cast_fp16")]; + tensor var_2014_cast_fp16 = mul(x = zero_mean_31, y = denom_31_cast_fp16)[name = tensor("op_2014_cast_fp16")]; + tensor var_2016_gamma_0_to_fp16 = const()[name = tensor("op_2016_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218550528)))]; + tensor var_2016_beta_0_to_fp16 = const()[name = tensor("op_2016_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218552128)))]; + tensor var_2016_epsilon_0_to_fp16 = const()[name = tensor("op_2016_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2016_cast_fp16 = batch_norm(beta = var_2016_beta_0_to_fp16, epsilon = var_2016_epsilon_0_to_fp16, gamma = var_2016_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2014_cast_fp16)[name = tensor("op_2016_cast_fp16")]; + tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 1])]; + tensor var_2024 = const()[name = tensor("op_2024"), val = tensor([1, 1])]; + tensor var_2026_pad_type_0 = const()[name = tensor("op_2026_pad_type_0"), val = tensor("custom")]; + tensor var_2026_pad_0 = const()[name = tensor("op_2026_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2026 = conv(bias = layers_7_mlp_fc1_bias, dilations = var_2024, groups = var_1810, pad = var_2026_pad_0, pad_type = var_2026_pad_type_0, strides = var_2022, weight = layers_7_mlp_fc1_weight, x = var_2016_cast_fp16)[name = tensor("op_2026")]; + tensor input_255_mode_0 = const()[name = tensor("input_255_mode_0"), val = tensor("EXACT")]; + tensor input_255 = gelu(mode = input_255_mode_0, x = var_2026)[name = tensor("input_255")]; + tensor var_2030 = const()[name = tensor("op_2030"), val = tensor([1, 1])]; + tensor var_2032 = const()[name = tensor("op_2032"), val = tensor([1, 1])]; + tensor var_2034_pad_type_0 = const()[name = tensor("op_2034_pad_type_0"), val = tensor("custom")]; + tensor var_2034_pad_0 = const()[name = tensor("op_2034_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034 = conv(bias = layers_7_mlp_fc2_bias, dilations = var_2032, groups = var_1810, pad = var_2034_pad_0, pad_type = var_2034_pad_type_0, strides = var_2030, weight = layers_7_mlp_fc2_weight, x = input_255)[name = tensor("op_2034")]; + tensor x_65 = add(x = var_2016_cast_fp16, y = var_2034)[name = tensor("x_65")]; + tensor var_1807_promoted_1 = const()[name = tensor("op_1807_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_1808_promoted_1 = const()[name = tensor("op_1808_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_67 = clip(alpha = var_1807_promoted_1, beta = var_1808_promoted_1, x = x_65)[name = tensor("x_67")]; + tensor var_2039 = const()[name = tensor("op_2039"), val = tensor([1])]; + tensor mean_33 = reduce_mean(axes = var_2039, keep_dims = var_1812, x = x_67)[name = tensor("mean_33")]; tensor zero_mean_33 = sub(x = x_67, y = mean_33)[name = tensor("zero_mean_33")]; - tensor var_1862_promoted_1 = const()[name = tensor("op_1862_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2096 = pow(x = zero_mean_33, y = var_1862_promoted_1)[name = tensor("op_2096")]; - tensor var_2097 = const()[name = tensor("op_2097"), val = tensor([1])]; - tensor var_2098 = reduce_mean(axes = var_2097, keep_dims = var_1857, x = var_2096)[name = tensor("op_2098")]; - tensor var_2099_to_fp16 = const()[name = tensor("op_2099_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2100_cast_fp16 = add(x = var_2098, y = var_2099_to_fp16)[name = tensor("op_2100_cast_fp16")]; + tensor var_1809_promoted_1 = const()[name = tensor("op_1809_promoted_1"), val = tensor(0x1p+1)]; + tensor var_2042 = pow(x = zero_mean_33, y = var_1809_promoted_1)[name = tensor("op_2042")]; + tensor var_2043 = const()[name = tensor("op_2043"), val = tensor([1])]; + tensor var_2044 = reduce_mean(axes = var_2043, keep_dims = var_1812, x = var_2042)[name = tensor("op_2044")]; + tensor var_2045_to_fp16 = const()[name = tensor("op_2045_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2046_cast_fp16 = add(x = var_2044, y = var_2045_to_fp16)[name = tensor("op_2046_cast_fp16")]; tensor denom_33_epsilon_0 = const()[name = tensor("denom_33_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0, x = var_2100_cast_fp16)[name = tensor("denom_33_cast_fp16")]; - tensor var_2102_cast_fp16 = mul(x = zero_mean_33, y = denom_33_cast_fp16)[name = tensor("op_2102_cast_fp16")]; - tensor var_2104_gamma_0_to_fp16 = const()[name = tensor("op_2104_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218553728)))]; - tensor var_2104_beta_0_to_fp16 = const()[name = tensor("op_2104_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218555328)))]; - tensor var_2104_epsilon_0_to_fp16 = const()[name = tensor("op_2104_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2104_cast_fp16 = batch_norm(beta = var_2104_beta_0_to_fp16, epsilon = var_2104_epsilon_0_to_fp16, gamma = var_2104_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2102_cast_fp16)[name = tensor("op_2104_cast_fp16")]; - tensor var_2109 = const()[name = tensor("op_2109"), val = tensor(1)]; - tensor var_2110 = const()[name = tensor("op_2110"), val = tensor(0)]; - tensor var_2111 = const()[name = tensor("op_2111"), val = tensor(true)]; - tensor var_2136 = const()[name = tensor("op_2136"), val = tensor([1, 1])]; - tensor var_2138 = const()[name = tensor("op_2138"), val = tensor([1, 1])]; - tensor var_2140_pad_type_0 = const()[name = tensor("op_2140_pad_type_0"), val = tensor("custom")]; - tensor var_2140_pad_0 = const()[name = tensor("op_2140_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2140 = conv(bias = layers_8_attention_q_proj_bias, dilations = var_2138, groups = var_2109, pad = var_2140_pad_0, pad_type = var_2140_pad_type_0, strides = var_2136, weight = layers_8_attention_q_proj_weight, x = var_2104_cast_fp16)[name = tensor("op_2140")]; - tensor var_2141 = const()[name = tensor("op_2141"), val = tensor([1, 64, 12, 512])]; - tensor var_2142 = reshape(shape = var_2141, x = var_2140)[name = tensor("op_2142")]; - tensor var_2145 = const()[name = tensor("op_2145"), val = tensor([1, 1])]; - tensor var_2147 = const()[name = tensor("op_2147"), val = tensor([1, 1])]; - tensor var_2149_pad_type_0 = const()[name = tensor("op_2149_pad_type_0"), val = tensor("custom")]; - tensor var_2149_pad_0 = const()[name = tensor("op_2149_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2149 = conv(bias = layers_8_attention_k_proj_bias, dilations = var_2147, groups = var_2109, pad = var_2149_pad_0, pad_type = var_2149_pad_type_0, strides = var_2145, weight = layers_8_attention_k_proj_weight, x = var_2104_cast_fp16)[name = tensor("op_2149")]; - tensor var_2150 = const()[name = tensor("op_2150"), val = tensor([1, 64, 12, 512])]; - tensor ks_17 = reshape(shape = var_2150, x = var_2149)[name = tensor("ks_17")]; - tensor var_2154 = const()[name = tensor("op_2154"), val = tensor([1, 1])]; - tensor var_2156 = const()[name = tensor("op_2156"), val = tensor([1, 1])]; - tensor var_2158_pad_type_0 = const()[name = tensor("op_2158_pad_type_0"), val = tensor("custom")]; - tensor var_2158_pad_0 = const()[name = tensor("op_2158_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2158 = conv(bias = layers_8_attention_v_proj_bias, dilations = var_2156, groups = var_2109, pad = var_2158_pad_0, pad_type = var_2158_pad_type_0, strides = var_2154, weight = layers_8_attention_v_proj_weight, x = var_2104_cast_fp16)[name = tensor("op_2158")]; - tensor var_2159 = const()[name = tensor("op_2159"), val = tensor([1, 64, 12, 512])]; - tensor var_2160 = reshape(shape = var_2159, x = var_2158)[name = tensor("op_2160")]; - tensor tile_42 = const()[name = tensor("tile_42"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2161_axis_0 = const()[name = tensor("op_2161_axis_0"), val = tensor(2)]; - tensor var_2161_0, tensor var_2161_1, tensor var_2161_2, tensor var_2161_3, tensor var_2161_4, tensor var_2161_5, tensor var_2161_6, tensor var_2161_7, tensor var_2161_8, tensor var_2161_9, tensor var_2161_10, tensor var_2161_11 = split(axis = var_2161_axis_0, split_sizes = tile_42, x = var_2142)[name = tensor("op_2161")]; - tensor var_2174_perm_0 = const()[name = tensor("op_2174_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_43 = const()[name = tensor("tile_43"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2175_axis_0 = const()[name = tensor("op_2175_axis_0"), val = tensor(2)]; - tensor transpose_3 = transpose(perm = var_2174_perm_0, x = ks_17)[name = tensor("transpose_3")]; - tensor var_2175_0, tensor var_2175_1, tensor var_2175_2, tensor var_2175_3, tensor var_2175_4, tensor var_2175_5, tensor var_2175_6, tensor var_2175_7, tensor var_2175_8, tensor var_2175_9, tensor var_2175_10, tensor var_2175_11 = split(axis = var_2175_axis_0, split_sizes = tile_43, x = transpose_3)[name = tensor("op_2175")]; - tensor tile_44 = const()[name = tensor("tile_44"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2188_axis_0 = const()[name = tensor("op_2188_axis_0"), val = tensor(2)]; - tensor var_2188_0, tensor var_2188_1, tensor var_2188_2, tensor var_2188_3, tensor var_2188_4, tensor var_2188_5, tensor var_2188_6, tensor var_2188_7, tensor var_2188_8, tensor var_2188_9, tensor var_2188_10, tensor var_2188_11 = split(axis = var_2188_axis_0, split_sizes = tile_44, x = var_2160)[name = tensor("op_2188")]; - tensor var_2202_equation_0 = const()[name = tensor("op_2202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2202 = einsum(equation = var_2202_equation_0, values = (var_2175_0, var_2161_0))[name = tensor("op_2202")]; - tensor var_2203_to_fp16 = const()[name = tensor("op_2203_to_fp16"), val = tensor(0x1p-3)]; - tensor w_193_cast_fp16 = mul(x = var_2202, y = var_2203_to_fp16)[name = tensor("w_193_cast_fp16")]; - tensor var_2206_equation_0 = const()[name = tensor("op_2206_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2206 = einsum(equation = var_2206_equation_0, values = (var_2175_1, var_2161_1))[name = tensor("op_2206")]; - tensor var_2207_to_fp16 = const()[name = tensor("op_2207_to_fp16"), val = tensor(0x1p-3)]; - tensor w_195_cast_fp16 = mul(x = var_2206, y = var_2207_to_fp16)[name = tensor("w_195_cast_fp16")]; - tensor var_2210_equation_0 = const()[name = tensor("op_2210_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2210 = einsum(equation = var_2210_equation_0, values = (var_2175_2, var_2161_2))[name = tensor("op_2210")]; - tensor var_2211_to_fp16 = const()[name = tensor("op_2211_to_fp16"), val = tensor(0x1p-3)]; - tensor w_197_cast_fp16 = mul(x = var_2210, y = var_2211_to_fp16)[name = tensor("w_197_cast_fp16")]; - tensor var_2214_equation_0 = const()[name = tensor("op_2214_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2214 = einsum(equation = var_2214_equation_0, values = (var_2175_3, var_2161_3))[name = tensor("op_2214")]; - tensor var_2215_to_fp16 = const()[name = tensor("op_2215_to_fp16"), val = tensor(0x1p-3)]; - tensor w_199_cast_fp16 = mul(x = var_2214, y = var_2215_to_fp16)[name = tensor("w_199_cast_fp16")]; - tensor var_2218_equation_0 = const()[name = tensor("op_2218_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2218 = einsum(equation = var_2218_equation_0, values = (var_2175_4, var_2161_4))[name = tensor("op_2218")]; - tensor var_2219_to_fp16 = const()[name = tensor("op_2219_to_fp16"), val = tensor(0x1p-3)]; - tensor w_201_cast_fp16 = mul(x = var_2218, y = var_2219_to_fp16)[name = tensor("w_201_cast_fp16")]; - tensor var_2222_equation_0 = const()[name = tensor("op_2222_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2222 = einsum(equation = var_2222_equation_0, values = (var_2175_5, var_2161_5))[name = tensor("op_2222")]; - tensor var_2223_to_fp16 = const()[name = tensor("op_2223_to_fp16"), val = tensor(0x1p-3)]; - tensor w_203_cast_fp16 = mul(x = var_2222, y = var_2223_to_fp16)[name = tensor("w_203_cast_fp16")]; - tensor var_2226_equation_0 = const()[name = tensor("op_2226_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2226 = einsum(equation = var_2226_equation_0, values = (var_2175_6, var_2161_6))[name = tensor("op_2226")]; - tensor var_2227_to_fp16 = const()[name = tensor("op_2227_to_fp16"), val = tensor(0x1p-3)]; - tensor w_205_cast_fp16 = mul(x = var_2226, y = var_2227_to_fp16)[name = tensor("w_205_cast_fp16")]; - tensor var_2230_equation_0 = const()[name = tensor("op_2230_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2230 = einsum(equation = var_2230_equation_0, values = (var_2175_7, var_2161_7))[name = tensor("op_2230")]; - tensor var_2231_to_fp16 = const()[name = tensor("op_2231_to_fp16"), val = tensor(0x1p-3)]; - tensor w_207_cast_fp16 = mul(x = var_2230, y = var_2231_to_fp16)[name = tensor("w_207_cast_fp16")]; - tensor var_2234_equation_0 = const()[name = tensor("op_2234_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2234 = einsum(equation = var_2234_equation_0, values = (var_2175_8, var_2161_8))[name = tensor("op_2234")]; - tensor var_2235_to_fp16 = const()[name = tensor("op_2235_to_fp16"), val = tensor(0x1p-3)]; - tensor w_209_cast_fp16 = mul(x = var_2234, y = var_2235_to_fp16)[name = tensor("w_209_cast_fp16")]; - tensor var_2238_equation_0 = const()[name = tensor("op_2238_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2238 = einsum(equation = var_2238_equation_0, values = (var_2175_9, var_2161_9))[name = tensor("op_2238")]; - tensor var_2239_to_fp16 = const()[name = tensor("op_2239_to_fp16"), val = tensor(0x1p-3)]; - tensor w_211_cast_fp16 = mul(x = var_2238, y = var_2239_to_fp16)[name = tensor("w_211_cast_fp16")]; - tensor var_2242_equation_0 = const()[name = tensor("op_2242_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2242 = einsum(equation = var_2242_equation_0, values = (var_2175_10, var_2161_10))[name = tensor("op_2242")]; - tensor var_2243_to_fp16 = const()[name = tensor("op_2243_to_fp16"), val = tensor(0x1p-3)]; - tensor w_213_cast_fp16 = mul(x = var_2242, y = var_2243_to_fp16)[name = tensor("w_213_cast_fp16")]; - tensor var_2246_equation_0 = const()[name = tensor("op_2246_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2246 = einsum(equation = var_2246_equation_0, values = (var_2175_11, var_2161_11))[name = tensor("op_2246")]; - tensor var_2247_to_fp16 = const()[name = tensor("op_2247_to_fp16"), val = tensor(0x1p-3)]; - tensor w_215_cast_fp16 = mul(x = var_2246, y = var_2247_to_fp16)[name = tensor("w_215_cast_fp16")]; - tensor var_2249_cast_fp16 = add(x = w_193_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2249_cast_fp16")]; - tensor var_2250_cast_fp16 = softmax(axis = var_2109, x = var_2249_cast_fp16)[name = tensor("op_2250_cast_fp16")]; - tensor var_2251_cast_fp16 = add(x = w_195_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2251_cast_fp16")]; - tensor var_2252_cast_fp16 = softmax(axis = var_2109, x = var_2251_cast_fp16)[name = tensor("op_2252_cast_fp16")]; - tensor var_2253_cast_fp16 = add(x = w_197_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2253_cast_fp16")]; - tensor var_2254_cast_fp16 = softmax(axis = var_2109, x = var_2253_cast_fp16)[name = tensor("op_2254_cast_fp16")]; - tensor var_2255_cast_fp16 = add(x = w_199_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2255_cast_fp16")]; - tensor var_2256_cast_fp16 = softmax(axis = var_2109, x = var_2255_cast_fp16)[name = tensor("op_2256_cast_fp16")]; - tensor var_2257_cast_fp16 = add(x = w_201_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2257_cast_fp16")]; - tensor var_2258_cast_fp16 = softmax(axis = var_2109, x = var_2257_cast_fp16)[name = tensor("op_2258_cast_fp16")]; - tensor var_2259_cast_fp16 = add(x = w_203_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2259_cast_fp16")]; - tensor var_2260_cast_fp16 = softmax(axis = var_2109, x = var_2259_cast_fp16)[name = tensor("op_2260_cast_fp16")]; - tensor var_2261_cast_fp16 = add(x = w_205_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2261_cast_fp16")]; - tensor var_2262_cast_fp16 = softmax(axis = var_2109, x = var_2261_cast_fp16)[name = tensor("op_2262_cast_fp16")]; - tensor var_2263_cast_fp16 = add(x = w_207_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2263_cast_fp16")]; - tensor var_2264_cast_fp16 = softmax(axis = var_2109, x = var_2263_cast_fp16)[name = tensor("op_2264_cast_fp16")]; - tensor var_2265_cast_fp16 = add(x = w_209_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2265_cast_fp16")]; - tensor var_2266_cast_fp16 = softmax(axis = var_2109, x = var_2265_cast_fp16)[name = tensor("op_2266_cast_fp16")]; - tensor var_2267_cast_fp16 = add(x = w_211_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2267_cast_fp16")]; - tensor var_2268_cast_fp16 = softmax(axis = var_2109, x = var_2267_cast_fp16)[name = tensor("op_2268_cast_fp16")]; - tensor var_2269_cast_fp16 = add(x = w_213_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2269_cast_fp16")]; - tensor var_2270_cast_fp16 = softmax(axis = var_2109, x = var_2269_cast_fp16)[name = tensor("op_2270_cast_fp16")]; - tensor var_2271_cast_fp16 = add(x = w_215_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2271_cast_fp16")]; - tensor var_2272_cast_fp16 = softmax(axis = var_2109, x = var_2271_cast_fp16)[name = tensor("op_2272_cast_fp16")]; - tensor var_2274_equation_0 = const()[name = tensor("op_2274_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2274_cast_fp16 = einsum(equation = var_2274_equation_0, values = (var_2188_0, var_2250_cast_fp16))[name = tensor("op_2274_cast_fp16")]; - tensor var_2276_equation_0 = const()[name = tensor("op_2276_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2276_cast_fp16 = einsum(equation = var_2276_equation_0, values = (var_2188_1, var_2252_cast_fp16))[name = tensor("op_2276_cast_fp16")]; - tensor var_2278_equation_0 = const()[name = tensor("op_2278_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2278_cast_fp16 = einsum(equation = var_2278_equation_0, values = (var_2188_2, var_2254_cast_fp16))[name = tensor("op_2278_cast_fp16")]; - tensor var_2280_equation_0 = const()[name = tensor("op_2280_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2280_cast_fp16 = einsum(equation = var_2280_equation_0, values = (var_2188_3, var_2256_cast_fp16))[name = tensor("op_2280_cast_fp16")]; - tensor var_2282_equation_0 = const()[name = tensor("op_2282_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2282_cast_fp16 = einsum(equation = var_2282_equation_0, values = (var_2188_4, var_2258_cast_fp16))[name = tensor("op_2282_cast_fp16")]; - tensor var_2284_equation_0 = const()[name = tensor("op_2284_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2284_cast_fp16 = einsum(equation = var_2284_equation_0, values = (var_2188_5, var_2260_cast_fp16))[name = tensor("op_2284_cast_fp16")]; - tensor var_2286_equation_0 = const()[name = tensor("op_2286_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2286_cast_fp16 = einsum(equation = var_2286_equation_0, values = (var_2188_6, var_2262_cast_fp16))[name = tensor("op_2286_cast_fp16")]; - tensor var_2288_equation_0 = const()[name = tensor("op_2288_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2288_cast_fp16 = einsum(equation = var_2288_equation_0, values = (var_2188_7, var_2264_cast_fp16))[name = tensor("op_2288_cast_fp16")]; - tensor var_2290_equation_0 = const()[name = tensor("op_2290_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2290_cast_fp16 = einsum(equation = var_2290_equation_0, values = (var_2188_8, var_2266_cast_fp16))[name = tensor("op_2290_cast_fp16")]; - tensor var_2292_equation_0 = const()[name = tensor("op_2292_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2292_cast_fp16 = einsum(equation = var_2292_equation_0, values = (var_2188_9, var_2268_cast_fp16))[name = tensor("op_2292_cast_fp16")]; - tensor var_2294_equation_0 = const()[name = tensor("op_2294_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2294_cast_fp16 = einsum(equation = var_2294_equation_0, values = (var_2188_10, var_2270_cast_fp16))[name = tensor("op_2294_cast_fp16")]; - tensor var_2296_equation_0 = const()[name = tensor("op_2296_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2296_cast_fp16 = einsum(equation = var_2296_equation_0, values = (var_2188_11, var_2272_cast_fp16))[name = tensor("op_2296_cast_fp16")]; - tensor var_2298_interleave_0 = const()[name = tensor("op_2298_interleave_0"), val = tensor(false)]; - tensor var_2298_cast_fp16 = concat(axis = var_2109, interleave = var_2298_interleave_0, values = (var_2274_cast_fp16, var_2276_cast_fp16, var_2278_cast_fp16, var_2280_cast_fp16, var_2282_cast_fp16, var_2284_cast_fp16, var_2286_cast_fp16, var_2288_cast_fp16, var_2290_cast_fp16, var_2292_cast_fp16, var_2294_cast_fp16, var_2296_cast_fp16))[name = tensor("op_2298_cast_fp16")]; - tensor var_2302 = const()[name = tensor("op_2302"), val = tensor([1, 1])]; - tensor var_2304 = const()[name = tensor("op_2304"), val = tensor([1, 1])]; - tensor var_2306_pad_type_0 = const()[name = tensor("op_2306_pad_type_0"), val = tensor("custom")]; - tensor var_2306_pad_0 = const()[name = tensor("op_2306_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2306 = conv(bias = layers_8_attention_o_proj_bias, dilations = var_2304, groups = var_2109, pad = var_2306_pad_0, pad_type = var_2306_pad_type_0, strides = var_2302, weight = layers_8_attention_o_proj_weight, x = var_2298_cast_fp16)[name = tensor("op_2306")]; - tensor var_2308_interleave_0 = const()[name = tensor("op_2308_interleave_0"), val = tensor(false)]; - tensor var_2308 = concat(axis = var_2110, interleave = var_2308_interleave_0, values = var_2306)[name = tensor("op_2308")]; - tensor x_69 = add(x = var_2104_cast_fp16, y = var_2308)[name = tensor("x_69")]; - tensor var_2107_promoted = const()[name = tensor("op_2107_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2108_promoted = const()[name = tensor("op_2108_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_71 = clip(alpha = var_2107_promoted, beta = var_2108_promoted, x = x_69)[name = tensor("x_71")]; - tensor var_2313 = const()[name = tensor("op_2313"), val = tensor([1])]; - tensor mean_35 = reduce_mean(axes = var_2313, keep_dims = var_2111, x = x_71)[name = tensor("mean_35")]; + tensor denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0, x = var_2046_cast_fp16)[name = tensor("denom_33_cast_fp16")]; + tensor var_2048_cast_fp16 = mul(x = zero_mean_33, y = denom_33_cast_fp16)[name = tensor("op_2048_cast_fp16")]; + tensor var_2050_gamma_0_to_fp16 = const()[name = tensor("op_2050_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218553728)))]; + tensor var_2050_beta_0_to_fp16 = const()[name = tensor("op_2050_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218555328)))]; + tensor var_2050_epsilon_0_to_fp16 = const()[name = tensor("op_2050_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2050_cast_fp16 = batch_norm(beta = var_2050_beta_0_to_fp16, epsilon = var_2050_epsilon_0_to_fp16, gamma = var_2050_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2048_cast_fp16)[name = tensor("op_2050_cast_fp16")]; + tensor var_2056 = const()[name = tensor("op_2056"), val = tensor(1)]; + tensor var_2057 = const()[name = tensor("op_2057"), val = tensor(0)]; + tensor var_2058 = const()[name = tensor("op_2058"), val = tensor(true)]; + tensor var_2080 = const()[name = tensor("op_2080"), val = tensor([1, 1])]; + tensor var_2082 = const()[name = tensor("op_2082"), val = tensor([1, 1])]; + tensor var_2084_pad_type_0 = const()[name = tensor("op_2084_pad_type_0"), val = tensor("custom")]; + tensor var_2084_pad_0 = const()[name = tensor("op_2084_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2084 = conv(bias = layers_8_attention_q_proj_bias, dilations = var_2082, groups = var_2056, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2080, weight = layers_8_attention_q_proj_weight, x = var_2050_cast_fp16)[name = tensor("op_2084")]; + tensor var_2087 = const()[name = tensor("op_2087"), val = tensor([1, 1])]; + tensor var_2089 = const()[name = tensor("op_2089"), val = tensor([1, 1])]; + tensor ks_17_pad_type_0 = const()[name = tensor("ks_17_pad_type_0"), val = tensor("custom")]; + tensor ks_17_pad_0 = const()[name = tensor("ks_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_17 = conv(bias = layers_8_attention_k_proj_bias, dilations = var_2089, groups = var_2056, pad = ks_17_pad_0, pad_type = ks_17_pad_type_0, strides = var_2087, weight = layers_8_attention_k_proj_weight, x = var_2050_cast_fp16)[name = tensor("ks_17")]; + tensor var_2094 = const()[name = tensor("op_2094"), val = tensor([1, 1])]; + tensor var_2096 = const()[name = tensor("op_2096"), val = tensor([1, 1])]; + tensor var_2098_pad_type_0 = const()[name = tensor("op_2098_pad_type_0"), val = tensor("custom")]; + tensor var_2098_pad_0 = const()[name = tensor("op_2098_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2098 = conv(bias = layers_8_attention_v_proj_bias, dilations = var_2096, groups = var_2056, pad = var_2098_pad_0, pad_type = var_2098_pad_type_0, strides = var_2094, weight = layers_8_attention_v_proj_weight, x = var_2050_cast_fp16)[name = tensor("op_2098")]; + tensor tile_42 = const()[name = tensor("tile_42"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2099_axis_0 = const()[name = tensor("op_2099_axis_0"), val = tensor(1)]; + tensor var_2099_0, tensor var_2099_1, tensor var_2099_2, tensor var_2099_3, tensor var_2099_4, tensor var_2099_5, tensor var_2099_6, tensor var_2099_7, tensor var_2099_8, tensor var_2099_9, tensor var_2099_10, tensor var_2099_11 = split(axis = var_2099_axis_0, split_sizes = tile_42, x = var_2084)[name = tensor("op_2099")]; + tensor var_2112_perm_0 = const()[name = tensor("op_2112_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_43 = const()[name = tensor("tile_43"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2113_axis_0 = const()[name = tensor("op_2113_axis_0"), val = tensor(3)]; + tensor transpose_3 = transpose(perm = var_2112_perm_0, x = ks_17)[name = tensor("transpose_3")]; + tensor var_2113_0, tensor var_2113_1, tensor var_2113_2, tensor var_2113_3, tensor var_2113_4, tensor var_2113_5, tensor var_2113_6, tensor var_2113_7, tensor var_2113_8, tensor var_2113_9, tensor var_2113_10, tensor var_2113_11 = split(axis = var_2113_axis_0, split_sizes = tile_43, x = transpose_3)[name = tensor("op_2113")]; + tensor tile_44 = const()[name = tensor("tile_44"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2126_axis_0 = const()[name = tensor("op_2126_axis_0"), val = tensor(1)]; + tensor var_2126_0, tensor var_2126_1, tensor var_2126_2, tensor var_2126_3, tensor var_2126_4, tensor var_2126_5, tensor var_2126_6, tensor var_2126_7, tensor var_2126_8, tensor var_2126_9, tensor var_2126_10, tensor var_2126_11 = split(axis = var_2126_axis_0, split_sizes = tile_44, x = var_2098)[name = tensor("op_2126")]; + tensor var_2140_equation_0 = const()[name = tensor("op_2140_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2140 = einsum(equation = var_2140_equation_0, values = (var_2113_0, var_2099_0))[name = tensor("op_2140")]; + tensor var_2141_to_fp16 = const()[name = tensor("op_2141_to_fp16"), val = tensor(0x1p-3)]; + tensor w_193_cast_fp16 = mul(x = var_2140, y = var_2141_to_fp16)[name = tensor("w_193_cast_fp16")]; + tensor var_2144_equation_0 = const()[name = tensor("op_2144_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2144 = einsum(equation = var_2144_equation_0, values = (var_2113_1, var_2099_1))[name = tensor("op_2144")]; + tensor var_2145_to_fp16 = const()[name = tensor("op_2145_to_fp16"), val = tensor(0x1p-3)]; + tensor w_195_cast_fp16 = mul(x = var_2144, y = var_2145_to_fp16)[name = tensor("w_195_cast_fp16")]; + tensor var_2148_equation_0 = const()[name = tensor("op_2148_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2148 = einsum(equation = var_2148_equation_0, values = (var_2113_2, var_2099_2))[name = tensor("op_2148")]; + tensor var_2149_to_fp16 = const()[name = tensor("op_2149_to_fp16"), val = tensor(0x1p-3)]; + tensor w_197_cast_fp16 = mul(x = var_2148, y = var_2149_to_fp16)[name = tensor("w_197_cast_fp16")]; + tensor var_2152_equation_0 = const()[name = tensor("op_2152_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2152 = einsum(equation = var_2152_equation_0, values = (var_2113_3, var_2099_3))[name = tensor("op_2152")]; + tensor var_2153_to_fp16 = const()[name = tensor("op_2153_to_fp16"), val = tensor(0x1p-3)]; + tensor w_199_cast_fp16 = mul(x = var_2152, y = var_2153_to_fp16)[name = tensor("w_199_cast_fp16")]; + tensor var_2156_equation_0 = const()[name = tensor("op_2156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2156 = einsum(equation = var_2156_equation_0, values = (var_2113_4, var_2099_4))[name = tensor("op_2156")]; + tensor var_2157_to_fp16 = const()[name = tensor("op_2157_to_fp16"), val = tensor(0x1p-3)]; + tensor w_201_cast_fp16 = mul(x = var_2156, y = var_2157_to_fp16)[name = tensor("w_201_cast_fp16")]; + tensor var_2160_equation_0 = const()[name = tensor("op_2160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2160 = einsum(equation = var_2160_equation_0, values = (var_2113_5, var_2099_5))[name = tensor("op_2160")]; + tensor var_2161_to_fp16 = const()[name = tensor("op_2161_to_fp16"), val = tensor(0x1p-3)]; + tensor w_203_cast_fp16 = mul(x = var_2160, y = var_2161_to_fp16)[name = tensor("w_203_cast_fp16")]; + tensor var_2164_equation_0 = const()[name = tensor("op_2164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2164 = einsum(equation = var_2164_equation_0, values = (var_2113_6, var_2099_6))[name = tensor("op_2164")]; + tensor var_2165_to_fp16 = const()[name = tensor("op_2165_to_fp16"), val = tensor(0x1p-3)]; + tensor w_205_cast_fp16 = mul(x = var_2164, y = var_2165_to_fp16)[name = tensor("w_205_cast_fp16")]; + tensor var_2168_equation_0 = const()[name = tensor("op_2168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2168 = einsum(equation = var_2168_equation_0, values = (var_2113_7, var_2099_7))[name = tensor("op_2168")]; + tensor var_2169_to_fp16 = const()[name = tensor("op_2169_to_fp16"), val = tensor(0x1p-3)]; + tensor w_207_cast_fp16 = mul(x = var_2168, y = var_2169_to_fp16)[name = tensor("w_207_cast_fp16")]; + tensor var_2172_equation_0 = const()[name = tensor("op_2172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2172 = einsum(equation = var_2172_equation_0, values = (var_2113_8, var_2099_8))[name = tensor("op_2172")]; + tensor var_2173_to_fp16 = const()[name = tensor("op_2173_to_fp16"), val = tensor(0x1p-3)]; + tensor w_209_cast_fp16 = mul(x = var_2172, y = var_2173_to_fp16)[name = tensor("w_209_cast_fp16")]; + tensor var_2176_equation_0 = const()[name = tensor("op_2176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2176 = einsum(equation = var_2176_equation_0, values = (var_2113_9, var_2099_9))[name = tensor("op_2176")]; + tensor var_2177_to_fp16 = const()[name = tensor("op_2177_to_fp16"), val = tensor(0x1p-3)]; + tensor w_211_cast_fp16 = mul(x = var_2176, y = var_2177_to_fp16)[name = tensor("w_211_cast_fp16")]; + tensor var_2180_equation_0 = const()[name = tensor("op_2180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2180 = einsum(equation = var_2180_equation_0, values = (var_2113_10, var_2099_10))[name = tensor("op_2180")]; + tensor var_2181_to_fp16 = const()[name = tensor("op_2181_to_fp16"), val = tensor(0x1p-3)]; + tensor w_213_cast_fp16 = mul(x = var_2180, y = var_2181_to_fp16)[name = tensor("w_213_cast_fp16")]; + tensor var_2184_equation_0 = const()[name = tensor("op_2184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2184 = einsum(equation = var_2184_equation_0, values = (var_2113_11, var_2099_11))[name = tensor("op_2184")]; + tensor var_2185_to_fp16 = const()[name = tensor("op_2185_to_fp16"), val = tensor(0x1p-3)]; + tensor w_215_cast_fp16 = mul(x = var_2184, y = var_2185_to_fp16)[name = tensor("w_215_cast_fp16")]; + tensor input_259_cast_fp16 = add(x = w_193_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_259_cast_fp16")]; + tensor var_2188_cast_fp16 = softmax(axis = var_2056, x = input_259_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor input_261_cast_fp16 = add(x = w_195_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_261_cast_fp16")]; + tensor var_2190_cast_fp16 = softmax(axis = var_2056, x = input_261_cast_fp16)[name = tensor("op_2190_cast_fp16")]; + tensor input_263_cast_fp16 = add(x = w_197_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_263_cast_fp16")]; + tensor var_2192_cast_fp16 = softmax(axis = var_2056, x = input_263_cast_fp16)[name = tensor("op_2192_cast_fp16")]; + tensor input_265_cast_fp16 = add(x = w_199_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_265_cast_fp16")]; + tensor var_2194_cast_fp16 = softmax(axis = var_2056, x = input_265_cast_fp16)[name = tensor("op_2194_cast_fp16")]; + tensor input_267_cast_fp16 = add(x = w_201_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_267_cast_fp16")]; + tensor var_2196_cast_fp16 = softmax(axis = var_2056, x = input_267_cast_fp16)[name = tensor("op_2196_cast_fp16")]; + tensor input_269_cast_fp16 = add(x = w_203_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_269_cast_fp16")]; + tensor var_2198_cast_fp16 = softmax(axis = var_2056, x = input_269_cast_fp16)[name = tensor("op_2198_cast_fp16")]; + tensor input_271_cast_fp16 = add(x = w_205_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_271_cast_fp16")]; + tensor var_2200_cast_fp16 = softmax(axis = var_2056, x = input_271_cast_fp16)[name = tensor("op_2200_cast_fp16")]; + tensor input_273_cast_fp16 = add(x = w_207_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_273_cast_fp16")]; + tensor var_2202_cast_fp16 = softmax(axis = var_2056, x = input_273_cast_fp16)[name = tensor("op_2202_cast_fp16")]; + tensor input_275_cast_fp16 = add(x = w_209_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_275_cast_fp16")]; + tensor var_2204_cast_fp16 = softmax(axis = var_2056, x = input_275_cast_fp16)[name = tensor("op_2204_cast_fp16")]; + tensor input_277_cast_fp16 = add(x = w_211_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_277_cast_fp16")]; + tensor var_2206_cast_fp16 = softmax(axis = var_2056, x = input_277_cast_fp16)[name = tensor("op_2206_cast_fp16")]; + tensor input_279_cast_fp16 = add(x = w_213_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_279_cast_fp16")]; + tensor var_2208_cast_fp16 = softmax(axis = var_2056, x = input_279_cast_fp16)[name = tensor("op_2208_cast_fp16")]; + tensor input_281_cast_fp16 = add(x = w_215_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_281_cast_fp16")]; + tensor var_2210_cast_fp16 = softmax(axis = var_2056, x = input_281_cast_fp16)[name = tensor("op_2210_cast_fp16")]; + tensor var_2212_equation_0 = const()[name = tensor("op_2212_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2212_cast_fp16 = einsum(equation = var_2212_equation_0, values = (var_2126_0, var_2188_cast_fp16))[name = tensor("op_2212_cast_fp16")]; + tensor var_2214_equation_0 = const()[name = tensor("op_2214_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2214_cast_fp16 = einsum(equation = var_2214_equation_0, values = (var_2126_1, var_2190_cast_fp16))[name = tensor("op_2214_cast_fp16")]; + tensor var_2216_equation_0 = const()[name = tensor("op_2216_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2216_cast_fp16 = einsum(equation = var_2216_equation_0, values = (var_2126_2, var_2192_cast_fp16))[name = tensor("op_2216_cast_fp16")]; + tensor var_2218_equation_0 = const()[name = tensor("op_2218_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2218_cast_fp16 = einsum(equation = var_2218_equation_0, values = (var_2126_3, var_2194_cast_fp16))[name = tensor("op_2218_cast_fp16")]; + tensor var_2220_equation_0 = const()[name = tensor("op_2220_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2220_cast_fp16 = einsum(equation = var_2220_equation_0, values = (var_2126_4, var_2196_cast_fp16))[name = tensor("op_2220_cast_fp16")]; + tensor var_2222_equation_0 = const()[name = tensor("op_2222_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2222_cast_fp16 = einsum(equation = var_2222_equation_0, values = (var_2126_5, var_2198_cast_fp16))[name = tensor("op_2222_cast_fp16")]; + tensor var_2224_equation_0 = const()[name = tensor("op_2224_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2224_cast_fp16 = einsum(equation = var_2224_equation_0, values = (var_2126_6, var_2200_cast_fp16))[name = tensor("op_2224_cast_fp16")]; + tensor var_2226_equation_0 = const()[name = tensor("op_2226_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2226_cast_fp16 = einsum(equation = var_2226_equation_0, values = (var_2126_7, var_2202_cast_fp16))[name = tensor("op_2226_cast_fp16")]; + tensor var_2228_equation_0 = const()[name = tensor("op_2228_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2228_cast_fp16 = einsum(equation = var_2228_equation_0, values = (var_2126_8, var_2204_cast_fp16))[name = tensor("op_2228_cast_fp16")]; + tensor var_2230_equation_0 = const()[name = tensor("op_2230_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2230_cast_fp16 = einsum(equation = var_2230_equation_0, values = (var_2126_9, var_2206_cast_fp16))[name = tensor("op_2230_cast_fp16")]; + tensor var_2232_equation_0 = const()[name = tensor("op_2232_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2232_cast_fp16 = einsum(equation = var_2232_equation_0, values = (var_2126_10, var_2208_cast_fp16))[name = tensor("op_2232_cast_fp16")]; + tensor var_2234_equation_0 = const()[name = tensor("op_2234_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2234_cast_fp16 = einsum(equation = var_2234_equation_0, values = (var_2126_11, var_2210_cast_fp16))[name = tensor("op_2234_cast_fp16")]; + tensor var_2236_interleave_0 = const()[name = tensor("op_2236_interleave_0"), val = tensor(false)]; + tensor var_2236_cast_fp16 = concat(axis = var_2056, interleave = var_2236_interleave_0, values = (var_2212_cast_fp16, var_2214_cast_fp16, var_2216_cast_fp16, var_2218_cast_fp16, var_2220_cast_fp16, var_2222_cast_fp16, var_2224_cast_fp16, var_2226_cast_fp16, var_2228_cast_fp16, var_2230_cast_fp16, var_2232_cast_fp16, var_2234_cast_fp16))[name = tensor("op_2236_cast_fp16")]; + tensor var_2240 = const()[name = tensor("op_2240"), val = tensor([1, 1])]; + tensor var_2242 = const()[name = tensor("op_2242"), val = tensor([1, 1])]; + tensor var_2244_pad_type_0 = const()[name = tensor("op_2244_pad_type_0"), val = tensor("custom")]; + tensor var_2244_pad_0 = const()[name = tensor("op_2244_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2244 = conv(bias = layers_8_attention_o_proj_bias, dilations = var_2242, groups = var_2056, pad = var_2244_pad_0, pad_type = var_2244_pad_type_0, strides = var_2240, weight = layers_8_attention_o_proj_weight, x = var_2236_cast_fp16)[name = tensor("op_2244")]; + tensor var_2246_interleave_0 = const()[name = tensor("op_2246_interleave_0"), val = tensor(false)]; + tensor var_2246 = concat(axis = var_2057, interleave = var_2246_interleave_0, values = var_2244)[name = tensor("op_2246")]; + tensor x_69 = add(x = var_2050_cast_fp16, y = var_2246)[name = tensor("x_69")]; + tensor var_2053_promoted = const()[name = tensor("op_2053_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_2054_promoted = const()[name = tensor("op_2054_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_71 = clip(alpha = var_2053_promoted, beta = var_2054_promoted, x = x_69)[name = tensor("x_71")]; + tensor var_2251 = const()[name = tensor("op_2251"), val = tensor([1])]; + tensor mean_35 = reduce_mean(axes = var_2251, keep_dims = var_2058, x = x_71)[name = tensor("mean_35")]; tensor zero_mean_35 = sub(x = x_71, y = mean_35)[name = tensor("zero_mean_35")]; - tensor var_2116_promoted = const()[name = tensor("op_2116_promoted"), val = tensor(0x1p+1)]; - tensor var_2316 = pow(x = zero_mean_35, y = var_2116_promoted)[name = tensor("op_2316")]; - tensor var_2317 = const()[name = tensor("op_2317"), val = tensor([1])]; - tensor var_2318 = reduce_mean(axes = var_2317, keep_dims = var_2111, x = var_2316)[name = tensor("op_2318")]; - tensor var_2319_to_fp16 = const()[name = tensor("op_2319_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2320_cast_fp16 = add(x = var_2318, y = var_2319_to_fp16)[name = tensor("op_2320_cast_fp16")]; + tensor var_2055_promoted = const()[name = tensor("op_2055_promoted"), val = tensor(0x1p+1)]; + tensor var_2254 = pow(x = zero_mean_35, y = var_2055_promoted)[name = tensor("op_2254")]; + tensor var_2255 = const()[name = tensor("op_2255"), val = tensor([1])]; + tensor var_2256 = reduce_mean(axes = var_2255, keep_dims = var_2058, x = var_2254)[name = tensor("op_2256")]; + tensor var_2257_to_fp16 = const()[name = tensor("op_2257_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2258_cast_fp16 = add(x = var_2256, y = var_2257_to_fp16)[name = tensor("op_2258_cast_fp16")]; tensor denom_35_epsilon_0 = const()[name = tensor("denom_35_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0, x = var_2320_cast_fp16)[name = tensor("denom_35_cast_fp16")]; - tensor var_2322_cast_fp16 = mul(x = zero_mean_35, y = denom_35_cast_fp16)[name = tensor("op_2322_cast_fp16")]; - tensor var_2324_gamma_0_to_fp16 = const()[name = tensor("op_2324_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218556928)))]; - tensor var_2324_beta_0_to_fp16 = const()[name = tensor("op_2324_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218558528)))]; - tensor var_2324_epsilon_0_to_fp16 = const()[name = tensor("op_2324_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2324_cast_fp16 = batch_norm(beta = var_2324_beta_0_to_fp16, epsilon = var_2324_epsilon_0_to_fp16, gamma = var_2324_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2322_cast_fp16)[name = tensor("op_2324_cast_fp16")]; - tensor var_2330 = const()[name = tensor("op_2330"), val = tensor([1, 1])]; - tensor var_2332 = const()[name = tensor("op_2332"), val = tensor([1, 1])]; - tensor var_2334_pad_type_0 = const()[name = tensor("op_2334_pad_type_0"), val = tensor("custom")]; - tensor var_2334_pad_0 = const()[name = tensor("op_2334_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2334 = conv(bias = layers_8_mlp_fc1_bias, dilations = var_2332, groups = var_2109, pad = var_2334_pad_0, pad_type = var_2334_pad_type_0, strides = var_2330, weight = layers_8_mlp_fc1_weight, x = var_2324_cast_fp16)[name = tensor("op_2334")]; - tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; - tensor input_71 = gelu(mode = input_71_mode_0, x = var_2334)[name = tensor("input_71")]; - tensor var_2338 = const()[name = tensor("op_2338"), val = tensor([1, 1])]; - tensor var_2340 = const()[name = tensor("op_2340"), val = tensor([1, 1])]; - tensor var_2342_pad_type_0 = const()[name = tensor("op_2342_pad_type_0"), val = tensor("custom")]; - tensor var_2342_pad_0 = const()[name = tensor("op_2342_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2342 = conv(bias = layers_8_mlp_fc2_bias, dilations = var_2340, groups = var_2109, pad = var_2342_pad_0, pad_type = var_2342_pad_type_0, strides = var_2338, weight = layers_8_mlp_fc2_weight, x = input_71)[name = tensor("op_2342")]; - tensor x_73 = add(x = var_2324_cast_fp16, y = var_2342)[name = tensor("x_73")]; - tensor var_2107_promoted_1 = const()[name = tensor("op_2107_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2108_promoted_1 = const()[name = tensor("op_2108_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_75 = clip(alpha = var_2107_promoted_1, beta = var_2108_promoted_1, x = x_73)[name = tensor("x_75")]; - tensor var_2347 = const()[name = tensor("op_2347"), val = tensor([1])]; - tensor mean_37 = reduce_mean(axes = var_2347, keep_dims = var_2111, x = x_75)[name = tensor("mean_37")]; + tensor denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0, x = var_2258_cast_fp16)[name = tensor("denom_35_cast_fp16")]; + tensor var_2260_cast_fp16 = mul(x = zero_mean_35, y = denom_35_cast_fp16)[name = tensor("op_2260_cast_fp16")]; + tensor var_2262_gamma_0_to_fp16 = const()[name = tensor("op_2262_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218556928)))]; + tensor var_2262_beta_0_to_fp16 = const()[name = tensor("op_2262_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218558528)))]; + tensor var_2262_epsilon_0_to_fp16 = const()[name = tensor("op_2262_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2262_cast_fp16 = batch_norm(beta = var_2262_beta_0_to_fp16, epsilon = var_2262_epsilon_0_to_fp16, gamma = var_2262_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2260_cast_fp16)[name = tensor("op_2262_cast_fp16")]; + tensor var_2268 = const()[name = tensor("op_2268"), val = tensor([1, 1])]; + tensor var_2270 = const()[name = tensor("op_2270"), val = tensor([1, 1])]; + tensor var_2272_pad_type_0 = const()[name = tensor("op_2272_pad_type_0"), val = tensor("custom")]; + tensor var_2272_pad_0 = const()[name = tensor("op_2272_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2272 = conv(bias = layers_8_mlp_fc1_bias, dilations = var_2270, groups = var_2056, pad = var_2272_pad_0, pad_type = var_2272_pad_type_0, strides = var_2268, weight = layers_8_mlp_fc1_weight, x = var_2262_cast_fp16)[name = tensor("op_2272")]; + tensor input_287_mode_0 = const()[name = tensor("input_287_mode_0"), val = tensor("EXACT")]; + tensor input_287 = gelu(mode = input_287_mode_0, x = var_2272)[name = tensor("input_287")]; + tensor var_2276 = const()[name = tensor("op_2276"), val = tensor([1, 1])]; + tensor var_2278 = const()[name = tensor("op_2278"), val = tensor([1, 1])]; + tensor var_2280_pad_type_0 = const()[name = tensor("op_2280_pad_type_0"), val = tensor("custom")]; + tensor var_2280_pad_0 = const()[name = tensor("op_2280_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2280 = conv(bias = layers_8_mlp_fc2_bias, dilations = var_2278, groups = var_2056, pad = var_2280_pad_0, pad_type = var_2280_pad_type_0, strides = var_2276, weight = layers_8_mlp_fc2_weight, x = input_287)[name = tensor("op_2280")]; + tensor x_73 = add(x = var_2262_cast_fp16, y = var_2280)[name = tensor("x_73")]; + tensor var_2053_promoted_1 = const()[name = tensor("op_2053_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_2054_promoted_1 = const()[name = tensor("op_2054_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_75 = clip(alpha = var_2053_promoted_1, beta = var_2054_promoted_1, x = x_73)[name = tensor("x_75")]; + tensor var_2285 = const()[name = tensor("op_2285"), val = tensor([1])]; + tensor mean_37 = reduce_mean(axes = var_2285, keep_dims = var_2058, x = x_75)[name = tensor("mean_37")]; tensor zero_mean_37 = sub(x = x_75, y = mean_37)[name = tensor("zero_mean_37")]; - tensor var_2116_promoted_1 = const()[name = tensor("op_2116_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2350 = pow(x = zero_mean_37, y = var_2116_promoted_1)[name = tensor("op_2350")]; - tensor var_2351 = const()[name = tensor("op_2351"), val = tensor([1])]; - tensor var_2352 = reduce_mean(axes = var_2351, keep_dims = var_2111, x = var_2350)[name = tensor("op_2352")]; - tensor var_2353_to_fp16 = const()[name = tensor("op_2353_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2354_cast_fp16 = add(x = var_2352, y = var_2353_to_fp16)[name = tensor("op_2354_cast_fp16")]; + tensor var_2055_promoted_1 = const()[name = tensor("op_2055_promoted_1"), val = tensor(0x1p+1)]; + tensor var_2288 = pow(x = zero_mean_37, y = var_2055_promoted_1)[name = tensor("op_2288")]; + tensor var_2289 = const()[name = tensor("op_2289"), val = tensor([1])]; + tensor var_2290 = reduce_mean(axes = var_2289, keep_dims = var_2058, x = var_2288)[name = tensor("op_2290")]; + tensor var_2291_to_fp16 = const()[name = tensor("op_2291_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2292_cast_fp16 = add(x = var_2290, y = var_2291_to_fp16)[name = tensor("op_2292_cast_fp16")]; tensor denom_37_epsilon_0 = const()[name = tensor("denom_37_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0, x = var_2354_cast_fp16)[name = tensor("denom_37_cast_fp16")]; - tensor var_2356_cast_fp16 = mul(x = zero_mean_37, y = denom_37_cast_fp16)[name = tensor("op_2356_cast_fp16")]; - tensor var_2358_gamma_0_to_fp16 = const()[name = tensor("op_2358_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218560128)))]; - tensor var_2358_beta_0_to_fp16 = const()[name = tensor("op_2358_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218561728)))]; - tensor var_2358_epsilon_0_to_fp16 = const()[name = tensor("op_2358_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2358_cast_fp16 = batch_norm(beta = var_2358_beta_0_to_fp16, epsilon = var_2358_epsilon_0_to_fp16, gamma = var_2358_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2356_cast_fp16)[name = tensor("op_2358_cast_fp16")]; - tensor var_2363 = const()[name = tensor("op_2363"), val = tensor(1)]; - tensor var_2364 = const()[name = tensor("op_2364"), val = tensor(0)]; - tensor var_2365 = const()[name = tensor("op_2365"), val = tensor(true)]; - tensor var_2390 = const()[name = tensor("op_2390"), val = tensor([1, 1])]; - tensor var_2392 = const()[name = tensor("op_2392"), val = tensor([1, 1])]; - tensor var_2394_pad_type_0 = const()[name = tensor("op_2394_pad_type_0"), val = tensor("custom")]; - tensor var_2394_pad_0 = const()[name = tensor("op_2394_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2394 = conv(bias = layers_9_attention_q_proj_bias, dilations = var_2392, groups = var_2363, pad = var_2394_pad_0, pad_type = var_2394_pad_type_0, strides = var_2390, weight = layers_9_attention_q_proj_weight, x = var_2358_cast_fp16)[name = tensor("op_2394")]; - tensor var_2395 = const()[name = tensor("op_2395"), val = tensor([1, 64, 12, 512])]; - tensor var_2396 = reshape(shape = var_2395, x = var_2394)[name = tensor("op_2396")]; - tensor var_2399 = const()[name = tensor("op_2399"), val = tensor([1, 1])]; - tensor var_2401 = const()[name = tensor("op_2401"), val = tensor([1, 1])]; - tensor var_2403_pad_type_0 = const()[name = tensor("op_2403_pad_type_0"), val = tensor("custom")]; - tensor var_2403_pad_0 = const()[name = tensor("op_2403_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2403 = conv(bias = layers_9_attention_k_proj_bias, dilations = var_2401, groups = var_2363, pad = var_2403_pad_0, pad_type = var_2403_pad_type_0, strides = var_2399, weight = layers_9_attention_k_proj_weight, x = var_2358_cast_fp16)[name = tensor("op_2403")]; - tensor var_2404 = const()[name = tensor("op_2404"), val = tensor([1, 64, 12, 512])]; - tensor ks_19 = reshape(shape = var_2404, x = var_2403)[name = tensor("ks_19")]; - tensor var_2408 = const()[name = tensor("op_2408"), val = tensor([1, 1])]; - tensor var_2410 = const()[name = tensor("op_2410"), val = tensor([1, 1])]; - tensor var_2412_pad_type_0 = const()[name = tensor("op_2412_pad_type_0"), val = tensor("custom")]; - tensor var_2412_pad_0 = const()[name = tensor("op_2412_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2412 = conv(bias = layers_9_attention_v_proj_bias, dilations = var_2410, groups = var_2363, pad = var_2412_pad_0, pad_type = var_2412_pad_type_0, strides = var_2408, weight = layers_9_attention_v_proj_weight, x = var_2358_cast_fp16)[name = tensor("op_2412")]; - tensor var_2413 = const()[name = tensor("op_2413"), val = tensor([1, 64, 12, 512])]; - tensor var_2414 = reshape(shape = var_2413, x = var_2412)[name = tensor("op_2414")]; - tensor tile_47 = const()[name = tensor("tile_47"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2415_axis_0 = const()[name = tensor("op_2415_axis_0"), val = tensor(2)]; - tensor var_2415_0, tensor var_2415_1, tensor var_2415_2, tensor var_2415_3, tensor var_2415_4, tensor var_2415_5, tensor var_2415_6, tensor var_2415_7, tensor var_2415_8, tensor var_2415_9, tensor var_2415_10, tensor var_2415_11 = split(axis = var_2415_axis_0, split_sizes = tile_47, x = var_2396)[name = tensor("op_2415")]; - tensor var_2428_perm_0 = const()[name = tensor("op_2428_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_48 = const()[name = tensor("tile_48"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2429_axis_0 = const()[name = tensor("op_2429_axis_0"), val = tensor(2)]; - tensor transpose_2 = transpose(perm = var_2428_perm_0, x = ks_19)[name = tensor("transpose_2")]; - tensor var_2429_0, tensor var_2429_1, tensor var_2429_2, tensor var_2429_3, tensor var_2429_4, tensor var_2429_5, tensor var_2429_6, tensor var_2429_7, tensor var_2429_8, tensor var_2429_9, tensor var_2429_10, tensor var_2429_11 = split(axis = var_2429_axis_0, split_sizes = tile_48, x = transpose_2)[name = tensor("op_2429")]; - tensor tile_49 = const()[name = tensor("tile_49"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2442_axis_0 = const()[name = tensor("op_2442_axis_0"), val = tensor(2)]; - tensor var_2442_0, tensor var_2442_1, tensor var_2442_2, tensor var_2442_3, tensor var_2442_4, tensor var_2442_5, tensor var_2442_6, tensor var_2442_7, tensor var_2442_8, tensor var_2442_9, tensor var_2442_10, tensor var_2442_11 = split(axis = var_2442_axis_0, split_sizes = tile_49, x = var_2414)[name = tensor("op_2442")]; - tensor var_2456_equation_0 = const()[name = tensor("op_2456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2456 = einsum(equation = var_2456_equation_0, values = (var_2429_0, var_2415_0))[name = tensor("op_2456")]; - tensor var_2457_to_fp16 = const()[name = tensor("op_2457_to_fp16"), val = tensor(0x1p-3)]; - tensor w_217_cast_fp16 = mul(x = var_2456, y = var_2457_to_fp16)[name = tensor("w_217_cast_fp16")]; - tensor var_2460_equation_0 = const()[name = tensor("op_2460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2460 = einsum(equation = var_2460_equation_0, values = (var_2429_1, var_2415_1))[name = tensor("op_2460")]; - tensor var_2461_to_fp16 = const()[name = tensor("op_2461_to_fp16"), val = tensor(0x1p-3)]; - tensor w_219_cast_fp16 = mul(x = var_2460, y = var_2461_to_fp16)[name = tensor("w_219_cast_fp16")]; - tensor var_2464_equation_0 = const()[name = tensor("op_2464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2464 = einsum(equation = var_2464_equation_0, values = (var_2429_2, var_2415_2))[name = tensor("op_2464")]; - tensor var_2465_to_fp16 = const()[name = tensor("op_2465_to_fp16"), val = tensor(0x1p-3)]; - tensor w_221_cast_fp16 = mul(x = var_2464, y = var_2465_to_fp16)[name = tensor("w_221_cast_fp16")]; - tensor var_2468_equation_0 = const()[name = tensor("op_2468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2468 = einsum(equation = var_2468_equation_0, values = (var_2429_3, var_2415_3))[name = tensor("op_2468")]; - tensor var_2469_to_fp16 = const()[name = tensor("op_2469_to_fp16"), val = tensor(0x1p-3)]; - tensor w_223_cast_fp16 = mul(x = var_2468, y = var_2469_to_fp16)[name = tensor("w_223_cast_fp16")]; - tensor var_2472_equation_0 = const()[name = tensor("op_2472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2472 = einsum(equation = var_2472_equation_0, values = (var_2429_4, var_2415_4))[name = tensor("op_2472")]; - tensor var_2473_to_fp16 = const()[name = tensor("op_2473_to_fp16"), val = tensor(0x1p-3)]; - tensor w_225_cast_fp16 = mul(x = var_2472, y = var_2473_to_fp16)[name = tensor("w_225_cast_fp16")]; - tensor var_2476_equation_0 = const()[name = tensor("op_2476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2476 = einsum(equation = var_2476_equation_0, values = (var_2429_5, var_2415_5))[name = tensor("op_2476")]; - tensor var_2477_to_fp16 = const()[name = tensor("op_2477_to_fp16"), val = tensor(0x1p-3)]; - tensor w_227_cast_fp16 = mul(x = var_2476, y = var_2477_to_fp16)[name = tensor("w_227_cast_fp16")]; - tensor var_2480_equation_0 = const()[name = tensor("op_2480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2480 = einsum(equation = var_2480_equation_0, values = (var_2429_6, var_2415_6))[name = tensor("op_2480")]; - tensor var_2481_to_fp16 = const()[name = tensor("op_2481_to_fp16"), val = tensor(0x1p-3)]; - tensor w_229_cast_fp16 = mul(x = var_2480, y = var_2481_to_fp16)[name = tensor("w_229_cast_fp16")]; - tensor var_2484_equation_0 = const()[name = tensor("op_2484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2484 = einsum(equation = var_2484_equation_0, values = (var_2429_7, var_2415_7))[name = tensor("op_2484")]; - tensor var_2485_to_fp16 = const()[name = tensor("op_2485_to_fp16"), val = tensor(0x1p-3)]; - tensor w_231_cast_fp16 = mul(x = var_2484, y = var_2485_to_fp16)[name = tensor("w_231_cast_fp16")]; - tensor var_2488_equation_0 = const()[name = tensor("op_2488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2488 = einsum(equation = var_2488_equation_0, values = (var_2429_8, var_2415_8))[name = tensor("op_2488")]; - tensor var_2489_to_fp16 = const()[name = tensor("op_2489_to_fp16"), val = tensor(0x1p-3)]; - tensor w_233_cast_fp16 = mul(x = var_2488, y = var_2489_to_fp16)[name = tensor("w_233_cast_fp16")]; - tensor var_2492_equation_0 = const()[name = tensor("op_2492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2492 = einsum(equation = var_2492_equation_0, values = (var_2429_9, var_2415_9))[name = tensor("op_2492")]; - tensor var_2493_to_fp16 = const()[name = tensor("op_2493_to_fp16"), val = tensor(0x1p-3)]; - tensor w_235_cast_fp16 = mul(x = var_2492, y = var_2493_to_fp16)[name = tensor("w_235_cast_fp16")]; - tensor var_2496_equation_0 = const()[name = tensor("op_2496_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2496 = einsum(equation = var_2496_equation_0, values = (var_2429_10, var_2415_10))[name = tensor("op_2496")]; - tensor var_2497_to_fp16 = const()[name = tensor("op_2497_to_fp16"), val = tensor(0x1p-3)]; - tensor w_237_cast_fp16 = mul(x = var_2496, y = var_2497_to_fp16)[name = tensor("w_237_cast_fp16")]; - tensor var_2500_equation_0 = const()[name = tensor("op_2500_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2500 = einsum(equation = var_2500_equation_0, values = (var_2429_11, var_2415_11))[name = tensor("op_2500")]; - tensor var_2501_to_fp16 = const()[name = tensor("op_2501_to_fp16"), val = tensor(0x1p-3)]; - tensor w_239_cast_fp16 = mul(x = var_2500, y = var_2501_to_fp16)[name = tensor("w_239_cast_fp16")]; - tensor var_2503_cast_fp16 = add(x = w_217_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2503_cast_fp16")]; - tensor var_2504_cast_fp16 = softmax(axis = var_2363, x = var_2503_cast_fp16)[name = tensor("op_2504_cast_fp16")]; - tensor var_2505_cast_fp16 = add(x = w_219_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2505_cast_fp16")]; - tensor var_2506_cast_fp16 = softmax(axis = var_2363, x = var_2505_cast_fp16)[name = tensor("op_2506_cast_fp16")]; - tensor var_2507_cast_fp16 = add(x = w_221_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2507_cast_fp16")]; - tensor var_2508_cast_fp16 = softmax(axis = var_2363, x = var_2507_cast_fp16)[name = tensor("op_2508_cast_fp16")]; - tensor var_2509_cast_fp16 = add(x = w_223_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2509_cast_fp16")]; - tensor var_2510_cast_fp16 = softmax(axis = var_2363, x = var_2509_cast_fp16)[name = tensor("op_2510_cast_fp16")]; - tensor var_2511_cast_fp16 = add(x = w_225_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2511_cast_fp16")]; - tensor var_2512_cast_fp16 = softmax(axis = var_2363, x = var_2511_cast_fp16)[name = tensor("op_2512_cast_fp16")]; - tensor var_2513_cast_fp16 = add(x = w_227_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2513_cast_fp16")]; - tensor var_2514_cast_fp16 = softmax(axis = var_2363, x = var_2513_cast_fp16)[name = tensor("op_2514_cast_fp16")]; - tensor var_2515_cast_fp16 = add(x = w_229_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2515_cast_fp16")]; - tensor var_2516_cast_fp16 = softmax(axis = var_2363, x = var_2515_cast_fp16)[name = tensor("op_2516_cast_fp16")]; - tensor var_2517_cast_fp16 = add(x = w_231_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2517_cast_fp16")]; - tensor var_2518_cast_fp16 = softmax(axis = var_2363, x = var_2517_cast_fp16)[name = tensor("op_2518_cast_fp16")]; - tensor var_2519_cast_fp16 = add(x = w_233_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2519_cast_fp16")]; - tensor var_2520_cast_fp16 = softmax(axis = var_2363, x = var_2519_cast_fp16)[name = tensor("op_2520_cast_fp16")]; - tensor var_2521_cast_fp16 = add(x = w_235_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2521_cast_fp16")]; - tensor var_2522_cast_fp16 = softmax(axis = var_2363, x = var_2521_cast_fp16)[name = tensor("op_2522_cast_fp16")]; - tensor var_2523_cast_fp16 = add(x = w_237_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2523_cast_fp16")]; - tensor var_2524_cast_fp16 = softmax(axis = var_2363, x = var_2523_cast_fp16)[name = tensor("op_2524_cast_fp16")]; - tensor var_2525_cast_fp16 = add(x = w_239_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2525_cast_fp16")]; - tensor var_2526_cast_fp16 = softmax(axis = var_2363, x = var_2525_cast_fp16)[name = tensor("op_2526_cast_fp16")]; - tensor var_2528_equation_0 = const()[name = tensor("op_2528_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2528_cast_fp16 = einsum(equation = var_2528_equation_0, values = (var_2442_0, var_2504_cast_fp16))[name = tensor("op_2528_cast_fp16")]; - tensor var_2530_equation_0 = const()[name = tensor("op_2530_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2530_cast_fp16 = einsum(equation = var_2530_equation_0, values = (var_2442_1, var_2506_cast_fp16))[name = tensor("op_2530_cast_fp16")]; - tensor var_2532_equation_0 = const()[name = tensor("op_2532_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2532_cast_fp16 = einsum(equation = var_2532_equation_0, values = (var_2442_2, var_2508_cast_fp16))[name = tensor("op_2532_cast_fp16")]; - tensor var_2534_equation_0 = const()[name = tensor("op_2534_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2534_cast_fp16 = einsum(equation = var_2534_equation_0, values = (var_2442_3, var_2510_cast_fp16))[name = tensor("op_2534_cast_fp16")]; - tensor var_2536_equation_0 = const()[name = tensor("op_2536_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2536_cast_fp16 = einsum(equation = var_2536_equation_0, values = (var_2442_4, var_2512_cast_fp16))[name = tensor("op_2536_cast_fp16")]; - tensor var_2538_equation_0 = const()[name = tensor("op_2538_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2538_cast_fp16 = einsum(equation = var_2538_equation_0, values = (var_2442_5, var_2514_cast_fp16))[name = tensor("op_2538_cast_fp16")]; - tensor var_2540_equation_0 = const()[name = tensor("op_2540_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2540_cast_fp16 = einsum(equation = var_2540_equation_0, values = (var_2442_6, var_2516_cast_fp16))[name = tensor("op_2540_cast_fp16")]; - tensor var_2542_equation_0 = const()[name = tensor("op_2542_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2542_cast_fp16 = einsum(equation = var_2542_equation_0, values = (var_2442_7, var_2518_cast_fp16))[name = tensor("op_2542_cast_fp16")]; - tensor var_2544_equation_0 = const()[name = tensor("op_2544_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2544_cast_fp16 = einsum(equation = var_2544_equation_0, values = (var_2442_8, var_2520_cast_fp16))[name = tensor("op_2544_cast_fp16")]; - tensor var_2546_equation_0 = const()[name = tensor("op_2546_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2546_cast_fp16 = einsum(equation = var_2546_equation_0, values = (var_2442_9, var_2522_cast_fp16))[name = tensor("op_2546_cast_fp16")]; - tensor var_2548_equation_0 = const()[name = tensor("op_2548_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2548_cast_fp16 = einsum(equation = var_2548_equation_0, values = (var_2442_10, var_2524_cast_fp16))[name = tensor("op_2548_cast_fp16")]; - tensor var_2550_equation_0 = const()[name = tensor("op_2550_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2550_cast_fp16 = einsum(equation = var_2550_equation_0, values = (var_2442_11, var_2526_cast_fp16))[name = tensor("op_2550_cast_fp16")]; - tensor var_2552_interleave_0 = const()[name = tensor("op_2552_interleave_0"), val = tensor(false)]; - tensor var_2552_cast_fp16 = concat(axis = var_2363, interleave = var_2552_interleave_0, values = (var_2528_cast_fp16, var_2530_cast_fp16, var_2532_cast_fp16, var_2534_cast_fp16, var_2536_cast_fp16, var_2538_cast_fp16, var_2540_cast_fp16, var_2542_cast_fp16, var_2544_cast_fp16, var_2546_cast_fp16, var_2548_cast_fp16, var_2550_cast_fp16))[name = tensor("op_2552_cast_fp16")]; - tensor var_2556 = const()[name = tensor("op_2556"), val = tensor([1, 1])]; - tensor var_2558 = const()[name = tensor("op_2558"), val = tensor([1, 1])]; - tensor var_2560_pad_type_0 = const()[name = tensor("op_2560_pad_type_0"), val = tensor("custom")]; - tensor var_2560_pad_0 = const()[name = tensor("op_2560_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2560 = conv(bias = layers_9_attention_o_proj_bias, dilations = var_2558, groups = var_2363, pad = var_2560_pad_0, pad_type = var_2560_pad_type_0, strides = var_2556, weight = layers_9_attention_o_proj_weight, x = var_2552_cast_fp16)[name = tensor("op_2560")]; - tensor var_2562_interleave_0 = const()[name = tensor("op_2562_interleave_0"), val = tensor(false)]; - tensor var_2562 = concat(axis = var_2364, interleave = var_2562_interleave_0, values = var_2560)[name = tensor("op_2562")]; - tensor x_77 = add(x = var_2358_cast_fp16, y = var_2562)[name = tensor("x_77")]; - tensor var_2361_promoted = const()[name = tensor("op_2361_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2362_promoted = const()[name = tensor("op_2362_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_79 = clip(alpha = var_2361_promoted, beta = var_2362_promoted, x = x_77)[name = tensor("x_79")]; - tensor var_2567 = const()[name = tensor("op_2567"), val = tensor([1])]; - tensor mean_39 = reduce_mean(axes = var_2567, keep_dims = var_2365, x = x_79)[name = tensor("mean_39")]; + tensor denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0, x = var_2292_cast_fp16)[name = tensor("denom_37_cast_fp16")]; + tensor var_2294_cast_fp16 = mul(x = zero_mean_37, y = denom_37_cast_fp16)[name = tensor("op_2294_cast_fp16")]; + tensor var_2296_gamma_0_to_fp16 = const()[name = tensor("op_2296_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218560128)))]; + tensor var_2296_beta_0_to_fp16 = const()[name = tensor("op_2296_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218561728)))]; + tensor var_2296_epsilon_0_to_fp16 = const()[name = tensor("op_2296_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2296_cast_fp16 = batch_norm(beta = var_2296_beta_0_to_fp16, epsilon = var_2296_epsilon_0_to_fp16, gamma = var_2296_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2294_cast_fp16)[name = tensor("op_2296_cast_fp16")]; + tensor var_2302 = const()[name = tensor("op_2302"), val = tensor(1)]; + tensor var_2303 = const()[name = tensor("op_2303"), val = tensor(0)]; + tensor var_2304 = const()[name = tensor("op_2304"), val = tensor(true)]; + tensor var_2326 = const()[name = tensor("op_2326"), val = tensor([1, 1])]; + tensor var_2328 = const()[name = tensor("op_2328"), val = tensor([1, 1])]; + tensor var_2330_pad_type_0 = const()[name = tensor("op_2330_pad_type_0"), val = tensor("custom")]; + tensor var_2330_pad_0 = const()[name = tensor("op_2330_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2330 = conv(bias = layers_9_attention_q_proj_bias, dilations = var_2328, groups = var_2302, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2326, weight = layers_9_attention_q_proj_weight, x = var_2296_cast_fp16)[name = tensor("op_2330")]; + tensor var_2333 = const()[name = tensor("op_2333"), val = tensor([1, 1])]; + tensor var_2335 = const()[name = tensor("op_2335"), val = tensor([1, 1])]; + tensor ks_19_pad_type_0 = const()[name = tensor("ks_19_pad_type_0"), val = tensor("custom")]; + tensor ks_19_pad_0 = const()[name = tensor("ks_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_19 = conv(bias = layers_9_attention_k_proj_bias, dilations = var_2335, groups = var_2302, pad = ks_19_pad_0, pad_type = ks_19_pad_type_0, strides = var_2333, weight = layers_9_attention_k_proj_weight, x = var_2296_cast_fp16)[name = tensor("ks_19")]; + tensor var_2340 = const()[name = tensor("op_2340"), val = tensor([1, 1])]; + tensor var_2342 = const()[name = tensor("op_2342"), val = tensor([1, 1])]; + tensor var_2344_pad_type_0 = const()[name = tensor("op_2344_pad_type_0"), val = tensor("custom")]; + tensor var_2344_pad_0 = const()[name = tensor("op_2344_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2344 = conv(bias = layers_9_attention_v_proj_bias, dilations = var_2342, groups = var_2302, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2340, weight = layers_9_attention_v_proj_weight, x = var_2296_cast_fp16)[name = tensor("op_2344")]; + tensor tile_47 = const()[name = tensor("tile_47"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2345_axis_0 = const()[name = tensor("op_2345_axis_0"), val = tensor(1)]; + tensor var_2345_0, tensor var_2345_1, tensor var_2345_2, tensor var_2345_3, tensor var_2345_4, tensor var_2345_5, tensor var_2345_6, tensor var_2345_7, tensor var_2345_8, tensor var_2345_9, tensor var_2345_10, tensor var_2345_11 = split(axis = var_2345_axis_0, split_sizes = tile_47, x = var_2330)[name = tensor("op_2345")]; + tensor var_2358_perm_0 = const()[name = tensor("op_2358_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_48 = const()[name = tensor("tile_48"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2359_axis_0 = const()[name = tensor("op_2359_axis_0"), val = tensor(3)]; + tensor transpose_2 = transpose(perm = var_2358_perm_0, x = ks_19)[name = tensor("transpose_2")]; + tensor var_2359_0, tensor var_2359_1, tensor var_2359_2, tensor var_2359_3, tensor var_2359_4, tensor var_2359_5, tensor var_2359_6, tensor var_2359_7, tensor var_2359_8, tensor var_2359_9, tensor var_2359_10, tensor var_2359_11 = split(axis = var_2359_axis_0, split_sizes = tile_48, x = transpose_2)[name = tensor("op_2359")]; + tensor tile_49 = const()[name = tensor("tile_49"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2372_axis_0 = const()[name = tensor("op_2372_axis_0"), val = tensor(1)]; + tensor var_2372_0, tensor var_2372_1, tensor var_2372_2, tensor var_2372_3, tensor var_2372_4, tensor var_2372_5, tensor var_2372_6, tensor var_2372_7, tensor var_2372_8, tensor var_2372_9, tensor var_2372_10, tensor var_2372_11 = split(axis = var_2372_axis_0, split_sizes = tile_49, x = var_2344)[name = tensor("op_2372")]; + tensor var_2386_equation_0 = const()[name = tensor("op_2386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2386 = einsum(equation = var_2386_equation_0, values = (var_2359_0, var_2345_0))[name = tensor("op_2386")]; + tensor var_2387_to_fp16 = const()[name = tensor("op_2387_to_fp16"), val = tensor(0x1p-3)]; + tensor w_217_cast_fp16 = mul(x = var_2386, y = var_2387_to_fp16)[name = tensor("w_217_cast_fp16")]; + tensor var_2390_equation_0 = const()[name = tensor("op_2390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2390 = einsum(equation = var_2390_equation_0, values = (var_2359_1, var_2345_1))[name = tensor("op_2390")]; + tensor var_2391_to_fp16 = const()[name = tensor("op_2391_to_fp16"), val = tensor(0x1p-3)]; + tensor w_219_cast_fp16 = mul(x = var_2390, y = var_2391_to_fp16)[name = tensor("w_219_cast_fp16")]; + tensor var_2394_equation_0 = const()[name = tensor("op_2394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2394 = einsum(equation = var_2394_equation_0, values = (var_2359_2, var_2345_2))[name = tensor("op_2394")]; + tensor var_2395_to_fp16 = const()[name = tensor("op_2395_to_fp16"), val = tensor(0x1p-3)]; + tensor w_221_cast_fp16 = mul(x = var_2394, y = var_2395_to_fp16)[name = tensor("w_221_cast_fp16")]; + tensor var_2398_equation_0 = const()[name = tensor("op_2398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2398 = einsum(equation = var_2398_equation_0, values = (var_2359_3, var_2345_3))[name = tensor("op_2398")]; + tensor var_2399_to_fp16 = const()[name = tensor("op_2399_to_fp16"), val = tensor(0x1p-3)]; + tensor w_223_cast_fp16 = mul(x = var_2398, y = var_2399_to_fp16)[name = tensor("w_223_cast_fp16")]; + tensor var_2402_equation_0 = const()[name = tensor("op_2402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2402 = einsum(equation = var_2402_equation_0, values = (var_2359_4, var_2345_4))[name = tensor("op_2402")]; + tensor var_2403_to_fp16 = const()[name = tensor("op_2403_to_fp16"), val = tensor(0x1p-3)]; + tensor w_225_cast_fp16 = mul(x = var_2402, y = var_2403_to_fp16)[name = tensor("w_225_cast_fp16")]; + tensor var_2406_equation_0 = const()[name = tensor("op_2406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2406 = einsum(equation = var_2406_equation_0, values = (var_2359_5, var_2345_5))[name = tensor("op_2406")]; + tensor var_2407_to_fp16 = const()[name = tensor("op_2407_to_fp16"), val = tensor(0x1p-3)]; + tensor w_227_cast_fp16 = mul(x = var_2406, y = var_2407_to_fp16)[name = tensor("w_227_cast_fp16")]; + tensor var_2410_equation_0 = const()[name = tensor("op_2410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2410 = einsum(equation = var_2410_equation_0, values = (var_2359_6, var_2345_6))[name = tensor("op_2410")]; + tensor var_2411_to_fp16 = const()[name = tensor("op_2411_to_fp16"), val = tensor(0x1p-3)]; + tensor w_229_cast_fp16 = mul(x = var_2410, y = var_2411_to_fp16)[name = tensor("w_229_cast_fp16")]; + tensor var_2414_equation_0 = const()[name = tensor("op_2414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2414 = einsum(equation = var_2414_equation_0, values = (var_2359_7, var_2345_7))[name = tensor("op_2414")]; + tensor var_2415_to_fp16 = const()[name = tensor("op_2415_to_fp16"), val = tensor(0x1p-3)]; + tensor w_231_cast_fp16 = mul(x = var_2414, y = var_2415_to_fp16)[name = tensor("w_231_cast_fp16")]; + tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2418 = einsum(equation = var_2418_equation_0, values = (var_2359_8, var_2345_8))[name = tensor("op_2418")]; + tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1p-3)]; + tensor w_233_cast_fp16 = mul(x = var_2418, y = var_2419_to_fp16)[name = tensor("w_233_cast_fp16")]; + tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2422 = einsum(equation = var_2422_equation_0, values = (var_2359_9, var_2345_9))[name = tensor("op_2422")]; + tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1p-3)]; + tensor w_235_cast_fp16 = mul(x = var_2422, y = var_2423_to_fp16)[name = tensor("w_235_cast_fp16")]; + tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2426 = einsum(equation = var_2426_equation_0, values = (var_2359_10, var_2345_10))[name = tensor("op_2426")]; + tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1p-3)]; + tensor w_237_cast_fp16 = mul(x = var_2426, y = var_2427_to_fp16)[name = tensor("w_237_cast_fp16")]; + tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2430 = einsum(equation = var_2430_equation_0, values = (var_2359_11, var_2345_11))[name = tensor("op_2430")]; + tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1p-3)]; + tensor w_239_cast_fp16 = mul(x = var_2430, y = var_2431_to_fp16)[name = tensor("w_239_cast_fp16")]; + tensor input_291_cast_fp16 = add(x = w_217_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_291_cast_fp16")]; + tensor var_2434_cast_fp16 = softmax(axis = var_2302, x = input_291_cast_fp16)[name = tensor("op_2434_cast_fp16")]; + tensor input_293_cast_fp16 = add(x = w_219_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_293_cast_fp16")]; + tensor var_2436_cast_fp16 = softmax(axis = var_2302, x = input_293_cast_fp16)[name = tensor("op_2436_cast_fp16")]; + tensor input_295_cast_fp16 = add(x = w_221_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_295_cast_fp16")]; + tensor var_2438_cast_fp16 = softmax(axis = var_2302, x = input_295_cast_fp16)[name = tensor("op_2438_cast_fp16")]; + tensor input_297_cast_fp16 = add(x = w_223_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_297_cast_fp16")]; + tensor var_2440_cast_fp16 = softmax(axis = var_2302, x = input_297_cast_fp16)[name = tensor("op_2440_cast_fp16")]; + tensor input_299_cast_fp16 = add(x = w_225_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_299_cast_fp16")]; + tensor var_2442_cast_fp16 = softmax(axis = var_2302, x = input_299_cast_fp16)[name = tensor("op_2442_cast_fp16")]; + tensor input_301_cast_fp16 = add(x = w_227_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_301_cast_fp16")]; + tensor var_2444_cast_fp16 = softmax(axis = var_2302, x = input_301_cast_fp16)[name = tensor("op_2444_cast_fp16")]; + tensor input_303_cast_fp16 = add(x = w_229_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_303_cast_fp16")]; + tensor var_2446_cast_fp16 = softmax(axis = var_2302, x = input_303_cast_fp16)[name = tensor("op_2446_cast_fp16")]; + tensor input_305_cast_fp16 = add(x = w_231_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_305_cast_fp16")]; + tensor var_2448_cast_fp16 = softmax(axis = var_2302, x = input_305_cast_fp16)[name = tensor("op_2448_cast_fp16")]; + tensor input_307_cast_fp16 = add(x = w_233_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_307_cast_fp16")]; + tensor var_2450_cast_fp16 = softmax(axis = var_2302, x = input_307_cast_fp16)[name = tensor("op_2450_cast_fp16")]; + tensor input_309_cast_fp16 = add(x = w_235_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_309_cast_fp16")]; + tensor var_2452_cast_fp16 = softmax(axis = var_2302, x = input_309_cast_fp16)[name = tensor("op_2452_cast_fp16")]; + tensor input_311_cast_fp16 = add(x = w_237_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_311_cast_fp16")]; + tensor var_2454_cast_fp16 = softmax(axis = var_2302, x = input_311_cast_fp16)[name = tensor("op_2454_cast_fp16")]; + tensor input_313_cast_fp16 = add(x = w_239_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_313_cast_fp16")]; + tensor var_2456_cast_fp16 = softmax(axis = var_2302, x = input_313_cast_fp16)[name = tensor("op_2456_cast_fp16")]; + tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2372_0, var_2434_cast_fp16))[name = tensor("op_2458_cast_fp16")]; + tensor var_2460_equation_0 = const()[name = tensor("op_2460_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2460_cast_fp16 = einsum(equation = var_2460_equation_0, values = (var_2372_1, var_2436_cast_fp16))[name = tensor("op_2460_cast_fp16")]; + tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2372_2, var_2438_cast_fp16))[name = tensor("op_2462_cast_fp16")]; + tensor var_2464_equation_0 = const()[name = tensor("op_2464_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2464_cast_fp16 = einsum(equation = var_2464_equation_0, values = (var_2372_3, var_2440_cast_fp16))[name = tensor("op_2464_cast_fp16")]; + tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2372_4, var_2442_cast_fp16))[name = tensor("op_2466_cast_fp16")]; + tensor var_2468_equation_0 = const()[name = tensor("op_2468_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2372_5, var_2444_cast_fp16))[name = tensor("op_2468_cast_fp16")]; + tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2372_6, var_2446_cast_fp16))[name = tensor("op_2470_cast_fp16")]; + tensor var_2472_equation_0 = const()[name = tensor("op_2472_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2372_7, var_2448_cast_fp16))[name = tensor("op_2472_cast_fp16")]; + tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2372_8, var_2450_cast_fp16))[name = tensor("op_2474_cast_fp16")]; + tensor var_2476_equation_0 = const()[name = tensor("op_2476_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2372_9, var_2452_cast_fp16))[name = tensor("op_2476_cast_fp16")]; + tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2372_10, var_2454_cast_fp16))[name = tensor("op_2478_cast_fp16")]; + tensor var_2480_equation_0 = const()[name = tensor("op_2480_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2372_11, var_2456_cast_fp16))[name = tensor("op_2480_cast_fp16")]; + tensor var_2482_interleave_0 = const()[name = tensor("op_2482_interleave_0"), val = tensor(false)]; + tensor var_2482_cast_fp16 = concat(axis = var_2302, interleave = var_2482_interleave_0, values = (var_2458_cast_fp16, var_2460_cast_fp16, var_2462_cast_fp16, var_2464_cast_fp16, var_2466_cast_fp16, var_2468_cast_fp16, var_2470_cast_fp16, var_2472_cast_fp16, var_2474_cast_fp16, var_2476_cast_fp16, var_2478_cast_fp16, var_2480_cast_fp16))[name = tensor("op_2482_cast_fp16")]; + tensor var_2486 = const()[name = tensor("op_2486"), val = tensor([1, 1])]; + tensor var_2488 = const()[name = tensor("op_2488"), val = tensor([1, 1])]; + tensor var_2490_pad_type_0 = const()[name = tensor("op_2490_pad_type_0"), val = tensor("custom")]; + tensor var_2490_pad_0 = const()[name = tensor("op_2490_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2490 = conv(bias = layers_9_attention_o_proj_bias, dilations = var_2488, groups = var_2302, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2486, weight = layers_9_attention_o_proj_weight, x = var_2482_cast_fp16)[name = tensor("op_2490")]; + tensor var_2492_interleave_0 = const()[name = tensor("op_2492_interleave_0"), val = tensor(false)]; + tensor var_2492 = concat(axis = var_2303, interleave = var_2492_interleave_0, values = var_2490)[name = tensor("op_2492")]; + tensor x_77 = add(x = var_2296_cast_fp16, y = var_2492)[name = tensor("x_77")]; + tensor var_2299_promoted = const()[name = tensor("op_2299_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_2300_promoted = const()[name = tensor("op_2300_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_79 = clip(alpha = var_2299_promoted, beta = var_2300_promoted, x = x_77)[name = tensor("x_79")]; + tensor var_2497 = const()[name = tensor("op_2497"), val = tensor([1])]; + tensor mean_39 = reduce_mean(axes = var_2497, keep_dims = var_2304, x = x_79)[name = tensor("mean_39")]; tensor zero_mean_39 = sub(x = x_79, y = mean_39)[name = tensor("zero_mean_39")]; - tensor var_2370_promoted = const()[name = tensor("op_2370_promoted"), val = tensor(0x1p+1)]; - tensor var_2570 = pow(x = zero_mean_39, y = var_2370_promoted)[name = tensor("op_2570")]; - tensor var_2571 = const()[name = tensor("op_2571"), val = tensor([1])]; - tensor var_2572 = reduce_mean(axes = var_2571, keep_dims = var_2365, x = var_2570)[name = tensor("op_2572")]; - tensor var_2573_to_fp16 = const()[name = tensor("op_2573_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2574_cast_fp16 = add(x = var_2572, y = var_2573_to_fp16)[name = tensor("op_2574_cast_fp16")]; + tensor var_2301_promoted = const()[name = tensor("op_2301_promoted"), val = tensor(0x1p+1)]; + tensor var_2500 = pow(x = zero_mean_39, y = var_2301_promoted)[name = tensor("op_2500")]; + tensor var_2501 = const()[name = tensor("op_2501"), val = tensor([1])]; + tensor var_2502 = reduce_mean(axes = var_2501, keep_dims = var_2304, x = var_2500)[name = tensor("op_2502")]; + tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2504_cast_fp16 = add(x = var_2502, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; tensor denom_39_epsilon_0 = const()[name = tensor("denom_39_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0, x = var_2574_cast_fp16)[name = tensor("denom_39_cast_fp16")]; - tensor var_2576_cast_fp16 = mul(x = zero_mean_39, y = denom_39_cast_fp16)[name = tensor("op_2576_cast_fp16")]; - tensor var_2578_gamma_0_to_fp16 = const()[name = tensor("op_2578_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218563328)))]; - tensor var_2578_beta_0_to_fp16 = const()[name = tensor("op_2578_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218564928)))]; - tensor var_2578_epsilon_0_to_fp16 = const()[name = tensor("op_2578_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2578_cast_fp16 = batch_norm(beta = var_2578_beta_0_to_fp16, epsilon = var_2578_epsilon_0_to_fp16, gamma = var_2578_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2576_cast_fp16)[name = tensor("op_2578_cast_fp16")]; - tensor var_2584 = const()[name = tensor("op_2584"), val = tensor([1, 1])]; - tensor var_2586 = const()[name = tensor("op_2586"), val = tensor([1, 1])]; - tensor var_2588_pad_type_0 = const()[name = tensor("op_2588_pad_type_0"), val = tensor("custom")]; - tensor var_2588_pad_0 = const()[name = tensor("op_2588_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2588 = conv(bias = layers_9_mlp_fc1_bias, dilations = var_2586, groups = var_2363, pad = var_2588_pad_0, pad_type = var_2588_pad_type_0, strides = var_2584, weight = layers_9_mlp_fc1_weight, x = var_2578_cast_fp16)[name = tensor("op_2588")]; - tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; - tensor input_79 = gelu(mode = input_79_mode_0, x = var_2588)[name = tensor("input_79")]; - tensor var_2592 = const()[name = tensor("op_2592"), val = tensor([1, 1])]; - tensor var_2594 = const()[name = tensor("op_2594"), val = tensor([1, 1])]; - tensor var_2596_pad_type_0 = const()[name = tensor("op_2596_pad_type_0"), val = tensor("custom")]; - tensor var_2596_pad_0 = const()[name = tensor("op_2596_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2596 = conv(bias = layers_9_mlp_fc2_bias, dilations = var_2594, groups = var_2363, pad = var_2596_pad_0, pad_type = var_2596_pad_type_0, strides = var_2592, weight = layers_9_mlp_fc2_weight, x = input_79)[name = tensor("op_2596")]; - tensor x_81 = add(x = var_2578_cast_fp16, y = var_2596)[name = tensor("x_81")]; - tensor var_2361_promoted_1 = const()[name = tensor("op_2361_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2362_promoted_1 = const()[name = tensor("op_2362_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_83 = clip(alpha = var_2361_promoted_1, beta = var_2362_promoted_1, x = x_81)[name = tensor("x_83")]; - tensor var_2601 = const()[name = tensor("op_2601"), val = tensor([1])]; - tensor mean_41 = reduce_mean(axes = var_2601, keep_dims = var_2365, x = x_83)[name = tensor("mean_41")]; + tensor denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0, x = var_2504_cast_fp16)[name = tensor("denom_39_cast_fp16")]; + tensor var_2506_cast_fp16 = mul(x = zero_mean_39, y = denom_39_cast_fp16)[name = tensor("op_2506_cast_fp16")]; + tensor var_2508_gamma_0_to_fp16 = const()[name = tensor("op_2508_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218563328)))]; + tensor var_2508_beta_0_to_fp16 = const()[name = tensor("op_2508_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218564928)))]; + tensor var_2508_epsilon_0_to_fp16 = const()[name = tensor("op_2508_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2508_cast_fp16 = batch_norm(beta = var_2508_beta_0_to_fp16, epsilon = var_2508_epsilon_0_to_fp16, gamma = var_2508_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2506_cast_fp16)[name = tensor("op_2508_cast_fp16")]; + tensor var_2514 = const()[name = tensor("op_2514"), val = tensor([1, 1])]; + tensor var_2516 = const()[name = tensor("op_2516"), val = tensor([1, 1])]; + tensor var_2518_pad_type_0 = const()[name = tensor("op_2518_pad_type_0"), val = tensor("custom")]; + tensor var_2518_pad_0 = const()[name = tensor("op_2518_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2518 = conv(bias = layers_9_mlp_fc1_bias, dilations = var_2516, groups = var_2302, pad = var_2518_pad_0, pad_type = var_2518_pad_type_0, strides = var_2514, weight = layers_9_mlp_fc1_weight, x = var_2508_cast_fp16)[name = tensor("op_2518")]; + tensor input_319_mode_0 = const()[name = tensor("input_319_mode_0"), val = tensor("EXACT")]; + tensor input_319 = gelu(mode = input_319_mode_0, x = var_2518)[name = tensor("input_319")]; + tensor var_2522 = const()[name = tensor("op_2522"), val = tensor([1, 1])]; + tensor var_2524 = const()[name = tensor("op_2524"), val = tensor([1, 1])]; + tensor var_2526_pad_type_0 = const()[name = tensor("op_2526_pad_type_0"), val = tensor("custom")]; + tensor var_2526_pad_0 = const()[name = tensor("op_2526_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2526 = conv(bias = layers_9_mlp_fc2_bias, dilations = var_2524, groups = var_2302, pad = var_2526_pad_0, pad_type = var_2526_pad_type_0, strides = var_2522, weight = layers_9_mlp_fc2_weight, x = input_319)[name = tensor("op_2526")]; + tensor x_81 = add(x = var_2508_cast_fp16, y = var_2526)[name = tensor("x_81")]; + tensor var_2299_promoted_1 = const()[name = tensor("op_2299_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_2300_promoted_1 = const()[name = tensor("op_2300_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_83 = clip(alpha = var_2299_promoted_1, beta = var_2300_promoted_1, x = x_81)[name = tensor("x_83")]; + tensor var_2531 = const()[name = tensor("op_2531"), val = tensor([1])]; + tensor mean_41 = reduce_mean(axes = var_2531, keep_dims = var_2304, x = x_83)[name = tensor("mean_41")]; tensor zero_mean_41 = sub(x = x_83, y = mean_41)[name = tensor("zero_mean_41")]; - tensor var_2370_promoted_1 = const()[name = tensor("op_2370_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2604 = pow(x = zero_mean_41, y = var_2370_promoted_1)[name = tensor("op_2604")]; - tensor var_2605 = const()[name = tensor("op_2605"), val = tensor([1])]; - tensor var_2606 = reduce_mean(axes = var_2605, keep_dims = var_2365, x = var_2604)[name = tensor("op_2606")]; - tensor var_2607_to_fp16 = const()[name = tensor("op_2607_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2608_cast_fp16 = add(x = var_2606, y = var_2607_to_fp16)[name = tensor("op_2608_cast_fp16")]; + tensor var_2301_promoted_1 = const()[name = tensor("op_2301_promoted_1"), val = tensor(0x1p+1)]; + tensor var_2534 = pow(x = zero_mean_41, y = var_2301_promoted_1)[name = tensor("op_2534")]; + tensor var_2535 = const()[name = tensor("op_2535"), val = tensor([1])]; + tensor var_2536 = reduce_mean(axes = var_2535, keep_dims = var_2304, x = var_2534)[name = tensor("op_2536")]; + tensor var_2537_to_fp16 = const()[name = tensor("op_2537_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2538_cast_fp16 = add(x = var_2536, y = var_2537_to_fp16)[name = tensor("op_2538_cast_fp16")]; tensor denom_41_epsilon_0 = const()[name = tensor("denom_41_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0, x = var_2608_cast_fp16)[name = tensor("denom_41_cast_fp16")]; - tensor var_2610_cast_fp16 = mul(x = zero_mean_41, y = denom_41_cast_fp16)[name = tensor("op_2610_cast_fp16")]; - tensor var_2612_gamma_0_to_fp16 = const()[name = tensor("op_2612_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218566528)))]; - tensor var_2612_beta_0_to_fp16 = const()[name = tensor("op_2612_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218568128)))]; - tensor var_2612_epsilon_0_to_fp16 = const()[name = tensor("op_2612_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2612_cast_fp16 = batch_norm(beta = var_2612_beta_0_to_fp16, epsilon = var_2612_epsilon_0_to_fp16, gamma = var_2612_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2610_cast_fp16)[name = tensor("op_2612_cast_fp16")]; - tensor var_2617 = const()[name = tensor("op_2617"), val = tensor(1)]; - tensor var_2618 = const()[name = tensor("op_2618"), val = tensor(0)]; - tensor var_2619 = const()[name = tensor("op_2619"), val = tensor(true)]; - tensor var_2644 = const()[name = tensor("op_2644"), val = tensor([1, 1])]; - tensor var_2646 = const()[name = tensor("op_2646"), val = tensor([1, 1])]; - tensor var_2648_pad_type_0 = const()[name = tensor("op_2648_pad_type_0"), val = tensor("custom")]; - tensor var_2648_pad_0 = const()[name = tensor("op_2648_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2648 = conv(bias = layers_10_attention_q_proj_bias, dilations = var_2646, groups = var_2617, pad = var_2648_pad_0, pad_type = var_2648_pad_type_0, strides = var_2644, weight = layers_10_attention_q_proj_weight, x = var_2612_cast_fp16)[name = tensor("op_2648")]; - tensor var_2649 = const()[name = tensor("op_2649"), val = tensor([1, 64, 12, 512])]; - tensor var_2650 = reshape(shape = var_2649, x = var_2648)[name = tensor("op_2650")]; - tensor var_2653 = const()[name = tensor("op_2653"), val = tensor([1, 1])]; - tensor var_2655 = const()[name = tensor("op_2655"), val = tensor([1, 1])]; - tensor var_2657_pad_type_0 = const()[name = tensor("op_2657_pad_type_0"), val = tensor("custom")]; - tensor var_2657_pad_0 = const()[name = tensor("op_2657_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2657 = conv(bias = layers_10_attention_k_proj_bias, dilations = var_2655, groups = var_2617, pad = var_2657_pad_0, pad_type = var_2657_pad_type_0, strides = var_2653, weight = layers_10_attention_k_proj_weight, x = var_2612_cast_fp16)[name = tensor("op_2657")]; - tensor var_2658 = const()[name = tensor("op_2658"), val = tensor([1, 64, 12, 512])]; - tensor ks_21 = reshape(shape = var_2658, x = var_2657)[name = tensor("ks_21")]; - tensor var_2662 = const()[name = tensor("op_2662"), val = tensor([1, 1])]; - tensor var_2664 = const()[name = tensor("op_2664"), val = tensor([1, 1])]; - tensor var_2666_pad_type_0 = const()[name = tensor("op_2666_pad_type_0"), val = tensor("custom")]; - tensor var_2666_pad_0 = const()[name = tensor("op_2666_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2666 = conv(bias = layers_10_attention_v_proj_bias, dilations = var_2664, groups = var_2617, pad = var_2666_pad_0, pad_type = var_2666_pad_type_0, strides = var_2662, weight = layers_10_attention_v_proj_weight, x = var_2612_cast_fp16)[name = tensor("op_2666")]; - tensor var_2667 = const()[name = tensor("op_2667"), val = tensor([1, 64, 12, 512])]; - tensor var_2668 = reshape(shape = var_2667, x = var_2666)[name = tensor("op_2668")]; - tensor tile_52 = const()[name = tensor("tile_52"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2669_axis_0 = const()[name = tensor("op_2669_axis_0"), val = tensor(2)]; - tensor var_2669_0, tensor var_2669_1, tensor var_2669_2, tensor var_2669_3, tensor var_2669_4, tensor var_2669_5, tensor var_2669_6, tensor var_2669_7, tensor var_2669_8, tensor var_2669_9, tensor var_2669_10, tensor var_2669_11 = split(axis = var_2669_axis_0, split_sizes = tile_52, x = var_2650)[name = tensor("op_2669")]; - tensor var_2682_perm_0 = const()[name = tensor("op_2682_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_53 = const()[name = tensor("tile_53"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2683_axis_0 = const()[name = tensor("op_2683_axis_0"), val = tensor(2)]; - tensor transpose_1 = transpose(perm = var_2682_perm_0, x = ks_21)[name = tensor("transpose_1")]; - tensor var_2683_0, tensor var_2683_1, tensor var_2683_2, tensor var_2683_3, tensor var_2683_4, tensor var_2683_5, tensor var_2683_6, tensor var_2683_7, tensor var_2683_8, tensor var_2683_9, tensor var_2683_10, tensor var_2683_11 = split(axis = var_2683_axis_0, split_sizes = tile_53, x = transpose_1)[name = tensor("op_2683")]; - tensor tile_54 = const()[name = tensor("tile_54"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2696_axis_0 = const()[name = tensor("op_2696_axis_0"), val = tensor(2)]; - tensor var_2696_0, tensor var_2696_1, tensor var_2696_2, tensor var_2696_3, tensor var_2696_4, tensor var_2696_5, tensor var_2696_6, tensor var_2696_7, tensor var_2696_8, tensor var_2696_9, tensor var_2696_10, tensor var_2696_11 = split(axis = var_2696_axis_0, split_sizes = tile_54, x = var_2668)[name = tensor("op_2696")]; - tensor var_2710_equation_0 = const()[name = tensor("op_2710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2710 = einsum(equation = var_2710_equation_0, values = (var_2683_0, var_2669_0))[name = tensor("op_2710")]; - tensor var_2711_to_fp16 = const()[name = tensor("op_2711_to_fp16"), val = tensor(0x1p-3)]; - tensor w_241_cast_fp16 = mul(x = var_2710, y = var_2711_to_fp16)[name = tensor("w_241_cast_fp16")]; - tensor var_2714_equation_0 = const()[name = tensor("op_2714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2714 = einsum(equation = var_2714_equation_0, values = (var_2683_1, var_2669_1))[name = tensor("op_2714")]; - tensor var_2715_to_fp16 = const()[name = tensor("op_2715_to_fp16"), val = tensor(0x1p-3)]; - tensor w_243_cast_fp16 = mul(x = var_2714, y = var_2715_to_fp16)[name = tensor("w_243_cast_fp16")]; - tensor var_2718_equation_0 = const()[name = tensor("op_2718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2718 = einsum(equation = var_2718_equation_0, values = (var_2683_2, var_2669_2))[name = tensor("op_2718")]; - tensor var_2719_to_fp16 = const()[name = tensor("op_2719_to_fp16"), val = tensor(0x1p-3)]; - tensor w_245_cast_fp16 = mul(x = var_2718, y = var_2719_to_fp16)[name = tensor("w_245_cast_fp16")]; - tensor var_2722_equation_0 = const()[name = tensor("op_2722_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2722 = einsum(equation = var_2722_equation_0, values = (var_2683_3, var_2669_3))[name = tensor("op_2722")]; - tensor var_2723_to_fp16 = const()[name = tensor("op_2723_to_fp16"), val = tensor(0x1p-3)]; - tensor w_247_cast_fp16 = mul(x = var_2722, y = var_2723_to_fp16)[name = tensor("w_247_cast_fp16")]; - tensor var_2726_equation_0 = const()[name = tensor("op_2726_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2726 = einsum(equation = var_2726_equation_0, values = (var_2683_4, var_2669_4))[name = tensor("op_2726")]; - tensor var_2727_to_fp16 = const()[name = tensor("op_2727_to_fp16"), val = tensor(0x1p-3)]; - tensor w_249_cast_fp16 = mul(x = var_2726, y = var_2727_to_fp16)[name = tensor("w_249_cast_fp16")]; - tensor var_2730_equation_0 = const()[name = tensor("op_2730_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2730 = einsum(equation = var_2730_equation_0, values = (var_2683_5, var_2669_5))[name = tensor("op_2730")]; - tensor var_2731_to_fp16 = const()[name = tensor("op_2731_to_fp16"), val = tensor(0x1p-3)]; - tensor w_251_cast_fp16 = mul(x = var_2730, y = var_2731_to_fp16)[name = tensor("w_251_cast_fp16")]; - tensor var_2734_equation_0 = const()[name = tensor("op_2734_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2734 = einsum(equation = var_2734_equation_0, values = (var_2683_6, var_2669_6))[name = tensor("op_2734")]; - tensor var_2735_to_fp16 = const()[name = tensor("op_2735_to_fp16"), val = tensor(0x1p-3)]; - tensor w_253_cast_fp16 = mul(x = var_2734, y = var_2735_to_fp16)[name = tensor("w_253_cast_fp16")]; - tensor var_2738_equation_0 = const()[name = tensor("op_2738_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2738 = einsum(equation = var_2738_equation_0, values = (var_2683_7, var_2669_7))[name = tensor("op_2738")]; - tensor var_2739_to_fp16 = const()[name = tensor("op_2739_to_fp16"), val = tensor(0x1p-3)]; - tensor w_255_cast_fp16 = mul(x = var_2738, y = var_2739_to_fp16)[name = tensor("w_255_cast_fp16")]; - tensor var_2742_equation_0 = const()[name = tensor("op_2742_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2742 = einsum(equation = var_2742_equation_0, values = (var_2683_8, var_2669_8))[name = tensor("op_2742")]; - tensor var_2743_to_fp16 = const()[name = tensor("op_2743_to_fp16"), val = tensor(0x1p-3)]; - tensor w_257_cast_fp16 = mul(x = var_2742, y = var_2743_to_fp16)[name = tensor("w_257_cast_fp16")]; - tensor var_2746_equation_0 = const()[name = tensor("op_2746_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2746 = einsum(equation = var_2746_equation_0, values = (var_2683_9, var_2669_9))[name = tensor("op_2746")]; - tensor var_2747_to_fp16 = const()[name = tensor("op_2747_to_fp16"), val = tensor(0x1p-3)]; - tensor w_259_cast_fp16 = mul(x = var_2746, y = var_2747_to_fp16)[name = tensor("w_259_cast_fp16")]; - tensor var_2750_equation_0 = const()[name = tensor("op_2750_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2750 = einsum(equation = var_2750_equation_0, values = (var_2683_10, var_2669_10))[name = tensor("op_2750")]; - tensor var_2751_to_fp16 = const()[name = tensor("op_2751_to_fp16"), val = tensor(0x1p-3)]; - tensor w_261_cast_fp16 = mul(x = var_2750, y = var_2751_to_fp16)[name = tensor("w_261_cast_fp16")]; - tensor var_2754_equation_0 = const()[name = tensor("op_2754_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2754 = einsum(equation = var_2754_equation_0, values = (var_2683_11, var_2669_11))[name = tensor("op_2754")]; - tensor var_2755_to_fp16 = const()[name = tensor("op_2755_to_fp16"), val = tensor(0x1p-3)]; - tensor w_263_cast_fp16 = mul(x = var_2754, y = var_2755_to_fp16)[name = tensor("w_263_cast_fp16")]; - tensor var_2757_cast_fp16 = add(x = w_241_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2757_cast_fp16")]; - tensor var_2758_cast_fp16 = softmax(axis = var_2617, x = var_2757_cast_fp16)[name = tensor("op_2758_cast_fp16")]; - tensor var_2759_cast_fp16 = add(x = w_243_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2759_cast_fp16")]; - tensor var_2760_cast_fp16 = softmax(axis = var_2617, x = var_2759_cast_fp16)[name = tensor("op_2760_cast_fp16")]; - tensor var_2761_cast_fp16 = add(x = w_245_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2761_cast_fp16")]; - tensor var_2762_cast_fp16 = softmax(axis = var_2617, x = var_2761_cast_fp16)[name = tensor("op_2762_cast_fp16")]; - tensor var_2763_cast_fp16 = add(x = w_247_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2763_cast_fp16")]; - tensor var_2764_cast_fp16 = softmax(axis = var_2617, x = var_2763_cast_fp16)[name = tensor("op_2764_cast_fp16")]; - tensor var_2765_cast_fp16 = add(x = w_249_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2765_cast_fp16")]; - tensor var_2766_cast_fp16 = softmax(axis = var_2617, x = var_2765_cast_fp16)[name = tensor("op_2766_cast_fp16")]; - tensor var_2767_cast_fp16 = add(x = w_251_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2767_cast_fp16")]; - tensor var_2768_cast_fp16 = softmax(axis = var_2617, x = var_2767_cast_fp16)[name = tensor("op_2768_cast_fp16")]; - tensor var_2769_cast_fp16 = add(x = w_253_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2769_cast_fp16")]; - tensor var_2770_cast_fp16 = softmax(axis = var_2617, x = var_2769_cast_fp16)[name = tensor("op_2770_cast_fp16")]; - tensor var_2771_cast_fp16 = add(x = w_255_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2771_cast_fp16")]; - tensor var_2772_cast_fp16 = softmax(axis = var_2617, x = var_2771_cast_fp16)[name = tensor("op_2772_cast_fp16")]; - tensor var_2773_cast_fp16 = add(x = w_257_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2773_cast_fp16")]; - tensor var_2774_cast_fp16 = softmax(axis = var_2617, x = var_2773_cast_fp16)[name = tensor("op_2774_cast_fp16")]; - tensor var_2775_cast_fp16 = add(x = w_259_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2775_cast_fp16")]; - tensor var_2776_cast_fp16 = softmax(axis = var_2617, x = var_2775_cast_fp16)[name = tensor("op_2776_cast_fp16")]; - tensor var_2777_cast_fp16 = add(x = w_261_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2777_cast_fp16")]; - tensor var_2778_cast_fp16 = softmax(axis = var_2617, x = var_2777_cast_fp16)[name = tensor("op_2778_cast_fp16")]; - tensor var_2779_cast_fp16 = add(x = w_263_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_2779_cast_fp16")]; - tensor var_2780_cast_fp16 = softmax(axis = var_2617, x = var_2779_cast_fp16)[name = tensor("op_2780_cast_fp16")]; - tensor var_2782_equation_0 = const()[name = tensor("op_2782_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2782_cast_fp16 = einsum(equation = var_2782_equation_0, values = (var_2696_0, var_2758_cast_fp16))[name = tensor("op_2782_cast_fp16")]; - tensor var_2784_equation_0 = const()[name = tensor("op_2784_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2784_cast_fp16 = einsum(equation = var_2784_equation_0, values = (var_2696_1, var_2760_cast_fp16))[name = tensor("op_2784_cast_fp16")]; - tensor var_2786_equation_0 = const()[name = tensor("op_2786_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2786_cast_fp16 = einsum(equation = var_2786_equation_0, values = (var_2696_2, var_2762_cast_fp16))[name = tensor("op_2786_cast_fp16")]; - tensor var_2788_equation_0 = const()[name = tensor("op_2788_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2788_cast_fp16 = einsum(equation = var_2788_equation_0, values = (var_2696_3, var_2764_cast_fp16))[name = tensor("op_2788_cast_fp16")]; - tensor var_2790_equation_0 = const()[name = tensor("op_2790_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2790_cast_fp16 = einsum(equation = var_2790_equation_0, values = (var_2696_4, var_2766_cast_fp16))[name = tensor("op_2790_cast_fp16")]; - tensor var_2792_equation_0 = const()[name = tensor("op_2792_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2792_cast_fp16 = einsum(equation = var_2792_equation_0, values = (var_2696_5, var_2768_cast_fp16))[name = tensor("op_2792_cast_fp16")]; - tensor var_2794_equation_0 = const()[name = tensor("op_2794_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2794_cast_fp16 = einsum(equation = var_2794_equation_0, values = (var_2696_6, var_2770_cast_fp16))[name = tensor("op_2794_cast_fp16")]; - tensor var_2796_equation_0 = const()[name = tensor("op_2796_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2796_cast_fp16 = einsum(equation = var_2796_equation_0, values = (var_2696_7, var_2772_cast_fp16))[name = tensor("op_2796_cast_fp16")]; - tensor var_2798_equation_0 = const()[name = tensor("op_2798_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2798_cast_fp16 = einsum(equation = var_2798_equation_0, values = (var_2696_8, var_2774_cast_fp16))[name = tensor("op_2798_cast_fp16")]; - tensor var_2800_equation_0 = const()[name = tensor("op_2800_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2800_cast_fp16 = einsum(equation = var_2800_equation_0, values = (var_2696_9, var_2776_cast_fp16))[name = tensor("op_2800_cast_fp16")]; - tensor var_2802_equation_0 = const()[name = tensor("op_2802_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2802_cast_fp16 = einsum(equation = var_2802_equation_0, values = (var_2696_10, var_2778_cast_fp16))[name = tensor("op_2802_cast_fp16")]; - tensor var_2804_equation_0 = const()[name = tensor("op_2804_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2804_cast_fp16 = einsum(equation = var_2804_equation_0, values = (var_2696_11, var_2780_cast_fp16))[name = tensor("op_2804_cast_fp16")]; - tensor var_2806_interleave_0 = const()[name = tensor("op_2806_interleave_0"), val = tensor(false)]; - tensor var_2806_cast_fp16 = concat(axis = var_2617, interleave = var_2806_interleave_0, values = (var_2782_cast_fp16, var_2784_cast_fp16, var_2786_cast_fp16, var_2788_cast_fp16, var_2790_cast_fp16, var_2792_cast_fp16, var_2794_cast_fp16, var_2796_cast_fp16, var_2798_cast_fp16, var_2800_cast_fp16, var_2802_cast_fp16, var_2804_cast_fp16))[name = tensor("op_2806_cast_fp16")]; - tensor var_2810 = const()[name = tensor("op_2810"), val = tensor([1, 1])]; - tensor var_2812 = const()[name = tensor("op_2812"), val = tensor([1, 1])]; - tensor var_2814_pad_type_0 = const()[name = tensor("op_2814_pad_type_0"), val = tensor("custom")]; - tensor var_2814_pad_0 = const()[name = tensor("op_2814_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2814 = conv(bias = layers_10_attention_o_proj_bias, dilations = var_2812, groups = var_2617, pad = var_2814_pad_0, pad_type = var_2814_pad_type_0, strides = var_2810, weight = layers_10_attention_o_proj_weight, x = var_2806_cast_fp16)[name = tensor("op_2814")]; - tensor var_2816_interleave_0 = const()[name = tensor("op_2816_interleave_0"), val = tensor(false)]; - tensor var_2816 = concat(axis = var_2618, interleave = var_2816_interleave_0, values = var_2814)[name = tensor("op_2816")]; - tensor x_85 = add(x = var_2612_cast_fp16, y = var_2816)[name = tensor("x_85")]; - tensor var_2615_promoted = const()[name = tensor("op_2615_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2616_promoted = const()[name = tensor("op_2616_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_87 = clip(alpha = var_2615_promoted, beta = var_2616_promoted, x = x_85)[name = tensor("x_87")]; - tensor var_2821 = const()[name = tensor("op_2821"), val = tensor([1])]; - tensor mean_43 = reduce_mean(axes = var_2821, keep_dims = var_2619, x = x_87)[name = tensor("mean_43")]; + tensor denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0, x = var_2538_cast_fp16)[name = tensor("denom_41_cast_fp16")]; + tensor var_2540_cast_fp16 = mul(x = zero_mean_41, y = denom_41_cast_fp16)[name = tensor("op_2540_cast_fp16")]; + tensor var_2542_gamma_0_to_fp16 = const()[name = tensor("op_2542_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218566528)))]; + tensor var_2542_beta_0_to_fp16 = const()[name = tensor("op_2542_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218568128)))]; + tensor var_2542_epsilon_0_to_fp16 = const()[name = tensor("op_2542_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2542_cast_fp16 = batch_norm(beta = var_2542_beta_0_to_fp16, epsilon = var_2542_epsilon_0_to_fp16, gamma = var_2542_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2540_cast_fp16)[name = tensor("op_2542_cast_fp16")]; + tensor var_2548 = const()[name = tensor("op_2548"), val = tensor(1)]; + tensor var_2549 = const()[name = tensor("op_2549"), val = tensor(0)]; + tensor var_2550 = const()[name = tensor("op_2550"), val = tensor(true)]; + tensor var_2572 = const()[name = tensor("op_2572"), val = tensor([1, 1])]; + tensor var_2574 = const()[name = tensor("op_2574"), val = tensor([1, 1])]; + tensor var_2576_pad_type_0 = const()[name = tensor("op_2576_pad_type_0"), val = tensor("custom")]; + tensor var_2576_pad_0 = const()[name = tensor("op_2576_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2576 = conv(bias = layers_10_attention_q_proj_bias, dilations = var_2574, groups = var_2548, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2572, weight = layers_10_attention_q_proj_weight, x = var_2542_cast_fp16)[name = tensor("op_2576")]; + tensor var_2579 = const()[name = tensor("op_2579"), val = tensor([1, 1])]; + tensor var_2581 = const()[name = tensor("op_2581"), val = tensor([1, 1])]; + tensor ks_21_pad_type_0 = const()[name = tensor("ks_21_pad_type_0"), val = tensor("custom")]; + tensor ks_21_pad_0 = const()[name = tensor("ks_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks_21 = conv(bias = layers_10_attention_k_proj_bias, dilations = var_2581, groups = var_2548, pad = ks_21_pad_0, pad_type = ks_21_pad_type_0, strides = var_2579, weight = layers_10_attention_k_proj_weight, x = var_2542_cast_fp16)[name = tensor("ks_21")]; + tensor var_2586 = const()[name = tensor("op_2586"), val = tensor([1, 1])]; + tensor var_2588 = const()[name = tensor("op_2588"), val = tensor([1, 1])]; + tensor var_2590_pad_type_0 = const()[name = tensor("op_2590_pad_type_0"), val = tensor("custom")]; + tensor var_2590_pad_0 = const()[name = tensor("op_2590_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2590 = conv(bias = layers_10_attention_v_proj_bias, dilations = var_2588, groups = var_2548, pad = var_2590_pad_0, pad_type = var_2590_pad_type_0, strides = var_2586, weight = layers_10_attention_v_proj_weight, x = var_2542_cast_fp16)[name = tensor("op_2590")]; + tensor tile_52 = const()[name = tensor("tile_52"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2591_axis_0 = const()[name = tensor("op_2591_axis_0"), val = tensor(1)]; + tensor var_2591_0, tensor var_2591_1, tensor var_2591_2, tensor var_2591_3, tensor var_2591_4, tensor var_2591_5, tensor var_2591_6, tensor var_2591_7, tensor var_2591_8, tensor var_2591_9, tensor var_2591_10, tensor var_2591_11 = split(axis = var_2591_axis_0, split_sizes = tile_52, x = var_2576)[name = tensor("op_2591")]; + tensor var_2604_perm_0 = const()[name = tensor("op_2604_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_53 = const()[name = tensor("tile_53"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2605_axis_0 = const()[name = tensor("op_2605_axis_0"), val = tensor(3)]; + tensor transpose_1 = transpose(perm = var_2604_perm_0, x = ks_21)[name = tensor("transpose_1")]; + tensor var_2605_0, tensor var_2605_1, tensor var_2605_2, tensor var_2605_3, tensor var_2605_4, tensor var_2605_5, tensor var_2605_6, tensor var_2605_7, tensor var_2605_8, tensor var_2605_9, tensor var_2605_10, tensor var_2605_11 = split(axis = var_2605_axis_0, split_sizes = tile_53, x = transpose_1)[name = tensor("op_2605")]; + tensor tile_54 = const()[name = tensor("tile_54"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2618_axis_0 = const()[name = tensor("op_2618_axis_0"), val = tensor(1)]; + tensor var_2618_0, tensor var_2618_1, tensor var_2618_2, tensor var_2618_3, tensor var_2618_4, tensor var_2618_5, tensor var_2618_6, tensor var_2618_7, tensor var_2618_8, tensor var_2618_9, tensor var_2618_10, tensor var_2618_11 = split(axis = var_2618_axis_0, split_sizes = tile_54, x = var_2590)[name = tensor("op_2618")]; + tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2632 = einsum(equation = var_2632_equation_0, values = (var_2605_0, var_2591_0))[name = tensor("op_2632")]; + tensor var_2633_to_fp16 = const()[name = tensor("op_2633_to_fp16"), val = tensor(0x1p-3)]; + tensor w_241_cast_fp16 = mul(x = var_2632, y = var_2633_to_fp16)[name = tensor("w_241_cast_fp16")]; + tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2636 = einsum(equation = var_2636_equation_0, values = (var_2605_1, var_2591_1))[name = tensor("op_2636")]; + tensor var_2637_to_fp16 = const()[name = tensor("op_2637_to_fp16"), val = tensor(0x1p-3)]; + tensor w_243_cast_fp16 = mul(x = var_2636, y = var_2637_to_fp16)[name = tensor("w_243_cast_fp16")]; + tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2640 = einsum(equation = var_2640_equation_0, values = (var_2605_2, var_2591_2))[name = tensor("op_2640")]; + tensor var_2641_to_fp16 = const()[name = tensor("op_2641_to_fp16"), val = tensor(0x1p-3)]; + tensor w_245_cast_fp16 = mul(x = var_2640, y = var_2641_to_fp16)[name = tensor("w_245_cast_fp16")]; + tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2644 = einsum(equation = var_2644_equation_0, values = (var_2605_3, var_2591_3))[name = tensor("op_2644")]; + tensor var_2645_to_fp16 = const()[name = tensor("op_2645_to_fp16"), val = tensor(0x1p-3)]; + tensor w_247_cast_fp16 = mul(x = var_2644, y = var_2645_to_fp16)[name = tensor("w_247_cast_fp16")]; + tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2648 = einsum(equation = var_2648_equation_0, values = (var_2605_4, var_2591_4))[name = tensor("op_2648")]; + tensor var_2649_to_fp16 = const()[name = tensor("op_2649_to_fp16"), val = tensor(0x1p-3)]; + tensor w_249_cast_fp16 = mul(x = var_2648, y = var_2649_to_fp16)[name = tensor("w_249_cast_fp16")]; + tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2652 = einsum(equation = var_2652_equation_0, values = (var_2605_5, var_2591_5))[name = tensor("op_2652")]; + tensor var_2653_to_fp16 = const()[name = tensor("op_2653_to_fp16"), val = tensor(0x1p-3)]; + tensor w_251_cast_fp16 = mul(x = var_2652, y = var_2653_to_fp16)[name = tensor("w_251_cast_fp16")]; + tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2656 = einsum(equation = var_2656_equation_0, values = (var_2605_6, var_2591_6))[name = tensor("op_2656")]; + tensor var_2657_to_fp16 = const()[name = tensor("op_2657_to_fp16"), val = tensor(0x1p-3)]; + tensor w_253_cast_fp16 = mul(x = var_2656, y = var_2657_to_fp16)[name = tensor("w_253_cast_fp16")]; + tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2660 = einsum(equation = var_2660_equation_0, values = (var_2605_7, var_2591_7))[name = tensor("op_2660")]; + tensor var_2661_to_fp16 = const()[name = tensor("op_2661_to_fp16"), val = tensor(0x1p-3)]; + tensor w_255_cast_fp16 = mul(x = var_2660, y = var_2661_to_fp16)[name = tensor("w_255_cast_fp16")]; + tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2664 = einsum(equation = var_2664_equation_0, values = (var_2605_8, var_2591_8))[name = tensor("op_2664")]; + tensor var_2665_to_fp16 = const()[name = tensor("op_2665_to_fp16"), val = tensor(0x1p-3)]; + tensor w_257_cast_fp16 = mul(x = var_2664, y = var_2665_to_fp16)[name = tensor("w_257_cast_fp16")]; + tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2668 = einsum(equation = var_2668_equation_0, values = (var_2605_9, var_2591_9))[name = tensor("op_2668")]; + tensor var_2669_to_fp16 = const()[name = tensor("op_2669_to_fp16"), val = tensor(0x1p-3)]; + tensor w_259_cast_fp16 = mul(x = var_2668, y = var_2669_to_fp16)[name = tensor("w_259_cast_fp16")]; + tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2672 = einsum(equation = var_2672_equation_0, values = (var_2605_10, var_2591_10))[name = tensor("op_2672")]; + tensor var_2673_to_fp16 = const()[name = tensor("op_2673_to_fp16"), val = tensor(0x1p-3)]; + tensor w_261_cast_fp16 = mul(x = var_2672, y = var_2673_to_fp16)[name = tensor("w_261_cast_fp16")]; + tensor var_2676_equation_0 = const()[name = tensor("op_2676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2676 = einsum(equation = var_2676_equation_0, values = (var_2605_11, var_2591_11))[name = tensor("op_2676")]; + tensor var_2677_to_fp16 = const()[name = tensor("op_2677_to_fp16"), val = tensor(0x1p-3)]; + tensor w_263_cast_fp16 = mul(x = var_2676, y = var_2677_to_fp16)[name = tensor("w_263_cast_fp16")]; + tensor input_323_cast_fp16 = add(x = w_241_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_323_cast_fp16")]; + tensor var_2680_cast_fp16 = softmax(axis = var_2548, x = input_323_cast_fp16)[name = tensor("op_2680_cast_fp16")]; + tensor input_325_cast_fp16 = add(x = w_243_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_325_cast_fp16")]; + tensor var_2682_cast_fp16 = softmax(axis = var_2548, x = input_325_cast_fp16)[name = tensor("op_2682_cast_fp16")]; + tensor input_327_cast_fp16 = add(x = w_245_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_327_cast_fp16")]; + tensor var_2684_cast_fp16 = softmax(axis = var_2548, x = input_327_cast_fp16)[name = tensor("op_2684_cast_fp16")]; + tensor input_329_cast_fp16 = add(x = w_247_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_329_cast_fp16")]; + tensor var_2686_cast_fp16 = softmax(axis = var_2548, x = input_329_cast_fp16)[name = tensor("op_2686_cast_fp16")]; + tensor input_331_cast_fp16 = add(x = w_249_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_331_cast_fp16")]; + tensor var_2688_cast_fp16 = softmax(axis = var_2548, x = input_331_cast_fp16)[name = tensor("op_2688_cast_fp16")]; + tensor input_333_cast_fp16 = add(x = w_251_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_333_cast_fp16")]; + tensor var_2690_cast_fp16 = softmax(axis = var_2548, x = input_333_cast_fp16)[name = tensor("op_2690_cast_fp16")]; + tensor input_335_cast_fp16 = add(x = w_253_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_335_cast_fp16")]; + tensor var_2692_cast_fp16 = softmax(axis = var_2548, x = input_335_cast_fp16)[name = tensor("op_2692_cast_fp16")]; + tensor input_337_cast_fp16 = add(x = w_255_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_337_cast_fp16")]; + tensor var_2694_cast_fp16 = softmax(axis = var_2548, x = input_337_cast_fp16)[name = tensor("op_2694_cast_fp16")]; + tensor input_339_cast_fp16 = add(x = w_257_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_339_cast_fp16")]; + tensor var_2696_cast_fp16 = softmax(axis = var_2548, x = input_339_cast_fp16)[name = tensor("op_2696_cast_fp16")]; + tensor input_341_cast_fp16 = add(x = w_259_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_341_cast_fp16")]; + tensor var_2698_cast_fp16 = softmax(axis = var_2548, x = input_341_cast_fp16)[name = tensor("op_2698_cast_fp16")]; + tensor input_343_cast_fp16 = add(x = w_261_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_343_cast_fp16")]; + tensor var_2700_cast_fp16 = softmax(axis = var_2548, x = input_343_cast_fp16)[name = tensor("op_2700_cast_fp16")]; + tensor input_345_cast_fp16 = add(x = w_263_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_345_cast_fp16")]; + tensor var_2702_cast_fp16 = softmax(axis = var_2548, x = input_345_cast_fp16)[name = tensor("op_2702_cast_fp16")]; + tensor var_2704_equation_0 = const()[name = tensor("op_2704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2704_cast_fp16 = einsum(equation = var_2704_equation_0, values = (var_2618_0, var_2680_cast_fp16))[name = tensor("op_2704_cast_fp16")]; + tensor var_2706_equation_0 = const()[name = tensor("op_2706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2706_cast_fp16 = einsum(equation = var_2706_equation_0, values = (var_2618_1, var_2682_cast_fp16))[name = tensor("op_2706_cast_fp16")]; + tensor var_2708_equation_0 = const()[name = tensor("op_2708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2708_cast_fp16 = einsum(equation = var_2708_equation_0, values = (var_2618_2, var_2684_cast_fp16))[name = tensor("op_2708_cast_fp16")]; + tensor var_2710_equation_0 = const()[name = tensor("op_2710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2710_cast_fp16 = einsum(equation = var_2710_equation_0, values = (var_2618_3, var_2686_cast_fp16))[name = tensor("op_2710_cast_fp16")]; + tensor var_2712_equation_0 = const()[name = tensor("op_2712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2712_cast_fp16 = einsum(equation = var_2712_equation_0, values = (var_2618_4, var_2688_cast_fp16))[name = tensor("op_2712_cast_fp16")]; + tensor var_2714_equation_0 = const()[name = tensor("op_2714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2714_cast_fp16 = einsum(equation = var_2714_equation_0, values = (var_2618_5, var_2690_cast_fp16))[name = tensor("op_2714_cast_fp16")]; + tensor var_2716_equation_0 = const()[name = tensor("op_2716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2716_cast_fp16 = einsum(equation = var_2716_equation_0, values = (var_2618_6, var_2692_cast_fp16))[name = tensor("op_2716_cast_fp16")]; + tensor var_2718_equation_0 = const()[name = tensor("op_2718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2718_cast_fp16 = einsum(equation = var_2718_equation_0, values = (var_2618_7, var_2694_cast_fp16))[name = tensor("op_2718_cast_fp16")]; + tensor var_2720_equation_0 = const()[name = tensor("op_2720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2720_cast_fp16 = einsum(equation = var_2720_equation_0, values = (var_2618_8, var_2696_cast_fp16))[name = tensor("op_2720_cast_fp16")]; + tensor var_2722_equation_0 = const()[name = tensor("op_2722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2618_9, var_2698_cast_fp16))[name = tensor("op_2722_cast_fp16")]; + tensor var_2724_equation_0 = const()[name = tensor("op_2724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2618_10, var_2700_cast_fp16))[name = tensor("op_2724_cast_fp16")]; + tensor var_2726_equation_0 = const()[name = tensor("op_2726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2618_11, var_2702_cast_fp16))[name = tensor("op_2726_cast_fp16")]; + tensor var_2728_interleave_0 = const()[name = tensor("op_2728_interleave_0"), val = tensor(false)]; + tensor var_2728_cast_fp16 = concat(axis = var_2548, interleave = var_2728_interleave_0, values = (var_2704_cast_fp16, var_2706_cast_fp16, var_2708_cast_fp16, var_2710_cast_fp16, var_2712_cast_fp16, var_2714_cast_fp16, var_2716_cast_fp16, var_2718_cast_fp16, var_2720_cast_fp16, var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16))[name = tensor("op_2728_cast_fp16")]; + tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; + tensor var_2734 = const()[name = tensor("op_2734"), val = tensor([1, 1])]; + tensor var_2736_pad_type_0 = const()[name = tensor("op_2736_pad_type_0"), val = tensor("custom")]; + tensor var_2736_pad_0 = const()[name = tensor("op_2736_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2736 = conv(bias = layers_10_attention_o_proj_bias, dilations = var_2734, groups = var_2548, pad = var_2736_pad_0, pad_type = var_2736_pad_type_0, strides = var_2732, weight = layers_10_attention_o_proj_weight, x = var_2728_cast_fp16)[name = tensor("op_2736")]; + tensor var_2738_interleave_0 = const()[name = tensor("op_2738_interleave_0"), val = tensor(false)]; + tensor var_2738 = concat(axis = var_2549, interleave = var_2738_interleave_0, values = var_2736)[name = tensor("op_2738")]; + tensor x_85 = add(x = var_2542_cast_fp16, y = var_2738)[name = tensor("x_85")]; + tensor var_2545_promoted = const()[name = tensor("op_2545_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_2546_promoted = const()[name = tensor("op_2546_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_87 = clip(alpha = var_2545_promoted, beta = var_2546_promoted, x = x_85)[name = tensor("x_87")]; + tensor var_2743 = const()[name = tensor("op_2743"), val = tensor([1])]; + tensor mean_43 = reduce_mean(axes = var_2743, keep_dims = var_2550, x = x_87)[name = tensor("mean_43")]; tensor zero_mean_43 = sub(x = x_87, y = mean_43)[name = tensor("zero_mean_43")]; - tensor var_2624_promoted = const()[name = tensor("op_2624_promoted"), val = tensor(0x1p+1)]; - tensor var_2824 = pow(x = zero_mean_43, y = var_2624_promoted)[name = tensor("op_2824")]; - tensor var_2825 = const()[name = tensor("op_2825"), val = tensor([1])]; - tensor var_2826 = reduce_mean(axes = var_2825, keep_dims = var_2619, x = var_2824)[name = tensor("op_2826")]; - tensor var_2827_to_fp16 = const()[name = tensor("op_2827_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2828_cast_fp16 = add(x = var_2826, y = var_2827_to_fp16)[name = tensor("op_2828_cast_fp16")]; + tensor var_2547_promoted = const()[name = tensor("op_2547_promoted"), val = tensor(0x1p+1)]; + tensor var_2746 = pow(x = zero_mean_43, y = var_2547_promoted)[name = tensor("op_2746")]; + tensor var_2747 = const()[name = tensor("op_2747"), val = tensor([1])]; + tensor var_2748 = reduce_mean(axes = var_2747, keep_dims = var_2550, x = var_2746)[name = tensor("op_2748")]; + tensor var_2749_to_fp16 = const()[name = tensor("op_2749_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2750_cast_fp16 = add(x = var_2748, y = var_2749_to_fp16)[name = tensor("op_2750_cast_fp16")]; tensor denom_43_epsilon_0 = const()[name = tensor("denom_43_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0, x = var_2828_cast_fp16)[name = tensor("denom_43_cast_fp16")]; - tensor var_2830_cast_fp16 = mul(x = zero_mean_43, y = denom_43_cast_fp16)[name = tensor("op_2830_cast_fp16")]; - tensor var_2832_gamma_0_to_fp16 = const()[name = tensor("op_2832_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218569728)))]; - tensor var_2832_beta_0_to_fp16 = const()[name = tensor("op_2832_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218571328)))]; - tensor var_2832_epsilon_0_to_fp16 = const()[name = tensor("op_2832_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2832_cast_fp16 = batch_norm(beta = var_2832_beta_0_to_fp16, epsilon = var_2832_epsilon_0_to_fp16, gamma = var_2832_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2830_cast_fp16)[name = tensor("op_2832_cast_fp16")]; - tensor var_2838 = const()[name = tensor("op_2838"), val = tensor([1, 1])]; - tensor var_2840 = const()[name = tensor("op_2840"), val = tensor([1, 1])]; - tensor var_2842_pad_type_0 = const()[name = tensor("op_2842_pad_type_0"), val = tensor("custom")]; - tensor var_2842_pad_0 = const()[name = tensor("op_2842_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2842 = conv(bias = layers_10_mlp_fc1_bias, dilations = var_2840, groups = var_2617, pad = var_2842_pad_0, pad_type = var_2842_pad_type_0, strides = var_2838, weight = layers_10_mlp_fc1_weight, x = var_2832_cast_fp16)[name = tensor("op_2842")]; - tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; - tensor input_87 = gelu(mode = input_87_mode_0, x = var_2842)[name = tensor("input_87")]; - tensor var_2846 = const()[name = tensor("op_2846"), val = tensor([1, 1])]; - tensor var_2848 = const()[name = tensor("op_2848"), val = tensor([1, 1])]; - tensor var_2850_pad_type_0 = const()[name = tensor("op_2850_pad_type_0"), val = tensor("custom")]; - tensor var_2850_pad_0 = const()[name = tensor("op_2850_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2850 = conv(bias = layers_10_mlp_fc2_bias, dilations = var_2848, groups = var_2617, pad = var_2850_pad_0, pad_type = var_2850_pad_type_0, strides = var_2846, weight = layers_10_mlp_fc2_weight, x = input_87)[name = tensor("op_2850")]; - tensor x_89 = add(x = var_2832_cast_fp16, y = var_2850)[name = tensor("x_89")]; - tensor var_2615_promoted_1 = const()[name = tensor("op_2615_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2616_promoted_1 = const()[name = tensor("op_2616_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_91 = clip(alpha = var_2615_promoted_1, beta = var_2616_promoted_1, x = x_89)[name = tensor("x_91")]; - tensor var_2855 = const()[name = tensor("op_2855"), val = tensor([1])]; - tensor mean_45 = reduce_mean(axes = var_2855, keep_dims = var_2619, x = x_91)[name = tensor("mean_45")]; + tensor denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0, x = var_2750_cast_fp16)[name = tensor("denom_43_cast_fp16")]; + tensor var_2752_cast_fp16 = mul(x = zero_mean_43, y = denom_43_cast_fp16)[name = tensor("op_2752_cast_fp16")]; + tensor var_2754_gamma_0_to_fp16 = const()[name = tensor("op_2754_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218569728)))]; + tensor var_2754_beta_0_to_fp16 = const()[name = tensor("op_2754_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218571328)))]; + tensor var_2754_epsilon_0_to_fp16 = const()[name = tensor("op_2754_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2754_cast_fp16 = batch_norm(beta = var_2754_beta_0_to_fp16, epsilon = var_2754_epsilon_0_to_fp16, gamma = var_2754_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2752_cast_fp16)[name = tensor("op_2754_cast_fp16")]; + tensor var_2760 = const()[name = tensor("op_2760"), val = tensor([1, 1])]; + tensor var_2762 = const()[name = tensor("op_2762"), val = tensor([1, 1])]; + tensor var_2764_pad_type_0 = const()[name = tensor("op_2764_pad_type_0"), val = tensor("custom")]; + tensor var_2764_pad_0 = const()[name = tensor("op_2764_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2764 = conv(bias = layers_10_mlp_fc1_bias, dilations = var_2762, groups = var_2548, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2760, weight = layers_10_mlp_fc1_weight, x = var_2754_cast_fp16)[name = tensor("op_2764")]; + tensor input_351_mode_0 = const()[name = tensor("input_351_mode_0"), val = tensor("EXACT")]; + tensor input_351 = gelu(mode = input_351_mode_0, x = var_2764)[name = tensor("input_351")]; + tensor var_2768 = const()[name = tensor("op_2768"), val = tensor([1, 1])]; + tensor var_2770 = const()[name = tensor("op_2770"), val = tensor([1, 1])]; + tensor var_2772_pad_type_0 = const()[name = tensor("op_2772_pad_type_0"), val = tensor("custom")]; + tensor var_2772_pad_0 = const()[name = tensor("op_2772_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2772 = conv(bias = layers_10_mlp_fc2_bias, dilations = var_2770, groups = var_2548, pad = var_2772_pad_0, pad_type = var_2772_pad_type_0, strides = var_2768, weight = layers_10_mlp_fc2_weight, x = input_351)[name = tensor("op_2772")]; + tensor x_89 = add(x = var_2754_cast_fp16, y = var_2772)[name = tensor("x_89")]; + tensor var_2545_promoted_1 = const()[name = tensor("op_2545_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_2546_promoted_1 = const()[name = tensor("op_2546_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x_91 = clip(alpha = var_2545_promoted_1, beta = var_2546_promoted_1, x = x_89)[name = tensor("x_91")]; + tensor var_2777 = const()[name = tensor("op_2777"), val = tensor([1])]; + tensor mean_45 = reduce_mean(axes = var_2777, keep_dims = var_2550, x = x_91)[name = tensor("mean_45")]; tensor zero_mean_45 = sub(x = x_91, y = mean_45)[name = tensor("zero_mean_45")]; - tensor var_2624_promoted_1 = const()[name = tensor("op_2624_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2858 = pow(x = zero_mean_45, y = var_2624_promoted_1)[name = tensor("op_2858")]; - tensor var_2859 = const()[name = tensor("op_2859"), val = tensor([1])]; - tensor var_2860 = reduce_mean(axes = var_2859, keep_dims = var_2619, x = var_2858)[name = tensor("op_2860")]; - tensor var_2861_to_fp16 = const()[name = tensor("op_2861_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2862_cast_fp16 = add(x = var_2860, y = var_2861_to_fp16)[name = tensor("op_2862_cast_fp16")]; + tensor var_2547_promoted_1 = const()[name = tensor("op_2547_promoted_1"), val = tensor(0x1p+1)]; + tensor var_2780 = pow(x = zero_mean_45, y = var_2547_promoted_1)[name = tensor("op_2780")]; + tensor var_2781 = const()[name = tensor("op_2781"), val = tensor([1])]; + tensor var_2782 = reduce_mean(axes = var_2781, keep_dims = var_2550, x = var_2780)[name = tensor("op_2782")]; + tensor var_2783_to_fp16 = const()[name = tensor("op_2783_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2784_cast_fp16 = add(x = var_2782, y = var_2783_to_fp16)[name = tensor("op_2784_cast_fp16")]; tensor denom_45_epsilon_0 = const()[name = tensor("denom_45_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0, x = var_2862_cast_fp16)[name = tensor("denom_45_cast_fp16")]; - tensor var_2864_cast_fp16 = mul(x = zero_mean_45, y = denom_45_cast_fp16)[name = tensor("op_2864_cast_fp16")]; - tensor var_2866_gamma_0_to_fp16 = const()[name = tensor("op_2866_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218572928)))]; - tensor var_2866_beta_0_to_fp16 = const()[name = tensor("op_2866_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218574528)))]; - tensor var_2866_epsilon_0_to_fp16 = const()[name = tensor("op_2866_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2866_cast_fp16 = batch_norm(beta = var_2866_beta_0_to_fp16, epsilon = var_2866_epsilon_0_to_fp16, gamma = var_2866_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2864_cast_fp16)[name = tensor("op_2866_cast_fp16")]; - tensor var_2871 = const()[name = tensor("op_2871"), val = tensor(1)]; - tensor var_2872 = const()[name = tensor("op_2872"), val = tensor(0)]; - tensor var_2873 = const()[name = tensor("op_2873"), val = tensor(true)]; - tensor var_2898 = const()[name = tensor("op_2898"), val = tensor([1, 1])]; - tensor var_2900 = const()[name = tensor("op_2900"), val = tensor([1, 1])]; - tensor var_2902_pad_type_0 = const()[name = tensor("op_2902_pad_type_0"), val = tensor("custom")]; - tensor var_2902_pad_0 = const()[name = tensor("op_2902_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2902 = conv(bias = layers_11_attention_q_proj_bias, dilations = var_2900, groups = var_2871, pad = var_2902_pad_0, pad_type = var_2902_pad_type_0, strides = var_2898, weight = layers_11_attention_q_proj_weight, x = var_2866_cast_fp16)[name = tensor("op_2902")]; - tensor var_2903 = const()[name = tensor("op_2903"), val = tensor([1, 64, 12, 512])]; - tensor var_2904 = reshape(shape = var_2903, x = var_2902)[name = tensor("op_2904")]; - tensor var_2907 = const()[name = tensor("op_2907"), val = tensor([1, 1])]; - tensor var_2909 = const()[name = tensor("op_2909"), val = tensor([1, 1])]; - tensor var_2911_pad_type_0 = const()[name = tensor("op_2911_pad_type_0"), val = tensor("custom")]; - tensor var_2911_pad_0 = const()[name = tensor("op_2911_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2911 = conv(bias = layers_11_attention_k_proj_bias, dilations = var_2909, groups = var_2871, pad = var_2911_pad_0, pad_type = var_2911_pad_type_0, strides = var_2907, weight = layers_11_attention_k_proj_weight, x = var_2866_cast_fp16)[name = tensor("op_2911")]; - tensor var_2912 = const()[name = tensor("op_2912"), val = tensor([1, 64, 12, 512])]; - tensor ks = reshape(shape = var_2912, x = var_2911)[name = tensor("ks")]; - tensor var_2916 = const()[name = tensor("op_2916"), val = tensor([1, 1])]; - tensor var_2918 = const()[name = tensor("op_2918"), val = tensor([1, 1])]; - tensor var_2920_pad_type_0 = const()[name = tensor("op_2920_pad_type_0"), val = tensor("custom")]; - tensor var_2920_pad_0 = const()[name = tensor("op_2920_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2920 = conv(bias = layers_11_attention_v_proj_bias, dilations = var_2918, groups = var_2871, pad = var_2920_pad_0, pad_type = var_2920_pad_type_0, strides = var_2916, weight = layers_11_attention_v_proj_weight, x = var_2866_cast_fp16)[name = tensor("op_2920")]; - tensor var_2921 = const()[name = tensor("op_2921"), val = tensor([1, 64, 12, 512])]; - tensor var_2922 = reshape(shape = var_2921, x = var_2920)[name = tensor("op_2922")]; - tensor tile_57 = const()[name = tensor("tile_57"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2923_axis_0 = const()[name = tensor("op_2923_axis_0"), val = tensor(2)]; - tensor var_2923_0, tensor var_2923_1, tensor var_2923_2, tensor var_2923_3, tensor var_2923_4, tensor var_2923_5, tensor var_2923_6, tensor var_2923_7, tensor var_2923_8, tensor var_2923_9, tensor var_2923_10, tensor var_2923_11 = split(axis = var_2923_axis_0, split_sizes = tile_57, x = var_2904)[name = tensor("op_2923")]; - tensor var_2936_perm_0 = const()[name = tensor("op_2936_perm_0"), val = tensor([0, 3, 2, 1])]; - tensor tile_58 = const()[name = tensor("tile_58"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2937_axis_0 = const()[name = tensor("op_2937_axis_0"), val = tensor(2)]; - tensor transpose_0 = transpose(perm = var_2936_perm_0, x = ks)[name = tensor("transpose_0")]; - tensor var_2937_0, tensor var_2937_1, tensor var_2937_2, tensor var_2937_3, tensor var_2937_4, tensor var_2937_5, tensor var_2937_6, tensor var_2937_7, tensor var_2937_8, tensor var_2937_9, tensor var_2937_10, tensor var_2937_11 = split(axis = var_2937_axis_0, split_sizes = tile_58, x = transpose_0)[name = tensor("op_2937")]; - tensor tile_59 = const()[name = tensor("tile_59"), val = tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])]; - tensor var_2950_axis_0 = const()[name = tensor("op_2950_axis_0"), val = tensor(2)]; - tensor var_2950_0, tensor var_2950_1, tensor var_2950_2, tensor var_2950_3, tensor var_2950_4, tensor var_2950_5, tensor var_2950_6, tensor var_2950_7, tensor var_2950_8, tensor var_2950_9, tensor var_2950_10, tensor var_2950_11 = split(axis = var_2950_axis_0, split_sizes = tile_59, x = var_2922)[name = tensor("op_2950")]; - tensor var_2964_equation_0 = const()[name = tensor("op_2964_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2964 = einsum(equation = var_2964_equation_0, values = (var_2937_0, var_2923_0))[name = tensor("op_2964")]; - tensor var_2965_to_fp16 = const()[name = tensor("op_2965_to_fp16"), val = tensor(0x1p-3)]; - tensor w_265_cast_fp16 = mul(x = var_2964, y = var_2965_to_fp16)[name = tensor("w_265_cast_fp16")]; - tensor var_2968_equation_0 = const()[name = tensor("op_2968_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2968 = einsum(equation = var_2968_equation_0, values = (var_2937_1, var_2923_1))[name = tensor("op_2968")]; - tensor var_2969_to_fp16 = const()[name = tensor("op_2969_to_fp16"), val = tensor(0x1p-3)]; - tensor w_267_cast_fp16 = mul(x = var_2968, y = var_2969_to_fp16)[name = tensor("w_267_cast_fp16")]; - tensor var_2972_equation_0 = const()[name = tensor("op_2972_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2972 = einsum(equation = var_2972_equation_0, values = (var_2937_2, var_2923_2))[name = tensor("op_2972")]; - tensor var_2973_to_fp16 = const()[name = tensor("op_2973_to_fp16"), val = tensor(0x1p-3)]; - tensor w_269_cast_fp16 = mul(x = var_2972, y = var_2973_to_fp16)[name = tensor("w_269_cast_fp16")]; - tensor var_2976_equation_0 = const()[name = tensor("op_2976_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2976 = einsum(equation = var_2976_equation_0, values = (var_2937_3, var_2923_3))[name = tensor("op_2976")]; - tensor var_2977_to_fp16 = const()[name = tensor("op_2977_to_fp16"), val = tensor(0x1p-3)]; - tensor w_271_cast_fp16 = mul(x = var_2976, y = var_2977_to_fp16)[name = tensor("w_271_cast_fp16")]; - tensor var_2980_equation_0 = const()[name = tensor("op_2980_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2980 = einsum(equation = var_2980_equation_0, values = (var_2937_4, var_2923_4))[name = tensor("op_2980")]; - tensor var_2981_to_fp16 = const()[name = tensor("op_2981_to_fp16"), val = tensor(0x1p-3)]; - tensor w_273_cast_fp16 = mul(x = var_2980, y = var_2981_to_fp16)[name = tensor("w_273_cast_fp16")]; - tensor var_2984_equation_0 = const()[name = tensor("op_2984_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2984 = einsum(equation = var_2984_equation_0, values = (var_2937_5, var_2923_5))[name = tensor("op_2984")]; - tensor var_2985_to_fp16 = const()[name = tensor("op_2985_to_fp16"), val = tensor(0x1p-3)]; - tensor w_275_cast_fp16 = mul(x = var_2984, y = var_2985_to_fp16)[name = tensor("w_275_cast_fp16")]; - tensor var_2988_equation_0 = const()[name = tensor("op_2988_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2988 = einsum(equation = var_2988_equation_0, values = (var_2937_6, var_2923_6))[name = tensor("op_2988")]; - tensor var_2989_to_fp16 = const()[name = tensor("op_2989_to_fp16"), val = tensor(0x1p-3)]; - tensor w_277_cast_fp16 = mul(x = var_2988, y = var_2989_to_fp16)[name = tensor("w_277_cast_fp16")]; - tensor var_2992_equation_0 = const()[name = tensor("op_2992_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2992 = einsum(equation = var_2992_equation_0, values = (var_2937_7, var_2923_7))[name = tensor("op_2992")]; - tensor var_2993_to_fp16 = const()[name = tensor("op_2993_to_fp16"), val = tensor(0x1p-3)]; - tensor w_279_cast_fp16 = mul(x = var_2992, y = var_2993_to_fp16)[name = tensor("w_279_cast_fp16")]; - tensor var_2996_equation_0 = const()[name = tensor("op_2996_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2996 = einsum(equation = var_2996_equation_0, values = (var_2937_8, var_2923_8))[name = tensor("op_2996")]; - tensor var_2997_to_fp16 = const()[name = tensor("op_2997_to_fp16"), val = tensor(0x1p-3)]; - tensor w_281_cast_fp16 = mul(x = var_2996, y = var_2997_to_fp16)[name = tensor("w_281_cast_fp16")]; - tensor var_3000_equation_0 = const()[name = tensor("op_3000_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_3000 = einsum(equation = var_3000_equation_0, values = (var_2937_9, var_2923_9))[name = tensor("op_3000")]; - tensor var_3001_to_fp16 = const()[name = tensor("op_3001_to_fp16"), val = tensor(0x1p-3)]; - tensor w_283_cast_fp16 = mul(x = var_3000, y = var_3001_to_fp16)[name = tensor("w_283_cast_fp16")]; - tensor var_3004_equation_0 = const()[name = tensor("op_3004_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_3004 = einsum(equation = var_3004_equation_0, values = (var_2937_10, var_2923_10))[name = tensor("op_3004")]; - tensor var_3005_to_fp16 = const()[name = tensor("op_3005_to_fp16"), val = tensor(0x1p-3)]; - tensor w_285_cast_fp16 = mul(x = var_3004, y = var_3005_to_fp16)[name = tensor("w_285_cast_fp16")]; - tensor var_3008_equation_0 = const()[name = tensor("op_3008_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_3008 = einsum(equation = var_3008_equation_0, values = (var_2937_11, var_2923_11))[name = tensor("op_3008")]; - tensor var_3009_to_fp16 = const()[name = tensor("op_3009_to_fp16"), val = tensor(0x1p-3)]; - tensor w_cast_fp16 = mul(x = var_3008, y = var_3009_to_fp16)[name = tensor("w_cast_fp16")]; - tensor var_3011_cast_fp16 = add(x = w_265_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3011_cast_fp16")]; - tensor var_3012_cast_fp16 = softmax(axis = var_2871, x = var_3011_cast_fp16)[name = tensor("op_3012_cast_fp16")]; - tensor var_3013_cast_fp16 = add(x = w_267_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3013_cast_fp16")]; - tensor var_3014_cast_fp16 = softmax(axis = var_2871, x = var_3013_cast_fp16)[name = tensor("op_3014_cast_fp16")]; - tensor var_3015_cast_fp16 = add(x = w_269_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3015_cast_fp16")]; - tensor var_3016_cast_fp16 = softmax(axis = var_2871, x = var_3015_cast_fp16)[name = tensor("op_3016_cast_fp16")]; - tensor var_3017_cast_fp16 = add(x = w_271_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3017_cast_fp16")]; - tensor var_3018_cast_fp16 = softmax(axis = var_2871, x = var_3017_cast_fp16)[name = tensor("op_3018_cast_fp16")]; - tensor var_3019_cast_fp16 = add(x = w_273_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3019_cast_fp16")]; - tensor var_3020_cast_fp16 = softmax(axis = var_2871, x = var_3019_cast_fp16)[name = tensor("op_3020_cast_fp16")]; - tensor var_3021_cast_fp16 = add(x = w_275_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3021_cast_fp16")]; - tensor var_3022_cast_fp16 = softmax(axis = var_2871, x = var_3021_cast_fp16)[name = tensor("op_3022_cast_fp16")]; - tensor var_3023_cast_fp16 = add(x = w_277_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3023_cast_fp16")]; - tensor var_3024_cast_fp16 = softmax(axis = var_2871, x = var_3023_cast_fp16)[name = tensor("op_3024_cast_fp16")]; - tensor var_3025_cast_fp16 = add(x = w_279_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3025_cast_fp16")]; - tensor var_3026_cast_fp16 = softmax(axis = var_2871, x = var_3025_cast_fp16)[name = tensor("op_3026_cast_fp16")]; - tensor var_3027_cast_fp16 = add(x = w_281_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3027_cast_fp16")]; - tensor var_3028_cast_fp16 = softmax(axis = var_2871, x = var_3027_cast_fp16)[name = tensor("op_3028_cast_fp16")]; - tensor var_3029_cast_fp16 = add(x = w_283_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3029_cast_fp16")]; - tensor var_3030_cast_fp16 = softmax(axis = var_2871, x = var_3029_cast_fp16)[name = tensor("op_3030_cast_fp16")]; - tensor var_3031_cast_fp16 = add(x = w_285_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3031_cast_fp16")]; - tensor var_3032_cast_fp16 = softmax(axis = var_2871, x = var_3031_cast_fp16)[name = tensor("op_3032_cast_fp16")]; - tensor var_3033_cast_fp16 = add(x = w_cast_fp16, y = var_73_cast_fp16)[name = tensor("op_3033_cast_fp16")]; - tensor var_3034_cast_fp16 = softmax(axis = var_2871, x = var_3033_cast_fp16)[name = tensor("op_3034_cast_fp16")]; - tensor var_3036_equation_0 = const()[name = tensor("op_3036_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3036_cast_fp16 = einsum(equation = var_3036_equation_0, values = (var_2950_0, var_3012_cast_fp16))[name = tensor("op_3036_cast_fp16")]; - tensor var_3038_equation_0 = const()[name = tensor("op_3038_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3038_cast_fp16 = einsum(equation = var_3038_equation_0, values = (var_2950_1, var_3014_cast_fp16))[name = tensor("op_3038_cast_fp16")]; - tensor var_3040_equation_0 = const()[name = tensor("op_3040_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3040_cast_fp16 = einsum(equation = var_3040_equation_0, values = (var_2950_2, var_3016_cast_fp16))[name = tensor("op_3040_cast_fp16")]; - tensor var_3042_equation_0 = const()[name = tensor("op_3042_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3042_cast_fp16 = einsum(equation = var_3042_equation_0, values = (var_2950_3, var_3018_cast_fp16))[name = tensor("op_3042_cast_fp16")]; - tensor var_3044_equation_0 = const()[name = tensor("op_3044_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3044_cast_fp16 = einsum(equation = var_3044_equation_0, values = (var_2950_4, var_3020_cast_fp16))[name = tensor("op_3044_cast_fp16")]; - tensor var_3046_equation_0 = const()[name = tensor("op_3046_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3046_cast_fp16 = einsum(equation = var_3046_equation_0, values = (var_2950_5, var_3022_cast_fp16))[name = tensor("op_3046_cast_fp16")]; - tensor var_3048_equation_0 = const()[name = tensor("op_3048_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3048_cast_fp16 = einsum(equation = var_3048_equation_0, values = (var_2950_6, var_3024_cast_fp16))[name = tensor("op_3048_cast_fp16")]; - tensor var_3050_equation_0 = const()[name = tensor("op_3050_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3050_cast_fp16 = einsum(equation = var_3050_equation_0, values = (var_2950_7, var_3026_cast_fp16))[name = tensor("op_3050_cast_fp16")]; - tensor var_3052_equation_0 = const()[name = tensor("op_3052_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3052_cast_fp16 = einsum(equation = var_3052_equation_0, values = (var_2950_8, var_3028_cast_fp16))[name = tensor("op_3052_cast_fp16")]; - tensor var_3054_equation_0 = const()[name = tensor("op_3054_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3054_cast_fp16 = einsum(equation = var_3054_equation_0, values = (var_2950_9, var_3030_cast_fp16))[name = tensor("op_3054_cast_fp16")]; - tensor var_3056_equation_0 = const()[name = tensor("op_3056_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3056_cast_fp16 = einsum(equation = var_3056_equation_0, values = (var_2950_10, var_3032_cast_fp16))[name = tensor("op_3056_cast_fp16")]; - tensor var_3058_equation_0 = const()[name = tensor("op_3058_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_3058_cast_fp16 = einsum(equation = var_3058_equation_0, values = (var_2950_11, var_3034_cast_fp16))[name = tensor("op_3058_cast_fp16")]; - tensor var_3060_interleave_0 = const()[name = tensor("op_3060_interleave_0"), val = tensor(false)]; - tensor var_3060_cast_fp16 = concat(axis = var_2871, interleave = var_3060_interleave_0, values = (var_3036_cast_fp16, var_3038_cast_fp16, var_3040_cast_fp16, var_3042_cast_fp16, var_3044_cast_fp16, var_3046_cast_fp16, var_3048_cast_fp16, var_3050_cast_fp16, var_3052_cast_fp16, var_3054_cast_fp16, var_3056_cast_fp16, var_3058_cast_fp16))[name = tensor("op_3060_cast_fp16")]; - tensor var_3064 = const()[name = tensor("op_3064"), val = tensor([1, 1])]; - tensor var_3066 = const()[name = tensor("op_3066"), val = tensor([1, 1])]; - tensor var_3068_pad_type_0 = const()[name = tensor("op_3068_pad_type_0"), val = tensor("custom")]; - tensor var_3068_pad_0 = const()[name = tensor("op_3068_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3068 = conv(bias = layers_11_attention_o_proj_bias, dilations = var_3066, groups = var_2871, pad = var_3068_pad_0, pad_type = var_3068_pad_type_0, strides = var_3064, weight = layers_11_attention_o_proj_weight, x = var_3060_cast_fp16)[name = tensor("op_3068")]; - tensor var_3070_interleave_0 = const()[name = tensor("op_3070_interleave_0"), val = tensor(false)]; - tensor var_3070 = concat(axis = var_2872, interleave = var_3070_interleave_0, values = var_3068)[name = tensor("op_3070")]; - tensor x_93 = add(x = var_2866_cast_fp16, y = var_3070)[name = tensor("x_93")]; - tensor var_2869_promoted = const()[name = tensor("op_2869_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2870_promoted = const()[name = tensor("op_2870_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_95 = clip(alpha = var_2869_promoted, beta = var_2870_promoted, x = x_93)[name = tensor("x_95")]; - tensor var_3075 = const()[name = tensor("op_3075"), val = tensor([1])]; - tensor mean_47 = reduce_mean(axes = var_3075, keep_dims = var_2873, x = x_95)[name = tensor("mean_47")]; + tensor denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0, x = var_2784_cast_fp16)[name = tensor("denom_45_cast_fp16")]; + tensor var_2786_cast_fp16 = mul(x = zero_mean_45, y = denom_45_cast_fp16)[name = tensor("op_2786_cast_fp16")]; + tensor var_2788_gamma_0_to_fp16 = const()[name = tensor("op_2788_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218572928)))]; + tensor var_2788_beta_0_to_fp16 = const()[name = tensor("op_2788_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218574528)))]; + tensor var_2788_epsilon_0_to_fp16 = const()[name = tensor("op_2788_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_2788_cast_fp16 = batch_norm(beta = var_2788_beta_0_to_fp16, epsilon = var_2788_epsilon_0_to_fp16, gamma = var_2788_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2786_cast_fp16)[name = tensor("op_2788_cast_fp16")]; + tensor var_2794 = const()[name = tensor("op_2794"), val = tensor(1)]; + tensor var_2795 = const()[name = tensor("op_2795"), val = tensor(0)]; + tensor var_2796 = const()[name = tensor("op_2796"), val = tensor(true)]; + tensor var_2818 = const()[name = tensor("op_2818"), val = tensor([1, 1])]; + tensor var_2820 = const()[name = tensor("op_2820"), val = tensor([1, 1])]; + tensor var_2822_pad_type_0 = const()[name = tensor("op_2822_pad_type_0"), val = tensor("custom")]; + tensor var_2822_pad_0 = const()[name = tensor("op_2822_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2822 = conv(bias = layers_11_attention_q_proj_bias, dilations = var_2820, groups = var_2794, pad = var_2822_pad_0, pad_type = var_2822_pad_type_0, strides = var_2818, weight = layers_11_attention_q_proj_weight, x = var_2788_cast_fp16)[name = tensor("op_2822")]; + tensor var_2825 = const()[name = tensor("op_2825"), val = tensor([1, 1])]; + tensor var_2827 = const()[name = tensor("op_2827"), val = tensor([1, 1])]; + tensor ks_pad_type_0 = const()[name = tensor("ks_pad_type_0"), val = tensor("custom")]; + tensor ks_pad_0 = const()[name = tensor("ks_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor ks = conv(bias = layers_11_attention_k_proj_bias, dilations = var_2827, groups = var_2794, pad = ks_pad_0, pad_type = ks_pad_type_0, strides = var_2825, weight = layers_11_attention_k_proj_weight, x = var_2788_cast_fp16)[name = tensor("ks")]; + tensor var_2832 = const()[name = tensor("op_2832"), val = tensor([1, 1])]; + tensor var_2834 = const()[name = tensor("op_2834"), val = tensor([1, 1])]; + tensor var_2836_pad_type_0 = const()[name = tensor("op_2836_pad_type_0"), val = tensor("custom")]; + tensor var_2836_pad_0 = const()[name = tensor("op_2836_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2836 = conv(bias = layers_11_attention_v_proj_bias, dilations = var_2834, groups = var_2794, pad = var_2836_pad_0, pad_type = var_2836_pad_type_0, strides = var_2832, weight = layers_11_attention_v_proj_weight, x = var_2788_cast_fp16)[name = tensor("op_2836")]; + tensor tile_57 = const()[name = tensor("tile_57"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2837_axis_0 = const()[name = tensor("op_2837_axis_0"), val = tensor(1)]; + tensor var_2837_0, tensor var_2837_1, tensor var_2837_2, tensor var_2837_3, tensor var_2837_4, tensor var_2837_5, tensor var_2837_6, tensor var_2837_7, tensor var_2837_8, tensor var_2837_9, tensor var_2837_10, tensor var_2837_11 = split(axis = var_2837_axis_0, split_sizes = tile_57, x = var_2822)[name = tensor("op_2837")]; + tensor var_2850_perm_0 = const()[name = tensor("op_2850_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor tile_58 = const()[name = tensor("tile_58"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2851_axis_0 = const()[name = tensor("op_2851_axis_0"), val = tensor(3)]; + tensor transpose_0 = transpose(perm = var_2850_perm_0, x = ks)[name = tensor("transpose_0")]; + tensor var_2851_0, tensor var_2851_1, tensor var_2851_2, tensor var_2851_3, tensor var_2851_4, tensor var_2851_5, tensor var_2851_6, tensor var_2851_7, tensor var_2851_8, tensor var_2851_9, tensor var_2851_10, tensor var_2851_11 = split(axis = var_2851_axis_0, split_sizes = tile_58, x = transpose_0)[name = tensor("op_2851")]; + tensor tile_59 = const()[name = tensor("tile_59"), val = tensor([64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64])]; + tensor var_2864_axis_0 = const()[name = tensor("op_2864_axis_0"), val = tensor(1)]; + tensor var_2864_0, tensor var_2864_1, tensor var_2864_2, tensor var_2864_3, tensor var_2864_4, tensor var_2864_5, tensor var_2864_6, tensor var_2864_7, tensor var_2864_8, tensor var_2864_9, tensor var_2864_10, tensor var_2864_11 = split(axis = var_2864_axis_0, split_sizes = tile_59, x = var_2836)[name = tensor("op_2864")]; + tensor var_2878_equation_0 = const()[name = tensor("op_2878_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2878 = einsum(equation = var_2878_equation_0, values = (var_2851_0, var_2837_0))[name = tensor("op_2878")]; + tensor var_2879_to_fp16 = const()[name = tensor("op_2879_to_fp16"), val = tensor(0x1p-3)]; + tensor w_265_cast_fp16 = mul(x = var_2878, y = var_2879_to_fp16)[name = tensor("w_265_cast_fp16")]; + tensor var_2882_equation_0 = const()[name = tensor("op_2882_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2882 = einsum(equation = var_2882_equation_0, values = (var_2851_1, var_2837_1))[name = tensor("op_2882")]; + tensor var_2883_to_fp16 = const()[name = tensor("op_2883_to_fp16"), val = tensor(0x1p-3)]; + tensor w_267_cast_fp16 = mul(x = var_2882, y = var_2883_to_fp16)[name = tensor("w_267_cast_fp16")]; + tensor var_2886_equation_0 = const()[name = tensor("op_2886_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2886 = einsum(equation = var_2886_equation_0, values = (var_2851_2, var_2837_2))[name = tensor("op_2886")]; + tensor var_2887_to_fp16 = const()[name = tensor("op_2887_to_fp16"), val = tensor(0x1p-3)]; + tensor w_269_cast_fp16 = mul(x = var_2886, y = var_2887_to_fp16)[name = tensor("w_269_cast_fp16")]; + tensor var_2890_equation_0 = const()[name = tensor("op_2890_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2890 = einsum(equation = var_2890_equation_0, values = (var_2851_3, var_2837_3))[name = tensor("op_2890")]; + tensor var_2891_to_fp16 = const()[name = tensor("op_2891_to_fp16"), val = tensor(0x1p-3)]; + tensor w_271_cast_fp16 = mul(x = var_2890, y = var_2891_to_fp16)[name = tensor("w_271_cast_fp16")]; + tensor var_2894_equation_0 = const()[name = tensor("op_2894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2894 = einsum(equation = var_2894_equation_0, values = (var_2851_4, var_2837_4))[name = tensor("op_2894")]; + tensor var_2895_to_fp16 = const()[name = tensor("op_2895_to_fp16"), val = tensor(0x1p-3)]; + tensor w_273_cast_fp16 = mul(x = var_2894, y = var_2895_to_fp16)[name = tensor("w_273_cast_fp16")]; + tensor var_2898_equation_0 = const()[name = tensor("op_2898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2898 = einsum(equation = var_2898_equation_0, values = (var_2851_5, var_2837_5))[name = tensor("op_2898")]; + tensor var_2899_to_fp16 = const()[name = tensor("op_2899_to_fp16"), val = tensor(0x1p-3)]; + tensor w_275_cast_fp16 = mul(x = var_2898, y = var_2899_to_fp16)[name = tensor("w_275_cast_fp16")]; + tensor var_2902_equation_0 = const()[name = tensor("op_2902_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2902 = einsum(equation = var_2902_equation_0, values = (var_2851_6, var_2837_6))[name = tensor("op_2902")]; + tensor var_2903_to_fp16 = const()[name = tensor("op_2903_to_fp16"), val = tensor(0x1p-3)]; + tensor w_277_cast_fp16 = mul(x = var_2902, y = var_2903_to_fp16)[name = tensor("w_277_cast_fp16")]; + tensor var_2906_equation_0 = const()[name = tensor("op_2906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2906 = einsum(equation = var_2906_equation_0, values = (var_2851_7, var_2837_7))[name = tensor("op_2906")]; + tensor var_2907_to_fp16 = const()[name = tensor("op_2907_to_fp16"), val = tensor(0x1p-3)]; + tensor w_279_cast_fp16 = mul(x = var_2906, y = var_2907_to_fp16)[name = tensor("w_279_cast_fp16")]; + tensor var_2910_equation_0 = const()[name = tensor("op_2910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2910 = einsum(equation = var_2910_equation_0, values = (var_2851_8, var_2837_8))[name = tensor("op_2910")]; + tensor var_2911_to_fp16 = const()[name = tensor("op_2911_to_fp16"), val = tensor(0x1p-3)]; + tensor w_281_cast_fp16 = mul(x = var_2910, y = var_2911_to_fp16)[name = tensor("w_281_cast_fp16")]; + tensor var_2914_equation_0 = const()[name = tensor("op_2914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2914 = einsum(equation = var_2914_equation_0, values = (var_2851_9, var_2837_9))[name = tensor("op_2914")]; + tensor var_2915_to_fp16 = const()[name = tensor("op_2915_to_fp16"), val = tensor(0x1p-3)]; + tensor w_283_cast_fp16 = mul(x = var_2914, y = var_2915_to_fp16)[name = tensor("w_283_cast_fp16")]; + tensor var_2918_equation_0 = const()[name = tensor("op_2918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2918 = einsum(equation = var_2918_equation_0, values = (var_2851_10, var_2837_10))[name = tensor("op_2918")]; + tensor var_2919_to_fp16 = const()[name = tensor("op_2919_to_fp16"), val = tensor(0x1p-3)]; + tensor w_285_cast_fp16 = mul(x = var_2918, y = var_2919_to_fp16)[name = tensor("w_285_cast_fp16")]; + tensor var_2922_equation_0 = const()[name = tensor("op_2922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2922 = einsum(equation = var_2922_equation_0, values = (var_2851_11, var_2837_11))[name = tensor("op_2922")]; + tensor var_2923_to_fp16 = const()[name = tensor("op_2923_to_fp16"), val = tensor(0x1p-3)]; + tensor w_cast_fp16 = mul(x = var_2922, y = var_2923_to_fp16)[name = tensor("w_cast_fp16")]; + tensor input_355_cast_fp16 = add(x = w_265_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_355_cast_fp16")]; + tensor var_2926_cast_fp16 = softmax(axis = var_2794, x = input_355_cast_fp16)[name = tensor("op_2926_cast_fp16")]; + tensor input_357_cast_fp16 = add(x = w_267_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_357_cast_fp16")]; + tensor var_2928_cast_fp16 = softmax(axis = var_2794, x = input_357_cast_fp16)[name = tensor("op_2928_cast_fp16")]; + tensor input_359_cast_fp16 = add(x = w_269_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_359_cast_fp16")]; + tensor var_2930_cast_fp16 = softmax(axis = var_2794, x = input_359_cast_fp16)[name = tensor("op_2930_cast_fp16")]; + tensor input_361_cast_fp16 = add(x = w_271_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_361_cast_fp16")]; + tensor var_2932_cast_fp16 = softmax(axis = var_2794, x = input_361_cast_fp16)[name = tensor("op_2932_cast_fp16")]; + tensor input_363_cast_fp16 = add(x = w_273_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_363_cast_fp16")]; + tensor var_2934_cast_fp16 = softmax(axis = var_2794, x = input_363_cast_fp16)[name = tensor("op_2934_cast_fp16")]; + tensor input_365_cast_fp16 = add(x = w_275_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_365_cast_fp16")]; + tensor var_2936_cast_fp16 = softmax(axis = var_2794, x = input_365_cast_fp16)[name = tensor("op_2936_cast_fp16")]; + tensor input_367_cast_fp16 = add(x = w_277_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_367_cast_fp16")]; + tensor var_2938_cast_fp16 = softmax(axis = var_2794, x = input_367_cast_fp16)[name = tensor("op_2938_cast_fp16")]; + tensor input_369_cast_fp16 = add(x = w_279_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_369_cast_fp16")]; + tensor var_2940_cast_fp16 = softmax(axis = var_2794, x = input_369_cast_fp16)[name = tensor("op_2940_cast_fp16")]; + tensor input_371_cast_fp16 = add(x = w_281_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_371_cast_fp16")]; + tensor var_2942_cast_fp16 = softmax(axis = var_2794, x = input_371_cast_fp16)[name = tensor("op_2942_cast_fp16")]; + tensor input_373_cast_fp16 = add(x = w_283_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_373_cast_fp16")]; + tensor var_2944_cast_fp16 = softmax(axis = var_2794, x = input_373_cast_fp16)[name = tensor("op_2944_cast_fp16")]; + tensor input_375_cast_fp16 = add(x = w_285_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_375_cast_fp16")]; + tensor var_2946_cast_fp16 = softmax(axis = var_2794, x = input_375_cast_fp16)[name = tensor("op_2946_cast_fp16")]; + tensor input_377_cast_fp16 = add(x = w_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_377_cast_fp16")]; + tensor var_2948_cast_fp16 = softmax(axis = var_2794, x = input_377_cast_fp16)[name = tensor("op_2948_cast_fp16")]; + tensor var_2950_equation_0 = const()[name = tensor("op_2950_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2950_cast_fp16 = einsum(equation = var_2950_equation_0, values = (var_2864_0, var_2926_cast_fp16))[name = tensor("op_2950_cast_fp16")]; + tensor var_2952_equation_0 = const()[name = tensor("op_2952_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2952_cast_fp16 = einsum(equation = var_2952_equation_0, values = (var_2864_1, var_2928_cast_fp16))[name = tensor("op_2952_cast_fp16")]; + tensor var_2954_equation_0 = const()[name = tensor("op_2954_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2954_cast_fp16 = einsum(equation = var_2954_equation_0, values = (var_2864_2, var_2930_cast_fp16))[name = tensor("op_2954_cast_fp16")]; + tensor var_2956_equation_0 = const()[name = tensor("op_2956_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2956_cast_fp16 = einsum(equation = var_2956_equation_0, values = (var_2864_3, var_2932_cast_fp16))[name = tensor("op_2956_cast_fp16")]; + tensor var_2958_equation_0 = const()[name = tensor("op_2958_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2958_cast_fp16 = einsum(equation = var_2958_equation_0, values = (var_2864_4, var_2934_cast_fp16))[name = tensor("op_2958_cast_fp16")]; + tensor var_2960_equation_0 = const()[name = tensor("op_2960_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2960_cast_fp16 = einsum(equation = var_2960_equation_0, values = (var_2864_5, var_2936_cast_fp16))[name = tensor("op_2960_cast_fp16")]; + tensor var_2962_equation_0 = const()[name = tensor("op_2962_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2962_cast_fp16 = einsum(equation = var_2962_equation_0, values = (var_2864_6, var_2938_cast_fp16))[name = tensor("op_2962_cast_fp16")]; + tensor var_2964_equation_0 = const()[name = tensor("op_2964_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2964_cast_fp16 = einsum(equation = var_2964_equation_0, values = (var_2864_7, var_2940_cast_fp16))[name = tensor("op_2964_cast_fp16")]; + tensor var_2966_equation_0 = const()[name = tensor("op_2966_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2966_cast_fp16 = einsum(equation = var_2966_equation_0, values = (var_2864_8, var_2942_cast_fp16))[name = tensor("op_2966_cast_fp16")]; + tensor var_2968_equation_0 = const()[name = tensor("op_2968_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2968_cast_fp16 = einsum(equation = var_2968_equation_0, values = (var_2864_9, var_2944_cast_fp16))[name = tensor("op_2968_cast_fp16")]; + tensor var_2970_equation_0 = const()[name = tensor("op_2970_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2970_cast_fp16 = einsum(equation = var_2970_equation_0, values = (var_2864_10, var_2946_cast_fp16))[name = tensor("op_2970_cast_fp16")]; + tensor var_2972_equation_0 = const()[name = tensor("op_2972_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2972_cast_fp16 = einsum(equation = var_2972_equation_0, values = (var_2864_11, var_2948_cast_fp16))[name = tensor("op_2972_cast_fp16")]; + tensor var_2974_interleave_0 = const()[name = tensor("op_2974_interleave_0"), val = tensor(false)]; + tensor var_2974_cast_fp16 = concat(axis = var_2794, interleave = var_2974_interleave_0, values = (var_2950_cast_fp16, var_2952_cast_fp16, var_2954_cast_fp16, var_2956_cast_fp16, var_2958_cast_fp16, var_2960_cast_fp16, var_2962_cast_fp16, var_2964_cast_fp16, var_2966_cast_fp16, var_2968_cast_fp16, var_2970_cast_fp16, var_2972_cast_fp16))[name = tensor("op_2974_cast_fp16")]; + tensor var_2978 = const()[name = tensor("op_2978"), val = tensor([1, 1])]; + tensor var_2980 = const()[name = tensor("op_2980"), val = tensor([1, 1])]; + tensor var_2982_pad_type_0 = const()[name = tensor("op_2982_pad_type_0"), val = tensor("custom")]; + tensor var_2982_pad_0 = const()[name = tensor("op_2982_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2982 = conv(bias = layers_11_attention_o_proj_bias, dilations = var_2980, groups = var_2794, pad = var_2982_pad_0, pad_type = var_2982_pad_type_0, strides = var_2978, weight = layers_11_attention_o_proj_weight, x = var_2974_cast_fp16)[name = tensor("op_2982")]; + tensor var_2984_interleave_0 = const()[name = tensor("op_2984_interleave_0"), val = tensor(false)]; + tensor var_2984 = concat(axis = var_2795, interleave = var_2984_interleave_0, values = var_2982)[name = tensor("op_2984")]; + tensor x_93 = add(x = var_2788_cast_fp16, y = var_2984)[name = tensor("x_93")]; + tensor var_2791_promoted = const()[name = tensor("op_2791_promoted"), val = tensor(-0x1.f4p+7)]; + tensor var_2792_promoted = const()[name = tensor("op_2792_promoted"), val = tensor(0x1.f4p+7)]; + tensor x_95 = clip(alpha = var_2791_promoted, beta = var_2792_promoted, x = x_93)[name = tensor("x_95")]; + tensor var_2989 = const()[name = tensor("op_2989"), val = tensor([1])]; + tensor mean_47 = reduce_mean(axes = var_2989, keep_dims = var_2796, x = x_95)[name = tensor("mean_47")]; tensor zero_mean_47 = sub(x = x_95, y = mean_47)[name = tensor("zero_mean_47")]; - tensor var_2878_promoted = const()[name = tensor("op_2878_promoted"), val = tensor(0x1p+1)]; - tensor var_3078 = pow(x = zero_mean_47, y = var_2878_promoted)[name = tensor("op_3078")]; - tensor var_3079 = const()[name = tensor("op_3079"), val = tensor([1])]; - tensor var_3080 = reduce_mean(axes = var_3079, keep_dims = var_2873, x = var_3078)[name = tensor("op_3080")]; - tensor var_3081_to_fp16 = const()[name = tensor("op_3081_to_fp16"), val = tensor(0x1p-24)]; - tensor var_3082_cast_fp16 = add(x = var_3080, y = var_3081_to_fp16)[name = tensor("op_3082_cast_fp16")]; + tensor var_2793_promoted = const()[name = tensor("op_2793_promoted"), val = tensor(0x1p+1)]; + tensor var_2992 = pow(x = zero_mean_47, y = var_2793_promoted)[name = tensor("op_2992")]; + tensor var_2993 = const()[name = tensor("op_2993"), val = tensor([1])]; + tensor var_2994 = reduce_mean(axes = var_2993, keep_dims = var_2796, x = var_2992)[name = tensor("op_2994")]; + tensor var_2995_to_fp16 = const()[name = tensor("op_2995_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2996_cast_fp16 = add(x = var_2994, y = var_2995_to_fp16)[name = tensor("op_2996_cast_fp16")]; tensor denom_47_epsilon_0 = const()[name = tensor("denom_47_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0, x = var_3082_cast_fp16)[name = tensor("denom_47_cast_fp16")]; - tensor var_3084_cast_fp16 = mul(x = zero_mean_47, y = denom_47_cast_fp16)[name = tensor("op_3084_cast_fp16")]; - tensor var_3086_gamma_0_to_fp16 = const()[name = tensor("op_3086_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218576128)))]; - tensor var_3086_beta_0_to_fp16 = const()[name = tensor("op_3086_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218577728)))]; - tensor var_3086_epsilon_0_to_fp16 = const()[name = tensor("op_3086_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_3086_cast_fp16 = batch_norm(beta = var_3086_beta_0_to_fp16, epsilon = var_3086_epsilon_0_to_fp16, gamma = var_3086_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3084_cast_fp16)[name = tensor("op_3086_cast_fp16")]; - tensor var_3092 = const()[name = tensor("op_3092"), val = tensor([1, 1])]; - tensor var_3094 = const()[name = tensor("op_3094"), val = tensor([1, 1])]; - tensor var_3096_pad_type_0 = const()[name = tensor("op_3096_pad_type_0"), val = tensor("custom")]; - tensor var_3096_pad_0 = const()[name = tensor("op_3096_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3096 = conv(bias = layers_11_mlp_fc1_bias, dilations = var_3094, groups = var_2871, pad = var_3096_pad_0, pad_type = var_3096_pad_type_0, strides = var_3092, weight = layers_11_mlp_fc1_weight, x = var_3086_cast_fp16)[name = tensor("op_3096")]; - tensor input_95_mode_0 = const()[name = tensor("input_95_mode_0"), val = tensor("EXACT")]; - tensor input_95 = gelu(mode = input_95_mode_0, x = var_3096)[name = tensor("input_95")]; - tensor var_3100 = const()[name = tensor("op_3100"), val = tensor([1, 1])]; - tensor var_3102 = const()[name = tensor("op_3102"), val = tensor([1, 1])]; - tensor var_3104_pad_type_0 = const()[name = tensor("op_3104_pad_type_0"), val = tensor("custom")]; - tensor var_3104_pad_0 = const()[name = tensor("op_3104_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3104 = conv(bias = layers_11_mlp_fc2_bias, dilations = var_3102, groups = var_2871, pad = var_3104_pad_0, pad_type = var_3104_pad_type_0, strides = var_3100, weight = layers_11_mlp_fc2_weight, x = input_95)[name = tensor("op_3104")]; - tensor x_97 = add(x = var_3086_cast_fp16, y = var_3104)[name = tensor("x_97")]; - tensor var_2869_promoted_1 = const()[name = tensor("op_2869_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2870_promoted_1 = const()[name = tensor("op_2870_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x = clip(alpha = var_2869_promoted_1, beta = var_2870_promoted_1, x = x_97)[name = tensor("x")]; - tensor var_3109 = const()[name = tensor("op_3109"), val = tensor([1])]; - tensor mean = reduce_mean(axes = var_3109, keep_dims = var_2873, x = x)[name = tensor("mean")]; + tensor denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0, x = var_2996_cast_fp16)[name = tensor("denom_47_cast_fp16")]; + tensor var_2998_cast_fp16 = mul(x = zero_mean_47, y = denom_47_cast_fp16)[name = tensor("op_2998_cast_fp16")]; + tensor var_3000_gamma_0_to_fp16 = const()[name = tensor("op_3000_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218576128)))]; + tensor var_3000_beta_0_to_fp16 = const()[name = tensor("op_3000_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218577728)))]; + tensor var_3000_epsilon_0_to_fp16 = const()[name = tensor("op_3000_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_3000_cast_fp16 = batch_norm(beta = var_3000_beta_0_to_fp16, epsilon = var_3000_epsilon_0_to_fp16, gamma = var_3000_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2998_cast_fp16)[name = tensor("op_3000_cast_fp16")]; + tensor var_3006 = const()[name = tensor("op_3006"), val = tensor([1, 1])]; + tensor var_3008 = const()[name = tensor("op_3008"), val = tensor([1, 1])]; + tensor var_3010_pad_type_0 = const()[name = tensor("op_3010_pad_type_0"), val = tensor("custom")]; + tensor var_3010_pad_0 = const()[name = tensor("op_3010_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3010 = conv(bias = layers_11_mlp_fc1_bias, dilations = var_3008, groups = var_2794, pad = var_3010_pad_0, pad_type = var_3010_pad_type_0, strides = var_3006, weight = layers_11_mlp_fc1_weight, x = var_3000_cast_fp16)[name = tensor("op_3010")]; + tensor input_383_mode_0 = const()[name = tensor("input_383_mode_0"), val = tensor("EXACT")]; + tensor input_383 = gelu(mode = input_383_mode_0, x = var_3010)[name = tensor("input_383")]; + tensor var_3014 = const()[name = tensor("op_3014"), val = tensor([1, 1])]; + tensor var_3016 = const()[name = tensor("op_3016"), val = tensor([1, 1])]; + tensor var_3018_pad_type_0 = const()[name = tensor("op_3018_pad_type_0"), val = tensor("custom")]; + tensor var_3018_pad_0 = const()[name = tensor("op_3018_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3018 = conv(bias = layers_11_mlp_fc2_bias, dilations = var_3016, groups = var_2794, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3014, weight = layers_11_mlp_fc2_weight, x = input_383)[name = tensor("op_3018")]; + tensor x_97 = add(x = var_3000_cast_fp16, y = var_3018)[name = tensor("x_97")]; + tensor var_2791_promoted_1 = const()[name = tensor("op_2791_promoted_1"), val = tensor(-0x1.f4p+7)]; + tensor var_2792_promoted_1 = const()[name = tensor("op_2792_promoted_1"), val = tensor(0x1.f4p+7)]; + tensor x = clip(alpha = var_2791_promoted_1, beta = var_2792_promoted_1, x = x_97)[name = tensor("x")]; + tensor var_3023 = const()[name = tensor("op_3023"), val = tensor([1])]; + tensor mean = reduce_mean(axes = var_3023, keep_dims = var_2796, x = x)[name = tensor("mean")]; tensor zero_mean = sub(x = x, y = mean)[name = tensor("zero_mean")]; - tensor var_2878_promoted_1 = const()[name = tensor("op_2878_promoted_1"), val = tensor(0x1p+1)]; - tensor var_3112 = pow(x = zero_mean, y = var_2878_promoted_1)[name = tensor("op_3112")]; - tensor var_3113 = const()[name = tensor("op_3113"), val = tensor([1])]; - tensor var_3114 = reduce_mean(axes = var_3113, keep_dims = var_2873, x = var_3112)[name = tensor("op_3114")]; - tensor var_3115_to_fp16 = const()[name = tensor("op_3115_to_fp16"), val = tensor(0x1p-24)]; - tensor var_3116_cast_fp16 = add(x = var_3114, y = var_3115_to_fp16)[name = tensor("op_3116_cast_fp16")]; + tensor var_2793_promoted_1 = const()[name = tensor("op_2793_promoted_1"), val = tensor(0x1p+1)]; + tensor var_3026 = pow(x = zero_mean, y = var_2793_promoted_1)[name = tensor("op_3026")]; + tensor var_3027 = const()[name = tensor("op_3027"), val = tensor([1])]; + tensor var_3028 = reduce_mean(axes = var_3027, keep_dims = var_2796, x = var_3026)[name = tensor("op_3028")]; + tensor var_3029_to_fp16 = const()[name = tensor("op_3029_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3030_cast_fp16 = add(x = var_3028, y = var_3029_to_fp16)[name = tensor("op_3030_cast_fp16")]; tensor denom_49_epsilon_0 = const()[name = tensor("denom_49_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0, x = var_3116_cast_fp16)[name = tensor("denom_49_cast_fp16")]; - tensor var_3118_cast_fp16 = mul(x = zero_mean, y = denom_49_cast_fp16)[name = tensor("op_3118_cast_fp16")]; - tensor var_3120_gamma_0_to_fp16 = const()[name = tensor("op_3120_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218579328)))]; - tensor var_3120_beta_0_to_fp16 = const()[name = tensor("op_3120_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218580928)))]; - tensor var_3120_epsilon_0_to_fp16 = const()[name = tensor("op_3120_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_3120_cast_fp16 = batch_norm(beta = var_3120_beta_0_to_fp16, epsilon = var_3120_epsilon_0_to_fp16, gamma = var_3120_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3118_cast_fp16)[name = tensor("op_3120_cast_fp16")]; - tensor var_3134_begin_0 = const()[name = tensor("op_3134_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3134_end_0 = const()[name = tensor("op_3134_end_0"), val = tensor([1, 768, 1, 512])]; - tensor var_3134_end_mask_0 = const()[name = tensor("op_3134_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_3134_squeeze_mask_0 = const()[name = tensor("op_3134_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_3134 = slice_by_index(begin = var_3134_begin_0, end = var_3134_end_0, end_mask = var_3134_end_mask_0, squeeze_mask = var_3134_squeeze_mask_0, x = var_3120_cast_fp16)[name = tensor("op_3134")]; - tensor var_3137_begin_0 = const()[name = tensor("op_3137_begin_0"), val = tensor([0, 0, 0])]; - tensor var_3137_end_0 = const()[name = tensor("op_3137_end_0"), val = tensor([1, 768, 1])]; - tensor var_3137_end_mask_0 = const()[name = tensor("op_3137_end_mask_0"), val = tensor([true, true, false])]; - tensor var_3137_squeeze_mask_0 = const()[name = tensor("op_3137_squeeze_mask_0"), val = tensor([false, false, true])]; - tensor var_3137 = slice_by_index(begin = var_3137_begin_0, end = var_3137_end_0, end_mask = var_3137_end_mask_0, squeeze_mask = var_3137_squeeze_mask_0, x = var_3134)[name = tensor("op_3137")]; - tensor var_3145 = const()[name = tensor("op_3145"), val = tensor([1])]; - tensor var_3146 = const()[name = tensor("op_3146"), val = tensor(true)]; - tensor abs_0_cast_fp16 = abs(x = var_3137)[name = tensor("abs_0_cast_fp16")]; + tensor denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0, x = var_3030_cast_fp16)[name = tensor("denom_49_cast_fp16")]; + tensor var_3032_cast_fp16 = mul(x = zero_mean, y = denom_49_cast_fp16)[name = tensor("op_3032_cast_fp16")]; + tensor var_3034_gamma_0_to_fp16 = const()[name = tensor("op_3034_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218579328)))]; + tensor var_3034_beta_0_to_fp16 = const()[name = tensor("op_3034_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(218580928)))]; + tensor var_3034_epsilon_0_to_fp16 = const()[name = tensor("op_3034_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_3034_cast_fp16 = batch_norm(beta = var_3034_beta_0_to_fp16, epsilon = var_3034_epsilon_0_to_fp16, gamma = var_3034_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3032_cast_fp16)[name = tensor("op_3034_cast_fp16")]; + tensor var_3048_begin_0 = const()[name = tensor("op_3048_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3048_end_0 = const()[name = tensor("op_3048_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_3048_end_mask_0 = const()[name = tensor("op_3048_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_3048_squeeze_mask_0 = const()[name = tensor("op_3048_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor var_3048 = slice_by_index(begin = var_3048_begin_0, end = var_3048_end_0, end_mask = var_3048_end_mask_0, squeeze_mask = var_3048_squeeze_mask_0, x = var_3034_cast_fp16)[name = tensor("op_3048")]; + tensor var_3051_begin_0 = const()[name = tensor("op_3051_begin_0"), val = tensor([0, 0, 0])]; + tensor var_3051_end_0 = const()[name = tensor("op_3051_end_0"), val = tensor([1, 768, 1])]; + tensor var_3051_end_mask_0 = const()[name = tensor("op_3051_end_mask_0"), val = tensor([true, true, false])]; + tensor var_3051_squeeze_mask_0 = const()[name = tensor("op_3051_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor var_3051 = slice_by_index(begin = var_3051_begin_0, end = var_3051_end_0, end_mask = var_3051_end_mask_0, squeeze_mask = var_3051_squeeze_mask_0, x = var_3048)[name = tensor("op_3051")]; + tensor var_3059 = const()[name = tensor("op_3059"), val = tensor([1])]; + tensor var_3060 = const()[name = tensor("op_3060"), val = tensor(true)]; + tensor abs_0_cast_fp16 = abs(x = var_3051)[name = tensor("abs_0_cast_fp16")]; tensor const_120_promoted_to_fp16 = const()[name = tensor("const_120_promoted_to_fp16"), val = tensor(0x1p+1)]; tensor pow_0_cast_fp16 = pow(x = abs_0_cast_fp16, y = const_120_promoted_to_fp16)[name = tensor("pow_0_cast_fp16")]; - tensor reduce_sum_0_cast_fp16 = reduce_sum(axes = var_3145, keep_dims = var_3146, x = pow_0_cast_fp16)[name = tensor("reduce_sum_0_cast_fp16")]; - tensor var_3147_y_0_to_fp16 = const()[name = tensor("op_3147_y_0_to_fp16"), val = tensor(0x1p-1)]; - tensor var_3147_cast_fp16 = pow(x = reduce_sum_0_cast_fp16, y = var_3147_y_0_to_fp16)[name = tensor("op_3147_cast_fp16")]; - tensor var_3148_to_fp16 = const()[name = tensor("op_3148_to_fp16"), val = tensor(0x1p-24)]; - tensor var_3149_cast_fp16 = maximum(x = var_3147_cast_fp16, y = var_3148_to_fp16)[name = tensor("op_3149_cast_fp16")]; + tensor reduce_sum_0_cast_fp16 = reduce_sum(axes = var_3059, keep_dims = var_3060, x = pow_0_cast_fp16)[name = tensor("reduce_sum_0_cast_fp16")]; + tensor var_3061_y_0_to_fp16 = const()[name = tensor("op_3061_y_0_to_fp16"), val = tensor(0x1p-1)]; + tensor var_3061_cast_fp16 = pow(x = reduce_sum_0_cast_fp16, y = var_3061_y_0_to_fp16)[name = tensor("op_3061_cast_fp16")]; + tensor var_3062_to_fp16 = const()[name = tensor("op_3062_to_fp16"), val = tensor(0x1p-24)]; + tensor var_3063_cast_fp16 = maximum(x = var_3061_cast_fp16, y = var_3062_to_fp16)[name = tensor("op_3063_cast_fp16")]; tensor denom_reps_0 = const()[name = tensor("denom_reps_0"), val = tensor([1, 768])]; - tensor denom_cast_fp16 = tile(reps = denom_reps_0, x = var_3149_cast_fp16)[name = tensor("denom_cast_fp16")]; - tensor outputs = real_div(x = var_3137, y = denom_cast_fp16)[name = tensor("op_3151_cast_fp16")]; + tensor denom_cast_fp16 = tile(reps = denom_reps_0, x = var_3063_cast_fp16)[name = tensor("denom_cast_fp16")]; + tensor outputs = real_div(x = var_3051, y = denom_cast_fp16)[name = tensor("op_3065_cast_fp16")]; } -> (outputs); } \ No newline at end of file