diff --git "a/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil" "b/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil" --- "a/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil" +++ "b/ane-snowflake-arctic-embed-s/model.mlmodelc/model.mil" @@ -1,5 +1,5 @@ program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.6.1"}, {"coremlc-version", "3304.7.1"}, {"coremltools-component-torch", "2.1.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.2"}})] +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.6.1"}, {"coremlc-version", "3304.7.1"}})] { func main(tensor input_ids, tensor mask) { tensor embeddings_position_embeddings_weight = const()[name = tensor("embeddings_position_embeddings_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; @@ -149,2302 +149,2025 @@ program(1.0) tensor layers_11_mlp_fc1_weight = const()[name = tensor("layers_11_mlp_fc1_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64426816)))]; tensor layers_11_mlp_fc2_bias = const()[name = tensor("layers_11_mlp_fc2_bias"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65606528)))]; tensor layers_11_mlp_fc2_weight = const()[name = tensor("layers_11_mlp_fc2_weight"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65607360)))]; - tensor var_30 = const()[name = tensor("op_30"), val = tensor(true)]; tensor inputs_embeds_axis_0 = const()[name = tensor("inputs_embeds_axis_0"), val = tensor(0)]; tensor inputs_embeds_batch_dims_0 = const()[name = tensor("inputs_embeds_batch_dims_0"), val = tensor(0)]; tensor inputs_embeds_validate_indices_0 = const()[name = tensor("inputs_embeds_validate_indices_0"), val = tensor(false)]; tensor input_ids_to_int16_dtype_0 = const()[name = tensor("input_ids_to_int16_dtype_0"), val = tensor("int16")]; - tensor cast_5 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = tensor("cast_5")]; - tensor inputs_embeds_cast_uint16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = cast_5, validate_indices = inputs_embeds_validate_indices_0, x = embeddings_word_embeddings_weight)[name = tensor("inputs_embeds_cast_uint16")]; - tensor var_45 = add(x = inputs_embeds_cast_uint16, y = embeddings_token_type_embeddings_weight)[name = tensor("op_45")]; - tensor embeddings_1 = add(x = var_45, y = embeddings_position_embeddings_weight)[name = tensor("embeddings_1")]; - tensor var_47_perm_0 = const()[name = tensor("op_47_perm_0"), val = tensor([0, 2, 1])]; + tensor cast_55 = cast(dtype = input_ids_to_int16_dtype_0, x = input_ids)[name = tensor("cast_55")]; + tensor inputs_embeds_cast_uint16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = cast_55, validate_indices = inputs_embeds_validate_indices_0, x = embeddings_word_embeddings_weight)[name = tensor("inputs_embeds_cast_uint16")]; + tensor var_44 = add(x = inputs_embeds_cast_uint16, y = embeddings_token_type_embeddings_weight)[name = tensor("op_44")]; + tensor embeddings_1 = add(x = var_44, y = embeddings_position_embeddings_weight)[name = tensor("embeddings_1")]; + tensor var_46_perm_0 = const()[name = tensor("op_46_perm_0"), val = tensor([0, 2, 1])]; tensor x_1_axes_0 = const()[name = tensor("x_1_axes_0"), val = tensor([2])]; - tensor transpose_12 = transpose(perm = var_47_perm_0, x = embeddings_1)[name = tensor("transpose_12")]; - tensor x_1 = expand_dims(axes = x_1_axes_0, x = transpose_12)[name = tensor("x_1")]; - tensor var_28_promoted = const()[name = tensor("op_28_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_29_promoted = const()[name = tensor("op_29_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_3 = clip(alpha = var_28_promoted, beta = var_29_promoted, x = x_1)[name = tensor("x_3")]; - tensor var_52 = const()[name = tensor("op_52"), val = tensor([1])]; - tensor mean_1 = reduce_mean(axes = var_52, keep_dims = var_30, x = x_3)[name = tensor("mean_1")]; - tensor zero_mean_1 = sub(x = x_3, y = mean_1)[name = tensor("zero_mean_1")]; - tensor var_33_promoted = const()[name = tensor("op_33_promoted"), val = tensor(0x1p+1)]; - tensor var_55 = pow(x = zero_mean_1, y = var_33_promoted)[name = tensor("op_55")]; - tensor var_56 = const()[name = tensor("op_56"), val = tensor([1])]; - tensor var_57 = reduce_mean(axes = var_56, keep_dims = var_30, x = var_55)[name = tensor("op_57")]; - tensor var_58_to_fp16 = const()[name = tensor("op_58_to_fp16"), val = tensor(0x1p-24)]; - tensor var_59_cast_fp16 = add(x = var_57, y = var_58_to_fp16)[name = tensor("op_59_cast_fp16")]; - tensor denom_1_epsilon_0 = const()[name = tensor("denom_1_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0, x = var_59_cast_fp16)[name = tensor("denom_1_cast_fp16")]; - tensor var_61_cast_fp16 = mul(x = zero_mean_1, y = denom_1_cast_fp16)[name = tensor("op_61_cast_fp16")]; - tensor var_63_mean_0_to_fp16 = const()[name = tensor("op_63_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66787072)))]; - tensor var_63_variance_0_to_fp16 = const()[name = tensor("op_63_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66787904)))]; - tensor var_63_gamma_0_to_fp16 = const()[name = tensor("op_63_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66788736)))]; - tensor var_63_beta_0_to_fp16 = const()[name = tensor("op_63_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66789568)))]; - tensor var_63_epsilon_0_to_fp16 = const()[name = tensor("op_63_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_63_cast_fp16 = batch_norm(beta = var_63_beta_0_to_fp16, epsilon = var_63_epsilon_0_to_fp16, gamma = var_63_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_61_cast_fp16)[name = tensor("op_63_cast_fp16")]; - tensor var_76_axes_0 = const()[name = tensor("op_76_axes_0"), val = tensor([2])]; - tensor var_76_cast_fp16 = expand_dims(axes = var_76_axes_0, x = mask)[name = tensor("op_76_cast_fp16")]; - tensor var_78_axes_0 = const()[name = tensor("op_78_axes_0"), val = tensor([3])]; - tensor var_78_cast_fp16 = expand_dims(axes = var_78_axes_0, x = var_76_cast_fp16)[name = tensor("op_78_cast_fp16")]; - tensor var_80_to_fp16 = const()[name = tensor("op_80_to_fp16"), val = tensor(0x1p+0)]; - tensor var_81_cast_fp16 = sub(x = var_78_cast_fp16, y = var_80_to_fp16)[name = tensor("op_81_cast_fp16")]; - tensor var_82_to_fp16 = const()[name = tensor("op_82_to_fp16"), val = tensor(0x1.388p+13)]; - tensor var_83_cast_fp16 = mul(x = var_81_cast_fp16, y = var_82_to_fp16)[name = tensor("op_83_cast_fp16")]; - tensor var_88 = const()[name = tensor("op_88"), val = tensor(1)]; - tensor var_89 = const()[name = tensor("op_89"), val = tensor(0)]; - tensor var_90 = const()[name = tensor("op_90"), val = tensor(true)]; + tensor transpose_62 = transpose(perm = var_46_perm_0, x = embeddings_1)[name = tensor("transpose_62")]; + tensor x_1 = expand_dims(axes = x_1_axes_0, x = transpose_62)[name = tensor("x_1")]; + tensor input_1_perm_0 = const()[name = tensor("input_1_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_3 = const()[name = tensor("weight_3"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66787072)))]; + tensor bias_1 = const()[name = tensor("bias_1"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66787904)))]; + tensor o_1_axes_0 = const()[name = tensor("o_1_axes_0"), val = tensor([-1])]; + tensor var_30_to_fp16 = const()[name = tensor("op_30_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_61 = transpose(perm = input_1_perm_0, x = x_1)[name = tensor("transpose_61")]; + tensor o_1_cast_fp16 = layer_norm(axes = o_1_axes_0, beta = bias_1, epsilon = var_30_to_fp16, gamma = weight_3, x = transpose_61)[name = tensor("o_1_cast_fp16")]; + tensor hidden_states_1_perm_0 = const()[name = tensor("hidden_states_1_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_69_axes_0 = const()[name = tensor("op_69_axes_0"), val = tensor([2])]; + tensor var_69_cast_fp16 = expand_dims(axes = var_69_axes_0, x = mask)[name = tensor("op_69_cast_fp16")]; + tensor var_71_axes_0 = const()[name = tensor("op_71_axes_0"), val = tensor([3])]; + tensor var_71_cast_fp16 = expand_dims(axes = var_71_axes_0, x = var_69_cast_fp16)[name = tensor("op_71_cast_fp16")]; + tensor var_73_to_fp16 = const()[name = tensor("op_73_to_fp16"), val = tensor(0x1p+0)]; + tensor var_74_cast_fp16 = sub(x = var_71_cast_fp16, y = var_73_to_fp16)[name = tensor("op_74_cast_fp16")]; + tensor var_75_to_fp16 = const()[name = tensor("op_75_to_fp16"), val = tensor(0x1.388p+13)]; + tensor var_76_cast_fp16 = mul(x = var_74_cast_fp16, y = var_75_to_fp16)[name = tensor("op_76_cast_fp16")]; + tensor var_81 = const()[name = tensor("op_81"), val = tensor(1)]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor(0)]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor var_107 = const()[name = tensor("op_107"), val = tensor([1, 1])]; + tensor var_109_pad_type_0 = const()[name = tensor("op_109_pad_type_0"), val = tensor("custom")]; + tensor var_109_pad_0 = const()[name = tensor("op_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_60 = transpose(perm = hidden_states_1_perm_0, x = o_1_cast_fp16)[name = tensor("transpose_60")]; + tensor var_109 = conv(bias = layers_0_attention_q_proj_bias, dilations = var_107, groups = var_81, pad = var_109_pad_0, pad_type = var_109_pad_type_0, strides = var_105, weight = layers_0_attention_q_proj_weight, x = transpose_60)[name = tensor("op_109")]; tensor var_112 = const()[name = tensor("op_112"), val = tensor([1, 1])]; tensor var_114 = const()[name = tensor("op_114"), val = tensor([1, 1])]; - tensor var_116_pad_type_0 = const()[name = tensor("op_116_pad_type_0"), val = tensor("custom")]; - tensor var_116_pad_0 = const()[name = tensor("op_116_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_116 = conv(bias = layers_0_attention_q_proj_bias, dilations = var_114, groups = var_88, pad = var_116_pad_0, pad_type = var_116_pad_type_0, strides = var_112, weight = layers_0_attention_q_proj_weight, x = var_63_cast_fp16)[name = tensor("op_116")]; - tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; - tensor var_121 = const()[name = tensor("op_121"), val = tensor([1, 1])]; tensor ks_1_pad_type_0 = const()[name = tensor("ks_1_pad_type_0"), val = tensor("custom")]; tensor ks_1_pad_0 = const()[name = tensor("ks_1_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_1 = conv(bias = layers_0_attention_k_proj_bias, dilations = var_121, groups = var_88, pad = ks_1_pad_0, pad_type = ks_1_pad_type_0, strides = var_119, weight = layers_0_attention_k_proj_weight, x = var_63_cast_fp16)[name = tensor("ks_1")]; - tensor var_126 = const()[name = tensor("op_126"), val = tensor([1, 1])]; - tensor var_128 = const()[name = tensor("op_128"), val = tensor([1, 1])]; - tensor var_130_pad_type_0 = const()[name = tensor("op_130_pad_type_0"), val = tensor("custom")]; - tensor var_130_pad_0 = const()[name = tensor("op_130_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_130 = conv(bias = layers_0_attention_v_proj_bias, dilations = var_128, groups = var_88, pad = var_130_pad_0, pad_type = var_130_pad_type_0, strides = var_126, weight = layers_0_attention_v_proj_weight, x = var_63_cast_fp16)[name = tensor("op_130")]; + tensor ks_1 = conv(bias = layers_0_attention_k_proj_bias, dilations = var_114, groups = var_81, pad = ks_1_pad_0, pad_type = ks_1_pad_type_0, strides = var_112, weight = layers_0_attention_k_proj_weight, x = transpose_60)[name = tensor("ks_1")]; + tensor var_119 = const()[name = tensor("op_119"), val = tensor([1, 1])]; + tensor var_121 = const()[name = tensor("op_121"), val = tensor([1, 1])]; + tensor var_123_pad_type_0 = const()[name = tensor("op_123_pad_type_0"), val = tensor("custom")]; + tensor var_123_pad_0 = const()[name = tensor("op_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_123 = conv(bias = layers_0_attention_v_proj_bias, dilations = var_121, groups = var_81, pad = var_123_pad_0, pad_type = var_123_pad_type_0, strides = var_119, weight = layers_0_attention_v_proj_weight, x = transpose_60)[name = tensor("op_123")]; tensor tile_2 = const()[name = tensor("tile_2"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_131_axis_0 = const()[name = tensor("op_131_axis_0"), val = tensor(1)]; - tensor var_131_0, tensor var_131_1, tensor var_131_2, tensor var_131_3, tensor var_131_4, tensor var_131_5, tensor var_131_6, tensor var_131_7, tensor var_131_8, tensor var_131_9, tensor var_131_10, tensor var_131_11 = split(axis = var_131_axis_0, split_sizes = tile_2, x = var_116)[name = tensor("op_131")]; - tensor var_144_perm_0 = const()[name = tensor("op_144_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_124_axis_0 = const()[name = tensor("op_124_axis_0"), val = tensor(1)]; + tensor var_124_0, tensor var_124_1, tensor var_124_2, tensor var_124_3, tensor var_124_4, tensor var_124_5, tensor var_124_6, tensor var_124_7, tensor var_124_8, tensor var_124_9, tensor var_124_10, tensor var_124_11 = split(axis = var_124_axis_0, split_sizes = tile_2, x = var_109)[name = tensor("op_124")]; + tensor var_137_perm_0 = const()[name = tensor("op_137_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_3 = const()[name = tensor("tile_3"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_145_axis_0 = const()[name = tensor("op_145_axis_0"), val = tensor(3)]; - tensor transpose_11 = transpose(perm = var_144_perm_0, x = ks_1)[name = tensor("transpose_11")]; - tensor var_145_0, tensor var_145_1, tensor var_145_2, tensor var_145_3, tensor var_145_4, tensor var_145_5, tensor var_145_6, tensor var_145_7, tensor var_145_8, tensor var_145_9, tensor var_145_10, tensor var_145_11 = split(axis = var_145_axis_0, split_sizes = tile_3, x = transpose_11)[name = tensor("op_145")]; + tensor var_138_axis_0 = const()[name = tensor("op_138_axis_0"), val = tensor(3)]; + tensor transpose_59 = transpose(perm = var_137_perm_0, x = ks_1)[name = tensor("transpose_59")]; + tensor var_138_0, tensor var_138_1, tensor var_138_2, tensor var_138_3, tensor var_138_4, tensor var_138_5, tensor var_138_6, tensor var_138_7, tensor var_138_8, tensor var_138_9, tensor var_138_10, tensor var_138_11 = split(axis = var_138_axis_0, split_sizes = tile_3, x = transpose_59)[name = tensor("op_138")]; tensor tile_4 = const()[name = tensor("tile_4"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_158_axis_0 = const()[name = tensor("op_158_axis_0"), val = tensor(1)]; - tensor var_158_0, tensor var_158_1, tensor var_158_2, tensor var_158_3, tensor var_158_4, tensor var_158_5, tensor var_158_6, tensor var_158_7, tensor var_158_8, tensor var_158_9, tensor var_158_10, tensor var_158_11 = split(axis = var_158_axis_0, split_sizes = tile_4, x = var_130)[name = tensor("op_158")]; - tensor var_172_equation_0 = const()[name = tensor("op_172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_172 = einsum(equation = var_172_equation_0, values = (var_145_0, var_131_0))[name = tensor("op_172")]; - tensor var_173_to_fp16 = const()[name = tensor("op_173_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_1_cast_fp16 = mul(x = var_172, y = var_173_to_fp16)[name = tensor("w_1_cast_fp16")]; - tensor var_176_equation_0 = const()[name = tensor("op_176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_176 = einsum(equation = var_176_equation_0, values = (var_145_1, var_131_1))[name = tensor("op_176")]; - tensor var_177_to_fp16 = const()[name = tensor("op_177_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_3_cast_fp16 = mul(x = var_176, y = var_177_to_fp16)[name = tensor("w_3_cast_fp16")]; - tensor var_180_equation_0 = const()[name = tensor("op_180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_180 = einsum(equation = var_180_equation_0, values = (var_145_2, var_131_2))[name = tensor("op_180")]; - tensor var_181_to_fp16 = const()[name = tensor("op_181_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_5_cast_fp16 = mul(x = var_180, y = var_181_to_fp16)[name = tensor("w_5_cast_fp16")]; - tensor var_184_equation_0 = const()[name = tensor("op_184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_184 = einsum(equation = var_184_equation_0, values = (var_145_3, var_131_3))[name = tensor("op_184")]; - tensor var_185_to_fp16 = const()[name = tensor("op_185_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_7_cast_fp16 = mul(x = var_184, y = var_185_to_fp16)[name = tensor("w_7_cast_fp16")]; - tensor var_188_equation_0 = const()[name = tensor("op_188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_188 = einsum(equation = var_188_equation_0, values = (var_145_4, var_131_4))[name = tensor("op_188")]; - tensor var_189_to_fp16 = const()[name = tensor("op_189_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_9_cast_fp16 = mul(x = var_188, y = var_189_to_fp16)[name = tensor("w_9_cast_fp16")]; - tensor var_192_equation_0 = const()[name = tensor("op_192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_192 = einsum(equation = var_192_equation_0, values = (var_145_5, var_131_5))[name = tensor("op_192")]; - tensor var_193_to_fp16 = const()[name = tensor("op_193_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_11_cast_fp16 = mul(x = var_192, y = var_193_to_fp16)[name = tensor("w_11_cast_fp16")]; - tensor var_196_equation_0 = const()[name = tensor("op_196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_196 = einsum(equation = var_196_equation_0, values = (var_145_6, var_131_6))[name = tensor("op_196")]; - tensor var_197_to_fp16 = const()[name = tensor("op_197_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_13_cast_fp16 = mul(x = var_196, y = var_197_to_fp16)[name = tensor("w_13_cast_fp16")]; - tensor var_200_equation_0 = const()[name = tensor("op_200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_200 = einsum(equation = var_200_equation_0, values = (var_145_7, var_131_7))[name = tensor("op_200")]; - tensor var_201_to_fp16 = const()[name = tensor("op_201_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_15_cast_fp16 = mul(x = var_200, y = var_201_to_fp16)[name = tensor("w_15_cast_fp16")]; - tensor var_204_equation_0 = const()[name = tensor("op_204_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_204 = einsum(equation = var_204_equation_0, values = (var_145_8, var_131_8))[name = tensor("op_204")]; - tensor var_205_to_fp16 = const()[name = tensor("op_205_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_17_cast_fp16 = mul(x = var_204, y = var_205_to_fp16)[name = tensor("w_17_cast_fp16")]; - tensor var_208_equation_0 = const()[name = tensor("op_208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_208 = einsum(equation = var_208_equation_0, values = (var_145_9, var_131_9))[name = tensor("op_208")]; - tensor var_209_to_fp16 = const()[name = tensor("op_209_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_19_cast_fp16 = mul(x = var_208, y = var_209_to_fp16)[name = tensor("w_19_cast_fp16")]; - tensor var_212_equation_0 = const()[name = tensor("op_212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_212 = einsum(equation = var_212_equation_0, values = (var_145_10, var_131_10))[name = tensor("op_212")]; - tensor var_213_to_fp16 = const()[name = tensor("op_213_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_21_cast_fp16 = mul(x = var_212, y = var_213_to_fp16)[name = tensor("w_21_cast_fp16")]; - tensor var_216_equation_0 = const()[name = tensor("op_216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_216 = einsum(equation = var_216_equation_0, values = (var_145_11, var_131_11))[name = tensor("op_216")]; - tensor var_217_to_fp16 = const()[name = tensor("op_217_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_23_cast_fp16 = mul(x = var_216, y = var_217_to_fp16)[name = tensor("w_23_cast_fp16")]; - tensor input_3_cast_fp16 = add(x = w_1_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_3_cast_fp16")]; - tensor var_220_cast_fp16 = softmax(axis = var_88, x = input_3_cast_fp16)[name = tensor("op_220_cast_fp16")]; - tensor input_5_cast_fp16 = add(x = w_3_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_5_cast_fp16")]; - tensor var_222_cast_fp16 = softmax(axis = var_88, x = input_5_cast_fp16)[name = tensor("op_222_cast_fp16")]; - tensor input_7_cast_fp16 = add(x = w_5_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_7_cast_fp16")]; - tensor var_224_cast_fp16 = softmax(axis = var_88, x = input_7_cast_fp16)[name = tensor("op_224_cast_fp16")]; - tensor input_9_cast_fp16 = add(x = w_7_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_9_cast_fp16")]; - tensor var_226_cast_fp16 = softmax(axis = var_88, x = input_9_cast_fp16)[name = tensor("op_226_cast_fp16")]; - tensor input_11_cast_fp16 = add(x = w_9_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_11_cast_fp16")]; - tensor var_228_cast_fp16 = softmax(axis = var_88, x = input_11_cast_fp16)[name = tensor("op_228_cast_fp16")]; - tensor input_13_cast_fp16 = add(x = w_11_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_13_cast_fp16")]; - tensor var_230_cast_fp16 = softmax(axis = var_88, x = input_13_cast_fp16)[name = tensor("op_230_cast_fp16")]; - tensor input_15_cast_fp16 = add(x = w_13_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_15_cast_fp16")]; - tensor var_232_cast_fp16 = softmax(axis = var_88, x = input_15_cast_fp16)[name = tensor("op_232_cast_fp16")]; - tensor input_17_cast_fp16 = add(x = w_15_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_17_cast_fp16")]; - tensor var_234_cast_fp16 = softmax(axis = var_88, x = input_17_cast_fp16)[name = tensor("op_234_cast_fp16")]; - tensor input_19_cast_fp16 = add(x = w_17_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_19_cast_fp16")]; - tensor var_236_cast_fp16 = softmax(axis = var_88, x = input_19_cast_fp16)[name = tensor("op_236_cast_fp16")]; - tensor input_21_cast_fp16 = add(x = w_19_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_21_cast_fp16")]; - tensor var_238_cast_fp16 = softmax(axis = var_88, x = input_21_cast_fp16)[name = tensor("op_238_cast_fp16")]; - tensor input_23_cast_fp16 = add(x = w_21_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_23_cast_fp16")]; - tensor var_240_cast_fp16 = softmax(axis = var_88, x = input_23_cast_fp16)[name = tensor("op_240_cast_fp16")]; - tensor input_25_cast_fp16 = add(x = w_23_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_25_cast_fp16")]; - tensor var_242_cast_fp16 = softmax(axis = var_88, x = input_25_cast_fp16)[name = tensor("op_242_cast_fp16")]; - tensor var_244_equation_0 = const()[name = tensor("op_244_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_244_cast_fp16 = einsum(equation = var_244_equation_0, values = (var_158_0, var_220_cast_fp16))[name = tensor("op_244_cast_fp16")]; - tensor var_246_equation_0 = const()[name = tensor("op_246_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_246_cast_fp16 = einsum(equation = var_246_equation_0, values = (var_158_1, var_222_cast_fp16))[name = tensor("op_246_cast_fp16")]; - tensor var_248_equation_0 = const()[name = tensor("op_248_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_248_cast_fp16 = einsum(equation = var_248_equation_0, values = (var_158_2, var_224_cast_fp16))[name = tensor("op_248_cast_fp16")]; - tensor var_250_equation_0 = const()[name = tensor("op_250_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_250_cast_fp16 = einsum(equation = var_250_equation_0, values = (var_158_3, var_226_cast_fp16))[name = tensor("op_250_cast_fp16")]; - tensor var_252_equation_0 = const()[name = tensor("op_252_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_252_cast_fp16 = einsum(equation = var_252_equation_0, values = (var_158_4, var_228_cast_fp16))[name = tensor("op_252_cast_fp16")]; - tensor var_254_equation_0 = const()[name = tensor("op_254_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_254_cast_fp16 = einsum(equation = var_254_equation_0, values = (var_158_5, var_230_cast_fp16))[name = tensor("op_254_cast_fp16")]; - tensor var_256_equation_0 = const()[name = tensor("op_256_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_256_cast_fp16 = einsum(equation = var_256_equation_0, values = (var_158_6, var_232_cast_fp16))[name = tensor("op_256_cast_fp16")]; - tensor var_258_equation_0 = const()[name = tensor("op_258_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_258_cast_fp16 = einsum(equation = var_258_equation_0, values = (var_158_7, var_234_cast_fp16))[name = tensor("op_258_cast_fp16")]; - tensor var_260_equation_0 = const()[name = tensor("op_260_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_260_cast_fp16 = einsum(equation = var_260_equation_0, values = (var_158_8, var_236_cast_fp16))[name = tensor("op_260_cast_fp16")]; - tensor var_262_equation_0 = const()[name = tensor("op_262_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_262_cast_fp16 = einsum(equation = var_262_equation_0, values = (var_158_9, var_238_cast_fp16))[name = tensor("op_262_cast_fp16")]; - tensor var_264_equation_0 = const()[name = tensor("op_264_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_264_cast_fp16 = einsum(equation = var_264_equation_0, values = (var_158_10, var_240_cast_fp16))[name = tensor("op_264_cast_fp16")]; - tensor var_266_equation_0 = const()[name = tensor("op_266_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_266_cast_fp16 = einsum(equation = var_266_equation_0, values = (var_158_11, var_242_cast_fp16))[name = tensor("op_266_cast_fp16")]; - tensor var_268_interleave_0 = const()[name = tensor("op_268_interleave_0"), val = tensor(false)]; - tensor var_268_cast_fp16 = concat(axis = var_88, interleave = var_268_interleave_0, values = (var_244_cast_fp16, var_246_cast_fp16, var_248_cast_fp16, var_250_cast_fp16, var_252_cast_fp16, var_254_cast_fp16, var_256_cast_fp16, var_258_cast_fp16, var_260_cast_fp16, var_262_cast_fp16, var_264_cast_fp16, var_266_cast_fp16))[name = tensor("op_268_cast_fp16")]; - tensor var_272 = const()[name = tensor("op_272"), val = tensor([1, 1])]; - tensor var_274 = const()[name = tensor("op_274"), val = tensor([1, 1])]; - tensor var_276_pad_type_0 = const()[name = tensor("op_276_pad_type_0"), val = tensor("custom")]; - tensor var_276_pad_0 = const()[name = tensor("op_276_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_276 = conv(bias = layers_0_attention_o_proj_bias, dilations = var_274, groups = var_88, pad = var_276_pad_0, pad_type = var_276_pad_type_0, strides = var_272, weight = layers_0_attention_o_proj_weight, x = var_268_cast_fp16)[name = tensor("op_276")]; - tensor var_278_interleave_0 = const()[name = tensor("op_278_interleave_0"), val = tensor(false)]; - tensor var_278 = concat(axis = var_89, interleave = var_278_interleave_0, values = var_276)[name = tensor("op_278")]; - tensor x_5 = add(x = var_63_cast_fp16, y = var_278)[name = tensor("x_5")]; - tensor var_85_promoted = const()[name = tensor("op_85_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_86_promoted = const()[name = tensor("op_86_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_7 = clip(alpha = var_85_promoted, beta = var_86_promoted, x = x_5)[name = tensor("x_7")]; - tensor var_283 = const()[name = tensor("op_283"), val = tensor([1])]; - tensor mean_3 = reduce_mean(axes = var_283, keep_dims = var_90, x = x_7)[name = tensor("mean_3")]; - tensor zero_mean_3 = sub(x = x_7, y = mean_3)[name = tensor("zero_mean_3")]; - tensor var_87_promoted = const()[name = tensor("op_87_promoted"), val = tensor(0x1p+1)]; - tensor var_286 = pow(x = zero_mean_3, y = var_87_promoted)[name = tensor("op_286")]; - tensor var_287 = const()[name = tensor("op_287"), val = tensor([1])]; - tensor var_288 = reduce_mean(axes = var_287, keep_dims = var_90, x = var_286)[name = tensor("op_288")]; - tensor var_289_to_fp16 = const()[name = tensor("op_289_to_fp16"), val = tensor(0x1p-24)]; - tensor var_290_cast_fp16 = add(x = var_288, y = var_289_to_fp16)[name = tensor("op_290_cast_fp16")]; - tensor denom_3_epsilon_0 = const()[name = tensor("denom_3_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0, x = var_290_cast_fp16)[name = tensor("denom_3_cast_fp16")]; - tensor var_292_cast_fp16 = mul(x = zero_mean_3, y = denom_3_cast_fp16)[name = tensor("op_292_cast_fp16")]; - tensor var_294_gamma_0_to_fp16 = const()[name = tensor("op_294_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66790400)))]; - tensor var_294_beta_0_to_fp16 = const()[name = tensor("op_294_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66791232)))]; - tensor var_294_epsilon_0_to_fp16 = const()[name = tensor("op_294_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_294_cast_fp16 = batch_norm(beta = var_294_beta_0_to_fp16, epsilon = var_294_epsilon_0_to_fp16, gamma = var_294_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_292_cast_fp16)[name = tensor("op_294_cast_fp16")]; - tensor var_300 = const()[name = tensor("op_300"), val = tensor([1, 1])]; - tensor var_302 = const()[name = tensor("op_302"), val = tensor([1, 1])]; - tensor var_304_pad_type_0 = const()[name = tensor("op_304_pad_type_0"), val = tensor("custom")]; - tensor var_304_pad_0 = const()[name = tensor("op_304_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_304 = conv(bias = layers_0_mlp_fc1_bias, dilations = var_302, groups = var_88, pad = var_304_pad_0, pad_type = var_304_pad_type_0, strides = var_300, weight = layers_0_mlp_fc1_weight, x = var_294_cast_fp16)[name = tensor("op_304")]; - tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; - tensor input_31 = gelu(mode = input_31_mode_0, x = var_304)[name = tensor("input_31")]; - tensor var_308 = const()[name = tensor("op_308"), val = tensor([1, 1])]; - tensor var_310 = const()[name = tensor("op_310"), val = tensor([1, 1])]; - tensor var_312_pad_type_0 = const()[name = tensor("op_312_pad_type_0"), val = tensor("custom")]; - tensor var_312_pad_0 = const()[name = tensor("op_312_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_312 = conv(bias = layers_0_mlp_fc2_bias, dilations = var_310, groups = var_88, pad = var_312_pad_0, pad_type = var_312_pad_type_0, strides = var_308, weight = layers_0_mlp_fc2_weight, x = input_31)[name = tensor("op_312")]; - tensor x_9 = add(x = var_294_cast_fp16, y = var_312)[name = tensor("x_9")]; - tensor var_85_promoted_1 = const()[name = tensor("op_85_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_86_promoted_1 = const()[name = tensor("op_86_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_11 = clip(alpha = var_85_promoted_1, beta = var_86_promoted_1, x = x_9)[name = tensor("x_11")]; - tensor var_317 = const()[name = tensor("op_317"), val = tensor([1])]; - tensor mean_5 = reduce_mean(axes = var_317, keep_dims = var_90, x = x_11)[name = tensor("mean_5")]; - tensor zero_mean_5 = sub(x = x_11, y = mean_5)[name = tensor("zero_mean_5")]; - tensor var_87_promoted_1 = const()[name = tensor("op_87_promoted_1"), val = tensor(0x1p+1)]; - tensor var_320 = pow(x = zero_mean_5, y = var_87_promoted_1)[name = tensor("op_320")]; - tensor var_321 = const()[name = tensor("op_321"), val = tensor([1])]; - tensor var_322 = reduce_mean(axes = var_321, keep_dims = var_90, x = var_320)[name = tensor("op_322")]; - tensor var_323_to_fp16 = const()[name = tensor("op_323_to_fp16"), val = tensor(0x1p-24)]; - tensor var_324_cast_fp16 = add(x = var_322, y = var_323_to_fp16)[name = tensor("op_324_cast_fp16")]; - tensor denom_5_epsilon_0 = const()[name = tensor("denom_5_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0, x = var_324_cast_fp16)[name = tensor("denom_5_cast_fp16")]; - tensor var_326_cast_fp16 = mul(x = zero_mean_5, y = denom_5_cast_fp16)[name = tensor("op_326_cast_fp16")]; - tensor var_328_gamma_0_to_fp16 = const()[name = tensor("op_328_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66792064)))]; - tensor var_328_beta_0_to_fp16 = const()[name = tensor("op_328_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66792896)))]; - tensor var_328_epsilon_0_to_fp16 = const()[name = tensor("op_328_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_328_cast_fp16 = batch_norm(beta = var_328_beta_0_to_fp16, epsilon = var_328_epsilon_0_to_fp16, gamma = var_328_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_326_cast_fp16)[name = tensor("op_328_cast_fp16")]; - tensor var_334 = const()[name = tensor("op_334"), val = tensor(1)]; - tensor var_335 = const()[name = tensor("op_335"), val = tensor(0)]; - tensor var_336 = const()[name = tensor("op_336"), val = tensor(true)]; - tensor var_358 = const()[name = tensor("op_358"), val = tensor([1, 1])]; - tensor var_360 = const()[name = tensor("op_360"), val = tensor([1, 1])]; - tensor var_362_pad_type_0 = const()[name = tensor("op_362_pad_type_0"), val = tensor("custom")]; - tensor var_362_pad_0 = const()[name = tensor("op_362_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_362 = conv(bias = layers_1_attention_q_proj_bias, dilations = var_360, groups = var_334, pad = var_362_pad_0, pad_type = var_362_pad_type_0, strides = var_358, weight = layers_1_attention_q_proj_weight, x = var_328_cast_fp16)[name = tensor("op_362")]; - tensor var_365 = const()[name = tensor("op_365"), val = tensor([1, 1])]; - tensor var_367 = const()[name = tensor("op_367"), val = tensor([1, 1])]; + tensor var_151_axis_0 = const()[name = tensor("op_151_axis_0"), val = tensor(1)]; + tensor var_151_0, tensor var_151_1, tensor var_151_2, tensor var_151_3, tensor var_151_4, tensor var_151_5, tensor var_151_6, tensor var_151_7, tensor var_151_8, tensor var_151_9, tensor var_151_10, tensor var_151_11 = split(axis = var_151_axis_0, split_sizes = tile_4, x = var_123)[name = tensor("op_151")]; + tensor var_165_equation_0 = const()[name = tensor("op_165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_165 = einsum(equation = var_165_equation_0, values = (var_138_0, var_124_0))[name = tensor("op_165")]; + tensor var_166_to_fp16 = const()[name = tensor("op_166_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_1_cast_fp16 = mul(x = var_165, y = var_166_to_fp16)[name = tensor("w_1_cast_fp16")]; + tensor var_169_equation_0 = const()[name = tensor("op_169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_169 = einsum(equation = var_169_equation_0, values = (var_138_1, var_124_1))[name = tensor("op_169")]; + tensor var_170_to_fp16 = const()[name = tensor("op_170_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_3_cast_fp16 = mul(x = var_169, y = var_170_to_fp16)[name = tensor("w_3_cast_fp16")]; + tensor var_173_equation_0 = const()[name = tensor("op_173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_173 = einsum(equation = var_173_equation_0, values = (var_138_2, var_124_2))[name = tensor("op_173")]; + tensor var_174_to_fp16 = const()[name = tensor("op_174_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_5_cast_fp16 = mul(x = var_173, y = var_174_to_fp16)[name = tensor("w_5_cast_fp16")]; + tensor var_177_equation_0 = const()[name = tensor("op_177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_177 = einsum(equation = var_177_equation_0, values = (var_138_3, var_124_3))[name = tensor("op_177")]; + tensor var_178_to_fp16 = const()[name = tensor("op_178_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_7_cast_fp16 = mul(x = var_177, y = var_178_to_fp16)[name = tensor("w_7_cast_fp16")]; + tensor var_181_equation_0 = const()[name = tensor("op_181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_181 = einsum(equation = var_181_equation_0, values = (var_138_4, var_124_4))[name = tensor("op_181")]; + tensor var_182_to_fp16 = const()[name = tensor("op_182_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_9_cast_fp16 = mul(x = var_181, y = var_182_to_fp16)[name = tensor("w_9_cast_fp16")]; + tensor var_185_equation_0 = const()[name = tensor("op_185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_185 = einsum(equation = var_185_equation_0, values = (var_138_5, var_124_5))[name = tensor("op_185")]; + tensor var_186_to_fp16 = const()[name = tensor("op_186_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_11_cast_fp16 = mul(x = var_185, y = var_186_to_fp16)[name = tensor("w_11_cast_fp16")]; + tensor var_189_equation_0 = const()[name = tensor("op_189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_189 = einsum(equation = var_189_equation_0, values = (var_138_6, var_124_6))[name = tensor("op_189")]; + tensor var_190_to_fp16 = const()[name = tensor("op_190_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_13_cast_fp16 = mul(x = var_189, y = var_190_to_fp16)[name = tensor("w_13_cast_fp16")]; + tensor var_193_equation_0 = const()[name = tensor("op_193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_193 = einsum(equation = var_193_equation_0, values = (var_138_7, var_124_7))[name = tensor("op_193")]; + tensor var_194_to_fp16 = const()[name = tensor("op_194_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_15_cast_fp16 = mul(x = var_193, y = var_194_to_fp16)[name = tensor("w_15_cast_fp16")]; + tensor var_197_equation_0 = const()[name = tensor("op_197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_197 = einsum(equation = var_197_equation_0, values = (var_138_8, var_124_8))[name = tensor("op_197")]; + tensor var_198_to_fp16 = const()[name = tensor("op_198_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_17_cast_fp16 = mul(x = var_197, y = var_198_to_fp16)[name = tensor("w_17_cast_fp16")]; + tensor var_201_equation_0 = const()[name = tensor("op_201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_201 = einsum(equation = var_201_equation_0, values = (var_138_9, var_124_9))[name = tensor("op_201")]; + tensor var_202_to_fp16 = const()[name = tensor("op_202_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_19_cast_fp16 = mul(x = var_201, y = var_202_to_fp16)[name = tensor("w_19_cast_fp16")]; + tensor var_205_equation_0 = const()[name = tensor("op_205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_205 = einsum(equation = var_205_equation_0, values = (var_138_10, var_124_10))[name = tensor("op_205")]; + tensor var_206_to_fp16 = const()[name = tensor("op_206_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_21_cast_fp16 = mul(x = var_205, y = var_206_to_fp16)[name = tensor("w_21_cast_fp16")]; + tensor var_209_equation_0 = const()[name = tensor("op_209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_209 = einsum(equation = var_209_equation_0, values = (var_138_11, var_124_11))[name = tensor("op_209")]; + tensor var_210_to_fp16 = const()[name = tensor("op_210_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_23_cast_fp16 = mul(x = var_209, y = var_210_to_fp16)[name = tensor("w_23_cast_fp16")]; + tensor input_5_cast_fp16 = add(x = w_1_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_213_cast_fp16 = softmax(axis = var_81, x = input_5_cast_fp16)[name = tensor("op_213_cast_fp16")]; + tensor input_7_cast_fp16 = add(x = w_3_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_215_cast_fp16 = softmax(axis = var_81, x = input_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor input_9_cast_fp16 = add(x = w_5_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_217_cast_fp16 = softmax(axis = var_81, x = input_9_cast_fp16)[name = tensor("op_217_cast_fp16")]; + tensor input_11_cast_fp16 = add(x = w_7_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_219_cast_fp16 = softmax(axis = var_81, x = input_11_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor input_13_cast_fp16 = add(x = w_9_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_221_cast_fp16 = softmax(axis = var_81, x = input_13_cast_fp16)[name = tensor("op_221_cast_fp16")]; + tensor input_15_cast_fp16 = add(x = w_11_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_223_cast_fp16 = softmax(axis = var_81, x = input_15_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor input_17_cast_fp16 = add(x = w_13_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_225_cast_fp16 = softmax(axis = var_81, x = input_17_cast_fp16)[name = tensor("op_225_cast_fp16")]; + tensor input_19_cast_fp16 = add(x = w_15_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_227_cast_fp16 = softmax(axis = var_81, x = input_19_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor input_21_cast_fp16 = add(x = w_17_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor var_229_cast_fp16 = softmax(axis = var_81, x = input_21_cast_fp16)[name = tensor("op_229_cast_fp16")]; + tensor input_23_cast_fp16 = add(x = w_19_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_231_cast_fp16 = softmax(axis = var_81, x = input_23_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor input_25_cast_fp16 = add(x = w_21_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor var_233_cast_fp16 = softmax(axis = var_81, x = input_25_cast_fp16)[name = tensor("op_233_cast_fp16")]; + tensor input_27_cast_fp16 = add(x = w_23_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_235_cast_fp16 = softmax(axis = var_81, x = input_27_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_237_equation_0 = const()[name = tensor("op_237_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_237_cast_fp16 = einsum(equation = var_237_equation_0, values = (var_151_0, var_213_cast_fp16))[name = tensor("op_237_cast_fp16")]; + tensor var_239_equation_0 = const()[name = tensor("op_239_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_239_cast_fp16 = einsum(equation = var_239_equation_0, values = (var_151_1, var_215_cast_fp16))[name = tensor("op_239_cast_fp16")]; + tensor var_241_equation_0 = const()[name = tensor("op_241_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_241_cast_fp16 = einsum(equation = var_241_equation_0, values = (var_151_2, var_217_cast_fp16))[name = tensor("op_241_cast_fp16")]; + tensor var_243_equation_0 = const()[name = tensor("op_243_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_243_cast_fp16 = einsum(equation = var_243_equation_0, values = (var_151_3, var_219_cast_fp16))[name = tensor("op_243_cast_fp16")]; + tensor var_245_equation_0 = const()[name = tensor("op_245_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_245_cast_fp16 = einsum(equation = var_245_equation_0, values = (var_151_4, var_221_cast_fp16))[name = tensor("op_245_cast_fp16")]; + tensor var_247_equation_0 = const()[name = tensor("op_247_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_247_cast_fp16 = einsum(equation = var_247_equation_0, values = (var_151_5, var_223_cast_fp16))[name = tensor("op_247_cast_fp16")]; + tensor var_249_equation_0 = const()[name = tensor("op_249_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_249_cast_fp16 = einsum(equation = var_249_equation_0, values = (var_151_6, var_225_cast_fp16))[name = tensor("op_249_cast_fp16")]; + tensor var_251_equation_0 = const()[name = tensor("op_251_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_251_cast_fp16 = einsum(equation = var_251_equation_0, values = (var_151_7, var_227_cast_fp16))[name = tensor("op_251_cast_fp16")]; + tensor var_253_equation_0 = const()[name = tensor("op_253_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_253_cast_fp16 = einsum(equation = var_253_equation_0, values = (var_151_8, var_229_cast_fp16))[name = tensor("op_253_cast_fp16")]; + tensor var_255_equation_0 = const()[name = tensor("op_255_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_255_cast_fp16 = einsum(equation = var_255_equation_0, values = (var_151_9, var_231_cast_fp16))[name = tensor("op_255_cast_fp16")]; + tensor var_257_equation_0 = const()[name = tensor("op_257_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_257_cast_fp16 = einsum(equation = var_257_equation_0, values = (var_151_10, var_233_cast_fp16))[name = tensor("op_257_cast_fp16")]; + tensor var_259_equation_0 = const()[name = tensor("op_259_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_259_cast_fp16 = einsum(equation = var_259_equation_0, values = (var_151_11, var_235_cast_fp16))[name = tensor("op_259_cast_fp16")]; + tensor var_261_interleave_0 = const()[name = tensor("op_261_interleave_0"), val = tensor(false)]; + tensor var_261_cast_fp16 = concat(axis = var_81, interleave = var_261_interleave_0, values = (var_237_cast_fp16, var_239_cast_fp16, var_241_cast_fp16, var_243_cast_fp16, var_245_cast_fp16, var_247_cast_fp16, var_249_cast_fp16, var_251_cast_fp16, var_253_cast_fp16, var_255_cast_fp16, var_257_cast_fp16, var_259_cast_fp16))[name = tensor("op_261_cast_fp16")]; + tensor var_265 = const()[name = tensor("op_265"), val = tensor([1, 1])]; + tensor var_267 = const()[name = tensor("op_267"), val = tensor([1, 1])]; + tensor var_269_pad_type_0 = const()[name = tensor("op_269_pad_type_0"), val = tensor("custom")]; + tensor var_269_pad_0 = const()[name = tensor("op_269_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_269 = conv(bias = layers_0_attention_o_proj_bias, dilations = var_267, groups = var_81, pad = var_269_pad_0, pad_type = var_269_pad_type_0, strides = var_265, weight = layers_0_attention_o_proj_weight, x = var_261_cast_fp16)[name = tensor("op_269")]; + tensor var_271_interleave_0 = const()[name = tensor("op_271_interleave_0"), val = tensor(false)]; + tensor var_271 = concat(axis = var_82, interleave = var_271_interleave_0, values = var_269)[name = tensor("op_271")]; + tensor x_3 = add(x = transpose_60, y = var_271)[name = tensor("x_3")]; + tensor input_31_perm_0 = const()[name = tensor("input_31_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_13 = const()[name = tensor("weight_13"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66788736)))]; + tensor bias_11 = const()[name = tensor("bias_11"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66789568)))]; + tensor o_3_axes_0 = const()[name = tensor("o_3_axes_0"), val = tensor([-1])]; + tensor var_80_to_fp16 = const()[name = tensor("op_80_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_58 = transpose(perm = input_31_perm_0, x = x_3)[name = tensor("transpose_58")]; + tensor o_3_cast_fp16 = layer_norm(axes = o_3_axes_0, beta = bias_11, epsilon = var_80_to_fp16, gamma = weight_13, x = transpose_58)[name = tensor("o_3_cast_fp16")]; + tensor input_33_perm_0 = const()[name = tensor("input_33_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_287 = const()[name = tensor("op_287"), val = tensor([1, 1])]; + tensor var_289 = const()[name = tensor("op_289"), val = tensor([1, 1])]; + tensor var_291_pad_type_0 = const()[name = tensor("op_291_pad_type_0"), val = tensor("custom")]; + tensor var_291_pad_0 = const()[name = tensor("op_291_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_57 = transpose(perm = input_33_perm_0, x = o_3_cast_fp16)[name = tensor("transpose_57")]; + tensor var_291 = conv(bias = layers_0_mlp_fc1_bias, dilations = var_289, groups = var_81, pad = var_291_pad_0, pad_type = var_291_pad_type_0, strides = var_287, weight = layers_0_mlp_fc1_weight, x = transpose_57)[name = tensor("op_291")]; + tensor input_35_mode_0 = const()[name = tensor("input_35_mode_0"), val = tensor("EXACT")]; + tensor input_35 = gelu(mode = input_35_mode_0, x = var_291)[name = tensor("input_35")]; + tensor var_295 = const()[name = tensor("op_295"), val = tensor([1, 1])]; + tensor var_297 = const()[name = tensor("op_297"), val = tensor([1, 1])]; + tensor var_299_pad_type_0 = const()[name = tensor("op_299_pad_type_0"), val = tensor("custom")]; + tensor var_299_pad_0 = const()[name = tensor("op_299_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_299 = conv(bias = layers_0_mlp_fc2_bias, dilations = var_297, groups = var_81, pad = var_299_pad_0, pad_type = var_299_pad_type_0, strides = var_295, weight = layers_0_mlp_fc2_weight, x = input_35)[name = tensor("op_299")]; + tensor x_5 = add(x = transpose_57, y = var_299)[name = tensor("x_5")]; + tensor input_37_perm_0 = const()[name = tensor("input_37_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_19 = const()[name = tensor("weight_19"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66790400)))]; + tensor bias_17 = const()[name = tensor("bias_17"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66791232)))]; + tensor o_5_axes_0 = const()[name = tensor("o_5_axes_0"), val = tensor([-1])]; + tensor transpose_56 = transpose(perm = input_37_perm_0, x = x_5)[name = tensor("transpose_56")]; + tensor o_5_cast_fp16 = layer_norm(axes = o_5_axes_0, beta = bias_17, epsilon = var_80_to_fp16, gamma = weight_19, x = transpose_56)[name = tensor("o_5_cast_fp16")]; + tensor hidden_states_3_perm_0 = const()[name = tensor("hidden_states_3_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_315 = const()[name = tensor("op_315"), val = tensor(1)]; + tensor var_316 = const()[name = tensor("op_316"), val = tensor(0)]; + tensor var_339 = const()[name = tensor("op_339"), val = tensor([1, 1])]; + tensor var_341 = const()[name = tensor("op_341"), val = tensor([1, 1])]; + tensor var_343_pad_type_0 = const()[name = tensor("op_343_pad_type_0"), val = tensor("custom")]; + tensor var_343_pad_0 = const()[name = tensor("op_343_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_55 = transpose(perm = hidden_states_3_perm_0, x = o_5_cast_fp16)[name = tensor("transpose_55")]; + tensor var_343 = conv(bias = layers_1_attention_q_proj_bias, dilations = var_341, groups = var_315, pad = var_343_pad_0, pad_type = var_343_pad_type_0, strides = var_339, weight = layers_1_attention_q_proj_weight, x = transpose_55)[name = tensor("op_343")]; + tensor var_346 = const()[name = tensor("op_346"), val = tensor([1, 1])]; + tensor var_348 = const()[name = tensor("op_348"), val = tensor([1, 1])]; tensor ks_3_pad_type_0 = const()[name = tensor("ks_3_pad_type_0"), val = tensor("custom")]; tensor ks_3_pad_0 = const()[name = tensor("ks_3_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_3 = conv(bias = layers_1_attention_k_proj_bias, dilations = var_367, groups = var_334, pad = ks_3_pad_0, pad_type = ks_3_pad_type_0, strides = var_365, weight = layers_1_attention_k_proj_weight, x = var_328_cast_fp16)[name = tensor("ks_3")]; - tensor var_372 = const()[name = tensor("op_372"), val = tensor([1, 1])]; - tensor var_374 = const()[name = tensor("op_374"), val = tensor([1, 1])]; - tensor var_376_pad_type_0 = const()[name = tensor("op_376_pad_type_0"), val = tensor("custom")]; - tensor var_376_pad_0 = const()[name = tensor("op_376_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_376 = conv(bias = layers_1_attention_v_proj_bias, dilations = var_374, groups = var_334, pad = var_376_pad_0, pad_type = var_376_pad_type_0, strides = var_372, weight = layers_1_attention_v_proj_weight, x = var_328_cast_fp16)[name = tensor("op_376")]; + tensor ks_3 = conv(bias = layers_1_attention_k_proj_bias, dilations = var_348, groups = var_315, pad = ks_3_pad_0, pad_type = ks_3_pad_type_0, strides = var_346, weight = layers_1_attention_k_proj_weight, x = transpose_55)[name = tensor("ks_3")]; + tensor var_353 = const()[name = tensor("op_353"), val = tensor([1, 1])]; + tensor var_355 = const()[name = tensor("op_355"), val = tensor([1, 1])]; + tensor var_357_pad_type_0 = const()[name = tensor("op_357_pad_type_0"), val = tensor("custom")]; + tensor var_357_pad_0 = const()[name = tensor("op_357_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_357 = conv(bias = layers_1_attention_v_proj_bias, dilations = var_355, groups = var_315, pad = var_357_pad_0, pad_type = var_357_pad_type_0, strides = var_353, weight = layers_1_attention_v_proj_weight, x = transpose_55)[name = tensor("op_357")]; tensor tile_7 = const()[name = tensor("tile_7"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_377_axis_0 = const()[name = tensor("op_377_axis_0"), val = tensor(1)]; - tensor var_377_0, tensor var_377_1, tensor var_377_2, tensor var_377_3, tensor var_377_4, tensor var_377_5, tensor var_377_6, tensor var_377_7, tensor var_377_8, tensor var_377_9, tensor var_377_10, tensor var_377_11 = split(axis = var_377_axis_0, split_sizes = tile_7, x = var_362)[name = tensor("op_377")]; - tensor var_390_perm_0 = const()[name = tensor("op_390_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_358_axis_0 = const()[name = tensor("op_358_axis_0"), val = tensor(1)]; + tensor var_358_0, tensor var_358_1, tensor var_358_2, tensor var_358_3, tensor var_358_4, tensor var_358_5, tensor var_358_6, tensor var_358_7, tensor var_358_8, tensor var_358_9, tensor var_358_10, tensor var_358_11 = split(axis = var_358_axis_0, split_sizes = tile_7, x = var_343)[name = tensor("op_358")]; + tensor var_371_perm_0 = const()[name = tensor("op_371_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_8 = const()[name = tensor("tile_8"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_391_axis_0 = const()[name = tensor("op_391_axis_0"), val = tensor(3)]; - tensor transpose_10 = transpose(perm = var_390_perm_0, x = ks_3)[name = tensor("transpose_10")]; - tensor var_391_0, tensor var_391_1, tensor var_391_2, tensor var_391_3, tensor var_391_4, tensor var_391_5, tensor var_391_6, tensor var_391_7, tensor var_391_8, tensor var_391_9, tensor var_391_10, tensor var_391_11 = split(axis = var_391_axis_0, split_sizes = tile_8, x = transpose_10)[name = tensor("op_391")]; + tensor var_372_axis_0 = const()[name = tensor("op_372_axis_0"), val = tensor(3)]; + tensor transpose_54 = transpose(perm = var_371_perm_0, x = ks_3)[name = tensor("transpose_54")]; + tensor var_372_0, tensor var_372_1, tensor var_372_2, tensor var_372_3, tensor var_372_4, tensor var_372_5, tensor var_372_6, tensor var_372_7, tensor var_372_8, tensor var_372_9, tensor var_372_10, tensor var_372_11 = split(axis = var_372_axis_0, split_sizes = tile_8, x = transpose_54)[name = tensor("op_372")]; tensor tile_9 = const()[name = tensor("tile_9"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_404_axis_0 = const()[name = tensor("op_404_axis_0"), val = tensor(1)]; - tensor var_404_0, tensor var_404_1, tensor var_404_2, tensor var_404_3, tensor var_404_4, tensor var_404_5, tensor var_404_6, tensor var_404_7, tensor var_404_8, tensor var_404_9, tensor var_404_10, tensor var_404_11 = split(axis = var_404_axis_0, split_sizes = tile_9, x = var_376)[name = tensor("op_404")]; - tensor var_418_equation_0 = const()[name = tensor("op_418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_418 = einsum(equation = var_418_equation_0, values = (var_391_0, var_377_0))[name = tensor("op_418")]; - tensor var_419_to_fp16 = const()[name = tensor("op_419_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_25_cast_fp16 = mul(x = var_418, y = var_419_to_fp16)[name = tensor("w_25_cast_fp16")]; - tensor var_422_equation_0 = const()[name = tensor("op_422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_422 = einsum(equation = var_422_equation_0, values = (var_391_1, var_377_1))[name = tensor("op_422")]; - tensor var_423_to_fp16 = const()[name = tensor("op_423_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_27_cast_fp16 = mul(x = var_422, y = var_423_to_fp16)[name = tensor("w_27_cast_fp16")]; - tensor var_426_equation_0 = const()[name = tensor("op_426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_426 = einsum(equation = var_426_equation_0, values = (var_391_2, var_377_2))[name = tensor("op_426")]; - tensor var_427_to_fp16 = const()[name = tensor("op_427_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_29_cast_fp16 = mul(x = var_426, y = var_427_to_fp16)[name = tensor("w_29_cast_fp16")]; - tensor var_430_equation_0 = const()[name = tensor("op_430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_430 = einsum(equation = var_430_equation_0, values = (var_391_3, var_377_3))[name = tensor("op_430")]; - tensor var_431_to_fp16 = const()[name = tensor("op_431_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_31_cast_fp16 = mul(x = var_430, y = var_431_to_fp16)[name = tensor("w_31_cast_fp16")]; - tensor var_434_equation_0 = const()[name = tensor("op_434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_434 = einsum(equation = var_434_equation_0, values = (var_391_4, var_377_4))[name = tensor("op_434")]; - tensor var_435_to_fp16 = const()[name = tensor("op_435_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_33_cast_fp16 = mul(x = var_434, y = var_435_to_fp16)[name = tensor("w_33_cast_fp16")]; - tensor var_438_equation_0 = const()[name = tensor("op_438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_438 = einsum(equation = var_438_equation_0, values = (var_391_5, var_377_5))[name = tensor("op_438")]; - tensor var_439_to_fp16 = const()[name = tensor("op_439_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_35_cast_fp16 = mul(x = var_438, y = var_439_to_fp16)[name = tensor("w_35_cast_fp16")]; - tensor var_442_equation_0 = const()[name = tensor("op_442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_442 = einsum(equation = var_442_equation_0, values = (var_391_6, var_377_6))[name = tensor("op_442")]; - tensor var_443_to_fp16 = const()[name = tensor("op_443_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_37_cast_fp16 = mul(x = var_442, y = var_443_to_fp16)[name = tensor("w_37_cast_fp16")]; - tensor var_446_equation_0 = const()[name = tensor("op_446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_446 = einsum(equation = var_446_equation_0, values = (var_391_7, var_377_7))[name = tensor("op_446")]; - tensor var_447_to_fp16 = const()[name = tensor("op_447_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_39_cast_fp16 = mul(x = var_446, y = var_447_to_fp16)[name = tensor("w_39_cast_fp16")]; - tensor var_450_equation_0 = const()[name = tensor("op_450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_450 = einsum(equation = var_450_equation_0, values = (var_391_8, var_377_8))[name = tensor("op_450")]; - tensor var_451_to_fp16 = const()[name = tensor("op_451_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_41_cast_fp16 = mul(x = var_450, y = var_451_to_fp16)[name = tensor("w_41_cast_fp16")]; - tensor var_454_equation_0 = const()[name = tensor("op_454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_454 = einsum(equation = var_454_equation_0, values = (var_391_9, var_377_9))[name = tensor("op_454")]; - tensor var_455_to_fp16 = const()[name = tensor("op_455_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_43_cast_fp16 = mul(x = var_454, y = var_455_to_fp16)[name = tensor("w_43_cast_fp16")]; - tensor var_458_equation_0 = const()[name = tensor("op_458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_458 = einsum(equation = var_458_equation_0, values = (var_391_10, var_377_10))[name = tensor("op_458")]; - tensor var_459_to_fp16 = const()[name = tensor("op_459_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_45_cast_fp16 = mul(x = var_458, y = var_459_to_fp16)[name = tensor("w_45_cast_fp16")]; - tensor var_462_equation_0 = const()[name = tensor("op_462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_462 = einsum(equation = var_462_equation_0, values = (var_391_11, var_377_11))[name = tensor("op_462")]; - tensor var_463_to_fp16 = const()[name = tensor("op_463_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_47_cast_fp16 = mul(x = var_462, y = var_463_to_fp16)[name = tensor("w_47_cast_fp16")]; - tensor input_35_cast_fp16 = add(x = w_25_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_35_cast_fp16")]; - tensor var_466_cast_fp16 = softmax(axis = var_334, x = input_35_cast_fp16)[name = tensor("op_466_cast_fp16")]; - tensor input_37_cast_fp16 = add(x = w_27_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_37_cast_fp16")]; - tensor var_468_cast_fp16 = softmax(axis = var_334, x = input_37_cast_fp16)[name = tensor("op_468_cast_fp16")]; - tensor input_39_cast_fp16 = add(x = w_29_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_39_cast_fp16")]; - tensor var_470_cast_fp16 = softmax(axis = var_334, x = input_39_cast_fp16)[name = tensor("op_470_cast_fp16")]; - tensor input_41_cast_fp16 = add(x = w_31_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_41_cast_fp16")]; - tensor var_472_cast_fp16 = softmax(axis = var_334, x = input_41_cast_fp16)[name = tensor("op_472_cast_fp16")]; - tensor input_43_cast_fp16 = add(x = w_33_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_43_cast_fp16")]; - tensor var_474_cast_fp16 = softmax(axis = var_334, x = input_43_cast_fp16)[name = tensor("op_474_cast_fp16")]; - tensor input_45_cast_fp16 = add(x = w_35_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_45_cast_fp16")]; - tensor var_476_cast_fp16 = softmax(axis = var_334, x = input_45_cast_fp16)[name = tensor("op_476_cast_fp16")]; - tensor input_47_cast_fp16 = add(x = w_37_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_47_cast_fp16")]; - tensor var_478_cast_fp16 = softmax(axis = var_334, x = input_47_cast_fp16)[name = tensor("op_478_cast_fp16")]; - tensor input_49_cast_fp16 = add(x = w_39_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_49_cast_fp16")]; - tensor var_480_cast_fp16 = softmax(axis = var_334, x = input_49_cast_fp16)[name = tensor("op_480_cast_fp16")]; - tensor input_51_cast_fp16 = add(x = w_41_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_51_cast_fp16")]; - tensor var_482_cast_fp16 = softmax(axis = var_334, x = input_51_cast_fp16)[name = tensor("op_482_cast_fp16")]; - tensor input_53_cast_fp16 = add(x = w_43_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_53_cast_fp16")]; - tensor var_484_cast_fp16 = softmax(axis = var_334, x = input_53_cast_fp16)[name = tensor("op_484_cast_fp16")]; - tensor input_55_cast_fp16 = add(x = w_45_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_55_cast_fp16")]; - tensor var_486_cast_fp16 = softmax(axis = var_334, x = input_55_cast_fp16)[name = tensor("op_486_cast_fp16")]; - tensor input_57_cast_fp16 = add(x = w_47_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_57_cast_fp16")]; - tensor var_488_cast_fp16 = softmax(axis = var_334, x = input_57_cast_fp16)[name = tensor("op_488_cast_fp16")]; - tensor var_490_equation_0 = const()[name = tensor("op_490_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_490_cast_fp16 = einsum(equation = var_490_equation_0, values = (var_404_0, var_466_cast_fp16))[name = tensor("op_490_cast_fp16")]; - tensor var_492_equation_0 = const()[name = tensor("op_492_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_492_cast_fp16 = einsum(equation = var_492_equation_0, values = (var_404_1, var_468_cast_fp16))[name = tensor("op_492_cast_fp16")]; - tensor var_494_equation_0 = const()[name = tensor("op_494_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_494_cast_fp16 = einsum(equation = var_494_equation_0, values = (var_404_2, var_470_cast_fp16))[name = tensor("op_494_cast_fp16")]; - tensor var_496_equation_0 = const()[name = tensor("op_496_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_496_cast_fp16 = einsum(equation = var_496_equation_0, values = (var_404_3, var_472_cast_fp16))[name = tensor("op_496_cast_fp16")]; - tensor var_498_equation_0 = const()[name = tensor("op_498_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_498_cast_fp16 = einsum(equation = var_498_equation_0, values = (var_404_4, var_474_cast_fp16))[name = tensor("op_498_cast_fp16")]; - tensor var_500_equation_0 = const()[name = tensor("op_500_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_500_cast_fp16 = einsum(equation = var_500_equation_0, values = (var_404_5, var_476_cast_fp16))[name = tensor("op_500_cast_fp16")]; - tensor var_502_equation_0 = const()[name = tensor("op_502_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_502_cast_fp16 = einsum(equation = var_502_equation_0, values = (var_404_6, var_478_cast_fp16))[name = tensor("op_502_cast_fp16")]; - tensor var_504_equation_0 = const()[name = tensor("op_504_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_504_cast_fp16 = einsum(equation = var_504_equation_0, values = (var_404_7, var_480_cast_fp16))[name = tensor("op_504_cast_fp16")]; - tensor var_506_equation_0 = const()[name = tensor("op_506_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_506_cast_fp16 = einsum(equation = var_506_equation_0, values = (var_404_8, var_482_cast_fp16))[name = tensor("op_506_cast_fp16")]; - tensor var_508_equation_0 = const()[name = tensor("op_508_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_508_cast_fp16 = einsum(equation = var_508_equation_0, values = (var_404_9, var_484_cast_fp16))[name = tensor("op_508_cast_fp16")]; - tensor var_510_equation_0 = const()[name = tensor("op_510_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_510_cast_fp16 = einsum(equation = var_510_equation_0, values = (var_404_10, var_486_cast_fp16))[name = tensor("op_510_cast_fp16")]; - tensor var_512_equation_0 = const()[name = tensor("op_512_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_512_cast_fp16 = einsum(equation = var_512_equation_0, values = (var_404_11, var_488_cast_fp16))[name = tensor("op_512_cast_fp16")]; - tensor var_514_interleave_0 = const()[name = tensor("op_514_interleave_0"), val = tensor(false)]; - tensor var_514_cast_fp16 = concat(axis = var_334, interleave = var_514_interleave_0, values = (var_490_cast_fp16, var_492_cast_fp16, var_494_cast_fp16, var_496_cast_fp16, var_498_cast_fp16, var_500_cast_fp16, var_502_cast_fp16, var_504_cast_fp16, var_506_cast_fp16, var_508_cast_fp16, var_510_cast_fp16, var_512_cast_fp16))[name = tensor("op_514_cast_fp16")]; - tensor var_518 = const()[name = tensor("op_518"), val = tensor([1, 1])]; - tensor var_520 = const()[name = tensor("op_520"), val = tensor([1, 1])]; - tensor var_522_pad_type_0 = const()[name = tensor("op_522_pad_type_0"), val = tensor("custom")]; - tensor var_522_pad_0 = const()[name = tensor("op_522_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_522 = conv(bias = layers_1_attention_o_proj_bias, dilations = var_520, groups = var_334, pad = var_522_pad_0, pad_type = var_522_pad_type_0, strides = var_518, weight = layers_1_attention_o_proj_weight, x = var_514_cast_fp16)[name = tensor("op_522")]; - tensor var_524_interleave_0 = const()[name = tensor("op_524_interleave_0"), val = tensor(false)]; - tensor var_524 = concat(axis = var_335, interleave = var_524_interleave_0, values = var_522)[name = tensor("op_524")]; - tensor x_13 = add(x = var_328_cast_fp16, y = var_524)[name = tensor("x_13")]; - tensor var_331_promoted = const()[name = tensor("op_331_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_332_promoted = const()[name = tensor("op_332_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_15 = clip(alpha = var_331_promoted, beta = var_332_promoted, x = x_13)[name = tensor("x_15")]; - tensor var_529 = const()[name = tensor("op_529"), val = tensor([1])]; - tensor mean_7 = reduce_mean(axes = var_529, keep_dims = var_336, x = x_15)[name = tensor("mean_7")]; - tensor zero_mean_7 = sub(x = x_15, y = mean_7)[name = tensor("zero_mean_7")]; - tensor var_333_promoted = const()[name = tensor("op_333_promoted"), val = tensor(0x1p+1)]; - tensor var_532 = pow(x = zero_mean_7, y = var_333_promoted)[name = tensor("op_532")]; - tensor var_533 = const()[name = tensor("op_533"), val = tensor([1])]; - tensor var_534 = reduce_mean(axes = var_533, keep_dims = var_336, x = var_532)[name = tensor("op_534")]; - tensor var_535_to_fp16 = const()[name = tensor("op_535_to_fp16"), val = tensor(0x1p-24)]; - tensor var_536_cast_fp16 = add(x = var_534, y = var_535_to_fp16)[name = tensor("op_536_cast_fp16")]; - tensor denom_7_epsilon_0 = const()[name = tensor("denom_7_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0, x = var_536_cast_fp16)[name = tensor("denom_7_cast_fp16")]; - tensor var_538_cast_fp16 = mul(x = zero_mean_7, y = denom_7_cast_fp16)[name = tensor("op_538_cast_fp16")]; - tensor var_540_gamma_0_to_fp16 = const()[name = tensor("op_540_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66793728)))]; - tensor var_540_beta_0_to_fp16 = const()[name = tensor("op_540_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66794560)))]; - tensor var_540_epsilon_0_to_fp16 = const()[name = tensor("op_540_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_540_cast_fp16 = batch_norm(beta = var_540_beta_0_to_fp16, epsilon = var_540_epsilon_0_to_fp16, gamma = var_540_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_538_cast_fp16)[name = tensor("op_540_cast_fp16")]; - tensor var_546 = const()[name = tensor("op_546"), val = tensor([1, 1])]; - tensor var_548 = const()[name = tensor("op_548"), val = tensor([1, 1])]; - tensor var_550_pad_type_0 = const()[name = tensor("op_550_pad_type_0"), val = tensor("custom")]; - tensor var_550_pad_0 = const()[name = tensor("op_550_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_550 = conv(bias = layers_1_mlp_fc1_bias, dilations = var_548, groups = var_334, pad = var_550_pad_0, pad_type = var_550_pad_type_0, strides = var_546, weight = layers_1_mlp_fc1_weight, x = var_540_cast_fp16)[name = tensor("op_550")]; - tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; - tensor input_63 = gelu(mode = input_63_mode_0, x = var_550)[name = tensor("input_63")]; - tensor var_554 = const()[name = tensor("op_554"), val = tensor([1, 1])]; - tensor var_556 = const()[name = tensor("op_556"), val = tensor([1, 1])]; - tensor var_558_pad_type_0 = const()[name = tensor("op_558_pad_type_0"), val = tensor("custom")]; - tensor var_558_pad_0 = const()[name = tensor("op_558_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_558 = conv(bias = layers_1_mlp_fc2_bias, dilations = var_556, groups = var_334, pad = var_558_pad_0, pad_type = var_558_pad_type_0, strides = var_554, weight = layers_1_mlp_fc2_weight, x = input_63)[name = tensor("op_558")]; - tensor x_17 = add(x = var_540_cast_fp16, y = var_558)[name = tensor("x_17")]; - tensor var_331_promoted_1 = const()[name = tensor("op_331_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_332_promoted_1 = const()[name = tensor("op_332_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_19 = clip(alpha = var_331_promoted_1, beta = var_332_promoted_1, x = x_17)[name = tensor("x_19")]; - tensor var_563 = const()[name = tensor("op_563"), val = tensor([1])]; - tensor mean_9 = reduce_mean(axes = var_563, keep_dims = var_336, x = x_19)[name = tensor("mean_9")]; - tensor zero_mean_9 = sub(x = x_19, y = mean_9)[name = tensor("zero_mean_9")]; - tensor var_333_promoted_1 = const()[name = tensor("op_333_promoted_1"), val = tensor(0x1p+1)]; - tensor var_566 = pow(x = zero_mean_9, y = var_333_promoted_1)[name = tensor("op_566")]; - tensor var_567 = const()[name = tensor("op_567"), val = tensor([1])]; - tensor var_568 = reduce_mean(axes = var_567, keep_dims = var_336, x = var_566)[name = tensor("op_568")]; - tensor var_569_to_fp16 = const()[name = tensor("op_569_to_fp16"), val = tensor(0x1p-24)]; - tensor var_570_cast_fp16 = add(x = var_568, y = var_569_to_fp16)[name = tensor("op_570_cast_fp16")]; - tensor denom_9_epsilon_0 = const()[name = tensor("denom_9_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0, x = var_570_cast_fp16)[name = tensor("denom_9_cast_fp16")]; - tensor var_572_cast_fp16 = mul(x = zero_mean_9, y = denom_9_cast_fp16)[name = tensor("op_572_cast_fp16")]; - tensor var_574_gamma_0_to_fp16 = const()[name = tensor("op_574_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66795392)))]; - tensor var_574_beta_0_to_fp16 = const()[name = tensor("op_574_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66796224)))]; - tensor var_574_epsilon_0_to_fp16 = const()[name = tensor("op_574_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_574_cast_fp16 = batch_norm(beta = var_574_beta_0_to_fp16, epsilon = var_574_epsilon_0_to_fp16, gamma = var_574_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_572_cast_fp16)[name = tensor("op_574_cast_fp16")]; - tensor var_580 = const()[name = tensor("op_580"), val = tensor(1)]; - tensor var_581 = const()[name = tensor("op_581"), val = tensor(0)]; - tensor var_582 = const()[name = tensor("op_582"), val = tensor(true)]; - tensor var_604 = const()[name = tensor("op_604"), val = tensor([1, 1])]; - tensor var_606 = const()[name = tensor("op_606"), val = tensor([1, 1])]; - tensor var_608_pad_type_0 = const()[name = tensor("op_608_pad_type_0"), val = tensor("custom")]; - tensor var_608_pad_0 = const()[name = tensor("op_608_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_608 = conv(bias = layers_2_attention_q_proj_bias, dilations = var_606, groups = var_580, pad = var_608_pad_0, pad_type = var_608_pad_type_0, strides = var_604, weight = layers_2_attention_q_proj_weight, x = var_574_cast_fp16)[name = tensor("op_608")]; - tensor var_611 = const()[name = tensor("op_611"), val = tensor([1, 1])]; - tensor var_613 = const()[name = tensor("op_613"), val = tensor([1, 1])]; + tensor var_385_axis_0 = const()[name = tensor("op_385_axis_0"), val = tensor(1)]; + tensor var_385_0, tensor var_385_1, tensor var_385_2, tensor var_385_3, tensor var_385_4, tensor var_385_5, tensor var_385_6, tensor var_385_7, tensor var_385_8, tensor var_385_9, tensor var_385_10, tensor var_385_11 = split(axis = var_385_axis_0, split_sizes = tile_9, x = var_357)[name = tensor("op_385")]; + tensor var_399_equation_0 = const()[name = tensor("op_399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_399 = einsum(equation = var_399_equation_0, values = (var_372_0, var_358_0))[name = tensor("op_399")]; + tensor var_400_to_fp16 = const()[name = tensor("op_400_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_25_cast_fp16 = mul(x = var_399, y = var_400_to_fp16)[name = tensor("w_25_cast_fp16")]; + tensor var_403_equation_0 = const()[name = tensor("op_403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_403 = einsum(equation = var_403_equation_0, values = (var_372_1, var_358_1))[name = tensor("op_403")]; + tensor var_404_to_fp16 = const()[name = tensor("op_404_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_27_cast_fp16 = mul(x = var_403, y = var_404_to_fp16)[name = tensor("w_27_cast_fp16")]; + tensor var_407_equation_0 = const()[name = tensor("op_407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_407 = einsum(equation = var_407_equation_0, values = (var_372_2, var_358_2))[name = tensor("op_407")]; + tensor var_408_to_fp16 = const()[name = tensor("op_408_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_29_cast_fp16 = mul(x = var_407, y = var_408_to_fp16)[name = tensor("w_29_cast_fp16")]; + tensor var_411_equation_0 = const()[name = tensor("op_411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_411 = einsum(equation = var_411_equation_0, values = (var_372_3, var_358_3))[name = tensor("op_411")]; + tensor var_412_to_fp16 = const()[name = tensor("op_412_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_31_cast_fp16 = mul(x = var_411, y = var_412_to_fp16)[name = tensor("w_31_cast_fp16")]; + tensor var_415_equation_0 = const()[name = tensor("op_415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_415 = einsum(equation = var_415_equation_0, values = (var_372_4, var_358_4))[name = tensor("op_415")]; + tensor var_416_to_fp16 = const()[name = tensor("op_416_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_33_cast_fp16 = mul(x = var_415, y = var_416_to_fp16)[name = tensor("w_33_cast_fp16")]; + tensor var_419_equation_0 = const()[name = tensor("op_419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_419 = einsum(equation = var_419_equation_0, values = (var_372_5, var_358_5))[name = tensor("op_419")]; + tensor var_420_to_fp16 = const()[name = tensor("op_420_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_35_cast_fp16 = mul(x = var_419, y = var_420_to_fp16)[name = tensor("w_35_cast_fp16")]; + tensor var_423_equation_0 = const()[name = tensor("op_423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_423 = einsum(equation = var_423_equation_0, values = (var_372_6, var_358_6))[name = tensor("op_423")]; + tensor var_424_to_fp16 = const()[name = tensor("op_424_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_37_cast_fp16 = mul(x = var_423, y = var_424_to_fp16)[name = tensor("w_37_cast_fp16")]; + tensor var_427_equation_0 = const()[name = tensor("op_427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_427 = einsum(equation = var_427_equation_0, values = (var_372_7, var_358_7))[name = tensor("op_427")]; + tensor var_428_to_fp16 = const()[name = tensor("op_428_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_39_cast_fp16 = mul(x = var_427, y = var_428_to_fp16)[name = tensor("w_39_cast_fp16")]; + tensor var_431_equation_0 = const()[name = tensor("op_431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_431 = einsum(equation = var_431_equation_0, values = (var_372_8, var_358_8))[name = tensor("op_431")]; + tensor var_432_to_fp16 = const()[name = tensor("op_432_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_41_cast_fp16 = mul(x = var_431, y = var_432_to_fp16)[name = tensor("w_41_cast_fp16")]; + tensor var_435_equation_0 = const()[name = tensor("op_435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_435 = einsum(equation = var_435_equation_0, values = (var_372_9, var_358_9))[name = tensor("op_435")]; + tensor var_436_to_fp16 = const()[name = tensor("op_436_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_43_cast_fp16 = mul(x = var_435, y = var_436_to_fp16)[name = tensor("w_43_cast_fp16")]; + tensor var_439_equation_0 = const()[name = tensor("op_439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_439 = einsum(equation = var_439_equation_0, values = (var_372_10, var_358_10))[name = tensor("op_439")]; + tensor var_440_to_fp16 = const()[name = tensor("op_440_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_45_cast_fp16 = mul(x = var_439, y = var_440_to_fp16)[name = tensor("w_45_cast_fp16")]; + tensor var_443_equation_0 = const()[name = tensor("op_443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_443 = einsum(equation = var_443_equation_0, values = (var_372_11, var_358_11))[name = tensor("op_443")]; + tensor var_444_to_fp16 = const()[name = tensor("op_444_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_47_cast_fp16 = mul(x = var_443, y = var_444_to_fp16)[name = tensor("w_47_cast_fp16")]; + tensor input_41_cast_fp16 = add(x = w_25_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor var_447_cast_fp16 = softmax(axis = var_315, x = input_41_cast_fp16)[name = tensor("op_447_cast_fp16")]; + tensor input_43_cast_fp16 = add(x = w_27_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_449_cast_fp16 = softmax(axis = var_315, x = input_43_cast_fp16)[name = tensor("op_449_cast_fp16")]; + tensor input_45_cast_fp16 = add(x = w_29_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor var_451_cast_fp16 = softmax(axis = var_315, x = input_45_cast_fp16)[name = tensor("op_451_cast_fp16")]; + tensor input_47_cast_fp16 = add(x = w_31_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_453_cast_fp16 = softmax(axis = var_315, x = input_47_cast_fp16)[name = tensor("op_453_cast_fp16")]; + tensor input_49_cast_fp16 = add(x = w_33_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_49_cast_fp16")]; + tensor var_455_cast_fp16 = softmax(axis = var_315, x = input_49_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor input_51_cast_fp16 = add(x = w_35_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_457_cast_fp16 = softmax(axis = var_315, x = input_51_cast_fp16)[name = tensor("op_457_cast_fp16")]; + tensor input_53_cast_fp16 = add(x = w_37_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor var_459_cast_fp16 = softmax(axis = var_315, x = input_53_cast_fp16)[name = tensor("op_459_cast_fp16")]; + tensor input_55_cast_fp16 = add(x = w_39_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_461_cast_fp16 = softmax(axis = var_315, x = input_55_cast_fp16)[name = tensor("op_461_cast_fp16")]; + tensor input_57_cast_fp16 = add(x = w_41_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_57_cast_fp16")]; + tensor var_463_cast_fp16 = softmax(axis = var_315, x = input_57_cast_fp16)[name = tensor("op_463_cast_fp16")]; + tensor input_59_cast_fp16 = add(x = w_43_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_465_cast_fp16 = softmax(axis = var_315, x = input_59_cast_fp16)[name = tensor("op_465_cast_fp16")]; + tensor input_61_cast_fp16 = add(x = w_45_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor var_467_cast_fp16 = softmax(axis = var_315, x = input_61_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor input_63_cast_fp16 = add(x = w_47_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_469_cast_fp16 = softmax(axis = var_315, x = input_63_cast_fp16)[name = tensor("op_469_cast_fp16")]; + tensor var_471_equation_0 = const()[name = tensor("op_471_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_471_cast_fp16 = einsum(equation = var_471_equation_0, values = (var_385_0, var_447_cast_fp16))[name = tensor("op_471_cast_fp16")]; + tensor var_473_equation_0 = const()[name = tensor("op_473_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_473_cast_fp16 = einsum(equation = var_473_equation_0, values = (var_385_1, var_449_cast_fp16))[name = tensor("op_473_cast_fp16")]; + tensor var_475_equation_0 = const()[name = tensor("op_475_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_475_cast_fp16 = einsum(equation = var_475_equation_0, values = (var_385_2, var_451_cast_fp16))[name = tensor("op_475_cast_fp16")]; + tensor var_477_equation_0 = const()[name = tensor("op_477_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_477_cast_fp16 = einsum(equation = var_477_equation_0, values = (var_385_3, var_453_cast_fp16))[name = tensor("op_477_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_385_4, var_455_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_481_equation_0 = const()[name = tensor("op_481_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_481_cast_fp16 = einsum(equation = var_481_equation_0, values = (var_385_5, var_457_cast_fp16))[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_385_6, var_459_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_485_equation_0 = const()[name = tensor("op_485_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_485_cast_fp16 = einsum(equation = var_485_equation_0, values = (var_385_7, var_461_cast_fp16))[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_385_8, var_463_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_489_equation_0 = const()[name = tensor("op_489_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_489_cast_fp16 = einsum(equation = var_489_equation_0, values = (var_385_9, var_465_cast_fp16))[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_385_10, var_467_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_493_equation_0 = const()[name = tensor("op_493_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_493_cast_fp16 = einsum(equation = var_493_equation_0, values = (var_385_11, var_469_cast_fp16))[name = tensor("op_493_cast_fp16")]; + tensor var_495_interleave_0 = const()[name = tensor("op_495_interleave_0"), val = tensor(false)]; + tensor var_495_cast_fp16 = concat(axis = var_315, interleave = var_495_interleave_0, values = (var_471_cast_fp16, var_473_cast_fp16, var_475_cast_fp16, var_477_cast_fp16, var_479_cast_fp16, var_481_cast_fp16, var_483_cast_fp16, var_485_cast_fp16, var_487_cast_fp16, var_489_cast_fp16, var_491_cast_fp16, var_493_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_499 = const()[name = tensor("op_499"), val = tensor([1, 1])]; + tensor var_501 = const()[name = tensor("op_501"), val = tensor([1, 1])]; + tensor var_503_pad_type_0 = const()[name = tensor("op_503_pad_type_0"), val = tensor("custom")]; + tensor var_503_pad_0 = const()[name = tensor("op_503_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_503 = conv(bias = layers_1_attention_o_proj_bias, dilations = var_501, groups = var_315, pad = var_503_pad_0, pad_type = var_503_pad_type_0, strides = var_499, weight = layers_1_attention_o_proj_weight, x = var_495_cast_fp16)[name = tensor("op_503")]; + tensor var_505_interleave_0 = const()[name = tensor("op_505_interleave_0"), val = tensor(false)]; + tensor var_505 = concat(axis = var_316, interleave = var_505_interleave_0, values = var_503)[name = tensor("op_505")]; + tensor x_7 = add(x = transpose_55, y = var_505)[name = tensor("x_7")]; + tensor input_67_perm_0 = const()[name = tensor("input_67_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_29 = const()[name = tensor("weight_29"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66792064)))]; + tensor bias_27 = const()[name = tensor("bias_27"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66792896)))]; + tensor o_7_axes_0 = const()[name = tensor("o_7_axes_0"), val = tensor([-1])]; + tensor var_314_to_fp16 = const()[name = tensor("op_314_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_53 = transpose(perm = input_67_perm_0, x = x_7)[name = tensor("transpose_53")]; + tensor o_7_cast_fp16 = layer_norm(axes = o_7_axes_0, beta = bias_27, epsilon = var_314_to_fp16, gamma = weight_29, x = transpose_53)[name = tensor("o_7_cast_fp16")]; + tensor input_69_perm_0 = const()[name = tensor("input_69_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_521 = const()[name = tensor("op_521"), val = tensor([1, 1])]; + tensor var_523 = const()[name = tensor("op_523"), val = tensor([1, 1])]; + tensor var_525_pad_type_0 = const()[name = tensor("op_525_pad_type_0"), val = tensor("custom")]; + tensor var_525_pad_0 = const()[name = tensor("op_525_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_52 = transpose(perm = input_69_perm_0, x = o_7_cast_fp16)[name = tensor("transpose_52")]; + tensor var_525 = conv(bias = layers_1_mlp_fc1_bias, dilations = var_523, groups = var_315, pad = var_525_pad_0, pad_type = var_525_pad_type_0, strides = var_521, weight = layers_1_mlp_fc1_weight, x = transpose_52)[name = tensor("op_525")]; + tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; + tensor input_71 = gelu(mode = input_71_mode_0, x = var_525)[name = tensor("input_71")]; + tensor var_529 = const()[name = tensor("op_529"), val = tensor([1, 1])]; + tensor var_531 = const()[name = tensor("op_531"), val = tensor([1, 1])]; + tensor var_533_pad_type_0 = const()[name = tensor("op_533_pad_type_0"), val = tensor("custom")]; + tensor var_533_pad_0 = const()[name = tensor("op_533_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_533 = conv(bias = layers_1_mlp_fc2_bias, dilations = var_531, groups = var_315, pad = var_533_pad_0, pad_type = var_533_pad_type_0, strides = var_529, weight = layers_1_mlp_fc2_weight, x = input_71)[name = tensor("op_533")]; + tensor x_9 = add(x = transpose_52, y = var_533)[name = tensor("x_9")]; + tensor input_73_perm_0 = const()[name = tensor("input_73_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_35 = const()[name = tensor("weight_35"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66793728)))]; + tensor bias_33 = const()[name = tensor("bias_33"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66794560)))]; + tensor o_9_axes_0 = const()[name = tensor("o_9_axes_0"), val = tensor([-1])]; + tensor transpose_51 = transpose(perm = input_73_perm_0, x = x_9)[name = tensor("transpose_51")]; + tensor o_9_cast_fp16 = layer_norm(axes = o_9_axes_0, beta = bias_33, epsilon = var_314_to_fp16, gamma = weight_35, x = transpose_51)[name = tensor("o_9_cast_fp16")]; + tensor hidden_states_5_perm_0 = const()[name = tensor("hidden_states_5_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_549 = const()[name = tensor("op_549"), val = tensor(1)]; + tensor var_550 = const()[name = tensor("op_550"), val = tensor(0)]; + tensor var_573 = const()[name = tensor("op_573"), val = tensor([1, 1])]; + tensor var_575 = const()[name = tensor("op_575"), val = tensor([1, 1])]; + tensor var_577_pad_type_0 = const()[name = tensor("op_577_pad_type_0"), val = tensor("custom")]; + tensor var_577_pad_0 = const()[name = tensor("op_577_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_50 = transpose(perm = hidden_states_5_perm_0, x = o_9_cast_fp16)[name = tensor("transpose_50")]; + tensor var_577 = conv(bias = layers_2_attention_q_proj_bias, dilations = var_575, groups = var_549, pad = var_577_pad_0, pad_type = var_577_pad_type_0, strides = var_573, weight = layers_2_attention_q_proj_weight, x = transpose_50)[name = tensor("op_577")]; + tensor var_580 = const()[name = tensor("op_580"), val = tensor([1, 1])]; + tensor var_582 = const()[name = tensor("op_582"), val = tensor([1, 1])]; tensor ks_5_pad_type_0 = const()[name = tensor("ks_5_pad_type_0"), val = tensor("custom")]; tensor ks_5_pad_0 = const()[name = tensor("ks_5_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_5 = conv(bias = layers_2_attention_k_proj_bias, dilations = var_613, groups = var_580, pad = ks_5_pad_0, pad_type = ks_5_pad_type_0, strides = var_611, weight = layers_2_attention_k_proj_weight, x = var_574_cast_fp16)[name = tensor("ks_5")]; - tensor var_618 = const()[name = tensor("op_618"), val = tensor([1, 1])]; - tensor var_620 = const()[name = tensor("op_620"), val = tensor([1, 1])]; - tensor var_622_pad_type_0 = const()[name = tensor("op_622_pad_type_0"), val = tensor("custom")]; - tensor var_622_pad_0 = const()[name = tensor("op_622_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_622 = conv(bias = layers_2_attention_v_proj_bias, dilations = var_620, groups = var_580, pad = var_622_pad_0, pad_type = var_622_pad_type_0, strides = var_618, weight = layers_2_attention_v_proj_weight, x = var_574_cast_fp16)[name = tensor("op_622")]; + tensor ks_5 = conv(bias = layers_2_attention_k_proj_bias, dilations = var_582, groups = var_549, pad = ks_5_pad_0, pad_type = ks_5_pad_type_0, strides = var_580, weight = layers_2_attention_k_proj_weight, x = transpose_50)[name = tensor("ks_5")]; + tensor var_587 = const()[name = tensor("op_587"), val = tensor([1, 1])]; + tensor var_589 = const()[name = tensor("op_589"), val = tensor([1, 1])]; + tensor var_591_pad_type_0 = const()[name = tensor("op_591_pad_type_0"), val = tensor("custom")]; + tensor var_591_pad_0 = const()[name = tensor("op_591_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_591 = conv(bias = layers_2_attention_v_proj_bias, dilations = var_589, groups = var_549, pad = var_591_pad_0, pad_type = var_591_pad_type_0, strides = var_587, weight = layers_2_attention_v_proj_weight, x = transpose_50)[name = tensor("op_591")]; tensor tile_12 = const()[name = tensor("tile_12"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_623_axis_0 = const()[name = tensor("op_623_axis_0"), val = tensor(1)]; - tensor var_623_0, tensor var_623_1, tensor var_623_2, tensor var_623_3, tensor var_623_4, tensor var_623_5, tensor var_623_6, tensor var_623_7, tensor var_623_8, tensor var_623_9, tensor var_623_10, tensor var_623_11 = split(axis = var_623_axis_0, split_sizes = tile_12, x = var_608)[name = tensor("op_623")]; - tensor var_636_perm_0 = const()[name = tensor("op_636_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_592_axis_0 = const()[name = tensor("op_592_axis_0"), val = tensor(1)]; + tensor var_592_0, tensor var_592_1, tensor var_592_2, tensor var_592_3, tensor var_592_4, tensor var_592_5, tensor var_592_6, tensor var_592_7, tensor var_592_8, tensor var_592_9, tensor var_592_10, tensor var_592_11 = split(axis = var_592_axis_0, split_sizes = tile_12, x = var_577)[name = tensor("op_592")]; + tensor var_605_perm_0 = const()[name = tensor("op_605_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_13 = const()[name = tensor("tile_13"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_637_axis_0 = const()[name = tensor("op_637_axis_0"), val = tensor(3)]; - tensor transpose_9 = transpose(perm = var_636_perm_0, x = ks_5)[name = tensor("transpose_9")]; - tensor var_637_0, tensor var_637_1, tensor var_637_2, tensor var_637_3, tensor var_637_4, tensor var_637_5, tensor var_637_6, tensor var_637_7, tensor var_637_8, tensor var_637_9, tensor var_637_10, tensor var_637_11 = split(axis = var_637_axis_0, split_sizes = tile_13, x = transpose_9)[name = tensor("op_637")]; + tensor var_606_axis_0 = const()[name = tensor("op_606_axis_0"), val = tensor(3)]; + tensor transpose_49 = transpose(perm = var_605_perm_0, x = ks_5)[name = tensor("transpose_49")]; + tensor var_606_0, tensor var_606_1, tensor var_606_2, tensor var_606_3, tensor var_606_4, tensor var_606_5, tensor var_606_6, tensor var_606_7, tensor var_606_8, tensor var_606_9, tensor var_606_10, tensor var_606_11 = split(axis = var_606_axis_0, split_sizes = tile_13, x = transpose_49)[name = tensor("op_606")]; tensor tile_14 = const()[name = tensor("tile_14"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_650_axis_0 = const()[name = tensor("op_650_axis_0"), val = tensor(1)]; - tensor var_650_0, tensor var_650_1, tensor var_650_2, tensor var_650_3, tensor var_650_4, tensor var_650_5, tensor var_650_6, tensor var_650_7, tensor var_650_8, tensor var_650_9, tensor var_650_10, tensor var_650_11 = split(axis = var_650_axis_0, split_sizes = tile_14, x = var_622)[name = tensor("op_650")]; - tensor var_664_equation_0 = const()[name = tensor("op_664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_664 = einsum(equation = var_664_equation_0, values = (var_637_0, var_623_0))[name = tensor("op_664")]; - tensor var_665_to_fp16 = const()[name = tensor("op_665_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_49_cast_fp16 = mul(x = var_664, y = var_665_to_fp16)[name = tensor("w_49_cast_fp16")]; - tensor var_668_equation_0 = const()[name = tensor("op_668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_668 = einsum(equation = var_668_equation_0, values = (var_637_1, var_623_1))[name = tensor("op_668")]; - tensor var_669_to_fp16 = const()[name = tensor("op_669_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_51_cast_fp16 = mul(x = var_668, y = var_669_to_fp16)[name = tensor("w_51_cast_fp16")]; - tensor var_672_equation_0 = const()[name = tensor("op_672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_672 = einsum(equation = var_672_equation_0, values = (var_637_2, var_623_2))[name = tensor("op_672")]; - tensor var_673_to_fp16 = const()[name = tensor("op_673_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_53_cast_fp16 = mul(x = var_672, y = var_673_to_fp16)[name = tensor("w_53_cast_fp16")]; - tensor var_676_equation_0 = const()[name = tensor("op_676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_676 = einsum(equation = var_676_equation_0, values = (var_637_3, var_623_3))[name = tensor("op_676")]; - tensor var_677_to_fp16 = const()[name = tensor("op_677_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_55_cast_fp16 = mul(x = var_676, y = var_677_to_fp16)[name = tensor("w_55_cast_fp16")]; - tensor var_680_equation_0 = const()[name = tensor("op_680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_680 = einsum(equation = var_680_equation_0, values = (var_637_4, var_623_4))[name = tensor("op_680")]; - tensor var_681_to_fp16 = const()[name = tensor("op_681_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_57_cast_fp16 = mul(x = var_680, y = var_681_to_fp16)[name = tensor("w_57_cast_fp16")]; - tensor var_684_equation_0 = const()[name = tensor("op_684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_684 = einsum(equation = var_684_equation_0, values = (var_637_5, var_623_5))[name = tensor("op_684")]; - tensor var_685_to_fp16 = const()[name = tensor("op_685_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_59_cast_fp16 = mul(x = var_684, y = var_685_to_fp16)[name = tensor("w_59_cast_fp16")]; - tensor var_688_equation_0 = const()[name = tensor("op_688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_688 = einsum(equation = var_688_equation_0, values = (var_637_6, var_623_6))[name = tensor("op_688")]; - tensor var_689_to_fp16 = const()[name = tensor("op_689_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_61_cast_fp16 = mul(x = var_688, y = var_689_to_fp16)[name = tensor("w_61_cast_fp16")]; - tensor var_692_equation_0 = const()[name = tensor("op_692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_692 = einsum(equation = var_692_equation_0, values = (var_637_7, var_623_7))[name = tensor("op_692")]; - tensor var_693_to_fp16 = const()[name = tensor("op_693_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_63_cast_fp16 = mul(x = var_692, y = var_693_to_fp16)[name = tensor("w_63_cast_fp16")]; - tensor var_696_equation_0 = const()[name = tensor("op_696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_696 = einsum(equation = var_696_equation_0, values = (var_637_8, var_623_8))[name = tensor("op_696")]; - tensor var_697_to_fp16 = const()[name = tensor("op_697_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_65_cast_fp16 = mul(x = var_696, y = var_697_to_fp16)[name = tensor("w_65_cast_fp16")]; - tensor var_700_equation_0 = const()[name = tensor("op_700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_700 = einsum(equation = var_700_equation_0, values = (var_637_9, var_623_9))[name = tensor("op_700")]; - tensor var_701_to_fp16 = const()[name = tensor("op_701_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_67_cast_fp16 = mul(x = var_700, y = var_701_to_fp16)[name = tensor("w_67_cast_fp16")]; - tensor var_704_equation_0 = const()[name = tensor("op_704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_704 = einsum(equation = var_704_equation_0, values = (var_637_10, var_623_10))[name = tensor("op_704")]; - tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_69_cast_fp16 = mul(x = var_704, y = var_705_to_fp16)[name = tensor("w_69_cast_fp16")]; - tensor var_708_equation_0 = const()[name = tensor("op_708_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_708 = einsum(equation = var_708_equation_0, values = (var_637_11, var_623_11))[name = tensor("op_708")]; - tensor var_709_to_fp16 = const()[name = tensor("op_709_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_71_cast_fp16 = mul(x = var_708, y = var_709_to_fp16)[name = tensor("w_71_cast_fp16")]; - tensor input_67_cast_fp16 = add(x = w_49_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_67_cast_fp16")]; - tensor var_712_cast_fp16 = softmax(axis = var_580, x = input_67_cast_fp16)[name = tensor("op_712_cast_fp16")]; - tensor input_69_cast_fp16 = add(x = w_51_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_69_cast_fp16")]; - tensor var_714_cast_fp16 = softmax(axis = var_580, x = input_69_cast_fp16)[name = tensor("op_714_cast_fp16")]; - tensor input_71_cast_fp16 = add(x = w_53_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_71_cast_fp16")]; - tensor var_716_cast_fp16 = softmax(axis = var_580, x = input_71_cast_fp16)[name = tensor("op_716_cast_fp16")]; - tensor input_73_cast_fp16 = add(x = w_55_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_73_cast_fp16")]; - tensor var_718_cast_fp16 = softmax(axis = var_580, x = input_73_cast_fp16)[name = tensor("op_718_cast_fp16")]; - tensor input_75_cast_fp16 = add(x = w_57_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_75_cast_fp16")]; - tensor var_720_cast_fp16 = softmax(axis = var_580, x = input_75_cast_fp16)[name = tensor("op_720_cast_fp16")]; - tensor input_77_cast_fp16 = add(x = w_59_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_77_cast_fp16")]; - tensor var_722_cast_fp16 = softmax(axis = var_580, x = input_77_cast_fp16)[name = tensor("op_722_cast_fp16")]; - tensor input_79_cast_fp16 = add(x = w_61_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_79_cast_fp16")]; - tensor var_724_cast_fp16 = softmax(axis = var_580, x = input_79_cast_fp16)[name = tensor("op_724_cast_fp16")]; - tensor input_81_cast_fp16 = add(x = w_63_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_81_cast_fp16")]; - tensor var_726_cast_fp16 = softmax(axis = var_580, x = input_81_cast_fp16)[name = tensor("op_726_cast_fp16")]; - tensor input_83_cast_fp16 = add(x = w_65_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_83_cast_fp16")]; - tensor var_728_cast_fp16 = softmax(axis = var_580, x = input_83_cast_fp16)[name = tensor("op_728_cast_fp16")]; - tensor input_85_cast_fp16 = add(x = w_67_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_85_cast_fp16")]; - tensor var_730_cast_fp16 = softmax(axis = var_580, x = input_85_cast_fp16)[name = tensor("op_730_cast_fp16")]; - tensor input_87_cast_fp16 = add(x = w_69_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_87_cast_fp16")]; - tensor var_732_cast_fp16 = softmax(axis = var_580, x = input_87_cast_fp16)[name = tensor("op_732_cast_fp16")]; - tensor input_89_cast_fp16 = add(x = w_71_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_89_cast_fp16")]; - tensor var_734_cast_fp16 = softmax(axis = var_580, x = input_89_cast_fp16)[name = tensor("op_734_cast_fp16")]; - tensor var_736_equation_0 = const()[name = tensor("op_736_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_736_cast_fp16 = einsum(equation = var_736_equation_0, values = (var_650_0, var_712_cast_fp16))[name = tensor("op_736_cast_fp16")]; - tensor var_738_equation_0 = const()[name = tensor("op_738_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_738_cast_fp16 = einsum(equation = var_738_equation_0, values = (var_650_1, var_714_cast_fp16))[name = tensor("op_738_cast_fp16")]; - tensor var_740_equation_0 = const()[name = tensor("op_740_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_740_cast_fp16 = einsum(equation = var_740_equation_0, values = (var_650_2, var_716_cast_fp16))[name = tensor("op_740_cast_fp16")]; - tensor var_742_equation_0 = const()[name = tensor("op_742_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_742_cast_fp16 = einsum(equation = var_742_equation_0, values = (var_650_3, var_718_cast_fp16))[name = tensor("op_742_cast_fp16")]; - tensor var_744_equation_0 = const()[name = tensor("op_744_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_744_cast_fp16 = einsum(equation = var_744_equation_0, values = (var_650_4, var_720_cast_fp16))[name = tensor("op_744_cast_fp16")]; - tensor var_746_equation_0 = const()[name = tensor("op_746_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_746_cast_fp16 = einsum(equation = var_746_equation_0, values = (var_650_5, var_722_cast_fp16))[name = tensor("op_746_cast_fp16")]; - tensor var_748_equation_0 = const()[name = tensor("op_748_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_748_cast_fp16 = einsum(equation = var_748_equation_0, values = (var_650_6, var_724_cast_fp16))[name = tensor("op_748_cast_fp16")]; - tensor var_750_equation_0 = const()[name = tensor("op_750_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_750_cast_fp16 = einsum(equation = var_750_equation_0, values = (var_650_7, var_726_cast_fp16))[name = tensor("op_750_cast_fp16")]; - tensor var_752_equation_0 = const()[name = tensor("op_752_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_752_cast_fp16 = einsum(equation = var_752_equation_0, values = (var_650_8, var_728_cast_fp16))[name = tensor("op_752_cast_fp16")]; - tensor var_754_equation_0 = const()[name = tensor("op_754_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_754_cast_fp16 = einsum(equation = var_754_equation_0, values = (var_650_9, var_730_cast_fp16))[name = tensor("op_754_cast_fp16")]; - tensor var_756_equation_0 = const()[name = tensor("op_756_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_756_cast_fp16 = einsum(equation = var_756_equation_0, values = (var_650_10, var_732_cast_fp16))[name = tensor("op_756_cast_fp16")]; - tensor var_758_equation_0 = const()[name = tensor("op_758_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_758_cast_fp16 = einsum(equation = var_758_equation_0, values = (var_650_11, var_734_cast_fp16))[name = tensor("op_758_cast_fp16")]; - tensor var_760_interleave_0 = const()[name = tensor("op_760_interleave_0"), val = tensor(false)]; - tensor var_760_cast_fp16 = concat(axis = var_580, interleave = var_760_interleave_0, values = (var_736_cast_fp16, var_738_cast_fp16, var_740_cast_fp16, var_742_cast_fp16, var_744_cast_fp16, var_746_cast_fp16, var_748_cast_fp16, var_750_cast_fp16, var_752_cast_fp16, var_754_cast_fp16, var_756_cast_fp16, var_758_cast_fp16))[name = tensor("op_760_cast_fp16")]; - tensor var_764 = const()[name = tensor("op_764"), val = tensor([1, 1])]; - tensor var_766 = const()[name = tensor("op_766"), val = tensor([1, 1])]; - tensor var_768_pad_type_0 = const()[name = tensor("op_768_pad_type_0"), val = tensor("custom")]; - tensor var_768_pad_0 = const()[name = tensor("op_768_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_768 = conv(bias = layers_2_attention_o_proj_bias, dilations = var_766, groups = var_580, pad = var_768_pad_0, pad_type = var_768_pad_type_0, strides = var_764, weight = layers_2_attention_o_proj_weight, x = var_760_cast_fp16)[name = tensor("op_768")]; - tensor var_770_interleave_0 = const()[name = tensor("op_770_interleave_0"), val = tensor(false)]; - tensor var_770 = concat(axis = var_581, interleave = var_770_interleave_0, values = var_768)[name = tensor("op_770")]; - tensor x_21 = add(x = var_574_cast_fp16, y = var_770)[name = tensor("x_21")]; - tensor var_577_promoted = const()[name = tensor("op_577_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_578_promoted = const()[name = tensor("op_578_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_23 = clip(alpha = var_577_promoted, beta = var_578_promoted, x = x_21)[name = tensor("x_23")]; - tensor var_775 = const()[name = tensor("op_775"), val = tensor([1])]; - tensor mean_11 = reduce_mean(axes = var_775, keep_dims = var_582, x = x_23)[name = tensor("mean_11")]; - tensor zero_mean_11 = sub(x = x_23, y = mean_11)[name = tensor("zero_mean_11")]; - tensor var_579_promoted = const()[name = tensor("op_579_promoted"), val = tensor(0x1p+1)]; - tensor var_778 = pow(x = zero_mean_11, y = var_579_promoted)[name = tensor("op_778")]; - tensor var_779 = const()[name = tensor("op_779"), val = tensor([1])]; - tensor var_780 = reduce_mean(axes = var_779, keep_dims = var_582, x = var_778)[name = tensor("op_780")]; - tensor var_781_to_fp16 = const()[name = tensor("op_781_to_fp16"), val = tensor(0x1p-24)]; - tensor var_782_cast_fp16 = add(x = var_780, y = var_781_to_fp16)[name = tensor("op_782_cast_fp16")]; - tensor denom_11_epsilon_0 = const()[name = tensor("denom_11_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0, x = var_782_cast_fp16)[name = tensor("denom_11_cast_fp16")]; - tensor var_784_cast_fp16 = mul(x = zero_mean_11, y = denom_11_cast_fp16)[name = tensor("op_784_cast_fp16")]; - tensor var_786_gamma_0_to_fp16 = const()[name = tensor("op_786_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66797056)))]; - tensor var_786_beta_0_to_fp16 = const()[name = tensor("op_786_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66797888)))]; - tensor var_786_epsilon_0_to_fp16 = const()[name = tensor("op_786_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_786_cast_fp16 = batch_norm(beta = var_786_beta_0_to_fp16, epsilon = var_786_epsilon_0_to_fp16, gamma = var_786_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_784_cast_fp16)[name = tensor("op_786_cast_fp16")]; - tensor var_792 = const()[name = tensor("op_792"), val = tensor([1, 1])]; - tensor var_794 = const()[name = tensor("op_794"), val = tensor([1, 1])]; - tensor var_796_pad_type_0 = const()[name = tensor("op_796_pad_type_0"), val = tensor("custom")]; - tensor var_796_pad_0 = const()[name = tensor("op_796_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_796 = conv(bias = layers_2_mlp_fc1_bias, dilations = var_794, groups = var_580, pad = var_796_pad_0, pad_type = var_796_pad_type_0, strides = var_792, weight = layers_2_mlp_fc1_weight, x = var_786_cast_fp16)[name = tensor("op_796")]; - tensor input_95_mode_0 = const()[name = tensor("input_95_mode_0"), val = tensor("EXACT")]; - tensor input_95 = gelu(mode = input_95_mode_0, x = var_796)[name = tensor("input_95")]; - tensor var_800 = const()[name = tensor("op_800"), val = tensor([1, 1])]; - tensor var_802 = const()[name = tensor("op_802"), val = tensor([1, 1])]; - tensor var_804_pad_type_0 = const()[name = tensor("op_804_pad_type_0"), val = tensor("custom")]; - tensor var_804_pad_0 = const()[name = tensor("op_804_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_804 = conv(bias = layers_2_mlp_fc2_bias, dilations = var_802, groups = var_580, pad = var_804_pad_0, pad_type = var_804_pad_type_0, strides = var_800, weight = layers_2_mlp_fc2_weight, x = input_95)[name = tensor("op_804")]; - tensor x_25 = add(x = var_786_cast_fp16, y = var_804)[name = tensor("x_25")]; - tensor var_577_promoted_1 = const()[name = tensor("op_577_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_578_promoted_1 = const()[name = tensor("op_578_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_27 = clip(alpha = var_577_promoted_1, beta = var_578_promoted_1, x = x_25)[name = tensor("x_27")]; - tensor var_809 = const()[name = tensor("op_809"), val = tensor([1])]; - tensor mean_13 = reduce_mean(axes = var_809, keep_dims = var_582, x = x_27)[name = tensor("mean_13")]; - tensor zero_mean_13 = sub(x = x_27, y = mean_13)[name = tensor("zero_mean_13")]; - tensor var_579_promoted_1 = const()[name = tensor("op_579_promoted_1"), val = tensor(0x1p+1)]; - tensor var_812 = pow(x = zero_mean_13, y = var_579_promoted_1)[name = tensor("op_812")]; - tensor var_813 = const()[name = tensor("op_813"), val = tensor([1])]; - tensor var_814 = reduce_mean(axes = var_813, keep_dims = var_582, x = var_812)[name = tensor("op_814")]; - tensor var_815_to_fp16 = const()[name = tensor("op_815_to_fp16"), val = tensor(0x1p-24)]; - tensor var_816_cast_fp16 = add(x = var_814, y = var_815_to_fp16)[name = tensor("op_816_cast_fp16")]; - tensor denom_13_epsilon_0 = const()[name = tensor("denom_13_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0, x = var_816_cast_fp16)[name = tensor("denom_13_cast_fp16")]; - tensor var_818_cast_fp16 = mul(x = zero_mean_13, y = denom_13_cast_fp16)[name = tensor("op_818_cast_fp16")]; - tensor var_820_gamma_0_to_fp16 = const()[name = tensor("op_820_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66798720)))]; - tensor var_820_beta_0_to_fp16 = const()[name = tensor("op_820_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66799552)))]; - tensor var_820_epsilon_0_to_fp16 = const()[name = tensor("op_820_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_820_cast_fp16 = batch_norm(beta = var_820_beta_0_to_fp16, epsilon = var_820_epsilon_0_to_fp16, gamma = var_820_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_818_cast_fp16)[name = tensor("op_820_cast_fp16")]; - tensor var_826 = const()[name = tensor("op_826"), val = tensor(1)]; - tensor var_827 = const()[name = tensor("op_827"), val = tensor(0)]; - tensor var_828 = const()[name = tensor("op_828"), val = tensor(true)]; - tensor var_850 = const()[name = tensor("op_850"), val = tensor([1, 1])]; - tensor var_852 = const()[name = tensor("op_852"), val = tensor([1, 1])]; - tensor var_854_pad_type_0 = const()[name = tensor("op_854_pad_type_0"), val = tensor("custom")]; - tensor var_854_pad_0 = const()[name = tensor("op_854_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_854 = conv(bias = layers_3_attention_q_proj_bias, dilations = var_852, groups = var_826, pad = var_854_pad_0, pad_type = var_854_pad_type_0, strides = var_850, weight = layers_3_attention_q_proj_weight, x = var_820_cast_fp16)[name = tensor("op_854")]; - tensor var_857 = const()[name = tensor("op_857"), val = tensor([1, 1])]; - tensor var_859 = const()[name = tensor("op_859"), val = tensor([1, 1])]; + tensor var_619_axis_0 = const()[name = tensor("op_619_axis_0"), val = tensor(1)]; + tensor var_619_0, tensor var_619_1, tensor var_619_2, tensor var_619_3, tensor var_619_4, tensor var_619_5, tensor var_619_6, tensor var_619_7, tensor var_619_8, tensor var_619_9, tensor var_619_10, tensor var_619_11 = split(axis = var_619_axis_0, split_sizes = tile_14, x = var_591)[name = tensor("op_619")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_633 = einsum(equation = var_633_equation_0, values = (var_606_0, var_592_0))[name = tensor("op_633")]; + tensor var_634_to_fp16 = const()[name = tensor("op_634_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_49_cast_fp16 = mul(x = var_633, y = var_634_to_fp16)[name = tensor("w_49_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_637 = einsum(equation = var_637_equation_0, values = (var_606_1, var_592_1))[name = tensor("op_637")]; + tensor var_638_to_fp16 = const()[name = tensor("op_638_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_51_cast_fp16 = mul(x = var_637, y = var_638_to_fp16)[name = tensor("w_51_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_641 = einsum(equation = var_641_equation_0, values = (var_606_2, var_592_2))[name = tensor("op_641")]; + tensor var_642_to_fp16 = const()[name = tensor("op_642_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_53_cast_fp16 = mul(x = var_641, y = var_642_to_fp16)[name = tensor("w_53_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_645 = einsum(equation = var_645_equation_0, values = (var_606_3, var_592_3))[name = tensor("op_645")]; + tensor var_646_to_fp16 = const()[name = tensor("op_646_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_55_cast_fp16 = mul(x = var_645, y = var_646_to_fp16)[name = tensor("w_55_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_649 = einsum(equation = var_649_equation_0, values = (var_606_4, var_592_4))[name = tensor("op_649")]; + tensor var_650_to_fp16 = const()[name = tensor("op_650_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_57_cast_fp16 = mul(x = var_649, y = var_650_to_fp16)[name = tensor("w_57_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_653 = einsum(equation = var_653_equation_0, values = (var_606_5, var_592_5))[name = tensor("op_653")]; + tensor var_654_to_fp16 = const()[name = tensor("op_654_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_59_cast_fp16 = mul(x = var_653, y = var_654_to_fp16)[name = tensor("w_59_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_657 = einsum(equation = var_657_equation_0, values = (var_606_6, var_592_6))[name = tensor("op_657")]; + tensor var_658_to_fp16 = const()[name = tensor("op_658_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_61_cast_fp16 = mul(x = var_657, y = var_658_to_fp16)[name = tensor("w_61_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_661 = einsum(equation = var_661_equation_0, values = (var_606_7, var_592_7))[name = tensor("op_661")]; + tensor var_662_to_fp16 = const()[name = tensor("op_662_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_63_cast_fp16 = mul(x = var_661, y = var_662_to_fp16)[name = tensor("w_63_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_665 = einsum(equation = var_665_equation_0, values = (var_606_8, var_592_8))[name = tensor("op_665")]; + tensor var_666_to_fp16 = const()[name = tensor("op_666_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_65_cast_fp16 = mul(x = var_665, y = var_666_to_fp16)[name = tensor("w_65_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_669 = einsum(equation = var_669_equation_0, values = (var_606_9, var_592_9))[name = tensor("op_669")]; + tensor var_670_to_fp16 = const()[name = tensor("op_670_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_67_cast_fp16 = mul(x = var_669, y = var_670_to_fp16)[name = tensor("w_67_cast_fp16")]; + tensor var_673_equation_0 = const()[name = tensor("op_673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_673 = einsum(equation = var_673_equation_0, values = (var_606_10, var_592_10))[name = tensor("op_673")]; + tensor var_674_to_fp16 = const()[name = tensor("op_674_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_69_cast_fp16 = mul(x = var_673, y = var_674_to_fp16)[name = tensor("w_69_cast_fp16")]; + tensor var_677_equation_0 = const()[name = tensor("op_677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_677 = einsum(equation = var_677_equation_0, values = (var_606_11, var_592_11))[name = tensor("op_677")]; + tensor var_678_to_fp16 = const()[name = tensor("op_678_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_71_cast_fp16 = mul(x = var_677, y = var_678_to_fp16)[name = tensor("w_71_cast_fp16")]; + tensor input_77_cast_fp16 = add(x = w_49_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor var_681_cast_fp16 = softmax(axis = var_549, x = input_77_cast_fp16)[name = tensor("op_681_cast_fp16")]; + tensor input_79_cast_fp16 = add(x = w_51_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_683_cast_fp16 = softmax(axis = var_549, x = input_79_cast_fp16)[name = tensor("op_683_cast_fp16")]; + tensor input_81_cast_fp16 = add(x = w_53_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor var_685_cast_fp16 = softmax(axis = var_549, x = input_81_cast_fp16)[name = tensor("op_685_cast_fp16")]; + tensor input_83_cast_fp16 = add(x = w_55_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_687_cast_fp16 = softmax(axis = var_549, x = input_83_cast_fp16)[name = tensor("op_687_cast_fp16")]; + tensor input_85_cast_fp16 = add(x = w_57_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor var_689_cast_fp16 = softmax(axis = var_549, x = input_85_cast_fp16)[name = tensor("op_689_cast_fp16")]; + tensor input_87_cast_fp16 = add(x = w_59_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_691_cast_fp16 = softmax(axis = var_549, x = input_87_cast_fp16)[name = tensor("op_691_cast_fp16")]; + tensor input_89_cast_fp16 = add(x = w_61_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor var_693_cast_fp16 = softmax(axis = var_549, x = input_89_cast_fp16)[name = tensor("op_693_cast_fp16")]; + tensor input_91_cast_fp16 = add(x = w_63_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_695_cast_fp16 = softmax(axis = var_549, x = input_91_cast_fp16)[name = tensor("op_695_cast_fp16")]; + tensor input_93_cast_fp16 = add(x = w_65_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor var_697_cast_fp16 = softmax(axis = var_549, x = input_93_cast_fp16)[name = tensor("op_697_cast_fp16")]; + tensor input_95_cast_fp16 = add(x = w_67_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_95_cast_fp16")]; + tensor var_699_cast_fp16 = softmax(axis = var_549, x = input_95_cast_fp16)[name = tensor("op_699_cast_fp16")]; + tensor input_97_cast_fp16 = add(x = w_69_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_97_cast_fp16")]; + tensor var_701_cast_fp16 = softmax(axis = var_549, x = input_97_cast_fp16)[name = tensor("op_701_cast_fp16")]; + tensor input_99_cast_fp16 = add(x = w_71_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_703_cast_fp16 = softmax(axis = var_549, x = input_99_cast_fp16)[name = tensor("op_703_cast_fp16")]; + tensor var_705_equation_0 = const()[name = tensor("op_705_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_705_cast_fp16 = einsum(equation = var_705_equation_0, values = (var_619_0, var_681_cast_fp16))[name = tensor("op_705_cast_fp16")]; + tensor var_707_equation_0 = const()[name = tensor("op_707_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_707_cast_fp16 = einsum(equation = var_707_equation_0, values = (var_619_1, var_683_cast_fp16))[name = tensor("op_707_cast_fp16")]; + tensor var_709_equation_0 = const()[name = tensor("op_709_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_709_cast_fp16 = einsum(equation = var_709_equation_0, values = (var_619_2, var_685_cast_fp16))[name = tensor("op_709_cast_fp16")]; + tensor var_711_equation_0 = const()[name = tensor("op_711_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_711_cast_fp16 = einsum(equation = var_711_equation_0, values = (var_619_3, var_687_cast_fp16))[name = tensor("op_711_cast_fp16")]; + tensor var_713_equation_0 = const()[name = tensor("op_713_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_713_cast_fp16 = einsum(equation = var_713_equation_0, values = (var_619_4, var_689_cast_fp16))[name = tensor("op_713_cast_fp16")]; + tensor var_715_equation_0 = const()[name = tensor("op_715_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_715_cast_fp16 = einsum(equation = var_715_equation_0, values = (var_619_5, var_691_cast_fp16))[name = tensor("op_715_cast_fp16")]; + tensor var_717_equation_0 = const()[name = tensor("op_717_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_717_cast_fp16 = einsum(equation = var_717_equation_0, values = (var_619_6, var_693_cast_fp16))[name = tensor("op_717_cast_fp16")]; + tensor var_719_equation_0 = const()[name = tensor("op_719_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_719_cast_fp16 = einsum(equation = var_719_equation_0, values = (var_619_7, var_695_cast_fp16))[name = tensor("op_719_cast_fp16")]; + tensor var_721_equation_0 = const()[name = tensor("op_721_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_721_cast_fp16 = einsum(equation = var_721_equation_0, values = (var_619_8, var_697_cast_fp16))[name = tensor("op_721_cast_fp16")]; + tensor var_723_equation_0 = const()[name = tensor("op_723_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_723_cast_fp16 = einsum(equation = var_723_equation_0, values = (var_619_9, var_699_cast_fp16))[name = tensor("op_723_cast_fp16")]; + tensor var_725_equation_0 = const()[name = tensor("op_725_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_725_cast_fp16 = einsum(equation = var_725_equation_0, values = (var_619_10, var_701_cast_fp16))[name = tensor("op_725_cast_fp16")]; + tensor var_727_equation_0 = const()[name = tensor("op_727_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_727_cast_fp16 = einsum(equation = var_727_equation_0, values = (var_619_11, var_703_cast_fp16))[name = tensor("op_727_cast_fp16")]; + tensor var_729_interleave_0 = const()[name = tensor("op_729_interleave_0"), val = tensor(false)]; + tensor var_729_cast_fp16 = concat(axis = var_549, interleave = var_729_interleave_0, values = (var_705_cast_fp16, var_707_cast_fp16, var_709_cast_fp16, var_711_cast_fp16, var_713_cast_fp16, var_715_cast_fp16, var_717_cast_fp16, var_719_cast_fp16, var_721_cast_fp16, var_723_cast_fp16, var_725_cast_fp16, var_727_cast_fp16))[name = tensor("op_729_cast_fp16")]; + tensor var_733 = const()[name = tensor("op_733"), val = tensor([1, 1])]; + tensor var_735 = const()[name = tensor("op_735"), val = tensor([1, 1])]; + tensor var_737_pad_type_0 = const()[name = tensor("op_737_pad_type_0"), val = tensor("custom")]; + tensor var_737_pad_0 = const()[name = tensor("op_737_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_737 = conv(bias = layers_2_attention_o_proj_bias, dilations = var_735, groups = var_549, pad = var_737_pad_0, pad_type = var_737_pad_type_0, strides = var_733, weight = layers_2_attention_o_proj_weight, x = var_729_cast_fp16)[name = tensor("op_737")]; + tensor var_739_interleave_0 = const()[name = tensor("op_739_interleave_0"), val = tensor(false)]; + tensor var_739 = concat(axis = var_550, interleave = var_739_interleave_0, values = var_737)[name = tensor("op_739")]; + tensor x_11 = add(x = transpose_50, y = var_739)[name = tensor("x_11")]; + tensor input_103_perm_0 = const()[name = tensor("input_103_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_45 = const()[name = tensor("weight_45"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66795392)))]; + tensor bias_43 = const()[name = tensor("bias_43"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66796224)))]; + tensor o_11_axes_0 = const()[name = tensor("o_11_axes_0"), val = tensor([-1])]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_48 = transpose(perm = input_103_perm_0, x = x_11)[name = tensor("transpose_48")]; + tensor o_11_cast_fp16 = layer_norm(axes = o_11_axes_0, beta = bias_43, epsilon = var_548_to_fp16, gamma = weight_45, x = transpose_48)[name = tensor("o_11_cast_fp16")]; + tensor input_105_perm_0 = const()[name = tensor("input_105_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_755 = const()[name = tensor("op_755"), val = tensor([1, 1])]; + tensor var_757 = const()[name = tensor("op_757"), val = tensor([1, 1])]; + tensor var_759_pad_type_0 = const()[name = tensor("op_759_pad_type_0"), val = tensor("custom")]; + tensor var_759_pad_0 = const()[name = tensor("op_759_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_47 = transpose(perm = input_105_perm_0, x = o_11_cast_fp16)[name = tensor("transpose_47")]; + tensor var_759 = conv(bias = layers_2_mlp_fc1_bias, dilations = var_757, groups = var_549, pad = var_759_pad_0, pad_type = var_759_pad_type_0, strides = var_755, weight = layers_2_mlp_fc1_weight, x = transpose_47)[name = tensor("op_759")]; + tensor input_107_mode_0 = const()[name = tensor("input_107_mode_0"), val = tensor("EXACT")]; + tensor input_107 = gelu(mode = input_107_mode_0, x = var_759)[name = tensor("input_107")]; + tensor var_763 = const()[name = tensor("op_763"), val = tensor([1, 1])]; + tensor var_765 = const()[name = tensor("op_765"), val = tensor([1, 1])]; + tensor var_767_pad_type_0 = const()[name = tensor("op_767_pad_type_0"), val = tensor("custom")]; + tensor var_767_pad_0 = const()[name = tensor("op_767_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_767 = conv(bias = layers_2_mlp_fc2_bias, dilations = var_765, groups = var_549, pad = var_767_pad_0, pad_type = var_767_pad_type_0, strides = var_763, weight = layers_2_mlp_fc2_weight, x = input_107)[name = tensor("op_767")]; + tensor x_13 = add(x = transpose_47, y = var_767)[name = tensor("x_13")]; + tensor input_109_perm_0 = const()[name = tensor("input_109_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_51 = const()[name = tensor("weight_51"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66797056)))]; + tensor bias_49 = const()[name = tensor("bias_49"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66797888)))]; + tensor o_13_axes_0 = const()[name = tensor("o_13_axes_0"), val = tensor([-1])]; + tensor transpose_46 = transpose(perm = input_109_perm_0, x = x_13)[name = tensor("transpose_46")]; + tensor o_13_cast_fp16 = layer_norm(axes = o_13_axes_0, beta = bias_49, epsilon = var_548_to_fp16, gamma = weight_51, x = transpose_46)[name = tensor("o_13_cast_fp16")]; + tensor hidden_states_7_perm_0 = const()[name = tensor("hidden_states_7_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_783 = const()[name = tensor("op_783"), val = tensor(1)]; + tensor var_784 = const()[name = tensor("op_784"), val = tensor(0)]; + tensor var_807 = const()[name = tensor("op_807"), val = tensor([1, 1])]; + tensor var_809 = const()[name = tensor("op_809"), val = tensor([1, 1])]; + tensor var_811_pad_type_0 = const()[name = tensor("op_811_pad_type_0"), val = tensor("custom")]; + tensor var_811_pad_0 = const()[name = tensor("op_811_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_45 = transpose(perm = hidden_states_7_perm_0, x = o_13_cast_fp16)[name = tensor("transpose_45")]; + tensor var_811 = conv(bias = layers_3_attention_q_proj_bias, dilations = var_809, groups = var_783, pad = var_811_pad_0, pad_type = var_811_pad_type_0, strides = var_807, weight = layers_3_attention_q_proj_weight, x = transpose_45)[name = tensor("op_811")]; + tensor var_814 = const()[name = tensor("op_814"), val = tensor([1, 1])]; + tensor var_816 = const()[name = tensor("op_816"), val = tensor([1, 1])]; tensor ks_7_pad_type_0 = const()[name = tensor("ks_7_pad_type_0"), val = tensor("custom")]; tensor ks_7_pad_0 = const()[name = tensor("ks_7_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_7 = conv(bias = layers_3_attention_k_proj_bias, dilations = var_859, groups = var_826, pad = ks_7_pad_0, pad_type = ks_7_pad_type_0, strides = var_857, weight = layers_3_attention_k_proj_weight, x = var_820_cast_fp16)[name = tensor("ks_7")]; - tensor var_864 = const()[name = tensor("op_864"), val = tensor([1, 1])]; - tensor var_866 = const()[name = tensor("op_866"), val = tensor([1, 1])]; - tensor var_868_pad_type_0 = const()[name = tensor("op_868_pad_type_0"), val = tensor("custom")]; - tensor var_868_pad_0 = const()[name = tensor("op_868_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_868 = conv(bias = layers_3_attention_v_proj_bias, dilations = var_866, groups = var_826, pad = var_868_pad_0, pad_type = var_868_pad_type_0, strides = var_864, weight = layers_3_attention_v_proj_weight, x = var_820_cast_fp16)[name = tensor("op_868")]; + tensor ks_7 = conv(bias = layers_3_attention_k_proj_bias, dilations = var_816, groups = var_783, pad = ks_7_pad_0, pad_type = ks_7_pad_type_0, strides = var_814, weight = layers_3_attention_k_proj_weight, x = transpose_45)[name = tensor("ks_7")]; + tensor var_821 = const()[name = tensor("op_821"), val = tensor([1, 1])]; + tensor var_823 = const()[name = tensor("op_823"), val = tensor([1, 1])]; + tensor var_825_pad_type_0 = const()[name = tensor("op_825_pad_type_0"), val = tensor("custom")]; + tensor var_825_pad_0 = const()[name = tensor("op_825_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_825 = conv(bias = layers_3_attention_v_proj_bias, dilations = var_823, groups = var_783, pad = var_825_pad_0, pad_type = var_825_pad_type_0, strides = var_821, weight = layers_3_attention_v_proj_weight, x = transpose_45)[name = tensor("op_825")]; tensor tile_17 = const()[name = tensor("tile_17"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_869_axis_0 = const()[name = tensor("op_869_axis_0"), val = tensor(1)]; - tensor var_869_0, tensor var_869_1, tensor var_869_2, tensor var_869_3, tensor var_869_4, tensor var_869_5, tensor var_869_6, tensor var_869_7, tensor var_869_8, tensor var_869_9, tensor var_869_10, tensor var_869_11 = split(axis = var_869_axis_0, split_sizes = tile_17, x = var_854)[name = tensor("op_869")]; - tensor var_882_perm_0 = const()[name = tensor("op_882_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_826_axis_0 = const()[name = tensor("op_826_axis_0"), val = tensor(1)]; + tensor var_826_0, tensor var_826_1, tensor var_826_2, tensor var_826_3, tensor var_826_4, tensor var_826_5, tensor var_826_6, tensor var_826_7, tensor var_826_8, tensor var_826_9, tensor var_826_10, tensor var_826_11 = split(axis = var_826_axis_0, split_sizes = tile_17, x = var_811)[name = tensor("op_826")]; + tensor var_839_perm_0 = const()[name = tensor("op_839_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_18 = const()[name = tensor("tile_18"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_883_axis_0 = const()[name = tensor("op_883_axis_0"), val = tensor(3)]; - tensor transpose_8 = transpose(perm = var_882_perm_0, x = ks_7)[name = tensor("transpose_8")]; - tensor var_883_0, tensor var_883_1, tensor var_883_2, tensor var_883_3, tensor var_883_4, tensor var_883_5, tensor var_883_6, tensor var_883_7, tensor var_883_8, tensor var_883_9, tensor var_883_10, tensor var_883_11 = split(axis = var_883_axis_0, split_sizes = tile_18, x = transpose_8)[name = tensor("op_883")]; + tensor var_840_axis_0 = const()[name = tensor("op_840_axis_0"), val = tensor(3)]; + tensor transpose_44 = transpose(perm = var_839_perm_0, x = ks_7)[name = tensor("transpose_44")]; + tensor var_840_0, tensor var_840_1, tensor var_840_2, tensor var_840_3, tensor var_840_4, tensor var_840_5, tensor var_840_6, tensor var_840_7, tensor var_840_8, tensor var_840_9, tensor var_840_10, tensor var_840_11 = split(axis = var_840_axis_0, split_sizes = tile_18, x = transpose_44)[name = tensor("op_840")]; tensor tile_19 = const()[name = tensor("tile_19"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_896_axis_0 = const()[name = tensor("op_896_axis_0"), val = tensor(1)]; - tensor var_896_0, tensor var_896_1, tensor var_896_2, tensor var_896_3, tensor var_896_4, tensor var_896_5, tensor var_896_6, tensor var_896_7, tensor var_896_8, tensor var_896_9, tensor var_896_10, tensor var_896_11 = split(axis = var_896_axis_0, split_sizes = tile_19, x = var_868)[name = tensor("op_896")]; - tensor var_910_equation_0 = const()[name = tensor("op_910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_910 = einsum(equation = var_910_equation_0, values = (var_883_0, var_869_0))[name = tensor("op_910")]; - tensor var_911_to_fp16 = const()[name = tensor("op_911_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_73_cast_fp16 = mul(x = var_910, y = var_911_to_fp16)[name = tensor("w_73_cast_fp16")]; - tensor var_914_equation_0 = const()[name = tensor("op_914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_914 = einsum(equation = var_914_equation_0, values = (var_883_1, var_869_1))[name = tensor("op_914")]; - tensor var_915_to_fp16 = const()[name = tensor("op_915_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_75_cast_fp16 = mul(x = var_914, y = var_915_to_fp16)[name = tensor("w_75_cast_fp16")]; - tensor var_918_equation_0 = const()[name = tensor("op_918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_918 = einsum(equation = var_918_equation_0, values = (var_883_2, var_869_2))[name = tensor("op_918")]; - tensor var_919_to_fp16 = const()[name = tensor("op_919_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_77_cast_fp16 = mul(x = var_918, y = var_919_to_fp16)[name = tensor("w_77_cast_fp16")]; - tensor var_922_equation_0 = const()[name = tensor("op_922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_922 = einsum(equation = var_922_equation_0, values = (var_883_3, var_869_3))[name = tensor("op_922")]; - tensor var_923_to_fp16 = const()[name = tensor("op_923_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_79_cast_fp16 = mul(x = var_922, y = var_923_to_fp16)[name = tensor("w_79_cast_fp16")]; - tensor var_926_equation_0 = const()[name = tensor("op_926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_926 = einsum(equation = var_926_equation_0, values = (var_883_4, var_869_4))[name = tensor("op_926")]; - tensor var_927_to_fp16 = const()[name = tensor("op_927_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_81_cast_fp16 = mul(x = var_926, y = var_927_to_fp16)[name = tensor("w_81_cast_fp16")]; - tensor var_930_equation_0 = const()[name = tensor("op_930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_930 = einsum(equation = var_930_equation_0, values = (var_883_5, var_869_5))[name = tensor("op_930")]; - tensor var_931_to_fp16 = const()[name = tensor("op_931_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_83_cast_fp16 = mul(x = var_930, y = var_931_to_fp16)[name = tensor("w_83_cast_fp16")]; - tensor var_934_equation_0 = const()[name = tensor("op_934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_934 = einsum(equation = var_934_equation_0, values = (var_883_6, var_869_6))[name = tensor("op_934")]; - tensor var_935_to_fp16 = const()[name = tensor("op_935_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_85_cast_fp16 = mul(x = var_934, y = var_935_to_fp16)[name = tensor("w_85_cast_fp16")]; - tensor var_938_equation_0 = const()[name = tensor("op_938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_938 = einsum(equation = var_938_equation_0, values = (var_883_7, var_869_7))[name = tensor("op_938")]; - tensor var_939_to_fp16 = const()[name = tensor("op_939_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_87_cast_fp16 = mul(x = var_938, y = var_939_to_fp16)[name = tensor("w_87_cast_fp16")]; - tensor var_942_equation_0 = const()[name = tensor("op_942_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_942 = einsum(equation = var_942_equation_0, values = (var_883_8, var_869_8))[name = tensor("op_942")]; - tensor var_943_to_fp16 = const()[name = tensor("op_943_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_89_cast_fp16 = mul(x = var_942, y = var_943_to_fp16)[name = tensor("w_89_cast_fp16")]; - tensor var_946_equation_0 = const()[name = tensor("op_946_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_946 = einsum(equation = var_946_equation_0, values = (var_883_9, var_869_9))[name = tensor("op_946")]; - tensor var_947_to_fp16 = const()[name = tensor("op_947_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_91_cast_fp16 = mul(x = var_946, y = var_947_to_fp16)[name = tensor("w_91_cast_fp16")]; - tensor var_950_equation_0 = const()[name = tensor("op_950_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_950 = einsum(equation = var_950_equation_0, values = (var_883_10, var_869_10))[name = tensor("op_950")]; - tensor var_951_to_fp16 = const()[name = tensor("op_951_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_93_cast_fp16 = mul(x = var_950, y = var_951_to_fp16)[name = tensor("w_93_cast_fp16")]; - tensor var_954_equation_0 = const()[name = tensor("op_954_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_954 = einsum(equation = var_954_equation_0, values = (var_883_11, var_869_11))[name = tensor("op_954")]; - tensor var_955_to_fp16 = const()[name = tensor("op_955_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_95_cast_fp16 = mul(x = var_954, y = var_955_to_fp16)[name = tensor("w_95_cast_fp16")]; - tensor input_99_cast_fp16 = add(x = w_73_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_99_cast_fp16")]; - tensor var_958_cast_fp16 = softmax(axis = var_826, x = input_99_cast_fp16)[name = tensor("op_958_cast_fp16")]; - tensor input_101_cast_fp16 = add(x = w_75_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_101_cast_fp16")]; - tensor var_960_cast_fp16 = softmax(axis = var_826, x = input_101_cast_fp16)[name = tensor("op_960_cast_fp16")]; - tensor input_103_cast_fp16 = add(x = w_77_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_103_cast_fp16")]; - tensor var_962_cast_fp16 = softmax(axis = var_826, x = input_103_cast_fp16)[name = tensor("op_962_cast_fp16")]; - tensor input_105_cast_fp16 = add(x = w_79_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_105_cast_fp16")]; - tensor var_964_cast_fp16 = softmax(axis = var_826, x = input_105_cast_fp16)[name = tensor("op_964_cast_fp16")]; - tensor input_107_cast_fp16 = add(x = w_81_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_107_cast_fp16")]; - tensor var_966_cast_fp16 = softmax(axis = var_826, x = input_107_cast_fp16)[name = tensor("op_966_cast_fp16")]; - tensor input_109_cast_fp16 = add(x = w_83_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_109_cast_fp16")]; - tensor var_968_cast_fp16 = softmax(axis = var_826, x = input_109_cast_fp16)[name = tensor("op_968_cast_fp16")]; - tensor input_111_cast_fp16 = add(x = w_85_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_111_cast_fp16")]; - tensor var_970_cast_fp16 = softmax(axis = var_826, x = input_111_cast_fp16)[name = tensor("op_970_cast_fp16")]; - tensor input_113_cast_fp16 = add(x = w_87_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_113_cast_fp16")]; - tensor var_972_cast_fp16 = softmax(axis = var_826, x = input_113_cast_fp16)[name = tensor("op_972_cast_fp16")]; - tensor input_115_cast_fp16 = add(x = w_89_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_115_cast_fp16")]; - tensor var_974_cast_fp16 = softmax(axis = var_826, x = input_115_cast_fp16)[name = tensor("op_974_cast_fp16")]; - tensor input_117_cast_fp16 = add(x = w_91_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_117_cast_fp16")]; - tensor var_976_cast_fp16 = softmax(axis = var_826, x = input_117_cast_fp16)[name = tensor("op_976_cast_fp16")]; - tensor input_119_cast_fp16 = add(x = w_93_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_119_cast_fp16")]; - tensor var_978_cast_fp16 = softmax(axis = var_826, x = input_119_cast_fp16)[name = tensor("op_978_cast_fp16")]; - tensor input_121_cast_fp16 = add(x = w_95_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_121_cast_fp16")]; - tensor var_980_cast_fp16 = softmax(axis = var_826, x = input_121_cast_fp16)[name = tensor("op_980_cast_fp16")]; - tensor var_982_equation_0 = const()[name = tensor("op_982_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_982_cast_fp16 = einsum(equation = var_982_equation_0, values = (var_896_0, var_958_cast_fp16))[name = tensor("op_982_cast_fp16")]; - tensor var_984_equation_0 = const()[name = tensor("op_984_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_984_cast_fp16 = einsum(equation = var_984_equation_0, values = (var_896_1, var_960_cast_fp16))[name = tensor("op_984_cast_fp16")]; - tensor var_986_equation_0 = const()[name = tensor("op_986_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_986_cast_fp16 = einsum(equation = var_986_equation_0, values = (var_896_2, var_962_cast_fp16))[name = tensor("op_986_cast_fp16")]; - tensor var_988_equation_0 = const()[name = tensor("op_988_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_988_cast_fp16 = einsum(equation = var_988_equation_0, values = (var_896_3, var_964_cast_fp16))[name = tensor("op_988_cast_fp16")]; - tensor var_990_equation_0 = const()[name = tensor("op_990_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_990_cast_fp16 = einsum(equation = var_990_equation_0, values = (var_896_4, var_966_cast_fp16))[name = tensor("op_990_cast_fp16")]; - tensor var_992_equation_0 = const()[name = tensor("op_992_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_992_cast_fp16 = einsum(equation = var_992_equation_0, values = (var_896_5, var_968_cast_fp16))[name = tensor("op_992_cast_fp16")]; - tensor var_994_equation_0 = const()[name = tensor("op_994_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_994_cast_fp16 = einsum(equation = var_994_equation_0, values = (var_896_6, var_970_cast_fp16))[name = tensor("op_994_cast_fp16")]; - tensor var_996_equation_0 = const()[name = tensor("op_996_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_996_cast_fp16 = einsum(equation = var_996_equation_0, values = (var_896_7, var_972_cast_fp16))[name = tensor("op_996_cast_fp16")]; - tensor var_998_equation_0 = const()[name = tensor("op_998_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_998_cast_fp16 = einsum(equation = var_998_equation_0, values = (var_896_8, var_974_cast_fp16))[name = tensor("op_998_cast_fp16")]; - tensor var_1000_equation_0 = const()[name = tensor("op_1000_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1000_cast_fp16 = einsum(equation = var_1000_equation_0, values = (var_896_9, var_976_cast_fp16))[name = tensor("op_1000_cast_fp16")]; - tensor var_1002_equation_0 = const()[name = tensor("op_1002_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1002_cast_fp16 = einsum(equation = var_1002_equation_0, values = (var_896_10, var_978_cast_fp16))[name = tensor("op_1002_cast_fp16")]; - tensor var_1004_equation_0 = const()[name = tensor("op_1004_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1004_cast_fp16 = einsum(equation = var_1004_equation_0, values = (var_896_11, var_980_cast_fp16))[name = tensor("op_1004_cast_fp16")]; - tensor var_1006_interleave_0 = const()[name = tensor("op_1006_interleave_0"), val = tensor(false)]; - tensor var_1006_cast_fp16 = concat(axis = var_826, interleave = var_1006_interleave_0, values = (var_982_cast_fp16, var_984_cast_fp16, var_986_cast_fp16, var_988_cast_fp16, var_990_cast_fp16, var_992_cast_fp16, var_994_cast_fp16, var_996_cast_fp16, var_998_cast_fp16, var_1000_cast_fp16, var_1002_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1006_cast_fp16")]; - tensor var_1010 = const()[name = tensor("op_1010"), val = tensor([1, 1])]; - tensor var_1012 = const()[name = tensor("op_1012"), val = tensor([1, 1])]; - tensor var_1014_pad_type_0 = const()[name = tensor("op_1014_pad_type_0"), val = tensor("custom")]; - tensor var_1014_pad_0 = const()[name = tensor("op_1014_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1014 = conv(bias = layers_3_attention_o_proj_bias, dilations = var_1012, groups = var_826, pad = var_1014_pad_0, pad_type = var_1014_pad_type_0, strides = var_1010, weight = layers_3_attention_o_proj_weight, x = var_1006_cast_fp16)[name = tensor("op_1014")]; - tensor var_1016_interleave_0 = const()[name = tensor("op_1016_interleave_0"), val = tensor(false)]; - tensor var_1016 = concat(axis = var_827, interleave = var_1016_interleave_0, values = var_1014)[name = tensor("op_1016")]; - tensor x_29 = add(x = var_820_cast_fp16, y = var_1016)[name = tensor("x_29")]; - tensor var_823_promoted = const()[name = tensor("op_823_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_824_promoted = const()[name = tensor("op_824_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_31 = clip(alpha = var_823_promoted, beta = var_824_promoted, x = x_29)[name = tensor("x_31")]; - tensor var_1021 = const()[name = tensor("op_1021"), val = tensor([1])]; - tensor mean_15 = reduce_mean(axes = var_1021, keep_dims = var_828, x = x_31)[name = tensor("mean_15")]; - tensor zero_mean_15 = sub(x = x_31, y = mean_15)[name = tensor("zero_mean_15")]; - tensor var_825_promoted = const()[name = tensor("op_825_promoted"), val = tensor(0x1p+1)]; - tensor var_1024 = pow(x = zero_mean_15, y = var_825_promoted)[name = tensor("op_1024")]; - tensor var_1025 = const()[name = tensor("op_1025"), val = tensor([1])]; - tensor var_1026 = reduce_mean(axes = var_1025, keep_dims = var_828, x = var_1024)[name = tensor("op_1026")]; - tensor var_1027_to_fp16 = const()[name = tensor("op_1027_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1028_cast_fp16 = add(x = var_1026, y = var_1027_to_fp16)[name = tensor("op_1028_cast_fp16")]; - tensor denom_15_epsilon_0 = const()[name = tensor("denom_15_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0, x = var_1028_cast_fp16)[name = tensor("denom_15_cast_fp16")]; - tensor var_1030_cast_fp16 = mul(x = zero_mean_15, y = denom_15_cast_fp16)[name = tensor("op_1030_cast_fp16")]; - tensor var_1032_gamma_0_to_fp16 = const()[name = tensor("op_1032_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66800384)))]; - tensor var_1032_beta_0_to_fp16 = const()[name = tensor("op_1032_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66801216)))]; - tensor var_1032_epsilon_0_to_fp16 = const()[name = tensor("op_1032_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1032_cast_fp16 = batch_norm(beta = var_1032_beta_0_to_fp16, epsilon = var_1032_epsilon_0_to_fp16, gamma = var_1032_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1030_cast_fp16)[name = tensor("op_1032_cast_fp16")]; - tensor var_1038 = const()[name = tensor("op_1038"), val = tensor([1, 1])]; - tensor var_1040 = const()[name = tensor("op_1040"), val = tensor([1, 1])]; - tensor var_1042_pad_type_0 = const()[name = tensor("op_1042_pad_type_0"), val = tensor("custom")]; - tensor var_1042_pad_0 = const()[name = tensor("op_1042_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1042 = conv(bias = layers_3_mlp_fc1_bias, dilations = var_1040, groups = var_826, pad = var_1042_pad_0, pad_type = var_1042_pad_type_0, strides = var_1038, weight = layers_3_mlp_fc1_weight, x = var_1032_cast_fp16)[name = tensor("op_1042")]; - tensor input_127_mode_0 = const()[name = tensor("input_127_mode_0"), val = tensor("EXACT")]; - tensor input_127 = gelu(mode = input_127_mode_0, x = var_1042)[name = tensor("input_127")]; - tensor var_1046 = const()[name = tensor("op_1046"), val = tensor([1, 1])]; + tensor var_853_axis_0 = const()[name = tensor("op_853_axis_0"), val = tensor(1)]; + tensor var_853_0, tensor var_853_1, tensor var_853_2, tensor var_853_3, tensor var_853_4, tensor var_853_5, tensor var_853_6, tensor var_853_7, tensor var_853_8, tensor var_853_9, tensor var_853_10, tensor var_853_11 = split(axis = var_853_axis_0, split_sizes = tile_19, x = var_825)[name = tensor("op_853")]; + tensor var_867_equation_0 = const()[name = tensor("op_867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_867 = einsum(equation = var_867_equation_0, values = (var_840_0, var_826_0))[name = tensor("op_867")]; + tensor var_868_to_fp16 = const()[name = tensor("op_868_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_73_cast_fp16 = mul(x = var_867, y = var_868_to_fp16)[name = tensor("w_73_cast_fp16")]; + tensor var_871_equation_0 = const()[name = tensor("op_871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_871 = einsum(equation = var_871_equation_0, values = (var_840_1, var_826_1))[name = tensor("op_871")]; + tensor var_872_to_fp16 = const()[name = tensor("op_872_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_75_cast_fp16 = mul(x = var_871, y = var_872_to_fp16)[name = tensor("w_75_cast_fp16")]; + tensor var_875_equation_0 = const()[name = tensor("op_875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_875 = einsum(equation = var_875_equation_0, values = (var_840_2, var_826_2))[name = tensor("op_875")]; + tensor var_876_to_fp16 = const()[name = tensor("op_876_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_77_cast_fp16 = mul(x = var_875, y = var_876_to_fp16)[name = tensor("w_77_cast_fp16")]; + tensor var_879_equation_0 = const()[name = tensor("op_879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_879 = einsum(equation = var_879_equation_0, values = (var_840_3, var_826_3))[name = tensor("op_879")]; + tensor var_880_to_fp16 = const()[name = tensor("op_880_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_79_cast_fp16 = mul(x = var_879, y = var_880_to_fp16)[name = tensor("w_79_cast_fp16")]; + tensor var_883_equation_0 = const()[name = tensor("op_883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_883 = einsum(equation = var_883_equation_0, values = (var_840_4, var_826_4))[name = tensor("op_883")]; + tensor var_884_to_fp16 = const()[name = tensor("op_884_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_81_cast_fp16 = mul(x = var_883, y = var_884_to_fp16)[name = tensor("w_81_cast_fp16")]; + tensor var_887_equation_0 = const()[name = tensor("op_887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_887 = einsum(equation = var_887_equation_0, values = (var_840_5, var_826_5))[name = tensor("op_887")]; + tensor var_888_to_fp16 = const()[name = tensor("op_888_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_83_cast_fp16 = mul(x = var_887, y = var_888_to_fp16)[name = tensor("w_83_cast_fp16")]; + tensor var_891_equation_0 = const()[name = tensor("op_891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_891 = einsum(equation = var_891_equation_0, values = (var_840_6, var_826_6))[name = tensor("op_891")]; + tensor var_892_to_fp16 = const()[name = tensor("op_892_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_85_cast_fp16 = mul(x = var_891, y = var_892_to_fp16)[name = tensor("w_85_cast_fp16")]; + tensor var_895_equation_0 = const()[name = tensor("op_895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_895 = einsum(equation = var_895_equation_0, values = (var_840_7, var_826_7))[name = tensor("op_895")]; + tensor var_896_to_fp16 = const()[name = tensor("op_896_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_87_cast_fp16 = mul(x = var_895, y = var_896_to_fp16)[name = tensor("w_87_cast_fp16")]; + tensor var_899_equation_0 = const()[name = tensor("op_899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_899 = einsum(equation = var_899_equation_0, values = (var_840_8, var_826_8))[name = tensor("op_899")]; + tensor var_900_to_fp16 = const()[name = tensor("op_900_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_89_cast_fp16 = mul(x = var_899, y = var_900_to_fp16)[name = tensor("w_89_cast_fp16")]; + tensor var_903_equation_0 = const()[name = tensor("op_903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_903 = einsum(equation = var_903_equation_0, values = (var_840_9, var_826_9))[name = tensor("op_903")]; + tensor var_904_to_fp16 = const()[name = tensor("op_904_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_91_cast_fp16 = mul(x = var_903, y = var_904_to_fp16)[name = tensor("w_91_cast_fp16")]; + tensor var_907_equation_0 = const()[name = tensor("op_907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_907 = einsum(equation = var_907_equation_0, values = (var_840_10, var_826_10))[name = tensor("op_907")]; + tensor var_908_to_fp16 = const()[name = tensor("op_908_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_93_cast_fp16 = mul(x = var_907, y = var_908_to_fp16)[name = tensor("w_93_cast_fp16")]; + tensor var_911_equation_0 = const()[name = tensor("op_911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_911 = einsum(equation = var_911_equation_0, values = (var_840_11, var_826_11))[name = tensor("op_911")]; + tensor var_912_to_fp16 = const()[name = tensor("op_912_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_95_cast_fp16 = mul(x = var_911, y = var_912_to_fp16)[name = tensor("w_95_cast_fp16")]; + tensor input_113_cast_fp16 = add(x = w_73_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_113_cast_fp16")]; + tensor var_915_cast_fp16 = softmax(axis = var_783, x = input_113_cast_fp16)[name = tensor("op_915_cast_fp16")]; + tensor input_115_cast_fp16 = add(x = w_75_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_917_cast_fp16 = softmax(axis = var_783, x = input_115_cast_fp16)[name = tensor("op_917_cast_fp16")]; + tensor input_117_cast_fp16 = add(x = w_77_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor var_919_cast_fp16 = softmax(axis = var_783, x = input_117_cast_fp16)[name = tensor("op_919_cast_fp16")]; + tensor input_119_cast_fp16 = add(x = w_79_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_119_cast_fp16")]; + tensor var_921_cast_fp16 = softmax(axis = var_783, x = input_119_cast_fp16)[name = tensor("op_921_cast_fp16")]; + tensor input_121_cast_fp16 = add(x = w_81_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_121_cast_fp16")]; + tensor var_923_cast_fp16 = softmax(axis = var_783, x = input_121_cast_fp16)[name = tensor("op_923_cast_fp16")]; + tensor input_123_cast_fp16 = add(x = w_83_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_123_cast_fp16")]; + tensor var_925_cast_fp16 = softmax(axis = var_783, x = input_123_cast_fp16)[name = tensor("op_925_cast_fp16")]; + tensor input_125_cast_fp16 = add(x = w_85_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_125_cast_fp16")]; + tensor var_927_cast_fp16 = softmax(axis = var_783, x = input_125_cast_fp16)[name = tensor("op_927_cast_fp16")]; + tensor input_127_cast_fp16 = add(x = w_87_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_127_cast_fp16")]; + tensor var_929_cast_fp16 = softmax(axis = var_783, x = input_127_cast_fp16)[name = tensor("op_929_cast_fp16")]; + tensor input_129_cast_fp16 = add(x = w_89_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_129_cast_fp16")]; + tensor var_931_cast_fp16 = softmax(axis = var_783, x = input_129_cast_fp16)[name = tensor("op_931_cast_fp16")]; + tensor input_131_cast_fp16 = add(x = w_91_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_131_cast_fp16")]; + tensor var_933_cast_fp16 = softmax(axis = var_783, x = input_131_cast_fp16)[name = tensor("op_933_cast_fp16")]; + tensor input_133_cast_fp16 = add(x = w_93_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_133_cast_fp16")]; + tensor var_935_cast_fp16 = softmax(axis = var_783, x = input_133_cast_fp16)[name = tensor("op_935_cast_fp16")]; + tensor input_135_cast_fp16 = add(x = w_95_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_135_cast_fp16")]; + tensor var_937_cast_fp16 = softmax(axis = var_783, x = input_135_cast_fp16)[name = tensor("op_937_cast_fp16")]; + tensor var_939_equation_0 = const()[name = tensor("op_939_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_939_cast_fp16 = einsum(equation = var_939_equation_0, values = (var_853_0, var_915_cast_fp16))[name = tensor("op_939_cast_fp16")]; + tensor var_941_equation_0 = const()[name = tensor("op_941_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_941_cast_fp16 = einsum(equation = var_941_equation_0, values = (var_853_1, var_917_cast_fp16))[name = tensor("op_941_cast_fp16")]; + tensor var_943_equation_0 = const()[name = tensor("op_943_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_943_cast_fp16 = einsum(equation = var_943_equation_0, values = (var_853_2, var_919_cast_fp16))[name = tensor("op_943_cast_fp16")]; + tensor var_945_equation_0 = const()[name = tensor("op_945_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_945_cast_fp16 = einsum(equation = var_945_equation_0, values = (var_853_3, var_921_cast_fp16))[name = tensor("op_945_cast_fp16")]; + tensor var_947_equation_0 = const()[name = tensor("op_947_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_947_cast_fp16 = einsum(equation = var_947_equation_0, values = (var_853_4, var_923_cast_fp16))[name = tensor("op_947_cast_fp16")]; + tensor var_949_equation_0 = const()[name = tensor("op_949_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_949_cast_fp16 = einsum(equation = var_949_equation_0, values = (var_853_5, var_925_cast_fp16))[name = tensor("op_949_cast_fp16")]; + tensor var_951_equation_0 = const()[name = tensor("op_951_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_951_cast_fp16 = einsum(equation = var_951_equation_0, values = (var_853_6, var_927_cast_fp16))[name = tensor("op_951_cast_fp16")]; + tensor var_953_equation_0 = const()[name = tensor("op_953_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_953_cast_fp16 = einsum(equation = var_953_equation_0, values = (var_853_7, var_929_cast_fp16))[name = tensor("op_953_cast_fp16")]; + tensor var_955_equation_0 = const()[name = tensor("op_955_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_955_cast_fp16 = einsum(equation = var_955_equation_0, values = (var_853_8, var_931_cast_fp16))[name = tensor("op_955_cast_fp16")]; + tensor var_957_equation_0 = const()[name = tensor("op_957_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_957_cast_fp16 = einsum(equation = var_957_equation_0, values = (var_853_9, var_933_cast_fp16))[name = tensor("op_957_cast_fp16")]; + tensor var_959_equation_0 = const()[name = tensor("op_959_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_959_cast_fp16 = einsum(equation = var_959_equation_0, values = (var_853_10, var_935_cast_fp16))[name = tensor("op_959_cast_fp16")]; + tensor var_961_equation_0 = const()[name = tensor("op_961_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_961_cast_fp16 = einsum(equation = var_961_equation_0, values = (var_853_11, var_937_cast_fp16))[name = tensor("op_961_cast_fp16")]; + tensor var_963_interleave_0 = const()[name = tensor("op_963_interleave_0"), val = tensor(false)]; + tensor var_963_cast_fp16 = concat(axis = var_783, interleave = var_963_interleave_0, values = (var_939_cast_fp16, var_941_cast_fp16, var_943_cast_fp16, var_945_cast_fp16, var_947_cast_fp16, var_949_cast_fp16, var_951_cast_fp16, var_953_cast_fp16, var_955_cast_fp16, var_957_cast_fp16, var_959_cast_fp16, var_961_cast_fp16))[name = tensor("op_963_cast_fp16")]; + tensor var_967 = const()[name = tensor("op_967"), val = tensor([1, 1])]; + tensor var_969 = const()[name = tensor("op_969"), val = tensor([1, 1])]; + tensor var_971_pad_type_0 = const()[name = tensor("op_971_pad_type_0"), val = tensor("custom")]; + tensor var_971_pad_0 = const()[name = tensor("op_971_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_971 = conv(bias = layers_3_attention_o_proj_bias, dilations = var_969, groups = var_783, pad = var_971_pad_0, pad_type = var_971_pad_type_0, strides = var_967, weight = layers_3_attention_o_proj_weight, x = var_963_cast_fp16)[name = tensor("op_971")]; + tensor var_973_interleave_0 = const()[name = tensor("op_973_interleave_0"), val = tensor(false)]; + tensor var_973 = concat(axis = var_784, interleave = var_973_interleave_0, values = var_971)[name = tensor("op_973")]; + tensor x_15 = add(x = transpose_45, y = var_973)[name = tensor("x_15")]; + tensor input_139_perm_0 = const()[name = tensor("input_139_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_61 = const()[name = tensor("weight_61"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66798720)))]; + tensor bias_59 = const()[name = tensor("bias_59"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66799552)))]; + tensor o_15_axes_0 = const()[name = tensor("o_15_axes_0"), val = tensor([-1])]; + tensor var_782_to_fp16 = const()[name = tensor("op_782_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_43 = transpose(perm = input_139_perm_0, x = x_15)[name = tensor("transpose_43")]; + tensor o_15_cast_fp16 = layer_norm(axes = o_15_axes_0, beta = bias_59, epsilon = var_782_to_fp16, gamma = weight_61, x = transpose_43)[name = tensor("o_15_cast_fp16")]; + tensor input_141_perm_0 = const()[name = tensor("input_141_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_989 = const()[name = tensor("op_989"), val = tensor([1, 1])]; + tensor var_991 = const()[name = tensor("op_991"), val = tensor([1, 1])]; + tensor var_993_pad_type_0 = const()[name = tensor("op_993_pad_type_0"), val = tensor("custom")]; + tensor var_993_pad_0 = const()[name = tensor("op_993_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_42 = transpose(perm = input_141_perm_0, x = o_15_cast_fp16)[name = tensor("transpose_42")]; + tensor var_993 = conv(bias = layers_3_mlp_fc1_bias, dilations = var_991, groups = var_783, pad = var_993_pad_0, pad_type = var_993_pad_type_0, strides = var_989, weight = layers_3_mlp_fc1_weight, x = transpose_42)[name = tensor("op_993")]; + tensor input_143_mode_0 = const()[name = tensor("input_143_mode_0"), val = tensor("EXACT")]; + tensor input_143 = gelu(mode = input_143_mode_0, x = var_993)[name = tensor("input_143")]; + tensor var_997 = const()[name = tensor("op_997"), val = tensor([1, 1])]; + tensor var_999 = const()[name = tensor("op_999"), val = tensor([1, 1])]; + tensor var_1001_pad_type_0 = const()[name = tensor("op_1001_pad_type_0"), val = tensor("custom")]; + tensor var_1001_pad_0 = const()[name = tensor("op_1001_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1001 = conv(bias = layers_3_mlp_fc2_bias, dilations = var_999, groups = var_783, pad = var_1001_pad_0, pad_type = var_1001_pad_type_0, strides = var_997, weight = layers_3_mlp_fc2_weight, x = input_143)[name = tensor("op_1001")]; + tensor x_17 = add(x = transpose_42, y = var_1001)[name = tensor("x_17")]; + tensor input_145_perm_0 = const()[name = tensor("input_145_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_67 = const()[name = tensor("weight_67"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66800384)))]; + tensor bias_65 = const()[name = tensor("bias_65"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66801216)))]; + tensor o_17_axes_0 = const()[name = tensor("o_17_axes_0"), val = tensor([-1])]; + tensor transpose_41 = transpose(perm = input_145_perm_0, x = x_17)[name = tensor("transpose_41")]; + tensor o_17_cast_fp16 = layer_norm(axes = o_17_axes_0, beta = bias_65, epsilon = var_782_to_fp16, gamma = weight_67, x = transpose_41)[name = tensor("o_17_cast_fp16")]; + tensor hidden_states_9_perm_0 = const()[name = tensor("hidden_states_9_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1017 = const()[name = tensor("op_1017"), val = tensor(1)]; + tensor var_1018 = const()[name = tensor("op_1018"), val = tensor(0)]; + tensor var_1041 = const()[name = tensor("op_1041"), val = tensor([1, 1])]; + tensor var_1043 = const()[name = tensor("op_1043"), val = tensor([1, 1])]; + tensor var_1045_pad_type_0 = const()[name = tensor("op_1045_pad_type_0"), val = tensor("custom")]; + tensor var_1045_pad_0 = const()[name = tensor("op_1045_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_40 = transpose(perm = hidden_states_9_perm_0, x = o_17_cast_fp16)[name = tensor("transpose_40")]; + tensor var_1045 = conv(bias = layers_4_attention_q_proj_bias, dilations = var_1043, groups = var_1017, pad = var_1045_pad_0, pad_type = var_1045_pad_type_0, strides = var_1041, weight = layers_4_attention_q_proj_weight, x = transpose_40)[name = tensor("op_1045")]; tensor var_1048 = const()[name = tensor("op_1048"), val = tensor([1, 1])]; - tensor var_1050_pad_type_0 = const()[name = tensor("op_1050_pad_type_0"), val = tensor("custom")]; - tensor var_1050_pad_0 = const()[name = tensor("op_1050_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1050 = conv(bias = layers_3_mlp_fc2_bias, dilations = var_1048, groups = var_826, pad = var_1050_pad_0, pad_type = var_1050_pad_type_0, strides = var_1046, weight = layers_3_mlp_fc2_weight, x = input_127)[name = tensor("op_1050")]; - tensor x_33 = add(x = var_1032_cast_fp16, y = var_1050)[name = tensor("x_33")]; - tensor var_823_promoted_1 = const()[name = tensor("op_823_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_824_promoted_1 = const()[name = tensor("op_824_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_35 = clip(alpha = var_823_promoted_1, beta = var_824_promoted_1, x = x_33)[name = tensor("x_35")]; - tensor var_1055 = const()[name = tensor("op_1055"), val = tensor([1])]; - tensor mean_17 = reduce_mean(axes = var_1055, keep_dims = var_828, x = x_35)[name = tensor("mean_17")]; - tensor zero_mean_17 = sub(x = x_35, y = mean_17)[name = tensor("zero_mean_17")]; - tensor var_825_promoted_1 = const()[name = tensor("op_825_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1058 = pow(x = zero_mean_17, y = var_825_promoted_1)[name = tensor("op_1058")]; - tensor var_1059 = const()[name = tensor("op_1059"), val = tensor([1])]; - tensor var_1060 = reduce_mean(axes = var_1059, keep_dims = var_828, x = var_1058)[name = tensor("op_1060")]; - tensor var_1061_to_fp16 = const()[name = tensor("op_1061_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1062_cast_fp16 = add(x = var_1060, y = var_1061_to_fp16)[name = tensor("op_1062_cast_fp16")]; - tensor denom_17_epsilon_0 = const()[name = tensor("denom_17_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0, x = var_1062_cast_fp16)[name = tensor("denom_17_cast_fp16")]; - tensor var_1064_cast_fp16 = mul(x = zero_mean_17, y = denom_17_cast_fp16)[name = tensor("op_1064_cast_fp16")]; - tensor var_1066_gamma_0_to_fp16 = const()[name = tensor("op_1066_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66802048)))]; - tensor var_1066_beta_0_to_fp16 = const()[name = tensor("op_1066_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66802880)))]; - tensor var_1066_epsilon_0_to_fp16 = const()[name = tensor("op_1066_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1066_cast_fp16 = batch_norm(beta = var_1066_beta_0_to_fp16, epsilon = var_1066_epsilon_0_to_fp16, gamma = var_1066_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1064_cast_fp16)[name = tensor("op_1066_cast_fp16")]; - tensor var_1072 = const()[name = tensor("op_1072"), val = tensor(1)]; - tensor var_1073 = const()[name = tensor("op_1073"), val = tensor(0)]; - tensor var_1074 = const()[name = tensor("op_1074"), val = tensor(true)]; - tensor var_1096 = const()[name = tensor("op_1096"), val = tensor([1, 1])]; - tensor var_1098 = const()[name = tensor("op_1098"), val = tensor([1, 1])]; - tensor var_1100_pad_type_0 = const()[name = tensor("op_1100_pad_type_0"), val = tensor("custom")]; - tensor var_1100_pad_0 = const()[name = tensor("op_1100_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1100 = conv(bias = layers_4_attention_q_proj_bias, dilations = var_1098, groups = var_1072, pad = var_1100_pad_0, pad_type = var_1100_pad_type_0, strides = var_1096, weight = layers_4_attention_q_proj_weight, x = var_1066_cast_fp16)[name = tensor("op_1100")]; - tensor var_1103 = const()[name = tensor("op_1103"), val = tensor([1, 1])]; - tensor var_1105 = const()[name = tensor("op_1105"), val = tensor([1, 1])]; + tensor var_1050 = const()[name = tensor("op_1050"), val = tensor([1, 1])]; tensor ks_9_pad_type_0 = const()[name = tensor("ks_9_pad_type_0"), val = tensor("custom")]; tensor ks_9_pad_0 = const()[name = tensor("ks_9_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_9 = conv(bias = layers_4_attention_k_proj_bias, dilations = var_1105, groups = var_1072, pad = ks_9_pad_0, pad_type = ks_9_pad_type_0, strides = var_1103, weight = layers_4_attention_k_proj_weight, x = var_1066_cast_fp16)[name = tensor("ks_9")]; - tensor var_1110 = const()[name = tensor("op_1110"), val = tensor([1, 1])]; - tensor var_1112 = const()[name = tensor("op_1112"), val = tensor([1, 1])]; - tensor var_1114_pad_type_0 = const()[name = tensor("op_1114_pad_type_0"), val = tensor("custom")]; - tensor var_1114_pad_0 = const()[name = tensor("op_1114_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1114 = conv(bias = layers_4_attention_v_proj_bias, dilations = var_1112, groups = var_1072, pad = var_1114_pad_0, pad_type = var_1114_pad_type_0, strides = var_1110, weight = layers_4_attention_v_proj_weight, x = var_1066_cast_fp16)[name = tensor("op_1114")]; + tensor ks_9 = conv(bias = layers_4_attention_k_proj_bias, dilations = var_1050, groups = var_1017, pad = ks_9_pad_0, pad_type = ks_9_pad_type_0, strides = var_1048, weight = layers_4_attention_k_proj_weight, x = transpose_40)[name = tensor("ks_9")]; + tensor var_1055 = const()[name = tensor("op_1055"), val = tensor([1, 1])]; + tensor var_1057 = const()[name = tensor("op_1057"), val = tensor([1, 1])]; + tensor var_1059_pad_type_0 = const()[name = tensor("op_1059_pad_type_0"), val = tensor("custom")]; + tensor var_1059_pad_0 = const()[name = tensor("op_1059_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1059 = conv(bias = layers_4_attention_v_proj_bias, dilations = var_1057, groups = var_1017, pad = var_1059_pad_0, pad_type = var_1059_pad_type_0, strides = var_1055, weight = layers_4_attention_v_proj_weight, x = transpose_40)[name = tensor("op_1059")]; tensor tile_22 = const()[name = tensor("tile_22"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1115_axis_0 = const()[name = tensor("op_1115_axis_0"), val = tensor(1)]; - tensor var_1115_0, tensor var_1115_1, tensor var_1115_2, tensor var_1115_3, tensor var_1115_4, tensor var_1115_5, tensor var_1115_6, tensor var_1115_7, tensor var_1115_8, tensor var_1115_9, tensor var_1115_10, tensor var_1115_11 = split(axis = var_1115_axis_0, split_sizes = tile_22, x = var_1100)[name = tensor("op_1115")]; - tensor var_1128_perm_0 = const()[name = tensor("op_1128_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1060_axis_0 = const()[name = tensor("op_1060_axis_0"), val = tensor(1)]; + tensor var_1060_0, tensor var_1060_1, tensor var_1060_2, tensor var_1060_3, tensor var_1060_4, tensor var_1060_5, tensor var_1060_6, tensor var_1060_7, tensor var_1060_8, tensor var_1060_9, tensor var_1060_10, tensor var_1060_11 = split(axis = var_1060_axis_0, split_sizes = tile_22, x = var_1045)[name = tensor("op_1060")]; + tensor var_1073_perm_0 = const()[name = tensor("op_1073_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_23 = const()[name = tensor("tile_23"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1129_axis_0 = const()[name = tensor("op_1129_axis_0"), val = tensor(3)]; - tensor transpose_7 = transpose(perm = var_1128_perm_0, x = ks_9)[name = tensor("transpose_7")]; - tensor var_1129_0, tensor var_1129_1, tensor var_1129_2, tensor var_1129_3, tensor var_1129_4, tensor var_1129_5, tensor var_1129_6, tensor var_1129_7, tensor var_1129_8, tensor var_1129_9, tensor var_1129_10, tensor var_1129_11 = split(axis = var_1129_axis_0, split_sizes = tile_23, x = transpose_7)[name = tensor("op_1129")]; + tensor var_1074_axis_0 = const()[name = tensor("op_1074_axis_0"), val = tensor(3)]; + tensor transpose_39 = transpose(perm = var_1073_perm_0, x = ks_9)[name = tensor("transpose_39")]; + tensor var_1074_0, tensor var_1074_1, tensor var_1074_2, tensor var_1074_3, tensor var_1074_4, tensor var_1074_5, tensor var_1074_6, tensor var_1074_7, tensor var_1074_8, tensor var_1074_9, tensor var_1074_10, tensor var_1074_11 = split(axis = var_1074_axis_0, split_sizes = tile_23, x = transpose_39)[name = tensor("op_1074")]; tensor tile_24 = const()[name = tensor("tile_24"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1142_axis_0 = const()[name = tensor("op_1142_axis_0"), val = tensor(1)]; - tensor var_1142_0, tensor var_1142_1, tensor var_1142_2, tensor var_1142_3, tensor var_1142_4, tensor var_1142_5, tensor var_1142_6, tensor var_1142_7, tensor var_1142_8, tensor var_1142_9, tensor var_1142_10, tensor var_1142_11 = split(axis = var_1142_axis_0, split_sizes = tile_24, x = var_1114)[name = tensor("op_1142")]; - tensor var_1156_equation_0 = const()[name = tensor("op_1156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1156 = einsum(equation = var_1156_equation_0, values = (var_1129_0, var_1115_0))[name = tensor("op_1156")]; - tensor var_1157_to_fp16 = const()[name = tensor("op_1157_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_97_cast_fp16 = mul(x = var_1156, y = var_1157_to_fp16)[name = tensor("w_97_cast_fp16")]; - tensor var_1160_equation_0 = const()[name = tensor("op_1160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1160 = einsum(equation = var_1160_equation_0, values = (var_1129_1, var_1115_1))[name = tensor("op_1160")]; - tensor var_1161_to_fp16 = const()[name = tensor("op_1161_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_99_cast_fp16 = mul(x = var_1160, y = var_1161_to_fp16)[name = tensor("w_99_cast_fp16")]; - tensor var_1164_equation_0 = const()[name = tensor("op_1164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1164 = einsum(equation = var_1164_equation_0, values = (var_1129_2, var_1115_2))[name = tensor("op_1164")]; - tensor var_1165_to_fp16 = const()[name = tensor("op_1165_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_101_cast_fp16 = mul(x = var_1164, y = var_1165_to_fp16)[name = tensor("w_101_cast_fp16")]; - tensor var_1168_equation_0 = const()[name = tensor("op_1168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1168 = einsum(equation = var_1168_equation_0, values = (var_1129_3, var_1115_3))[name = tensor("op_1168")]; - tensor var_1169_to_fp16 = const()[name = tensor("op_1169_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_103_cast_fp16 = mul(x = var_1168, y = var_1169_to_fp16)[name = tensor("w_103_cast_fp16")]; - tensor var_1172_equation_0 = const()[name = tensor("op_1172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1172 = einsum(equation = var_1172_equation_0, values = (var_1129_4, var_1115_4))[name = tensor("op_1172")]; - tensor var_1173_to_fp16 = const()[name = tensor("op_1173_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_105_cast_fp16 = mul(x = var_1172, y = var_1173_to_fp16)[name = tensor("w_105_cast_fp16")]; - tensor var_1176_equation_0 = const()[name = tensor("op_1176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1176 = einsum(equation = var_1176_equation_0, values = (var_1129_5, var_1115_5))[name = tensor("op_1176")]; - tensor var_1177_to_fp16 = const()[name = tensor("op_1177_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_107_cast_fp16 = mul(x = var_1176, y = var_1177_to_fp16)[name = tensor("w_107_cast_fp16")]; - tensor var_1180_equation_0 = const()[name = tensor("op_1180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1180 = einsum(equation = var_1180_equation_0, values = (var_1129_6, var_1115_6))[name = tensor("op_1180")]; - tensor var_1181_to_fp16 = const()[name = tensor("op_1181_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_109_cast_fp16 = mul(x = var_1180, y = var_1181_to_fp16)[name = tensor("w_109_cast_fp16")]; - tensor var_1184_equation_0 = const()[name = tensor("op_1184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1184 = einsum(equation = var_1184_equation_0, values = (var_1129_7, var_1115_7))[name = tensor("op_1184")]; - tensor var_1185_to_fp16 = const()[name = tensor("op_1185_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_111_cast_fp16 = mul(x = var_1184, y = var_1185_to_fp16)[name = tensor("w_111_cast_fp16")]; - tensor var_1188_equation_0 = const()[name = tensor("op_1188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1188 = einsum(equation = var_1188_equation_0, values = (var_1129_8, var_1115_8))[name = tensor("op_1188")]; - tensor var_1189_to_fp16 = const()[name = tensor("op_1189_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_113_cast_fp16 = mul(x = var_1188, y = var_1189_to_fp16)[name = tensor("w_113_cast_fp16")]; - tensor var_1192_equation_0 = const()[name = tensor("op_1192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1192 = einsum(equation = var_1192_equation_0, values = (var_1129_9, var_1115_9))[name = tensor("op_1192")]; - tensor var_1193_to_fp16 = const()[name = tensor("op_1193_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_115_cast_fp16 = mul(x = var_1192, y = var_1193_to_fp16)[name = tensor("w_115_cast_fp16")]; - tensor var_1196_equation_0 = const()[name = tensor("op_1196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1196 = einsum(equation = var_1196_equation_0, values = (var_1129_10, var_1115_10))[name = tensor("op_1196")]; - tensor var_1197_to_fp16 = const()[name = tensor("op_1197_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_117_cast_fp16 = mul(x = var_1196, y = var_1197_to_fp16)[name = tensor("w_117_cast_fp16")]; - tensor var_1200_equation_0 = const()[name = tensor("op_1200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1200 = einsum(equation = var_1200_equation_0, values = (var_1129_11, var_1115_11))[name = tensor("op_1200")]; - tensor var_1201_to_fp16 = const()[name = tensor("op_1201_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_119_cast_fp16 = mul(x = var_1200, y = var_1201_to_fp16)[name = tensor("w_119_cast_fp16")]; - tensor input_131_cast_fp16 = add(x = w_97_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_131_cast_fp16")]; - tensor var_1204_cast_fp16 = softmax(axis = var_1072, x = input_131_cast_fp16)[name = tensor("op_1204_cast_fp16")]; - tensor input_133_cast_fp16 = add(x = w_99_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_133_cast_fp16")]; - tensor var_1206_cast_fp16 = softmax(axis = var_1072, x = input_133_cast_fp16)[name = tensor("op_1206_cast_fp16")]; - tensor input_135_cast_fp16 = add(x = w_101_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_135_cast_fp16")]; - tensor var_1208_cast_fp16 = softmax(axis = var_1072, x = input_135_cast_fp16)[name = tensor("op_1208_cast_fp16")]; - tensor input_137_cast_fp16 = add(x = w_103_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_137_cast_fp16")]; - tensor var_1210_cast_fp16 = softmax(axis = var_1072, x = input_137_cast_fp16)[name = tensor("op_1210_cast_fp16")]; - tensor input_139_cast_fp16 = add(x = w_105_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_139_cast_fp16")]; - tensor var_1212_cast_fp16 = softmax(axis = var_1072, x = input_139_cast_fp16)[name = tensor("op_1212_cast_fp16")]; - tensor input_141_cast_fp16 = add(x = w_107_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_141_cast_fp16")]; - tensor var_1214_cast_fp16 = softmax(axis = var_1072, x = input_141_cast_fp16)[name = tensor("op_1214_cast_fp16")]; - tensor input_143_cast_fp16 = add(x = w_109_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_143_cast_fp16")]; - tensor var_1216_cast_fp16 = softmax(axis = var_1072, x = input_143_cast_fp16)[name = tensor("op_1216_cast_fp16")]; - tensor input_145_cast_fp16 = add(x = w_111_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_145_cast_fp16")]; - tensor var_1218_cast_fp16 = softmax(axis = var_1072, x = input_145_cast_fp16)[name = tensor("op_1218_cast_fp16")]; - tensor input_147_cast_fp16 = add(x = w_113_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_147_cast_fp16")]; - tensor var_1220_cast_fp16 = softmax(axis = var_1072, x = input_147_cast_fp16)[name = tensor("op_1220_cast_fp16")]; - tensor input_149_cast_fp16 = add(x = w_115_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_149_cast_fp16")]; - tensor var_1222_cast_fp16 = softmax(axis = var_1072, x = input_149_cast_fp16)[name = tensor("op_1222_cast_fp16")]; - tensor input_151_cast_fp16 = add(x = w_117_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_151_cast_fp16")]; - tensor var_1224_cast_fp16 = softmax(axis = var_1072, x = input_151_cast_fp16)[name = tensor("op_1224_cast_fp16")]; - tensor input_153_cast_fp16 = add(x = w_119_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_153_cast_fp16")]; - tensor var_1226_cast_fp16 = softmax(axis = var_1072, x = input_153_cast_fp16)[name = tensor("op_1226_cast_fp16")]; - tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1142_0, var_1204_cast_fp16))[name = tensor("op_1228_cast_fp16")]; - tensor var_1230_equation_0 = const()[name = tensor("op_1230_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1230_cast_fp16 = einsum(equation = var_1230_equation_0, values = (var_1142_1, var_1206_cast_fp16))[name = tensor("op_1230_cast_fp16")]; - tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1142_2, var_1208_cast_fp16))[name = tensor("op_1232_cast_fp16")]; - tensor var_1234_equation_0 = const()[name = tensor("op_1234_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1234_cast_fp16 = einsum(equation = var_1234_equation_0, values = (var_1142_3, var_1210_cast_fp16))[name = tensor("op_1234_cast_fp16")]; - tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1142_4, var_1212_cast_fp16))[name = tensor("op_1236_cast_fp16")]; - tensor var_1238_equation_0 = const()[name = tensor("op_1238_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1238_cast_fp16 = einsum(equation = var_1238_equation_0, values = (var_1142_5, var_1214_cast_fp16))[name = tensor("op_1238_cast_fp16")]; - tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1142_6, var_1216_cast_fp16))[name = tensor("op_1240_cast_fp16")]; - tensor var_1242_equation_0 = const()[name = tensor("op_1242_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1242_cast_fp16 = einsum(equation = var_1242_equation_0, values = (var_1142_7, var_1218_cast_fp16))[name = tensor("op_1242_cast_fp16")]; - tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1142_8, var_1220_cast_fp16))[name = tensor("op_1244_cast_fp16")]; - tensor var_1246_equation_0 = const()[name = tensor("op_1246_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1246_cast_fp16 = einsum(equation = var_1246_equation_0, values = (var_1142_9, var_1222_cast_fp16))[name = tensor("op_1246_cast_fp16")]; - tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1142_10, var_1224_cast_fp16))[name = tensor("op_1248_cast_fp16")]; - tensor var_1250_equation_0 = const()[name = tensor("op_1250_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1250_cast_fp16 = einsum(equation = var_1250_equation_0, values = (var_1142_11, var_1226_cast_fp16))[name = tensor("op_1250_cast_fp16")]; - tensor var_1252_interleave_0 = const()[name = tensor("op_1252_interleave_0"), val = tensor(false)]; - tensor var_1252_cast_fp16 = concat(axis = var_1072, interleave = var_1252_interleave_0, values = (var_1228_cast_fp16, var_1230_cast_fp16, var_1232_cast_fp16, var_1234_cast_fp16, var_1236_cast_fp16, var_1238_cast_fp16, var_1240_cast_fp16, var_1242_cast_fp16, var_1244_cast_fp16, var_1246_cast_fp16, var_1248_cast_fp16, var_1250_cast_fp16))[name = tensor("op_1252_cast_fp16")]; - tensor var_1256 = const()[name = tensor("op_1256"), val = tensor([1, 1])]; - tensor var_1258 = const()[name = tensor("op_1258"), val = tensor([1, 1])]; - tensor var_1260_pad_type_0 = const()[name = tensor("op_1260_pad_type_0"), val = tensor("custom")]; - tensor var_1260_pad_0 = const()[name = tensor("op_1260_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1260 = conv(bias = layers_4_attention_o_proj_bias, dilations = var_1258, groups = var_1072, pad = var_1260_pad_0, pad_type = var_1260_pad_type_0, strides = var_1256, weight = layers_4_attention_o_proj_weight, x = var_1252_cast_fp16)[name = tensor("op_1260")]; - tensor var_1262_interleave_0 = const()[name = tensor("op_1262_interleave_0"), val = tensor(false)]; - tensor var_1262 = concat(axis = var_1073, interleave = var_1262_interleave_0, values = var_1260)[name = tensor("op_1262")]; - tensor x_37 = add(x = var_1066_cast_fp16, y = var_1262)[name = tensor("x_37")]; - tensor var_1069_promoted = const()[name = tensor("op_1069_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1070_promoted = const()[name = tensor("op_1070_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_39 = clip(alpha = var_1069_promoted, beta = var_1070_promoted, x = x_37)[name = tensor("x_39")]; - tensor var_1267 = const()[name = tensor("op_1267"), val = tensor([1])]; - tensor mean_19 = reduce_mean(axes = var_1267, keep_dims = var_1074, x = x_39)[name = tensor("mean_19")]; - tensor zero_mean_19 = sub(x = x_39, y = mean_19)[name = tensor("zero_mean_19")]; - tensor var_1071_promoted = const()[name = tensor("op_1071_promoted"), val = tensor(0x1p+1)]; - tensor var_1270 = pow(x = zero_mean_19, y = var_1071_promoted)[name = tensor("op_1270")]; - tensor var_1271 = const()[name = tensor("op_1271"), val = tensor([1])]; - tensor var_1272 = reduce_mean(axes = var_1271, keep_dims = var_1074, x = var_1270)[name = tensor("op_1272")]; - tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1274_cast_fp16 = add(x = var_1272, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; - tensor denom_19_epsilon_0 = const()[name = tensor("denom_19_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0, x = var_1274_cast_fp16)[name = tensor("denom_19_cast_fp16")]; - tensor var_1276_cast_fp16 = mul(x = zero_mean_19, y = denom_19_cast_fp16)[name = tensor("op_1276_cast_fp16")]; - tensor var_1278_gamma_0_to_fp16 = const()[name = tensor("op_1278_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66803712)))]; - tensor var_1278_beta_0_to_fp16 = const()[name = tensor("op_1278_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66804544)))]; - tensor var_1278_epsilon_0_to_fp16 = const()[name = tensor("op_1278_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1278_cast_fp16 = batch_norm(beta = var_1278_beta_0_to_fp16, epsilon = var_1278_epsilon_0_to_fp16, gamma = var_1278_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1276_cast_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1087_axis_0 = const()[name = tensor("op_1087_axis_0"), val = tensor(1)]; + tensor var_1087_0, tensor var_1087_1, tensor var_1087_2, tensor var_1087_3, tensor var_1087_4, tensor var_1087_5, tensor var_1087_6, tensor var_1087_7, tensor var_1087_8, tensor var_1087_9, tensor var_1087_10, tensor var_1087_11 = split(axis = var_1087_axis_0, split_sizes = tile_24, x = var_1059)[name = tensor("op_1087")]; + tensor var_1101_equation_0 = const()[name = tensor("op_1101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1101 = einsum(equation = var_1101_equation_0, values = (var_1074_0, var_1060_0))[name = tensor("op_1101")]; + tensor var_1102_to_fp16 = const()[name = tensor("op_1102_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_97_cast_fp16 = mul(x = var_1101, y = var_1102_to_fp16)[name = tensor("w_97_cast_fp16")]; + tensor var_1105_equation_0 = const()[name = tensor("op_1105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1105 = einsum(equation = var_1105_equation_0, values = (var_1074_1, var_1060_1))[name = tensor("op_1105")]; + tensor var_1106_to_fp16 = const()[name = tensor("op_1106_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_99_cast_fp16 = mul(x = var_1105, y = var_1106_to_fp16)[name = tensor("w_99_cast_fp16")]; + tensor var_1109_equation_0 = const()[name = tensor("op_1109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1109 = einsum(equation = var_1109_equation_0, values = (var_1074_2, var_1060_2))[name = tensor("op_1109")]; + tensor var_1110_to_fp16 = const()[name = tensor("op_1110_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_101_cast_fp16 = mul(x = var_1109, y = var_1110_to_fp16)[name = tensor("w_101_cast_fp16")]; + tensor var_1113_equation_0 = const()[name = tensor("op_1113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1113 = einsum(equation = var_1113_equation_0, values = (var_1074_3, var_1060_3))[name = tensor("op_1113")]; + tensor var_1114_to_fp16 = const()[name = tensor("op_1114_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_103_cast_fp16 = mul(x = var_1113, y = var_1114_to_fp16)[name = tensor("w_103_cast_fp16")]; + tensor var_1117_equation_0 = const()[name = tensor("op_1117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1117 = einsum(equation = var_1117_equation_0, values = (var_1074_4, var_1060_4))[name = tensor("op_1117")]; + tensor var_1118_to_fp16 = const()[name = tensor("op_1118_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_105_cast_fp16 = mul(x = var_1117, y = var_1118_to_fp16)[name = tensor("w_105_cast_fp16")]; + tensor var_1121_equation_0 = const()[name = tensor("op_1121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1121 = einsum(equation = var_1121_equation_0, values = (var_1074_5, var_1060_5))[name = tensor("op_1121")]; + tensor var_1122_to_fp16 = const()[name = tensor("op_1122_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_107_cast_fp16 = mul(x = var_1121, y = var_1122_to_fp16)[name = tensor("w_107_cast_fp16")]; + tensor var_1125_equation_0 = const()[name = tensor("op_1125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1125 = einsum(equation = var_1125_equation_0, values = (var_1074_6, var_1060_6))[name = tensor("op_1125")]; + tensor var_1126_to_fp16 = const()[name = tensor("op_1126_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_109_cast_fp16 = mul(x = var_1125, y = var_1126_to_fp16)[name = tensor("w_109_cast_fp16")]; + tensor var_1129_equation_0 = const()[name = tensor("op_1129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1129 = einsum(equation = var_1129_equation_0, values = (var_1074_7, var_1060_7))[name = tensor("op_1129")]; + tensor var_1130_to_fp16 = const()[name = tensor("op_1130_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_111_cast_fp16 = mul(x = var_1129, y = var_1130_to_fp16)[name = tensor("w_111_cast_fp16")]; + tensor var_1133_equation_0 = const()[name = tensor("op_1133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1133 = einsum(equation = var_1133_equation_0, values = (var_1074_8, var_1060_8))[name = tensor("op_1133")]; + tensor var_1134_to_fp16 = const()[name = tensor("op_1134_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_113_cast_fp16 = mul(x = var_1133, y = var_1134_to_fp16)[name = tensor("w_113_cast_fp16")]; + tensor var_1137_equation_0 = const()[name = tensor("op_1137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1137 = einsum(equation = var_1137_equation_0, values = (var_1074_9, var_1060_9))[name = tensor("op_1137")]; + tensor var_1138_to_fp16 = const()[name = tensor("op_1138_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_115_cast_fp16 = mul(x = var_1137, y = var_1138_to_fp16)[name = tensor("w_115_cast_fp16")]; + tensor var_1141_equation_0 = const()[name = tensor("op_1141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1141 = einsum(equation = var_1141_equation_0, values = (var_1074_10, var_1060_10))[name = tensor("op_1141")]; + tensor var_1142_to_fp16 = const()[name = tensor("op_1142_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_117_cast_fp16 = mul(x = var_1141, y = var_1142_to_fp16)[name = tensor("w_117_cast_fp16")]; + tensor var_1145_equation_0 = const()[name = tensor("op_1145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1145 = einsum(equation = var_1145_equation_0, values = (var_1074_11, var_1060_11))[name = tensor("op_1145")]; + tensor var_1146_to_fp16 = const()[name = tensor("op_1146_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_119_cast_fp16 = mul(x = var_1145, y = var_1146_to_fp16)[name = tensor("w_119_cast_fp16")]; + tensor input_149_cast_fp16 = add(x = w_97_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_149_cast_fp16")]; + tensor var_1149_cast_fp16 = softmax(axis = var_1017, x = input_149_cast_fp16)[name = tensor("op_1149_cast_fp16")]; + tensor input_151_cast_fp16 = add(x = w_99_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_151_cast_fp16")]; + tensor var_1151_cast_fp16 = softmax(axis = var_1017, x = input_151_cast_fp16)[name = tensor("op_1151_cast_fp16")]; + tensor input_153_cast_fp16 = add(x = w_101_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_153_cast_fp16")]; + tensor var_1153_cast_fp16 = softmax(axis = var_1017, x = input_153_cast_fp16)[name = tensor("op_1153_cast_fp16")]; + tensor input_155_cast_fp16 = add(x = w_103_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_155_cast_fp16")]; + tensor var_1155_cast_fp16 = softmax(axis = var_1017, x = input_155_cast_fp16)[name = tensor("op_1155_cast_fp16")]; + tensor input_157_cast_fp16 = add(x = w_105_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_157_cast_fp16")]; + tensor var_1157_cast_fp16 = softmax(axis = var_1017, x = input_157_cast_fp16)[name = tensor("op_1157_cast_fp16")]; + tensor input_159_cast_fp16 = add(x = w_107_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_159_cast_fp16")]; + tensor var_1159_cast_fp16 = softmax(axis = var_1017, x = input_159_cast_fp16)[name = tensor("op_1159_cast_fp16")]; + tensor input_161_cast_fp16 = add(x = w_109_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_161_cast_fp16")]; + tensor var_1161_cast_fp16 = softmax(axis = var_1017, x = input_161_cast_fp16)[name = tensor("op_1161_cast_fp16")]; + tensor input_163_cast_fp16 = add(x = w_111_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_163_cast_fp16")]; + tensor var_1163_cast_fp16 = softmax(axis = var_1017, x = input_163_cast_fp16)[name = tensor("op_1163_cast_fp16")]; + tensor input_165_cast_fp16 = add(x = w_113_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_165_cast_fp16")]; + tensor var_1165_cast_fp16 = softmax(axis = var_1017, x = input_165_cast_fp16)[name = tensor("op_1165_cast_fp16")]; + tensor input_167_cast_fp16 = add(x = w_115_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_167_cast_fp16")]; + tensor var_1167_cast_fp16 = softmax(axis = var_1017, x = input_167_cast_fp16)[name = tensor("op_1167_cast_fp16")]; + tensor input_169_cast_fp16 = add(x = w_117_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_169_cast_fp16")]; + tensor var_1169_cast_fp16 = softmax(axis = var_1017, x = input_169_cast_fp16)[name = tensor("op_1169_cast_fp16")]; + tensor input_171_cast_fp16 = add(x = w_119_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_171_cast_fp16")]; + tensor var_1171_cast_fp16 = softmax(axis = var_1017, x = input_171_cast_fp16)[name = tensor("op_1171_cast_fp16")]; + tensor var_1173_equation_0 = const()[name = tensor("op_1173_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1173_cast_fp16 = einsum(equation = var_1173_equation_0, values = (var_1087_0, var_1149_cast_fp16))[name = tensor("op_1173_cast_fp16")]; + tensor var_1175_equation_0 = const()[name = tensor("op_1175_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1175_cast_fp16 = einsum(equation = var_1175_equation_0, values = (var_1087_1, var_1151_cast_fp16))[name = tensor("op_1175_cast_fp16")]; + tensor var_1177_equation_0 = const()[name = tensor("op_1177_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1177_cast_fp16 = einsum(equation = var_1177_equation_0, values = (var_1087_2, var_1153_cast_fp16))[name = tensor("op_1177_cast_fp16")]; + tensor var_1179_equation_0 = const()[name = tensor("op_1179_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1179_cast_fp16 = einsum(equation = var_1179_equation_0, values = (var_1087_3, var_1155_cast_fp16))[name = tensor("op_1179_cast_fp16")]; + tensor var_1181_equation_0 = const()[name = tensor("op_1181_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1181_cast_fp16 = einsum(equation = var_1181_equation_0, values = (var_1087_4, var_1157_cast_fp16))[name = tensor("op_1181_cast_fp16")]; + tensor var_1183_equation_0 = const()[name = tensor("op_1183_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1183_cast_fp16 = einsum(equation = var_1183_equation_0, values = (var_1087_5, var_1159_cast_fp16))[name = tensor("op_1183_cast_fp16")]; + tensor var_1185_equation_0 = const()[name = tensor("op_1185_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1185_cast_fp16 = einsum(equation = var_1185_equation_0, values = (var_1087_6, var_1161_cast_fp16))[name = tensor("op_1185_cast_fp16")]; + tensor var_1187_equation_0 = const()[name = tensor("op_1187_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1187_cast_fp16 = einsum(equation = var_1187_equation_0, values = (var_1087_7, var_1163_cast_fp16))[name = tensor("op_1187_cast_fp16")]; + tensor var_1189_equation_0 = const()[name = tensor("op_1189_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1189_cast_fp16 = einsum(equation = var_1189_equation_0, values = (var_1087_8, var_1165_cast_fp16))[name = tensor("op_1189_cast_fp16")]; + tensor var_1191_equation_0 = const()[name = tensor("op_1191_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1191_cast_fp16 = einsum(equation = var_1191_equation_0, values = (var_1087_9, var_1167_cast_fp16))[name = tensor("op_1191_cast_fp16")]; + tensor var_1193_equation_0 = const()[name = tensor("op_1193_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1193_cast_fp16 = einsum(equation = var_1193_equation_0, values = (var_1087_10, var_1169_cast_fp16))[name = tensor("op_1193_cast_fp16")]; + tensor var_1195_equation_0 = const()[name = tensor("op_1195_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1195_cast_fp16 = einsum(equation = var_1195_equation_0, values = (var_1087_11, var_1171_cast_fp16))[name = tensor("op_1195_cast_fp16")]; + tensor var_1197_interleave_0 = const()[name = tensor("op_1197_interleave_0"), val = tensor(false)]; + tensor var_1197_cast_fp16 = concat(axis = var_1017, interleave = var_1197_interleave_0, values = (var_1173_cast_fp16, var_1175_cast_fp16, var_1177_cast_fp16, var_1179_cast_fp16, var_1181_cast_fp16, var_1183_cast_fp16, var_1185_cast_fp16, var_1187_cast_fp16, var_1189_cast_fp16, var_1191_cast_fp16, var_1193_cast_fp16, var_1195_cast_fp16))[name = tensor("op_1197_cast_fp16")]; + tensor var_1201 = const()[name = tensor("op_1201"), val = tensor([1, 1])]; + tensor var_1203 = const()[name = tensor("op_1203"), val = tensor([1, 1])]; + tensor var_1205_pad_type_0 = const()[name = tensor("op_1205_pad_type_0"), val = tensor("custom")]; + tensor var_1205_pad_0 = const()[name = tensor("op_1205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1205 = conv(bias = layers_4_attention_o_proj_bias, dilations = var_1203, groups = var_1017, pad = var_1205_pad_0, pad_type = var_1205_pad_type_0, strides = var_1201, weight = layers_4_attention_o_proj_weight, x = var_1197_cast_fp16)[name = tensor("op_1205")]; + tensor var_1207_interleave_0 = const()[name = tensor("op_1207_interleave_0"), val = tensor(false)]; + tensor var_1207 = concat(axis = var_1018, interleave = var_1207_interleave_0, values = var_1205)[name = tensor("op_1207")]; + tensor x_19 = add(x = transpose_40, y = var_1207)[name = tensor("x_19")]; + tensor input_175_perm_0 = const()[name = tensor("input_175_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_77 = const()[name = tensor("weight_77"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66802048)))]; + tensor bias_75 = const()[name = tensor("bias_75"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66802880)))]; + tensor o_19_axes_0 = const()[name = tensor("o_19_axes_0"), val = tensor([-1])]; + tensor var_1016_to_fp16 = const()[name = tensor("op_1016_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_38 = transpose(perm = input_175_perm_0, x = x_19)[name = tensor("transpose_38")]; + tensor o_19_cast_fp16 = layer_norm(axes = o_19_axes_0, beta = bias_75, epsilon = var_1016_to_fp16, gamma = weight_77, x = transpose_38)[name = tensor("o_19_cast_fp16")]; + tensor input_177_perm_0 = const()[name = tensor("input_177_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1223 = const()[name = tensor("op_1223"), val = tensor([1, 1])]; + tensor var_1225 = const()[name = tensor("op_1225"), val = tensor([1, 1])]; + tensor var_1227_pad_type_0 = const()[name = tensor("op_1227_pad_type_0"), val = tensor("custom")]; + tensor var_1227_pad_0 = const()[name = tensor("op_1227_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_37 = transpose(perm = input_177_perm_0, x = o_19_cast_fp16)[name = tensor("transpose_37")]; + tensor var_1227 = conv(bias = layers_4_mlp_fc1_bias, dilations = var_1225, groups = var_1017, pad = var_1227_pad_0, pad_type = var_1227_pad_type_0, strides = var_1223, weight = layers_4_mlp_fc1_weight, x = transpose_37)[name = tensor("op_1227")]; + tensor input_179_mode_0 = const()[name = tensor("input_179_mode_0"), val = tensor("EXACT")]; + tensor input_179 = gelu(mode = input_179_mode_0, x = var_1227)[name = tensor("input_179")]; + tensor var_1231 = const()[name = tensor("op_1231"), val = tensor([1, 1])]; + tensor var_1233 = const()[name = tensor("op_1233"), val = tensor([1, 1])]; + tensor var_1235_pad_type_0 = const()[name = tensor("op_1235_pad_type_0"), val = tensor("custom")]; + tensor var_1235_pad_0 = const()[name = tensor("op_1235_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1235 = conv(bias = layers_4_mlp_fc2_bias, dilations = var_1233, groups = var_1017, pad = var_1235_pad_0, pad_type = var_1235_pad_type_0, strides = var_1231, weight = layers_4_mlp_fc2_weight, x = input_179)[name = tensor("op_1235")]; + tensor x_21 = add(x = transpose_37, y = var_1235)[name = tensor("x_21")]; + tensor input_181_perm_0 = const()[name = tensor("input_181_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_83 = const()[name = tensor("weight_83"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66803712)))]; + tensor bias_81 = const()[name = tensor("bias_81"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66804544)))]; + tensor o_21_axes_0 = const()[name = tensor("o_21_axes_0"), val = tensor([-1])]; + tensor transpose_36 = transpose(perm = input_181_perm_0, x = x_21)[name = tensor("transpose_36")]; + tensor o_21_cast_fp16 = layer_norm(axes = o_21_axes_0, beta = bias_81, epsilon = var_1016_to_fp16, gamma = weight_83, x = transpose_36)[name = tensor("o_21_cast_fp16")]; + tensor hidden_states_11_perm_0 = const()[name = tensor("hidden_states_11_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1251 = const()[name = tensor("op_1251"), val = tensor(1)]; + tensor var_1252 = const()[name = tensor("op_1252"), val = tensor(0)]; + tensor var_1275 = const()[name = tensor("op_1275"), val = tensor([1, 1])]; + tensor var_1277 = const()[name = tensor("op_1277"), val = tensor([1, 1])]; + tensor var_1279_pad_type_0 = const()[name = tensor("op_1279_pad_type_0"), val = tensor("custom")]; + tensor var_1279_pad_0 = const()[name = tensor("op_1279_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_35 = transpose(perm = hidden_states_11_perm_0, x = o_21_cast_fp16)[name = tensor("transpose_35")]; + tensor var_1279 = conv(bias = layers_5_attention_q_proj_bias, dilations = var_1277, groups = var_1251, pad = var_1279_pad_0, pad_type = var_1279_pad_type_0, strides = var_1275, weight = layers_5_attention_q_proj_weight, x = transpose_35)[name = tensor("op_1279")]; + tensor var_1282 = const()[name = tensor("op_1282"), val = tensor([1, 1])]; tensor var_1284 = const()[name = tensor("op_1284"), val = tensor([1, 1])]; - tensor var_1286 = const()[name = tensor("op_1286"), val = tensor([1, 1])]; - tensor var_1288_pad_type_0 = const()[name = tensor("op_1288_pad_type_0"), val = tensor("custom")]; - tensor var_1288_pad_0 = const()[name = tensor("op_1288_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1288 = conv(bias = layers_4_mlp_fc1_bias, dilations = var_1286, groups = var_1072, pad = var_1288_pad_0, pad_type = var_1288_pad_type_0, strides = var_1284, weight = layers_4_mlp_fc1_weight, x = var_1278_cast_fp16)[name = tensor("op_1288")]; - tensor input_159_mode_0 = const()[name = tensor("input_159_mode_0"), val = tensor("EXACT")]; - tensor input_159 = gelu(mode = input_159_mode_0, x = var_1288)[name = tensor("input_159")]; - tensor var_1292 = const()[name = tensor("op_1292"), val = tensor([1, 1])]; - tensor var_1294 = const()[name = tensor("op_1294"), val = tensor([1, 1])]; - tensor var_1296_pad_type_0 = const()[name = tensor("op_1296_pad_type_0"), val = tensor("custom")]; - tensor var_1296_pad_0 = const()[name = tensor("op_1296_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1296 = conv(bias = layers_4_mlp_fc2_bias, dilations = var_1294, groups = var_1072, pad = var_1296_pad_0, pad_type = var_1296_pad_type_0, strides = var_1292, weight = layers_4_mlp_fc2_weight, x = input_159)[name = tensor("op_1296")]; - tensor x_41 = add(x = var_1278_cast_fp16, y = var_1296)[name = tensor("x_41")]; - tensor var_1069_promoted_1 = const()[name = tensor("op_1069_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1070_promoted_1 = const()[name = tensor("op_1070_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_43 = clip(alpha = var_1069_promoted_1, beta = var_1070_promoted_1, x = x_41)[name = tensor("x_43")]; - tensor var_1301 = const()[name = tensor("op_1301"), val = tensor([1])]; - tensor mean_21 = reduce_mean(axes = var_1301, keep_dims = var_1074, x = x_43)[name = tensor("mean_21")]; - tensor zero_mean_21 = sub(x = x_43, y = mean_21)[name = tensor("zero_mean_21")]; - tensor var_1071_promoted_1 = const()[name = tensor("op_1071_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1304 = pow(x = zero_mean_21, y = var_1071_promoted_1)[name = tensor("op_1304")]; - tensor var_1305 = const()[name = tensor("op_1305"), val = tensor([1])]; - tensor var_1306 = reduce_mean(axes = var_1305, keep_dims = var_1074, x = var_1304)[name = tensor("op_1306")]; - tensor var_1307_to_fp16 = const()[name = tensor("op_1307_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1308_cast_fp16 = add(x = var_1306, y = var_1307_to_fp16)[name = tensor("op_1308_cast_fp16")]; - tensor denom_21_epsilon_0 = const()[name = tensor("denom_21_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0, x = var_1308_cast_fp16)[name = tensor("denom_21_cast_fp16")]; - tensor var_1310_cast_fp16 = mul(x = zero_mean_21, y = denom_21_cast_fp16)[name = tensor("op_1310_cast_fp16")]; - tensor var_1312_gamma_0_to_fp16 = const()[name = tensor("op_1312_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66805376)))]; - tensor var_1312_beta_0_to_fp16 = const()[name = tensor("op_1312_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66806208)))]; - tensor var_1312_epsilon_0_to_fp16 = const()[name = tensor("op_1312_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1312_cast_fp16 = batch_norm(beta = var_1312_beta_0_to_fp16, epsilon = var_1312_epsilon_0_to_fp16, gamma = var_1312_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1310_cast_fp16)[name = tensor("op_1312_cast_fp16")]; - tensor var_1318 = const()[name = tensor("op_1318"), val = tensor(1)]; - tensor var_1319 = const()[name = tensor("op_1319"), val = tensor(0)]; - tensor var_1320 = const()[name = tensor("op_1320"), val = tensor(true)]; - tensor var_1342 = const()[name = tensor("op_1342"), val = tensor([1, 1])]; - tensor var_1344 = const()[name = tensor("op_1344"), val = tensor([1, 1])]; - tensor var_1346_pad_type_0 = const()[name = tensor("op_1346_pad_type_0"), val = tensor("custom")]; - tensor var_1346_pad_0 = const()[name = tensor("op_1346_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1346 = conv(bias = layers_5_attention_q_proj_bias, dilations = var_1344, groups = var_1318, pad = var_1346_pad_0, pad_type = var_1346_pad_type_0, strides = var_1342, weight = layers_5_attention_q_proj_weight, x = var_1312_cast_fp16)[name = tensor("op_1346")]; - tensor var_1349 = const()[name = tensor("op_1349"), val = tensor([1, 1])]; - tensor var_1351 = const()[name = tensor("op_1351"), val = tensor([1, 1])]; tensor ks_11_pad_type_0 = const()[name = tensor("ks_11_pad_type_0"), val = tensor("custom")]; tensor ks_11_pad_0 = const()[name = tensor("ks_11_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_11 = conv(bias = layers_5_attention_k_proj_bias, dilations = var_1351, groups = var_1318, pad = ks_11_pad_0, pad_type = ks_11_pad_type_0, strides = var_1349, weight = layers_5_attention_k_proj_weight, x = var_1312_cast_fp16)[name = tensor("ks_11")]; - tensor var_1356 = const()[name = tensor("op_1356"), val = tensor([1, 1])]; - tensor var_1358 = const()[name = tensor("op_1358"), val = tensor([1, 1])]; - tensor var_1360_pad_type_0 = const()[name = tensor("op_1360_pad_type_0"), val = tensor("custom")]; - tensor var_1360_pad_0 = const()[name = tensor("op_1360_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1360 = conv(bias = layers_5_attention_v_proj_bias, dilations = var_1358, groups = var_1318, pad = var_1360_pad_0, pad_type = var_1360_pad_type_0, strides = var_1356, weight = layers_5_attention_v_proj_weight, x = var_1312_cast_fp16)[name = tensor("op_1360")]; + tensor ks_11 = conv(bias = layers_5_attention_k_proj_bias, dilations = var_1284, groups = var_1251, pad = ks_11_pad_0, pad_type = ks_11_pad_type_0, strides = var_1282, weight = layers_5_attention_k_proj_weight, x = transpose_35)[name = tensor("ks_11")]; + tensor var_1289 = const()[name = tensor("op_1289"), val = tensor([1, 1])]; + tensor var_1291 = const()[name = tensor("op_1291"), val = tensor([1, 1])]; + tensor var_1293_pad_type_0 = const()[name = tensor("op_1293_pad_type_0"), val = tensor("custom")]; + tensor var_1293_pad_0 = const()[name = tensor("op_1293_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1293 = conv(bias = layers_5_attention_v_proj_bias, dilations = var_1291, groups = var_1251, pad = var_1293_pad_0, pad_type = var_1293_pad_type_0, strides = var_1289, weight = layers_5_attention_v_proj_weight, x = transpose_35)[name = tensor("op_1293")]; tensor tile_27 = const()[name = tensor("tile_27"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1361_axis_0 = const()[name = tensor("op_1361_axis_0"), val = tensor(1)]; - tensor var_1361_0, tensor var_1361_1, tensor var_1361_2, tensor var_1361_3, tensor var_1361_4, tensor var_1361_5, tensor var_1361_6, tensor var_1361_7, tensor var_1361_8, tensor var_1361_9, tensor var_1361_10, tensor var_1361_11 = split(axis = var_1361_axis_0, split_sizes = tile_27, x = var_1346)[name = tensor("op_1361")]; - tensor var_1374_perm_0 = const()[name = tensor("op_1374_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1294_axis_0 = const()[name = tensor("op_1294_axis_0"), val = tensor(1)]; + tensor var_1294_0, tensor var_1294_1, tensor var_1294_2, tensor var_1294_3, tensor var_1294_4, tensor var_1294_5, tensor var_1294_6, tensor var_1294_7, tensor var_1294_8, tensor var_1294_9, tensor var_1294_10, tensor var_1294_11 = split(axis = var_1294_axis_0, split_sizes = tile_27, x = var_1279)[name = tensor("op_1294")]; + tensor var_1307_perm_0 = const()[name = tensor("op_1307_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_28 = const()[name = tensor("tile_28"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1375_axis_0 = const()[name = tensor("op_1375_axis_0"), val = tensor(3)]; - tensor transpose_6 = transpose(perm = var_1374_perm_0, x = ks_11)[name = tensor("transpose_6")]; - tensor var_1375_0, tensor var_1375_1, tensor var_1375_2, tensor var_1375_3, tensor var_1375_4, tensor var_1375_5, tensor var_1375_6, tensor var_1375_7, tensor var_1375_8, tensor var_1375_9, tensor var_1375_10, tensor var_1375_11 = split(axis = var_1375_axis_0, split_sizes = tile_28, x = transpose_6)[name = tensor("op_1375")]; + tensor var_1308_axis_0 = const()[name = tensor("op_1308_axis_0"), val = tensor(3)]; + tensor transpose_34 = transpose(perm = var_1307_perm_0, x = ks_11)[name = tensor("transpose_34")]; + tensor var_1308_0, tensor var_1308_1, tensor var_1308_2, tensor var_1308_3, tensor var_1308_4, tensor var_1308_5, tensor var_1308_6, tensor var_1308_7, tensor var_1308_8, tensor var_1308_9, tensor var_1308_10, tensor var_1308_11 = split(axis = var_1308_axis_0, split_sizes = tile_28, x = transpose_34)[name = tensor("op_1308")]; tensor tile_29 = const()[name = tensor("tile_29"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1388_axis_0 = const()[name = tensor("op_1388_axis_0"), val = tensor(1)]; - tensor var_1388_0, tensor var_1388_1, tensor var_1388_2, tensor var_1388_3, tensor var_1388_4, tensor var_1388_5, tensor var_1388_6, tensor var_1388_7, tensor var_1388_8, tensor var_1388_9, tensor var_1388_10, tensor var_1388_11 = split(axis = var_1388_axis_0, split_sizes = tile_29, x = var_1360)[name = tensor("op_1388")]; - tensor var_1402_equation_0 = const()[name = tensor("op_1402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1402 = einsum(equation = var_1402_equation_0, values = (var_1375_0, var_1361_0))[name = tensor("op_1402")]; - tensor var_1403_to_fp16 = const()[name = tensor("op_1403_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_121_cast_fp16 = mul(x = var_1402, y = var_1403_to_fp16)[name = tensor("w_121_cast_fp16")]; - tensor var_1406_equation_0 = const()[name = tensor("op_1406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1406 = einsum(equation = var_1406_equation_0, values = (var_1375_1, var_1361_1))[name = tensor("op_1406")]; - tensor var_1407_to_fp16 = const()[name = tensor("op_1407_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_123_cast_fp16 = mul(x = var_1406, y = var_1407_to_fp16)[name = tensor("w_123_cast_fp16")]; - tensor var_1410_equation_0 = const()[name = tensor("op_1410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1410 = einsum(equation = var_1410_equation_0, values = (var_1375_2, var_1361_2))[name = tensor("op_1410")]; - tensor var_1411_to_fp16 = const()[name = tensor("op_1411_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_125_cast_fp16 = mul(x = var_1410, y = var_1411_to_fp16)[name = tensor("w_125_cast_fp16")]; - tensor var_1414_equation_0 = const()[name = tensor("op_1414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1414 = einsum(equation = var_1414_equation_0, values = (var_1375_3, var_1361_3))[name = tensor("op_1414")]; - tensor var_1415_to_fp16 = const()[name = tensor("op_1415_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_127_cast_fp16 = mul(x = var_1414, y = var_1415_to_fp16)[name = tensor("w_127_cast_fp16")]; - tensor var_1418_equation_0 = const()[name = tensor("op_1418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1418 = einsum(equation = var_1418_equation_0, values = (var_1375_4, var_1361_4))[name = tensor("op_1418")]; - tensor var_1419_to_fp16 = const()[name = tensor("op_1419_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_129_cast_fp16 = mul(x = var_1418, y = var_1419_to_fp16)[name = tensor("w_129_cast_fp16")]; - tensor var_1422_equation_0 = const()[name = tensor("op_1422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1422 = einsum(equation = var_1422_equation_0, values = (var_1375_5, var_1361_5))[name = tensor("op_1422")]; - tensor var_1423_to_fp16 = const()[name = tensor("op_1423_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_131_cast_fp16 = mul(x = var_1422, y = var_1423_to_fp16)[name = tensor("w_131_cast_fp16")]; - tensor var_1426_equation_0 = const()[name = tensor("op_1426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1426 = einsum(equation = var_1426_equation_0, values = (var_1375_6, var_1361_6))[name = tensor("op_1426")]; - tensor var_1427_to_fp16 = const()[name = tensor("op_1427_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_133_cast_fp16 = mul(x = var_1426, y = var_1427_to_fp16)[name = tensor("w_133_cast_fp16")]; - tensor var_1430_equation_0 = const()[name = tensor("op_1430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1430 = einsum(equation = var_1430_equation_0, values = (var_1375_7, var_1361_7))[name = tensor("op_1430")]; - tensor var_1431_to_fp16 = const()[name = tensor("op_1431_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_135_cast_fp16 = mul(x = var_1430, y = var_1431_to_fp16)[name = tensor("w_135_cast_fp16")]; - tensor var_1434_equation_0 = const()[name = tensor("op_1434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1434 = einsum(equation = var_1434_equation_0, values = (var_1375_8, var_1361_8))[name = tensor("op_1434")]; - tensor var_1435_to_fp16 = const()[name = tensor("op_1435_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_137_cast_fp16 = mul(x = var_1434, y = var_1435_to_fp16)[name = tensor("w_137_cast_fp16")]; - tensor var_1438_equation_0 = const()[name = tensor("op_1438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1438 = einsum(equation = var_1438_equation_0, values = (var_1375_9, var_1361_9))[name = tensor("op_1438")]; - tensor var_1439_to_fp16 = const()[name = tensor("op_1439_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_139_cast_fp16 = mul(x = var_1438, y = var_1439_to_fp16)[name = tensor("w_139_cast_fp16")]; - tensor var_1442_equation_0 = const()[name = tensor("op_1442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1442 = einsum(equation = var_1442_equation_0, values = (var_1375_10, var_1361_10))[name = tensor("op_1442")]; - tensor var_1443_to_fp16 = const()[name = tensor("op_1443_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_141_cast_fp16 = mul(x = var_1442, y = var_1443_to_fp16)[name = tensor("w_141_cast_fp16")]; - tensor var_1446_equation_0 = const()[name = tensor("op_1446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1446 = einsum(equation = var_1446_equation_0, values = (var_1375_11, var_1361_11))[name = tensor("op_1446")]; - tensor var_1447_to_fp16 = const()[name = tensor("op_1447_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_143_cast_fp16 = mul(x = var_1446, y = var_1447_to_fp16)[name = tensor("w_143_cast_fp16")]; - tensor input_163_cast_fp16 = add(x = w_121_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_163_cast_fp16")]; - tensor var_1450_cast_fp16 = softmax(axis = var_1318, x = input_163_cast_fp16)[name = tensor("op_1450_cast_fp16")]; - tensor input_165_cast_fp16 = add(x = w_123_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_165_cast_fp16")]; - tensor var_1452_cast_fp16 = softmax(axis = var_1318, x = input_165_cast_fp16)[name = tensor("op_1452_cast_fp16")]; - tensor input_167_cast_fp16 = add(x = w_125_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_167_cast_fp16")]; - tensor var_1454_cast_fp16 = softmax(axis = var_1318, x = input_167_cast_fp16)[name = tensor("op_1454_cast_fp16")]; - tensor input_169_cast_fp16 = add(x = w_127_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_169_cast_fp16")]; - tensor var_1456_cast_fp16 = softmax(axis = var_1318, x = input_169_cast_fp16)[name = tensor("op_1456_cast_fp16")]; - tensor input_171_cast_fp16 = add(x = w_129_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_171_cast_fp16")]; - tensor var_1458_cast_fp16 = softmax(axis = var_1318, x = input_171_cast_fp16)[name = tensor("op_1458_cast_fp16")]; - tensor input_173_cast_fp16 = add(x = w_131_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_173_cast_fp16")]; - tensor var_1460_cast_fp16 = softmax(axis = var_1318, x = input_173_cast_fp16)[name = tensor("op_1460_cast_fp16")]; - tensor input_175_cast_fp16 = add(x = w_133_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_175_cast_fp16")]; - tensor var_1462_cast_fp16 = softmax(axis = var_1318, x = input_175_cast_fp16)[name = tensor("op_1462_cast_fp16")]; - tensor input_177_cast_fp16 = add(x = w_135_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_177_cast_fp16")]; - tensor var_1464_cast_fp16 = softmax(axis = var_1318, x = input_177_cast_fp16)[name = tensor("op_1464_cast_fp16")]; - tensor input_179_cast_fp16 = add(x = w_137_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_179_cast_fp16")]; - tensor var_1466_cast_fp16 = softmax(axis = var_1318, x = input_179_cast_fp16)[name = tensor("op_1466_cast_fp16")]; - tensor input_181_cast_fp16 = add(x = w_139_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_181_cast_fp16")]; - tensor var_1468_cast_fp16 = softmax(axis = var_1318, x = input_181_cast_fp16)[name = tensor("op_1468_cast_fp16")]; - tensor input_183_cast_fp16 = add(x = w_141_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_183_cast_fp16")]; - tensor var_1470_cast_fp16 = softmax(axis = var_1318, x = input_183_cast_fp16)[name = tensor("op_1470_cast_fp16")]; - tensor input_185_cast_fp16 = add(x = w_143_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_185_cast_fp16")]; - tensor var_1472_cast_fp16 = softmax(axis = var_1318, x = input_185_cast_fp16)[name = tensor("op_1472_cast_fp16")]; - tensor var_1474_equation_0 = const()[name = tensor("op_1474_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1474_cast_fp16 = einsum(equation = var_1474_equation_0, values = (var_1388_0, var_1450_cast_fp16))[name = tensor("op_1474_cast_fp16")]; - tensor var_1476_equation_0 = const()[name = tensor("op_1476_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1476_cast_fp16 = einsum(equation = var_1476_equation_0, values = (var_1388_1, var_1452_cast_fp16))[name = tensor("op_1476_cast_fp16")]; - tensor var_1478_equation_0 = const()[name = tensor("op_1478_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1478_cast_fp16 = einsum(equation = var_1478_equation_0, values = (var_1388_2, var_1454_cast_fp16))[name = tensor("op_1478_cast_fp16")]; - tensor var_1480_equation_0 = const()[name = tensor("op_1480_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1480_cast_fp16 = einsum(equation = var_1480_equation_0, values = (var_1388_3, var_1456_cast_fp16))[name = tensor("op_1480_cast_fp16")]; - tensor var_1482_equation_0 = const()[name = tensor("op_1482_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1482_cast_fp16 = einsum(equation = var_1482_equation_0, values = (var_1388_4, var_1458_cast_fp16))[name = tensor("op_1482_cast_fp16")]; - tensor var_1484_equation_0 = const()[name = tensor("op_1484_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1484_cast_fp16 = einsum(equation = var_1484_equation_0, values = (var_1388_5, var_1460_cast_fp16))[name = tensor("op_1484_cast_fp16")]; - tensor var_1486_equation_0 = const()[name = tensor("op_1486_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1486_cast_fp16 = einsum(equation = var_1486_equation_0, values = (var_1388_6, var_1462_cast_fp16))[name = tensor("op_1486_cast_fp16")]; - tensor var_1488_equation_0 = const()[name = tensor("op_1488_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1488_cast_fp16 = einsum(equation = var_1488_equation_0, values = (var_1388_7, var_1464_cast_fp16))[name = tensor("op_1488_cast_fp16")]; - tensor var_1490_equation_0 = const()[name = tensor("op_1490_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1490_cast_fp16 = einsum(equation = var_1490_equation_0, values = (var_1388_8, var_1466_cast_fp16))[name = tensor("op_1490_cast_fp16")]; - tensor var_1492_equation_0 = const()[name = tensor("op_1492_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1492_cast_fp16 = einsum(equation = var_1492_equation_0, values = (var_1388_9, var_1468_cast_fp16))[name = tensor("op_1492_cast_fp16")]; - tensor var_1494_equation_0 = const()[name = tensor("op_1494_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1494_cast_fp16 = einsum(equation = var_1494_equation_0, values = (var_1388_10, var_1470_cast_fp16))[name = tensor("op_1494_cast_fp16")]; - tensor var_1496_equation_0 = const()[name = tensor("op_1496_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1496_cast_fp16 = einsum(equation = var_1496_equation_0, values = (var_1388_11, var_1472_cast_fp16))[name = tensor("op_1496_cast_fp16")]; - tensor var_1498_interleave_0 = const()[name = tensor("op_1498_interleave_0"), val = tensor(false)]; - tensor var_1498_cast_fp16 = concat(axis = var_1318, interleave = var_1498_interleave_0, values = (var_1474_cast_fp16, var_1476_cast_fp16, var_1478_cast_fp16, var_1480_cast_fp16, var_1482_cast_fp16, var_1484_cast_fp16, var_1486_cast_fp16, var_1488_cast_fp16, var_1490_cast_fp16, var_1492_cast_fp16, var_1494_cast_fp16, var_1496_cast_fp16))[name = tensor("op_1498_cast_fp16")]; - tensor var_1502 = const()[name = tensor("op_1502"), val = tensor([1, 1])]; - tensor var_1504 = const()[name = tensor("op_1504"), val = tensor([1, 1])]; - tensor var_1506_pad_type_0 = const()[name = tensor("op_1506_pad_type_0"), val = tensor("custom")]; - tensor var_1506_pad_0 = const()[name = tensor("op_1506_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1506 = conv(bias = layers_5_attention_o_proj_bias, dilations = var_1504, groups = var_1318, pad = var_1506_pad_0, pad_type = var_1506_pad_type_0, strides = var_1502, weight = layers_5_attention_o_proj_weight, x = var_1498_cast_fp16)[name = tensor("op_1506")]; - tensor var_1508_interleave_0 = const()[name = tensor("op_1508_interleave_0"), val = tensor(false)]; - tensor var_1508 = concat(axis = var_1319, interleave = var_1508_interleave_0, values = var_1506)[name = tensor("op_1508")]; - tensor x_45 = add(x = var_1312_cast_fp16, y = var_1508)[name = tensor("x_45")]; - tensor var_1315_promoted = const()[name = tensor("op_1315_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1316_promoted = const()[name = tensor("op_1316_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_47 = clip(alpha = var_1315_promoted, beta = var_1316_promoted, x = x_45)[name = tensor("x_47")]; - tensor var_1513 = const()[name = tensor("op_1513"), val = tensor([1])]; - tensor mean_23 = reduce_mean(axes = var_1513, keep_dims = var_1320, x = x_47)[name = tensor("mean_23")]; - tensor zero_mean_23 = sub(x = x_47, y = mean_23)[name = tensor("zero_mean_23")]; - tensor var_1317_promoted = const()[name = tensor("op_1317_promoted"), val = tensor(0x1p+1)]; - tensor var_1516 = pow(x = zero_mean_23, y = var_1317_promoted)[name = tensor("op_1516")]; - tensor var_1517 = const()[name = tensor("op_1517"), val = tensor([1])]; - tensor var_1518 = reduce_mean(axes = var_1517, keep_dims = var_1320, x = var_1516)[name = tensor("op_1518")]; - tensor var_1519_to_fp16 = const()[name = tensor("op_1519_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1520_cast_fp16 = add(x = var_1518, y = var_1519_to_fp16)[name = tensor("op_1520_cast_fp16")]; - tensor denom_23_epsilon_0 = const()[name = tensor("denom_23_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0, x = var_1520_cast_fp16)[name = tensor("denom_23_cast_fp16")]; - tensor var_1522_cast_fp16 = mul(x = zero_mean_23, y = denom_23_cast_fp16)[name = tensor("op_1522_cast_fp16")]; - tensor var_1524_gamma_0_to_fp16 = const()[name = tensor("op_1524_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66807040)))]; - tensor var_1524_beta_0_to_fp16 = const()[name = tensor("op_1524_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66807872)))]; - tensor var_1524_epsilon_0_to_fp16 = const()[name = tensor("op_1524_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1524_cast_fp16 = batch_norm(beta = var_1524_beta_0_to_fp16, epsilon = var_1524_epsilon_0_to_fp16, gamma = var_1524_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1522_cast_fp16)[name = tensor("op_1524_cast_fp16")]; - tensor var_1530 = const()[name = tensor("op_1530"), val = tensor([1, 1])]; - tensor var_1532 = const()[name = tensor("op_1532"), val = tensor([1, 1])]; - tensor var_1534_pad_type_0 = const()[name = tensor("op_1534_pad_type_0"), val = tensor("custom")]; - tensor var_1534_pad_0 = const()[name = tensor("op_1534_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1534 = conv(bias = layers_5_mlp_fc1_bias, dilations = var_1532, groups = var_1318, pad = var_1534_pad_0, pad_type = var_1534_pad_type_0, strides = var_1530, weight = layers_5_mlp_fc1_weight, x = var_1524_cast_fp16)[name = tensor("op_1534")]; - tensor input_191_mode_0 = const()[name = tensor("input_191_mode_0"), val = tensor("EXACT")]; - tensor input_191 = gelu(mode = input_191_mode_0, x = var_1534)[name = tensor("input_191")]; - tensor var_1538 = const()[name = tensor("op_1538"), val = tensor([1, 1])]; - tensor var_1540 = const()[name = tensor("op_1540"), val = tensor([1, 1])]; - tensor var_1542_pad_type_0 = const()[name = tensor("op_1542_pad_type_0"), val = tensor("custom")]; - tensor var_1542_pad_0 = const()[name = tensor("op_1542_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1542 = conv(bias = layers_5_mlp_fc2_bias, dilations = var_1540, groups = var_1318, pad = var_1542_pad_0, pad_type = var_1542_pad_type_0, strides = var_1538, weight = layers_5_mlp_fc2_weight, x = input_191)[name = tensor("op_1542")]; - tensor x_49 = add(x = var_1524_cast_fp16, y = var_1542)[name = tensor("x_49")]; - tensor var_1315_promoted_1 = const()[name = tensor("op_1315_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1316_promoted_1 = const()[name = tensor("op_1316_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_51 = clip(alpha = var_1315_promoted_1, beta = var_1316_promoted_1, x = x_49)[name = tensor("x_51")]; - tensor var_1547 = const()[name = tensor("op_1547"), val = tensor([1])]; - tensor mean_25 = reduce_mean(axes = var_1547, keep_dims = var_1320, x = x_51)[name = tensor("mean_25")]; - tensor zero_mean_25 = sub(x = x_51, y = mean_25)[name = tensor("zero_mean_25")]; - tensor var_1317_promoted_1 = const()[name = tensor("op_1317_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1550 = pow(x = zero_mean_25, y = var_1317_promoted_1)[name = tensor("op_1550")]; - tensor var_1551 = const()[name = tensor("op_1551"), val = tensor([1])]; - tensor var_1552 = reduce_mean(axes = var_1551, keep_dims = var_1320, x = var_1550)[name = tensor("op_1552")]; - tensor var_1553_to_fp16 = const()[name = tensor("op_1553_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1554_cast_fp16 = add(x = var_1552, y = var_1553_to_fp16)[name = tensor("op_1554_cast_fp16")]; - tensor denom_25_epsilon_0 = const()[name = tensor("denom_25_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0, x = var_1554_cast_fp16)[name = tensor("denom_25_cast_fp16")]; - tensor var_1556_cast_fp16 = mul(x = zero_mean_25, y = denom_25_cast_fp16)[name = tensor("op_1556_cast_fp16")]; - tensor var_1558_gamma_0_to_fp16 = const()[name = tensor("op_1558_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66808704)))]; - tensor var_1558_beta_0_to_fp16 = const()[name = tensor("op_1558_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66809536)))]; - tensor var_1558_epsilon_0_to_fp16 = const()[name = tensor("op_1558_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1558_cast_fp16 = batch_norm(beta = var_1558_beta_0_to_fp16, epsilon = var_1558_epsilon_0_to_fp16, gamma = var_1558_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1556_cast_fp16)[name = tensor("op_1558_cast_fp16")]; - tensor var_1564 = const()[name = tensor("op_1564"), val = tensor(1)]; - tensor var_1565 = const()[name = tensor("op_1565"), val = tensor(0)]; - tensor var_1566 = const()[name = tensor("op_1566"), val = tensor(true)]; - tensor var_1588 = const()[name = tensor("op_1588"), val = tensor([1, 1])]; - tensor var_1590 = const()[name = tensor("op_1590"), val = tensor([1, 1])]; - tensor var_1592_pad_type_0 = const()[name = tensor("op_1592_pad_type_0"), val = tensor("custom")]; - tensor var_1592_pad_0 = const()[name = tensor("op_1592_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1592 = conv(bias = layers_6_attention_q_proj_bias, dilations = var_1590, groups = var_1564, pad = var_1592_pad_0, pad_type = var_1592_pad_type_0, strides = var_1588, weight = layers_6_attention_q_proj_weight, x = var_1558_cast_fp16)[name = tensor("op_1592")]; - tensor var_1595 = const()[name = tensor("op_1595"), val = tensor([1, 1])]; - tensor var_1597 = const()[name = tensor("op_1597"), val = tensor([1, 1])]; + tensor var_1321_axis_0 = const()[name = tensor("op_1321_axis_0"), val = tensor(1)]; + tensor var_1321_0, tensor var_1321_1, tensor var_1321_2, tensor var_1321_3, tensor var_1321_4, tensor var_1321_5, tensor var_1321_6, tensor var_1321_7, tensor var_1321_8, tensor var_1321_9, tensor var_1321_10, tensor var_1321_11 = split(axis = var_1321_axis_0, split_sizes = tile_29, x = var_1293)[name = tensor("op_1321")]; + tensor var_1335_equation_0 = const()[name = tensor("op_1335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1335 = einsum(equation = var_1335_equation_0, values = (var_1308_0, var_1294_0))[name = tensor("op_1335")]; + tensor var_1336_to_fp16 = const()[name = tensor("op_1336_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_121_cast_fp16 = mul(x = var_1335, y = var_1336_to_fp16)[name = tensor("w_121_cast_fp16")]; + tensor var_1339_equation_0 = const()[name = tensor("op_1339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1339 = einsum(equation = var_1339_equation_0, values = (var_1308_1, var_1294_1))[name = tensor("op_1339")]; + tensor var_1340_to_fp16 = const()[name = tensor("op_1340_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_123_cast_fp16 = mul(x = var_1339, y = var_1340_to_fp16)[name = tensor("w_123_cast_fp16")]; + tensor var_1343_equation_0 = const()[name = tensor("op_1343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1343 = einsum(equation = var_1343_equation_0, values = (var_1308_2, var_1294_2))[name = tensor("op_1343")]; + tensor var_1344_to_fp16 = const()[name = tensor("op_1344_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_125_cast_fp16 = mul(x = var_1343, y = var_1344_to_fp16)[name = tensor("w_125_cast_fp16")]; + tensor var_1347_equation_0 = const()[name = tensor("op_1347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1347 = einsum(equation = var_1347_equation_0, values = (var_1308_3, var_1294_3))[name = tensor("op_1347")]; + tensor var_1348_to_fp16 = const()[name = tensor("op_1348_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_127_cast_fp16 = mul(x = var_1347, y = var_1348_to_fp16)[name = tensor("w_127_cast_fp16")]; + tensor var_1351_equation_0 = const()[name = tensor("op_1351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1351 = einsum(equation = var_1351_equation_0, values = (var_1308_4, var_1294_4))[name = tensor("op_1351")]; + tensor var_1352_to_fp16 = const()[name = tensor("op_1352_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_129_cast_fp16 = mul(x = var_1351, y = var_1352_to_fp16)[name = tensor("w_129_cast_fp16")]; + tensor var_1355_equation_0 = const()[name = tensor("op_1355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1355 = einsum(equation = var_1355_equation_0, values = (var_1308_5, var_1294_5))[name = tensor("op_1355")]; + tensor var_1356_to_fp16 = const()[name = tensor("op_1356_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_131_cast_fp16 = mul(x = var_1355, y = var_1356_to_fp16)[name = tensor("w_131_cast_fp16")]; + tensor var_1359_equation_0 = const()[name = tensor("op_1359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1359 = einsum(equation = var_1359_equation_0, values = (var_1308_6, var_1294_6))[name = tensor("op_1359")]; + tensor var_1360_to_fp16 = const()[name = tensor("op_1360_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_133_cast_fp16 = mul(x = var_1359, y = var_1360_to_fp16)[name = tensor("w_133_cast_fp16")]; + tensor var_1363_equation_0 = const()[name = tensor("op_1363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1363 = einsum(equation = var_1363_equation_0, values = (var_1308_7, var_1294_7))[name = tensor("op_1363")]; + tensor var_1364_to_fp16 = const()[name = tensor("op_1364_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_135_cast_fp16 = mul(x = var_1363, y = var_1364_to_fp16)[name = tensor("w_135_cast_fp16")]; + tensor var_1367_equation_0 = const()[name = tensor("op_1367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1367 = einsum(equation = var_1367_equation_0, values = (var_1308_8, var_1294_8))[name = tensor("op_1367")]; + tensor var_1368_to_fp16 = const()[name = tensor("op_1368_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_137_cast_fp16 = mul(x = var_1367, y = var_1368_to_fp16)[name = tensor("w_137_cast_fp16")]; + tensor var_1371_equation_0 = const()[name = tensor("op_1371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1371 = einsum(equation = var_1371_equation_0, values = (var_1308_9, var_1294_9))[name = tensor("op_1371")]; + tensor var_1372_to_fp16 = const()[name = tensor("op_1372_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_139_cast_fp16 = mul(x = var_1371, y = var_1372_to_fp16)[name = tensor("w_139_cast_fp16")]; + tensor var_1375_equation_0 = const()[name = tensor("op_1375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1375 = einsum(equation = var_1375_equation_0, values = (var_1308_10, var_1294_10))[name = tensor("op_1375")]; + tensor var_1376_to_fp16 = const()[name = tensor("op_1376_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_141_cast_fp16 = mul(x = var_1375, y = var_1376_to_fp16)[name = tensor("w_141_cast_fp16")]; + tensor var_1379_equation_0 = const()[name = tensor("op_1379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1379 = einsum(equation = var_1379_equation_0, values = (var_1308_11, var_1294_11))[name = tensor("op_1379")]; + tensor var_1380_to_fp16 = const()[name = tensor("op_1380_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_143_cast_fp16 = mul(x = var_1379, y = var_1380_to_fp16)[name = tensor("w_143_cast_fp16")]; + tensor input_185_cast_fp16 = add(x = w_121_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_185_cast_fp16")]; + tensor var_1383_cast_fp16 = softmax(axis = var_1251, x = input_185_cast_fp16)[name = tensor("op_1383_cast_fp16")]; + tensor input_187_cast_fp16 = add(x = w_123_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_187_cast_fp16")]; + tensor var_1385_cast_fp16 = softmax(axis = var_1251, x = input_187_cast_fp16)[name = tensor("op_1385_cast_fp16")]; + tensor input_189_cast_fp16 = add(x = w_125_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_189_cast_fp16")]; + tensor var_1387_cast_fp16 = softmax(axis = var_1251, x = input_189_cast_fp16)[name = tensor("op_1387_cast_fp16")]; + tensor input_191_cast_fp16 = add(x = w_127_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_191_cast_fp16")]; + tensor var_1389_cast_fp16 = softmax(axis = var_1251, x = input_191_cast_fp16)[name = tensor("op_1389_cast_fp16")]; + tensor input_193_cast_fp16 = add(x = w_129_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_193_cast_fp16")]; + tensor var_1391_cast_fp16 = softmax(axis = var_1251, x = input_193_cast_fp16)[name = tensor("op_1391_cast_fp16")]; + tensor input_195_cast_fp16 = add(x = w_131_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_195_cast_fp16")]; + tensor var_1393_cast_fp16 = softmax(axis = var_1251, x = input_195_cast_fp16)[name = tensor("op_1393_cast_fp16")]; + tensor input_197_cast_fp16 = add(x = w_133_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_197_cast_fp16")]; + tensor var_1395_cast_fp16 = softmax(axis = var_1251, x = input_197_cast_fp16)[name = tensor("op_1395_cast_fp16")]; + tensor input_199_cast_fp16 = add(x = w_135_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_199_cast_fp16")]; + tensor var_1397_cast_fp16 = softmax(axis = var_1251, x = input_199_cast_fp16)[name = tensor("op_1397_cast_fp16")]; + tensor input_201_cast_fp16 = add(x = w_137_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_201_cast_fp16")]; + tensor var_1399_cast_fp16 = softmax(axis = var_1251, x = input_201_cast_fp16)[name = tensor("op_1399_cast_fp16")]; + tensor input_203_cast_fp16 = add(x = w_139_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_203_cast_fp16")]; + tensor var_1401_cast_fp16 = softmax(axis = var_1251, x = input_203_cast_fp16)[name = tensor("op_1401_cast_fp16")]; + tensor input_205_cast_fp16 = add(x = w_141_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_205_cast_fp16")]; + tensor var_1403_cast_fp16 = softmax(axis = var_1251, x = input_205_cast_fp16)[name = tensor("op_1403_cast_fp16")]; + tensor input_207_cast_fp16 = add(x = w_143_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_207_cast_fp16")]; + tensor var_1405_cast_fp16 = softmax(axis = var_1251, x = input_207_cast_fp16)[name = tensor("op_1405_cast_fp16")]; + tensor var_1407_equation_0 = const()[name = tensor("op_1407_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1407_cast_fp16 = einsum(equation = var_1407_equation_0, values = (var_1321_0, var_1383_cast_fp16))[name = tensor("op_1407_cast_fp16")]; + tensor var_1409_equation_0 = const()[name = tensor("op_1409_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1409_cast_fp16 = einsum(equation = var_1409_equation_0, values = (var_1321_1, var_1385_cast_fp16))[name = tensor("op_1409_cast_fp16")]; + tensor var_1411_equation_0 = const()[name = tensor("op_1411_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1411_cast_fp16 = einsum(equation = var_1411_equation_0, values = (var_1321_2, var_1387_cast_fp16))[name = tensor("op_1411_cast_fp16")]; + tensor var_1413_equation_0 = const()[name = tensor("op_1413_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1413_cast_fp16 = einsum(equation = var_1413_equation_0, values = (var_1321_3, var_1389_cast_fp16))[name = tensor("op_1413_cast_fp16")]; + tensor var_1415_equation_0 = const()[name = tensor("op_1415_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1415_cast_fp16 = einsum(equation = var_1415_equation_0, values = (var_1321_4, var_1391_cast_fp16))[name = tensor("op_1415_cast_fp16")]; + tensor var_1417_equation_0 = const()[name = tensor("op_1417_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1417_cast_fp16 = einsum(equation = var_1417_equation_0, values = (var_1321_5, var_1393_cast_fp16))[name = tensor("op_1417_cast_fp16")]; + tensor var_1419_equation_0 = const()[name = tensor("op_1419_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1419_cast_fp16 = einsum(equation = var_1419_equation_0, values = (var_1321_6, var_1395_cast_fp16))[name = tensor("op_1419_cast_fp16")]; + tensor var_1421_equation_0 = const()[name = tensor("op_1421_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1421_cast_fp16 = einsum(equation = var_1421_equation_0, values = (var_1321_7, var_1397_cast_fp16))[name = tensor("op_1421_cast_fp16")]; + tensor var_1423_equation_0 = const()[name = tensor("op_1423_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1423_cast_fp16 = einsum(equation = var_1423_equation_0, values = (var_1321_8, var_1399_cast_fp16))[name = tensor("op_1423_cast_fp16")]; + tensor var_1425_equation_0 = const()[name = tensor("op_1425_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1425_cast_fp16 = einsum(equation = var_1425_equation_0, values = (var_1321_9, var_1401_cast_fp16))[name = tensor("op_1425_cast_fp16")]; + tensor var_1427_equation_0 = const()[name = tensor("op_1427_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1427_cast_fp16 = einsum(equation = var_1427_equation_0, values = (var_1321_10, var_1403_cast_fp16))[name = tensor("op_1427_cast_fp16")]; + tensor var_1429_equation_0 = const()[name = tensor("op_1429_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1429_cast_fp16 = einsum(equation = var_1429_equation_0, values = (var_1321_11, var_1405_cast_fp16))[name = tensor("op_1429_cast_fp16")]; + tensor var_1431_interleave_0 = const()[name = tensor("op_1431_interleave_0"), val = tensor(false)]; + tensor var_1431_cast_fp16 = concat(axis = var_1251, interleave = var_1431_interleave_0, values = (var_1407_cast_fp16, var_1409_cast_fp16, var_1411_cast_fp16, var_1413_cast_fp16, var_1415_cast_fp16, var_1417_cast_fp16, var_1419_cast_fp16, var_1421_cast_fp16, var_1423_cast_fp16, var_1425_cast_fp16, var_1427_cast_fp16, var_1429_cast_fp16))[name = tensor("op_1431_cast_fp16")]; + tensor var_1435 = const()[name = tensor("op_1435"), val = tensor([1, 1])]; + tensor var_1437 = const()[name = tensor("op_1437"), val = tensor([1, 1])]; + tensor var_1439_pad_type_0 = const()[name = tensor("op_1439_pad_type_0"), val = tensor("custom")]; + tensor var_1439_pad_0 = const()[name = tensor("op_1439_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1439 = conv(bias = layers_5_attention_o_proj_bias, dilations = var_1437, groups = var_1251, pad = var_1439_pad_0, pad_type = var_1439_pad_type_0, strides = var_1435, weight = layers_5_attention_o_proj_weight, x = var_1431_cast_fp16)[name = tensor("op_1439")]; + tensor var_1441_interleave_0 = const()[name = tensor("op_1441_interleave_0"), val = tensor(false)]; + tensor var_1441 = concat(axis = var_1252, interleave = var_1441_interleave_0, values = var_1439)[name = tensor("op_1441")]; + tensor x_23 = add(x = transpose_35, y = var_1441)[name = tensor("x_23")]; + tensor input_211_perm_0 = const()[name = tensor("input_211_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_93 = const()[name = tensor("weight_93"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66805376)))]; + tensor bias_91 = const()[name = tensor("bias_91"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66806208)))]; + tensor o_23_axes_0 = const()[name = tensor("o_23_axes_0"), val = tensor([-1])]; + tensor var_1250_to_fp16 = const()[name = tensor("op_1250_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_33 = transpose(perm = input_211_perm_0, x = x_23)[name = tensor("transpose_33")]; + tensor o_23_cast_fp16 = layer_norm(axes = o_23_axes_0, beta = bias_91, epsilon = var_1250_to_fp16, gamma = weight_93, x = transpose_33)[name = tensor("o_23_cast_fp16")]; + tensor input_213_perm_0 = const()[name = tensor("input_213_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1457 = const()[name = tensor("op_1457"), val = tensor([1, 1])]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461_pad_type_0 = const()[name = tensor("op_1461_pad_type_0"), val = tensor("custom")]; + tensor var_1461_pad_0 = const()[name = tensor("op_1461_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_32 = transpose(perm = input_213_perm_0, x = o_23_cast_fp16)[name = tensor("transpose_32")]; + tensor var_1461 = conv(bias = layers_5_mlp_fc1_bias, dilations = var_1459, groups = var_1251, pad = var_1461_pad_0, pad_type = var_1461_pad_type_0, strides = var_1457, weight = layers_5_mlp_fc1_weight, x = transpose_32)[name = tensor("op_1461")]; + tensor input_215_mode_0 = const()[name = tensor("input_215_mode_0"), val = tensor("EXACT")]; + tensor input_215 = gelu(mode = input_215_mode_0, x = var_1461)[name = tensor("input_215")]; + tensor var_1465 = const()[name = tensor("op_1465"), val = tensor([1, 1])]; + tensor var_1467 = const()[name = tensor("op_1467"), val = tensor([1, 1])]; + tensor var_1469_pad_type_0 = const()[name = tensor("op_1469_pad_type_0"), val = tensor("custom")]; + tensor var_1469_pad_0 = const()[name = tensor("op_1469_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1469 = conv(bias = layers_5_mlp_fc2_bias, dilations = var_1467, groups = var_1251, pad = var_1469_pad_0, pad_type = var_1469_pad_type_0, strides = var_1465, weight = layers_5_mlp_fc2_weight, x = input_215)[name = tensor("op_1469")]; + tensor x_25 = add(x = transpose_32, y = var_1469)[name = tensor("x_25")]; + tensor input_217_perm_0 = const()[name = tensor("input_217_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_99 = const()[name = tensor("weight_99"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66807040)))]; + tensor bias_97 = const()[name = tensor("bias_97"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66807872)))]; + tensor o_25_axes_0 = const()[name = tensor("o_25_axes_0"), val = tensor([-1])]; + tensor transpose_31 = transpose(perm = input_217_perm_0, x = x_25)[name = tensor("transpose_31")]; + tensor o_25_cast_fp16 = layer_norm(axes = o_25_axes_0, beta = bias_97, epsilon = var_1250_to_fp16, gamma = weight_99, x = transpose_31)[name = tensor("o_25_cast_fp16")]; + tensor hidden_states_13_perm_0 = const()[name = tensor("hidden_states_13_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1485 = const()[name = tensor("op_1485"), val = tensor(1)]; + tensor var_1486 = const()[name = tensor("op_1486"), val = tensor(0)]; + tensor var_1509 = const()[name = tensor("op_1509"), val = tensor([1, 1])]; + tensor var_1511 = const()[name = tensor("op_1511"), val = tensor([1, 1])]; + tensor var_1513_pad_type_0 = const()[name = tensor("op_1513_pad_type_0"), val = tensor("custom")]; + tensor var_1513_pad_0 = const()[name = tensor("op_1513_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_30 = transpose(perm = hidden_states_13_perm_0, x = o_25_cast_fp16)[name = tensor("transpose_30")]; + tensor var_1513 = conv(bias = layers_6_attention_q_proj_bias, dilations = var_1511, groups = var_1485, pad = var_1513_pad_0, pad_type = var_1513_pad_type_0, strides = var_1509, weight = layers_6_attention_q_proj_weight, x = transpose_30)[name = tensor("op_1513")]; + tensor var_1516 = const()[name = tensor("op_1516"), val = tensor([1, 1])]; + tensor var_1518 = const()[name = tensor("op_1518"), val = tensor([1, 1])]; tensor ks_13_pad_type_0 = const()[name = tensor("ks_13_pad_type_0"), val = tensor("custom")]; tensor ks_13_pad_0 = const()[name = tensor("ks_13_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_13 = conv(bias = layers_6_attention_k_proj_bias, dilations = var_1597, groups = var_1564, pad = ks_13_pad_0, pad_type = ks_13_pad_type_0, strides = var_1595, weight = layers_6_attention_k_proj_weight, x = var_1558_cast_fp16)[name = tensor("ks_13")]; - tensor var_1602 = const()[name = tensor("op_1602"), val = tensor([1, 1])]; - tensor var_1604 = const()[name = tensor("op_1604"), val = tensor([1, 1])]; - tensor var_1606_pad_type_0 = const()[name = tensor("op_1606_pad_type_0"), val = tensor("custom")]; - tensor var_1606_pad_0 = const()[name = tensor("op_1606_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1606 = conv(bias = layers_6_attention_v_proj_bias, dilations = var_1604, groups = var_1564, pad = var_1606_pad_0, pad_type = var_1606_pad_type_0, strides = var_1602, weight = layers_6_attention_v_proj_weight, x = var_1558_cast_fp16)[name = tensor("op_1606")]; + tensor ks_13 = conv(bias = layers_6_attention_k_proj_bias, dilations = var_1518, groups = var_1485, pad = ks_13_pad_0, pad_type = ks_13_pad_type_0, strides = var_1516, weight = layers_6_attention_k_proj_weight, x = transpose_30)[name = tensor("ks_13")]; + tensor var_1523 = const()[name = tensor("op_1523"), val = tensor([1, 1])]; + tensor var_1525 = const()[name = tensor("op_1525"), val = tensor([1, 1])]; + tensor var_1527_pad_type_0 = const()[name = tensor("op_1527_pad_type_0"), val = tensor("custom")]; + tensor var_1527_pad_0 = const()[name = tensor("op_1527_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1527 = conv(bias = layers_6_attention_v_proj_bias, dilations = var_1525, groups = var_1485, pad = var_1527_pad_0, pad_type = var_1527_pad_type_0, strides = var_1523, weight = layers_6_attention_v_proj_weight, x = transpose_30)[name = tensor("op_1527")]; tensor tile_32 = const()[name = tensor("tile_32"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1607_axis_0 = const()[name = tensor("op_1607_axis_0"), val = tensor(1)]; - tensor var_1607_0, tensor var_1607_1, tensor var_1607_2, tensor var_1607_3, tensor var_1607_4, tensor var_1607_5, tensor var_1607_6, tensor var_1607_7, tensor var_1607_8, tensor var_1607_9, tensor var_1607_10, tensor var_1607_11 = split(axis = var_1607_axis_0, split_sizes = tile_32, x = var_1592)[name = tensor("op_1607")]; - tensor var_1620_perm_0 = const()[name = tensor("op_1620_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1528_axis_0 = const()[name = tensor("op_1528_axis_0"), val = tensor(1)]; + tensor var_1528_0, tensor var_1528_1, tensor var_1528_2, tensor var_1528_3, tensor var_1528_4, tensor var_1528_5, tensor var_1528_6, tensor var_1528_7, tensor var_1528_8, tensor var_1528_9, tensor var_1528_10, tensor var_1528_11 = split(axis = var_1528_axis_0, split_sizes = tile_32, x = var_1513)[name = tensor("op_1528")]; + tensor var_1541_perm_0 = const()[name = tensor("op_1541_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_33 = const()[name = tensor("tile_33"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1621_axis_0 = const()[name = tensor("op_1621_axis_0"), val = tensor(3)]; - tensor transpose_5 = transpose(perm = var_1620_perm_0, x = ks_13)[name = tensor("transpose_5")]; - tensor var_1621_0, tensor var_1621_1, tensor var_1621_2, tensor var_1621_3, tensor var_1621_4, tensor var_1621_5, tensor var_1621_6, tensor var_1621_7, tensor var_1621_8, tensor var_1621_9, tensor var_1621_10, tensor var_1621_11 = split(axis = var_1621_axis_0, split_sizes = tile_33, x = transpose_5)[name = tensor("op_1621")]; + tensor var_1542_axis_0 = const()[name = tensor("op_1542_axis_0"), val = tensor(3)]; + tensor transpose_29 = transpose(perm = var_1541_perm_0, x = ks_13)[name = tensor("transpose_29")]; + tensor var_1542_0, tensor var_1542_1, tensor var_1542_2, tensor var_1542_3, tensor var_1542_4, tensor var_1542_5, tensor var_1542_6, tensor var_1542_7, tensor var_1542_8, tensor var_1542_9, tensor var_1542_10, tensor var_1542_11 = split(axis = var_1542_axis_0, split_sizes = tile_33, x = transpose_29)[name = tensor("op_1542")]; tensor tile_34 = const()[name = tensor("tile_34"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1634_axis_0 = const()[name = tensor("op_1634_axis_0"), val = tensor(1)]; - tensor var_1634_0, tensor var_1634_1, tensor var_1634_2, tensor var_1634_3, tensor var_1634_4, tensor var_1634_5, tensor var_1634_6, tensor var_1634_7, tensor var_1634_8, tensor var_1634_9, tensor var_1634_10, tensor var_1634_11 = split(axis = var_1634_axis_0, split_sizes = tile_34, x = var_1606)[name = tensor("op_1634")]; - tensor var_1648_equation_0 = const()[name = tensor("op_1648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1648 = einsum(equation = var_1648_equation_0, values = (var_1621_0, var_1607_0))[name = tensor("op_1648")]; - tensor var_1649_to_fp16 = const()[name = tensor("op_1649_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_145_cast_fp16 = mul(x = var_1648, y = var_1649_to_fp16)[name = tensor("w_145_cast_fp16")]; - tensor var_1652_equation_0 = const()[name = tensor("op_1652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1652 = einsum(equation = var_1652_equation_0, values = (var_1621_1, var_1607_1))[name = tensor("op_1652")]; - tensor var_1653_to_fp16 = const()[name = tensor("op_1653_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_147_cast_fp16 = mul(x = var_1652, y = var_1653_to_fp16)[name = tensor("w_147_cast_fp16")]; - tensor var_1656_equation_0 = const()[name = tensor("op_1656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1656 = einsum(equation = var_1656_equation_0, values = (var_1621_2, var_1607_2))[name = tensor("op_1656")]; - tensor var_1657_to_fp16 = const()[name = tensor("op_1657_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_149_cast_fp16 = mul(x = var_1656, y = var_1657_to_fp16)[name = tensor("w_149_cast_fp16")]; - tensor var_1660_equation_0 = const()[name = tensor("op_1660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1660 = einsum(equation = var_1660_equation_0, values = (var_1621_3, var_1607_3))[name = tensor("op_1660")]; - tensor var_1661_to_fp16 = const()[name = tensor("op_1661_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_151_cast_fp16 = mul(x = var_1660, y = var_1661_to_fp16)[name = tensor("w_151_cast_fp16")]; - tensor var_1664_equation_0 = const()[name = tensor("op_1664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1664 = einsum(equation = var_1664_equation_0, values = (var_1621_4, var_1607_4))[name = tensor("op_1664")]; - tensor var_1665_to_fp16 = const()[name = tensor("op_1665_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_153_cast_fp16 = mul(x = var_1664, y = var_1665_to_fp16)[name = tensor("w_153_cast_fp16")]; - tensor var_1668_equation_0 = const()[name = tensor("op_1668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1668 = einsum(equation = var_1668_equation_0, values = (var_1621_5, var_1607_5))[name = tensor("op_1668")]; - tensor var_1669_to_fp16 = const()[name = tensor("op_1669_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_155_cast_fp16 = mul(x = var_1668, y = var_1669_to_fp16)[name = tensor("w_155_cast_fp16")]; - tensor var_1672_equation_0 = const()[name = tensor("op_1672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1672 = einsum(equation = var_1672_equation_0, values = (var_1621_6, var_1607_6))[name = tensor("op_1672")]; - tensor var_1673_to_fp16 = const()[name = tensor("op_1673_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_157_cast_fp16 = mul(x = var_1672, y = var_1673_to_fp16)[name = tensor("w_157_cast_fp16")]; - tensor var_1676_equation_0 = const()[name = tensor("op_1676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1676 = einsum(equation = var_1676_equation_0, values = (var_1621_7, var_1607_7))[name = tensor("op_1676")]; - tensor var_1677_to_fp16 = const()[name = tensor("op_1677_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_159_cast_fp16 = mul(x = var_1676, y = var_1677_to_fp16)[name = tensor("w_159_cast_fp16")]; - tensor var_1680_equation_0 = const()[name = tensor("op_1680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1680 = einsum(equation = var_1680_equation_0, values = (var_1621_8, var_1607_8))[name = tensor("op_1680")]; - tensor var_1681_to_fp16 = const()[name = tensor("op_1681_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_161_cast_fp16 = mul(x = var_1680, y = var_1681_to_fp16)[name = tensor("w_161_cast_fp16")]; - tensor var_1684_equation_0 = const()[name = tensor("op_1684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1684 = einsum(equation = var_1684_equation_0, values = (var_1621_9, var_1607_9))[name = tensor("op_1684")]; - tensor var_1685_to_fp16 = const()[name = tensor("op_1685_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_163_cast_fp16 = mul(x = var_1684, y = var_1685_to_fp16)[name = tensor("w_163_cast_fp16")]; - tensor var_1688_equation_0 = const()[name = tensor("op_1688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1688 = einsum(equation = var_1688_equation_0, values = (var_1621_10, var_1607_10))[name = tensor("op_1688")]; - tensor var_1689_to_fp16 = const()[name = tensor("op_1689_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_165_cast_fp16 = mul(x = var_1688, y = var_1689_to_fp16)[name = tensor("w_165_cast_fp16")]; - tensor var_1692_equation_0 = const()[name = tensor("op_1692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1692 = einsum(equation = var_1692_equation_0, values = (var_1621_11, var_1607_11))[name = tensor("op_1692")]; - tensor var_1693_to_fp16 = const()[name = tensor("op_1693_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_167_cast_fp16 = mul(x = var_1692, y = var_1693_to_fp16)[name = tensor("w_167_cast_fp16")]; - tensor input_195_cast_fp16 = add(x = w_145_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_195_cast_fp16")]; - tensor var_1696_cast_fp16 = softmax(axis = var_1564, x = input_195_cast_fp16)[name = tensor("op_1696_cast_fp16")]; - tensor input_197_cast_fp16 = add(x = w_147_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_197_cast_fp16")]; - tensor var_1698_cast_fp16 = softmax(axis = var_1564, x = input_197_cast_fp16)[name = tensor("op_1698_cast_fp16")]; - tensor input_199_cast_fp16 = add(x = w_149_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_199_cast_fp16")]; - tensor var_1700_cast_fp16 = softmax(axis = var_1564, x = input_199_cast_fp16)[name = tensor("op_1700_cast_fp16")]; - tensor input_201_cast_fp16 = add(x = w_151_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_201_cast_fp16")]; - tensor var_1702_cast_fp16 = softmax(axis = var_1564, x = input_201_cast_fp16)[name = tensor("op_1702_cast_fp16")]; - tensor input_203_cast_fp16 = add(x = w_153_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_203_cast_fp16")]; - tensor var_1704_cast_fp16 = softmax(axis = var_1564, x = input_203_cast_fp16)[name = tensor("op_1704_cast_fp16")]; - tensor input_205_cast_fp16 = add(x = w_155_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_205_cast_fp16")]; - tensor var_1706_cast_fp16 = softmax(axis = var_1564, x = input_205_cast_fp16)[name = tensor("op_1706_cast_fp16")]; - tensor input_207_cast_fp16 = add(x = w_157_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_207_cast_fp16")]; - tensor var_1708_cast_fp16 = softmax(axis = var_1564, x = input_207_cast_fp16)[name = tensor("op_1708_cast_fp16")]; - tensor input_209_cast_fp16 = add(x = w_159_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_209_cast_fp16")]; - tensor var_1710_cast_fp16 = softmax(axis = var_1564, x = input_209_cast_fp16)[name = tensor("op_1710_cast_fp16")]; - tensor input_211_cast_fp16 = add(x = w_161_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_211_cast_fp16")]; - tensor var_1712_cast_fp16 = softmax(axis = var_1564, x = input_211_cast_fp16)[name = tensor("op_1712_cast_fp16")]; - tensor input_213_cast_fp16 = add(x = w_163_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_213_cast_fp16")]; - tensor var_1714_cast_fp16 = softmax(axis = var_1564, x = input_213_cast_fp16)[name = tensor("op_1714_cast_fp16")]; - tensor input_215_cast_fp16 = add(x = w_165_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_215_cast_fp16")]; - tensor var_1716_cast_fp16 = softmax(axis = var_1564, x = input_215_cast_fp16)[name = tensor("op_1716_cast_fp16")]; - tensor input_217_cast_fp16 = add(x = w_167_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_217_cast_fp16")]; - tensor var_1718_cast_fp16 = softmax(axis = var_1564, x = input_217_cast_fp16)[name = tensor("op_1718_cast_fp16")]; - tensor var_1720_equation_0 = const()[name = tensor("op_1720_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1720_cast_fp16 = einsum(equation = var_1720_equation_0, values = (var_1634_0, var_1696_cast_fp16))[name = tensor("op_1720_cast_fp16")]; - tensor var_1722_equation_0 = const()[name = tensor("op_1722_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1722_cast_fp16 = einsum(equation = var_1722_equation_0, values = (var_1634_1, var_1698_cast_fp16))[name = tensor("op_1722_cast_fp16")]; - tensor var_1724_equation_0 = const()[name = tensor("op_1724_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1724_cast_fp16 = einsum(equation = var_1724_equation_0, values = (var_1634_2, var_1700_cast_fp16))[name = tensor("op_1724_cast_fp16")]; - tensor var_1726_equation_0 = const()[name = tensor("op_1726_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1726_cast_fp16 = einsum(equation = var_1726_equation_0, values = (var_1634_3, var_1702_cast_fp16))[name = tensor("op_1726_cast_fp16")]; - tensor var_1728_equation_0 = const()[name = tensor("op_1728_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1728_cast_fp16 = einsum(equation = var_1728_equation_0, values = (var_1634_4, var_1704_cast_fp16))[name = tensor("op_1728_cast_fp16")]; - tensor var_1730_equation_0 = const()[name = tensor("op_1730_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1730_cast_fp16 = einsum(equation = var_1730_equation_0, values = (var_1634_5, var_1706_cast_fp16))[name = tensor("op_1730_cast_fp16")]; - tensor var_1732_equation_0 = const()[name = tensor("op_1732_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1732_cast_fp16 = einsum(equation = var_1732_equation_0, values = (var_1634_6, var_1708_cast_fp16))[name = tensor("op_1732_cast_fp16")]; - tensor var_1734_equation_0 = const()[name = tensor("op_1734_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1734_cast_fp16 = einsum(equation = var_1734_equation_0, values = (var_1634_7, var_1710_cast_fp16))[name = tensor("op_1734_cast_fp16")]; - tensor var_1736_equation_0 = const()[name = tensor("op_1736_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1736_cast_fp16 = einsum(equation = var_1736_equation_0, values = (var_1634_8, var_1712_cast_fp16))[name = tensor("op_1736_cast_fp16")]; - tensor var_1738_equation_0 = const()[name = tensor("op_1738_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1738_cast_fp16 = einsum(equation = var_1738_equation_0, values = (var_1634_9, var_1714_cast_fp16))[name = tensor("op_1738_cast_fp16")]; - tensor var_1740_equation_0 = const()[name = tensor("op_1740_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1740_cast_fp16 = einsum(equation = var_1740_equation_0, values = (var_1634_10, var_1716_cast_fp16))[name = tensor("op_1740_cast_fp16")]; - tensor var_1742_equation_0 = const()[name = tensor("op_1742_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1742_cast_fp16 = einsum(equation = var_1742_equation_0, values = (var_1634_11, var_1718_cast_fp16))[name = tensor("op_1742_cast_fp16")]; - tensor var_1744_interleave_0 = const()[name = tensor("op_1744_interleave_0"), val = tensor(false)]; - tensor var_1744_cast_fp16 = concat(axis = var_1564, interleave = var_1744_interleave_0, values = (var_1720_cast_fp16, var_1722_cast_fp16, var_1724_cast_fp16, var_1726_cast_fp16, var_1728_cast_fp16, var_1730_cast_fp16, var_1732_cast_fp16, var_1734_cast_fp16, var_1736_cast_fp16, var_1738_cast_fp16, var_1740_cast_fp16, var_1742_cast_fp16))[name = tensor("op_1744_cast_fp16")]; - tensor var_1748 = const()[name = tensor("op_1748"), val = tensor([1, 1])]; + tensor var_1555_axis_0 = const()[name = tensor("op_1555_axis_0"), val = tensor(1)]; + tensor var_1555_0, tensor var_1555_1, tensor var_1555_2, tensor var_1555_3, tensor var_1555_4, tensor var_1555_5, tensor var_1555_6, tensor var_1555_7, tensor var_1555_8, tensor var_1555_9, tensor var_1555_10, tensor var_1555_11 = split(axis = var_1555_axis_0, split_sizes = tile_34, x = var_1527)[name = tensor("op_1555")]; + tensor var_1569_equation_0 = const()[name = tensor("op_1569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1569 = einsum(equation = var_1569_equation_0, values = (var_1542_0, var_1528_0))[name = tensor("op_1569")]; + tensor var_1570_to_fp16 = const()[name = tensor("op_1570_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_145_cast_fp16 = mul(x = var_1569, y = var_1570_to_fp16)[name = tensor("w_145_cast_fp16")]; + tensor var_1573_equation_0 = const()[name = tensor("op_1573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1573 = einsum(equation = var_1573_equation_0, values = (var_1542_1, var_1528_1))[name = tensor("op_1573")]; + tensor var_1574_to_fp16 = const()[name = tensor("op_1574_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_147_cast_fp16 = mul(x = var_1573, y = var_1574_to_fp16)[name = tensor("w_147_cast_fp16")]; + tensor var_1577_equation_0 = const()[name = tensor("op_1577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1577 = einsum(equation = var_1577_equation_0, values = (var_1542_2, var_1528_2))[name = tensor("op_1577")]; + tensor var_1578_to_fp16 = const()[name = tensor("op_1578_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_149_cast_fp16 = mul(x = var_1577, y = var_1578_to_fp16)[name = tensor("w_149_cast_fp16")]; + tensor var_1581_equation_0 = const()[name = tensor("op_1581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1581 = einsum(equation = var_1581_equation_0, values = (var_1542_3, var_1528_3))[name = tensor("op_1581")]; + tensor var_1582_to_fp16 = const()[name = tensor("op_1582_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_151_cast_fp16 = mul(x = var_1581, y = var_1582_to_fp16)[name = tensor("w_151_cast_fp16")]; + tensor var_1585_equation_0 = const()[name = tensor("op_1585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1585 = einsum(equation = var_1585_equation_0, values = (var_1542_4, var_1528_4))[name = tensor("op_1585")]; + tensor var_1586_to_fp16 = const()[name = tensor("op_1586_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_153_cast_fp16 = mul(x = var_1585, y = var_1586_to_fp16)[name = tensor("w_153_cast_fp16")]; + tensor var_1589_equation_0 = const()[name = tensor("op_1589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1589 = einsum(equation = var_1589_equation_0, values = (var_1542_5, var_1528_5))[name = tensor("op_1589")]; + tensor var_1590_to_fp16 = const()[name = tensor("op_1590_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_155_cast_fp16 = mul(x = var_1589, y = var_1590_to_fp16)[name = tensor("w_155_cast_fp16")]; + tensor var_1593_equation_0 = const()[name = tensor("op_1593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1593 = einsum(equation = var_1593_equation_0, values = (var_1542_6, var_1528_6))[name = tensor("op_1593")]; + tensor var_1594_to_fp16 = const()[name = tensor("op_1594_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_157_cast_fp16 = mul(x = var_1593, y = var_1594_to_fp16)[name = tensor("w_157_cast_fp16")]; + tensor var_1597_equation_0 = const()[name = tensor("op_1597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1597 = einsum(equation = var_1597_equation_0, values = (var_1542_7, var_1528_7))[name = tensor("op_1597")]; + tensor var_1598_to_fp16 = const()[name = tensor("op_1598_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_159_cast_fp16 = mul(x = var_1597, y = var_1598_to_fp16)[name = tensor("w_159_cast_fp16")]; + tensor var_1601_equation_0 = const()[name = tensor("op_1601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1601 = einsum(equation = var_1601_equation_0, values = (var_1542_8, var_1528_8))[name = tensor("op_1601")]; + tensor var_1602_to_fp16 = const()[name = tensor("op_1602_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_161_cast_fp16 = mul(x = var_1601, y = var_1602_to_fp16)[name = tensor("w_161_cast_fp16")]; + tensor var_1605_equation_0 = const()[name = tensor("op_1605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1605 = einsum(equation = var_1605_equation_0, values = (var_1542_9, var_1528_9))[name = tensor("op_1605")]; + tensor var_1606_to_fp16 = const()[name = tensor("op_1606_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_163_cast_fp16 = mul(x = var_1605, y = var_1606_to_fp16)[name = tensor("w_163_cast_fp16")]; + tensor var_1609_equation_0 = const()[name = tensor("op_1609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1609 = einsum(equation = var_1609_equation_0, values = (var_1542_10, var_1528_10))[name = tensor("op_1609")]; + tensor var_1610_to_fp16 = const()[name = tensor("op_1610_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_165_cast_fp16 = mul(x = var_1609, y = var_1610_to_fp16)[name = tensor("w_165_cast_fp16")]; + tensor var_1613_equation_0 = const()[name = tensor("op_1613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1613 = einsum(equation = var_1613_equation_0, values = (var_1542_11, var_1528_11))[name = tensor("op_1613")]; + tensor var_1614_to_fp16 = const()[name = tensor("op_1614_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_167_cast_fp16 = mul(x = var_1613, y = var_1614_to_fp16)[name = tensor("w_167_cast_fp16")]; + tensor input_221_cast_fp16 = add(x = w_145_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_221_cast_fp16")]; + tensor var_1617_cast_fp16 = softmax(axis = var_1485, x = input_221_cast_fp16)[name = tensor("op_1617_cast_fp16")]; + tensor input_223_cast_fp16 = add(x = w_147_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_223_cast_fp16")]; + tensor var_1619_cast_fp16 = softmax(axis = var_1485, x = input_223_cast_fp16)[name = tensor("op_1619_cast_fp16")]; + tensor input_225_cast_fp16 = add(x = w_149_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_225_cast_fp16")]; + tensor var_1621_cast_fp16 = softmax(axis = var_1485, x = input_225_cast_fp16)[name = tensor("op_1621_cast_fp16")]; + tensor input_227_cast_fp16 = add(x = w_151_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_227_cast_fp16")]; + tensor var_1623_cast_fp16 = softmax(axis = var_1485, x = input_227_cast_fp16)[name = tensor("op_1623_cast_fp16")]; + tensor input_229_cast_fp16 = add(x = w_153_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_229_cast_fp16")]; + tensor var_1625_cast_fp16 = softmax(axis = var_1485, x = input_229_cast_fp16)[name = tensor("op_1625_cast_fp16")]; + tensor input_231_cast_fp16 = add(x = w_155_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_231_cast_fp16")]; + tensor var_1627_cast_fp16 = softmax(axis = var_1485, x = input_231_cast_fp16)[name = tensor("op_1627_cast_fp16")]; + tensor input_233_cast_fp16 = add(x = w_157_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_233_cast_fp16")]; + tensor var_1629_cast_fp16 = softmax(axis = var_1485, x = input_233_cast_fp16)[name = tensor("op_1629_cast_fp16")]; + tensor input_235_cast_fp16 = add(x = w_159_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_235_cast_fp16")]; + tensor var_1631_cast_fp16 = softmax(axis = var_1485, x = input_235_cast_fp16)[name = tensor("op_1631_cast_fp16")]; + tensor input_237_cast_fp16 = add(x = w_161_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_237_cast_fp16")]; + tensor var_1633_cast_fp16 = softmax(axis = var_1485, x = input_237_cast_fp16)[name = tensor("op_1633_cast_fp16")]; + tensor input_239_cast_fp16 = add(x = w_163_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_239_cast_fp16")]; + tensor var_1635_cast_fp16 = softmax(axis = var_1485, x = input_239_cast_fp16)[name = tensor("op_1635_cast_fp16")]; + tensor input_241_cast_fp16 = add(x = w_165_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_241_cast_fp16")]; + tensor var_1637_cast_fp16 = softmax(axis = var_1485, x = input_241_cast_fp16)[name = tensor("op_1637_cast_fp16")]; + tensor input_243_cast_fp16 = add(x = w_167_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_243_cast_fp16")]; + tensor var_1639_cast_fp16 = softmax(axis = var_1485, x = input_243_cast_fp16)[name = tensor("op_1639_cast_fp16")]; + tensor var_1641_equation_0 = const()[name = tensor("op_1641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1641_cast_fp16 = einsum(equation = var_1641_equation_0, values = (var_1555_0, var_1617_cast_fp16))[name = tensor("op_1641_cast_fp16")]; + tensor var_1643_equation_0 = const()[name = tensor("op_1643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1643_cast_fp16 = einsum(equation = var_1643_equation_0, values = (var_1555_1, var_1619_cast_fp16))[name = tensor("op_1643_cast_fp16")]; + tensor var_1645_equation_0 = const()[name = tensor("op_1645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1645_cast_fp16 = einsum(equation = var_1645_equation_0, values = (var_1555_2, var_1621_cast_fp16))[name = tensor("op_1645_cast_fp16")]; + tensor var_1647_equation_0 = const()[name = tensor("op_1647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1647_cast_fp16 = einsum(equation = var_1647_equation_0, values = (var_1555_3, var_1623_cast_fp16))[name = tensor("op_1647_cast_fp16")]; + tensor var_1649_equation_0 = const()[name = tensor("op_1649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1649_cast_fp16 = einsum(equation = var_1649_equation_0, values = (var_1555_4, var_1625_cast_fp16))[name = tensor("op_1649_cast_fp16")]; + tensor var_1651_equation_0 = const()[name = tensor("op_1651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1651_cast_fp16 = einsum(equation = var_1651_equation_0, values = (var_1555_5, var_1627_cast_fp16))[name = tensor("op_1651_cast_fp16")]; + tensor var_1653_equation_0 = const()[name = tensor("op_1653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1653_cast_fp16 = einsum(equation = var_1653_equation_0, values = (var_1555_6, var_1629_cast_fp16))[name = tensor("op_1653_cast_fp16")]; + tensor var_1655_equation_0 = const()[name = tensor("op_1655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1655_cast_fp16 = einsum(equation = var_1655_equation_0, values = (var_1555_7, var_1631_cast_fp16))[name = tensor("op_1655_cast_fp16")]; + tensor var_1657_equation_0 = const()[name = tensor("op_1657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1657_cast_fp16 = einsum(equation = var_1657_equation_0, values = (var_1555_8, var_1633_cast_fp16))[name = tensor("op_1657_cast_fp16")]; + tensor var_1659_equation_0 = const()[name = tensor("op_1659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1659_cast_fp16 = einsum(equation = var_1659_equation_0, values = (var_1555_9, var_1635_cast_fp16))[name = tensor("op_1659_cast_fp16")]; + tensor var_1661_equation_0 = const()[name = tensor("op_1661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1661_cast_fp16 = einsum(equation = var_1661_equation_0, values = (var_1555_10, var_1637_cast_fp16))[name = tensor("op_1661_cast_fp16")]; + tensor var_1663_equation_0 = const()[name = tensor("op_1663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1663_cast_fp16 = einsum(equation = var_1663_equation_0, values = (var_1555_11, var_1639_cast_fp16))[name = tensor("op_1663_cast_fp16")]; + tensor var_1665_interleave_0 = const()[name = tensor("op_1665_interleave_0"), val = tensor(false)]; + tensor var_1665_cast_fp16 = concat(axis = var_1485, interleave = var_1665_interleave_0, values = (var_1641_cast_fp16, var_1643_cast_fp16, var_1645_cast_fp16, var_1647_cast_fp16, var_1649_cast_fp16, var_1651_cast_fp16, var_1653_cast_fp16, var_1655_cast_fp16, var_1657_cast_fp16, var_1659_cast_fp16, var_1661_cast_fp16, var_1663_cast_fp16))[name = tensor("op_1665_cast_fp16")]; + tensor var_1669 = const()[name = tensor("op_1669"), val = tensor([1, 1])]; + tensor var_1671 = const()[name = tensor("op_1671"), val = tensor([1, 1])]; + tensor var_1673_pad_type_0 = const()[name = tensor("op_1673_pad_type_0"), val = tensor("custom")]; + tensor var_1673_pad_0 = const()[name = tensor("op_1673_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1673 = conv(bias = layers_6_attention_o_proj_bias, dilations = var_1671, groups = var_1485, pad = var_1673_pad_0, pad_type = var_1673_pad_type_0, strides = var_1669, weight = layers_6_attention_o_proj_weight, x = var_1665_cast_fp16)[name = tensor("op_1673")]; + tensor var_1675_interleave_0 = const()[name = tensor("op_1675_interleave_0"), val = tensor(false)]; + tensor var_1675 = concat(axis = var_1486, interleave = var_1675_interleave_0, values = var_1673)[name = tensor("op_1675")]; + tensor x_27 = add(x = transpose_30, y = var_1675)[name = tensor("x_27")]; + tensor input_247_perm_0 = const()[name = tensor("input_247_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_109 = const()[name = tensor("weight_109"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66808704)))]; + tensor bias_107 = const()[name = tensor("bias_107"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66809536)))]; + tensor o_27_axes_0 = const()[name = tensor("o_27_axes_0"), val = tensor([-1])]; + tensor var_1484_to_fp16 = const()[name = tensor("op_1484_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_28 = transpose(perm = input_247_perm_0, x = x_27)[name = tensor("transpose_28")]; + tensor o_27_cast_fp16 = layer_norm(axes = o_27_axes_0, beta = bias_107, epsilon = var_1484_to_fp16, gamma = weight_109, x = transpose_28)[name = tensor("o_27_cast_fp16")]; + tensor input_249_perm_0 = const()[name = tensor("input_249_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1691 = const()[name = tensor("op_1691"), val = tensor([1, 1])]; + tensor var_1693 = const()[name = tensor("op_1693"), val = tensor([1, 1])]; + tensor var_1695_pad_type_0 = const()[name = tensor("op_1695_pad_type_0"), val = tensor("custom")]; + tensor var_1695_pad_0 = const()[name = tensor("op_1695_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_27 = transpose(perm = input_249_perm_0, x = o_27_cast_fp16)[name = tensor("transpose_27")]; + tensor var_1695 = conv(bias = layers_6_mlp_fc1_bias, dilations = var_1693, groups = var_1485, pad = var_1695_pad_0, pad_type = var_1695_pad_type_0, strides = var_1691, weight = layers_6_mlp_fc1_weight, x = transpose_27)[name = tensor("op_1695")]; + tensor input_251_mode_0 = const()[name = tensor("input_251_mode_0"), val = tensor("EXACT")]; + tensor input_251 = gelu(mode = input_251_mode_0, x = var_1695)[name = tensor("input_251")]; + tensor var_1699 = const()[name = tensor("op_1699"), val = tensor([1, 1])]; + tensor var_1701 = const()[name = tensor("op_1701"), val = tensor([1, 1])]; + tensor var_1703_pad_type_0 = const()[name = tensor("op_1703_pad_type_0"), val = tensor("custom")]; + tensor var_1703_pad_0 = const()[name = tensor("op_1703_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1703 = conv(bias = layers_6_mlp_fc2_bias, dilations = var_1701, groups = var_1485, pad = var_1703_pad_0, pad_type = var_1703_pad_type_0, strides = var_1699, weight = layers_6_mlp_fc2_weight, x = input_251)[name = tensor("op_1703")]; + tensor x_29 = add(x = transpose_27, y = var_1703)[name = tensor("x_29")]; + tensor input_253_perm_0 = const()[name = tensor("input_253_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_115 = const()[name = tensor("weight_115"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66810368)))]; + tensor bias_113 = const()[name = tensor("bias_113"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66811200)))]; + tensor o_29_axes_0 = const()[name = tensor("o_29_axes_0"), val = tensor([-1])]; + tensor transpose_26 = transpose(perm = input_253_perm_0, x = x_29)[name = tensor("transpose_26")]; + tensor o_29_cast_fp16 = layer_norm(axes = o_29_axes_0, beta = bias_113, epsilon = var_1484_to_fp16, gamma = weight_115, x = transpose_26)[name = tensor("o_29_cast_fp16")]; + tensor hidden_states_15_perm_0 = const()[name = tensor("hidden_states_15_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1719 = const()[name = tensor("op_1719"), val = tensor(1)]; + tensor var_1720 = const()[name = tensor("op_1720"), val = tensor(0)]; + tensor var_1743 = const()[name = tensor("op_1743"), val = tensor([1, 1])]; + tensor var_1745 = const()[name = tensor("op_1745"), val = tensor([1, 1])]; + tensor var_1747_pad_type_0 = const()[name = tensor("op_1747_pad_type_0"), val = tensor("custom")]; + tensor var_1747_pad_0 = const()[name = tensor("op_1747_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_25 = transpose(perm = hidden_states_15_perm_0, x = o_29_cast_fp16)[name = tensor("transpose_25")]; + tensor var_1747 = conv(bias = layers_7_attention_q_proj_bias, dilations = var_1745, groups = var_1719, pad = var_1747_pad_0, pad_type = var_1747_pad_type_0, strides = var_1743, weight = layers_7_attention_q_proj_weight, x = transpose_25)[name = tensor("op_1747")]; tensor var_1750 = const()[name = tensor("op_1750"), val = tensor([1, 1])]; - tensor var_1752_pad_type_0 = const()[name = tensor("op_1752_pad_type_0"), val = tensor("custom")]; - tensor var_1752_pad_0 = const()[name = tensor("op_1752_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1752 = conv(bias = layers_6_attention_o_proj_bias, dilations = var_1750, groups = var_1564, pad = var_1752_pad_0, pad_type = var_1752_pad_type_0, strides = var_1748, weight = layers_6_attention_o_proj_weight, x = var_1744_cast_fp16)[name = tensor("op_1752")]; - tensor var_1754_interleave_0 = const()[name = tensor("op_1754_interleave_0"), val = tensor(false)]; - tensor var_1754 = concat(axis = var_1565, interleave = var_1754_interleave_0, values = var_1752)[name = tensor("op_1754")]; - tensor x_53 = add(x = var_1558_cast_fp16, y = var_1754)[name = tensor("x_53")]; - tensor var_1561_promoted = const()[name = tensor("op_1561_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1562_promoted = const()[name = tensor("op_1562_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_55 = clip(alpha = var_1561_promoted, beta = var_1562_promoted, x = x_53)[name = tensor("x_55")]; - tensor var_1759 = const()[name = tensor("op_1759"), val = tensor([1])]; - tensor mean_27 = reduce_mean(axes = var_1759, keep_dims = var_1566, x = x_55)[name = tensor("mean_27")]; - tensor zero_mean_27 = sub(x = x_55, y = mean_27)[name = tensor("zero_mean_27")]; - tensor var_1563_promoted = const()[name = tensor("op_1563_promoted"), val = tensor(0x1p+1)]; - tensor var_1762 = pow(x = zero_mean_27, y = var_1563_promoted)[name = tensor("op_1762")]; - tensor var_1763 = const()[name = tensor("op_1763"), val = tensor([1])]; - tensor var_1764 = reduce_mean(axes = var_1763, keep_dims = var_1566, x = var_1762)[name = tensor("op_1764")]; - tensor var_1765_to_fp16 = const()[name = tensor("op_1765_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1766_cast_fp16 = add(x = var_1764, y = var_1765_to_fp16)[name = tensor("op_1766_cast_fp16")]; - tensor denom_27_epsilon_0 = const()[name = tensor("denom_27_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0, x = var_1766_cast_fp16)[name = tensor("denom_27_cast_fp16")]; - tensor var_1768_cast_fp16 = mul(x = zero_mean_27, y = denom_27_cast_fp16)[name = tensor("op_1768_cast_fp16")]; - tensor var_1770_gamma_0_to_fp16 = const()[name = tensor("op_1770_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66810368)))]; - tensor var_1770_beta_0_to_fp16 = const()[name = tensor("op_1770_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66811200)))]; - tensor var_1770_epsilon_0_to_fp16 = const()[name = tensor("op_1770_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1770_cast_fp16 = batch_norm(beta = var_1770_beta_0_to_fp16, epsilon = var_1770_epsilon_0_to_fp16, gamma = var_1770_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1768_cast_fp16)[name = tensor("op_1770_cast_fp16")]; - tensor var_1776 = const()[name = tensor("op_1776"), val = tensor([1, 1])]; - tensor var_1778 = const()[name = tensor("op_1778"), val = tensor([1, 1])]; - tensor var_1780_pad_type_0 = const()[name = tensor("op_1780_pad_type_0"), val = tensor("custom")]; - tensor var_1780_pad_0 = const()[name = tensor("op_1780_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1780 = conv(bias = layers_6_mlp_fc1_bias, dilations = var_1778, groups = var_1564, pad = var_1780_pad_0, pad_type = var_1780_pad_type_0, strides = var_1776, weight = layers_6_mlp_fc1_weight, x = var_1770_cast_fp16)[name = tensor("op_1780")]; - tensor input_223_mode_0 = const()[name = tensor("input_223_mode_0"), val = tensor("EXACT")]; - tensor input_223 = gelu(mode = input_223_mode_0, x = var_1780)[name = tensor("input_223")]; - tensor var_1784 = const()[name = tensor("op_1784"), val = tensor([1, 1])]; - tensor var_1786 = const()[name = tensor("op_1786"), val = tensor([1, 1])]; - tensor var_1788_pad_type_0 = const()[name = tensor("op_1788_pad_type_0"), val = tensor("custom")]; - tensor var_1788_pad_0 = const()[name = tensor("op_1788_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1788 = conv(bias = layers_6_mlp_fc2_bias, dilations = var_1786, groups = var_1564, pad = var_1788_pad_0, pad_type = var_1788_pad_type_0, strides = var_1784, weight = layers_6_mlp_fc2_weight, x = input_223)[name = tensor("op_1788")]; - tensor x_57 = add(x = var_1770_cast_fp16, y = var_1788)[name = tensor("x_57")]; - tensor var_1561_promoted_1 = const()[name = tensor("op_1561_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1562_promoted_1 = const()[name = tensor("op_1562_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_59 = clip(alpha = var_1561_promoted_1, beta = var_1562_promoted_1, x = x_57)[name = tensor("x_59")]; - tensor var_1793 = const()[name = tensor("op_1793"), val = tensor([1])]; - tensor mean_29 = reduce_mean(axes = var_1793, keep_dims = var_1566, x = x_59)[name = tensor("mean_29")]; - tensor zero_mean_29 = sub(x = x_59, y = mean_29)[name = tensor("zero_mean_29")]; - tensor var_1563_promoted_1 = const()[name = tensor("op_1563_promoted_1"), val = tensor(0x1p+1)]; - tensor var_1796 = pow(x = zero_mean_29, y = var_1563_promoted_1)[name = tensor("op_1796")]; - tensor var_1797 = const()[name = tensor("op_1797"), val = tensor([1])]; - tensor var_1798 = reduce_mean(axes = var_1797, keep_dims = var_1566, x = var_1796)[name = tensor("op_1798")]; - tensor var_1799_to_fp16 = const()[name = tensor("op_1799_to_fp16"), val = tensor(0x1p-24)]; - tensor var_1800_cast_fp16 = add(x = var_1798, y = var_1799_to_fp16)[name = tensor("op_1800_cast_fp16")]; - tensor denom_29_epsilon_0 = const()[name = tensor("denom_29_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0, x = var_1800_cast_fp16)[name = tensor("denom_29_cast_fp16")]; - tensor var_1802_cast_fp16 = mul(x = zero_mean_29, y = denom_29_cast_fp16)[name = tensor("op_1802_cast_fp16")]; - tensor var_1804_gamma_0_to_fp16 = const()[name = tensor("op_1804_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66812032)))]; - tensor var_1804_beta_0_to_fp16 = const()[name = tensor("op_1804_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66812864)))]; - tensor var_1804_epsilon_0_to_fp16 = const()[name = tensor("op_1804_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1804_cast_fp16 = batch_norm(beta = var_1804_beta_0_to_fp16, epsilon = var_1804_epsilon_0_to_fp16, gamma = var_1804_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_1802_cast_fp16)[name = tensor("op_1804_cast_fp16")]; - tensor var_1810 = const()[name = tensor("op_1810"), val = tensor(1)]; - tensor var_1811 = const()[name = tensor("op_1811"), val = tensor(0)]; - tensor var_1812 = const()[name = tensor("op_1812"), val = tensor(true)]; - tensor var_1834 = const()[name = tensor("op_1834"), val = tensor([1, 1])]; - tensor var_1836 = const()[name = tensor("op_1836"), val = tensor([1, 1])]; - tensor var_1838_pad_type_0 = const()[name = tensor("op_1838_pad_type_0"), val = tensor("custom")]; - tensor var_1838_pad_0 = const()[name = tensor("op_1838_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1838 = conv(bias = layers_7_attention_q_proj_bias, dilations = var_1836, groups = var_1810, pad = var_1838_pad_0, pad_type = var_1838_pad_type_0, strides = var_1834, weight = layers_7_attention_q_proj_weight, x = var_1804_cast_fp16)[name = tensor("op_1838")]; - tensor var_1841 = const()[name = tensor("op_1841"), val = tensor([1, 1])]; - tensor var_1843 = const()[name = tensor("op_1843"), val = tensor([1, 1])]; + tensor var_1752 = const()[name = tensor("op_1752"), val = tensor([1, 1])]; tensor ks_15_pad_type_0 = const()[name = tensor("ks_15_pad_type_0"), val = tensor("custom")]; tensor ks_15_pad_0 = const()[name = tensor("ks_15_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_15 = conv(bias = layers_7_attention_k_proj_bias, dilations = var_1843, groups = var_1810, pad = ks_15_pad_0, pad_type = ks_15_pad_type_0, strides = var_1841, weight = layers_7_attention_k_proj_weight, x = var_1804_cast_fp16)[name = tensor("ks_15")]; - tensor var_1848 = const()[name = tensor("op_1848"), val = tensor([1, 1])]; - tensor var_1850 = const()[name = tensor("op_1850"), val = tensor([1, 1])]; - tensor var_1852_pad_type_0 = const()[name = tensor("op_1852_pad_type_0"), val = tensor("custom")]; - tensor var_1852_pad_0 = const()[name = tensor("op_1852_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1852 = conv(bias = layers_7_attention_v_proj_bias, dilations = var_1850, groups = var_1810, pad = var_1852_pad_0, pad_type = var_1852_pad_type_0, strides = var_1848, weight = layers_7_attention_v_proj_weight, x = var_1804_cast_fp16)[name = tensor("op_1852")]; + tensor ks_15 = conv(bias = layers_7_attention_k_proj_bias, dilations = var_1752, groups = var_1719, pad = ks_15_pad_0, pad_type = ks_15_pad_type_0, strides = var_1750, weight = layers_7_attention_k_proj_weight, x = transpose_25)[name = tensor("ks_15")]; + tensor var_1757 = const()[name = tensor("op_1757"), val = tensor([1, 1])]; + tensor var_1759 = const()[name = tensor("op_1759"), val = tensor([1, 1])]; + tensor var_1761_pad_type_0 = const()[name = tensor("op_1761_pad_type_0"), val = tensor("custom")]; + tensor var_1761_pad_0 = const()[name = tensor("op_1761_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1761 = conv(bias = layers_7_attention_v_proj_bias, dilations = var_1759, groups = var_1719, pad = var_1761_pad_0, pad_type = var_1761_pad_type_0, strides = var_1757, weight = layers_7_attention_v_proj_weight, x = transpose_25)[name = tensor("op_1761")]; tensor tile_37 = const()[name = tensor("tile_37"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1853_axis_0 = const()[name = tensor("op_1853_axis_0"), val = tensor(1)]; - tensor var_1853_0, tensor var_1853_1, tensor var_1853_2, tensor var_1853_3, tensor var_1853_4, tensor var_1853_5, tensor var_1853_6, tensor var_1853_7, tensor var_1853_8, tensor var_1853_9, tensor var_1853_10, tensor var_1853_11 = split(axis = var_1853_axis_0, split_sizes = tile_37, x = var_1838)[name = tensor("op_1853")]; - tensor var_1866_perm_0 = const()[name = tensor("op_1866_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1762_axis_0 = const()[name = tensor("op_1762_axis_0"), val = tensor(1)]; + tensor var_1762_0, tensor var_1762_1, tensor var_1762_2, tensor var_1762_3, tensor var_1762_4, tensor var_1762_5, tensor var_1762_6, tensor var_1762_7, tensor var_1762_8, tensor var_1762_9, tensor var_1762_10, tensor var_1762_11 = split(axis = var_1762_axis_0, split_sizes = tile_37, x = var_1747)[name = tensor("op_1762")]; + tensor var_1775_perm_0 = const()[name = tensor("op_1775_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_38 = const()[name = tensor("tile_38"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1867_axis_0 = const()[name = tensor("op_1867_axis_0"), val = tensor(3)]; - tensor transpose_4 = transpose(perm = var_1866_perm_0, x = ks_15)[name = tensor("transpose_4")]; - tensor var_1867_0, tensor var_1867_1, tensor var_1867_2, tensor var_1867_3, tensor var_1867_4, tensor var_1867_5, tensor var_1867_6, tensor var_1867_7, tensor var_1867_8, tensor var_1867_9, tensor var_1867_10, tensor var_1867_11 = split(axis = var_1867_axis_0, split_sizes = tile_38, x = transpose_4)[name = tensor("op_1867")]; + tensor var_1776_axis_0 = const()[name = tensor("op_1776_axis_0"), val = tensor(3)]; + tensor transpose_24 = transpose(perm = var_1775_perm_0, x = ks_15)[name = tensor("transpose_24")]; + tensor var_1776_0, tensor var_1776_1, tensor var_1776_2, tensor var_1776_3, tensor var_1776_4, tensor var_1776_5, tensor var_1776_6, tensor var_1776_7, tensor var_1776_8, tensor var_1776_9, tensor var_1776_10, tensor var_1776_11 = split(axis = var_1776_axis_0, split_sizes = tile_38, x = transpose_24)[name = tensor("op_1776")]; tensor tile_39 = const()[name = tensor("tile_39"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_1880_axis_0 = const()[name = tensor("op_1880_axis_0"), val = tensor(1)]; - tensor var_1880_0, tensor var_1880_1, tensor var_1880_2, tensor var_1880_3, tensor var_1880_4, tensor var_1880_5, tensor var_1880_6, tensor var_1880_7, tensor var_1880_8, tensor var_1880_9, tensor var_1880_10, tensor var_1880_11 = split(axis = var_1880_axis_0, split_sizes = tile_39, x = var_1852)[name = tensor("op_1880")]; - tensor var_1894_equation_0 = const()[name = tensor("op_1894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1894 = einsum(equation = var_1894_equation_0, values = (var_1867_0, var_1853_0))[name = tensor("op_1894")]; - tensor var_1895_to_fp16 = const()[name = tensor("op_1895_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_169_cast_fp16 = mul(x = var_1894, y = var_1895_to_fp16)[name = tensor("w_169_cast_fp16")]; - tensor var_1898_equation_0 = const()[name = tensor("op_1898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1898 = einsum(equation = var_1898_equation_0, values = (var_1867_1, var_1853_1))[name = tensor("op_1898")]; - tensor var_1899_to_fp16 = const()[name = tensor("op_1899_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_171_cast_fp16 = mul(x = var_1898, y = var_1899_to_fp16)[name = tensor("w_171_cast_fp16")]; - tensor var_1902_equation_0 = const()[name = tensor("op_1902_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1902 = einsum(equation = var_1902_equation_0, values = (var_1867_2, var_1853_2))[name = tensor("op_1902")]; - tensor var_1903_to_fp16 = const()[name = tensor("op_1903_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_173_cast_fp16 = mul(x = var_1902, y = var_1903_to_fp16)[name = tensor("w_173_cast_fp16")]; - tensor var_1906_equation_0 = const()[name = tensor("op_1906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1906 = einsum(equation = var_1906_equation_0, values = (var_1867_3, var_1853_3))[name = tensor("op_1906")]; - tensor var_1907_to_fp16 = const()[name = tensor("op_1907_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_175_cast_fp16 = mul(x = var_1906, y = var_1907_to_fp16)[name = tensor("w_175_cast_fp16")]; - tensor var_1910_equation_0 = const()[name = tensor("op_1910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1910 = einsum(equation = var_1910_equation_0, values = (var_1867_4, var_1853_4))[name = tensor("op_1910")]; - tensor var_1911_to_fp16 = const()[name = tensor("op_1911_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_177_cast_fp16 = mul(x = var_1910, y = var_1911_to_fp16)[name = tensor("w_177_cast_fp16")]; - tensor var_1914_equation_0 = const()[name = tensor("op_1914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1914 = einsum(equation = var_1914_equation_0, values = (var_1867_5, var_1853_5))[name = tensor("op_1914")]; - tensor var_1915_to_fp16 = const()[name = tensor("op_1915_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_179_cast_fp16 = mul(x = var_1914, y = var_1915_to_fp16)[name = tensor("w_179_cast_fp16")]; - tensor var_1918_equation_0 = const()[name = tensor("op_1918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1918 = einsum(equation = var_1918_equation_0, values = (var_1867_6, var_1853_6))[name = tensor("op_1918")]; - tensor var_1919_to_fp16 = const()[name = tensor("op_1919_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_181_cast_fp16 = mul(x = var_1918, y = var_1919_to_fp16)[name = tensor("w_181_cast_fp16")]; - tensor var_1922_equation_0 = const()[name = tensor("op_1922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1922 = einsum(equation = var_1922_equation_0, values = (var_1867_7, var_1853_7))[name = tensor("op_1922")]; - tensor var_1923_to_fp16 = const()[name = tensor("op_1923_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_183_cast_fp16 = mul(x = var_1922, y = var_1923_to_fp16)[name = tensor("w_183_cast_fp16")]; - tensor var_1926_equation_0 = const()[name = tensor("op_1926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1926 = einsum(equation = var_1926_equation_0, values = (var_1867_8, var_1853_8))[name = tensor("op_1926")]; - tensor var_1927_to_fp16 = const()[name = tensor("op_1927_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_185_cast_fp16 = mul(x = var_1926, y = var_1927_to_fp16)[name = tensor("w_185_cast_fp16")]; - tensor var_1930_equation_0 = const()[name = tensor("op_1930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1930 = einsum(equation = var_1930_equation_0, values = (var_1867_9, var_1853_9))[name = tensor("op_1930")]; - tensor var_1931_to_fp16 = const()[name = tensor("op_1931_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_187_cast_fp16 = mul(x = var_1930, y = var_1931_to_fp16)[name = tensor("w_187_cast_fp16")]; - tensor var_1934_equation_0 = const()[name = tensor("op_1934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1934 = einsum(equation = var_1934_equation_0, values = (var_1867_10, var_1853_10))[name = tensor("op_1934")]; - tensor var_1935_to_fp16 = const()[name = tensor("op_1935_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_189_cast_fp16 = mul(x = var_1934, y = var_1935_to_fp16)[name = tensor("w_189_cast_fp16")]; - tensor var_1938_equation_0 = const()[name = tensor("op_1938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_1938 = einsum(equation = var_1938_equation_0, values = (var_1867_11, var_1853_11))[name = tensor("op_1938")]; - tensor var_1939_to_fp16 = const()[name = tensor("op_1939_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_191_cast_fp16 = mul(x = var_1938, y = var_1939_to_fp16)[name = tensor("w_191_cast_fp16")]; - tensor input_227_cast_fp16 = add(x = w_169_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_227_cast_fp16")]; - tensor var_1942_cast_fp16 = softmax(axis = var_1810, x = input_227_cast_fp16)[name = tensor("op_1942_cast_fp16")]; - tensor input_229_cast_fp16 = add(x = w_171_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_229_cast_fp16")]; - tensor var_1944_cast_fp16 = softmax(axis = var_1810, x = input_229_cast_fp16)[name = tensor("op_1944_cast_fp16")]; - tensor input_231_cast_fp16 = add(x = w_173_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_231_cast_fp16")]; - tensor var_1946_cast_fp16 = softmax(axis = var_1810, x = input_231_cast_fp16)[name = tensor("op_1946_cast_fp16")]; - tensor input_233_cast_fp16 = add(x = w_175_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_233_cast_fp16")]; - tensor var_1948_cast_fp16 = softmax(axis = var_1810, x = input_233_cast_fp16)[name = tensor("op_1948_cast_fp16")]; - tensor input_235_cast_fp16 = add(x = w_177_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_235_cast_fp16")]; - tensor var_1950_cast_fp16 = softmax(axis = var_1810, x = input_235_cast_fp16)[name = tensor("op_1950_cast_fp16")]; - tensor input_237_cast_fp16 = add(x = w_179_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_237_cast_fp16")]; - tensor var_1952_cast_fp16 = softmax(axis = var_1810, x = input_237_cast_fp16)[name = tensor("op_1952_cast_fp16")]; - tensor input_239_cast_fp16 = add(x = w_181_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_239_cast_fp16")]; - tensor var_1954_cast_fp16 = softmax(axis = var_1810, x = input_239_cast_fp16)[name = tensor("op_1954_cast_fp16")]; - tensor input_241_cast_fp16 = add(x = w_183_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_241_cast_fp16")]; - tensor var_1956_cast_fp16 = softmax(axis = var_1810, x = input_241_cast_fp16)[name = tensor("op_1956_cast_fp16")]; - tensor input_243_cast_fp16 = add(x = w_185_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_243_cast_fp16")]; - tensor var_1958_cast_fp16 = softmax(axis = var_1810, x = input_243_cast_fp16)[name = tensor("op_1958_cast_fp16")]; - tensor input_245_cast_fp16 = add(x = w_187_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_245_cast_fp16")]; - tensor var_1960_cast_fp16 = softmax(axis = var_1810, x = input_245_cast_fp16)[name = tensor("op_1960_cast_fp16")]; - tensor input_247_cast_fp16 = add(x = w_189_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_247_cast_fp16")]; - tensor var_1962_cast_fp16 = softmax(axis = var_1810, x = input_247_cast_fp16)[name = tensor("op_1962_cast_fp16")]; - tensor input_249_cast_fp16 = add(x = w_191_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_249_cast_fp16")]; - tensor var_1964_cast_fp16 = softmax(axis = var_1810, x = input_249_cast_fp16)[name = tensor("op_1964_cast_fp16")]; - tensor var_1966_equation_0 = const()[name = tensor("op_1966_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1966_cast_fp16 = einsum(equation = var_1966_equation_0, values = (var_1880_0, var_1942_cast_fp16))[name = tensor("op_1966_cast_fp16")]; - tensor var_1968_equation_0 = const()[name = tensor("op_1968_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1968_cast_fp16 = einsum(equation = var_1968_equation_0, values = (var_1880_1, var_1944_cast_fp16))[name = tensor("op_1968_cast_fp16")]; - tensor var_1970_equation_0 = const()[name = tensor("op_1970_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1970_cast_fp16 = einsum(equation = var_1970_equation_0, values = (var_1880_2, var_1946_cast_fp16))[name = tensor("op_1970_cast_fp16")]; - tensor var_1972_equation_0 = const()[name = tensor("op_1972_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1972_cast_fp16 = einsum(equation = var_1972_equation_0, values = (var_1880_3, var_1948_cast_fp16))[name = tensor("op_1972_cast_fp16")]; - tensor var_1974_equation_0 = const()[name = tensor("op_1974_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1974_cast_fp16 = einsum(equation = var_1974_equation_0, values = (var_1880_4, var_1950_cast_fp16))[name = tensor("op_1974_cast_fp16")]; - tensor var_1976_equation_0 = const()[name = tensor("op_1976_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1976_cast_fp16 = einsum(equation = var_1976_equation_0, values = (var_1880_5, var_1952_cast_fp16))[name = tensor("op_1976_cast_fp16")]; - tensor var_1978_equation_0 = const()[name = tensor("op_1978_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1978_cast_fp16 = einsum(equation = var_1978_equation_0, values = (var_1880_6, var_1954_cast_fp16))[name = tensor("op_1978_cast_fp16")]; - tensor var_1980_equation_0 = const()[name = tensor("op_1980_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1980_cast_fp16 = einsum(equation = var_1980_equation_0, values = (var_1880_7, var_1956_cast_fp16))[name = tensor("op_1980_cast_fp16")]; - tensor var_1982_equation_0 = const()[name = tensor("op_1982_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1982_cast_fp16 = einsum(equation = var_1982_equation_0, values = (var_1880_8, var_1958_cast_fp16))[name = tensor("op_1982_cast_fp16")]; - tensor var_1984_equation_0 = const()[name = tensor("op_1984_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1984_cast_fp16 = einsum(equation = var_1984_equation_0, values = (var_1880_9, var_1960_cast_fp16))[name = tensor("op_1984_cast_fp16")]; - tensor var_1986_equation_0 = const()[name = tensor("op_1986_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1986_cast_fp16 = einsum(equation = var_1986_equation_0, values = (var_1880_10, var_1962_cast_fp16))[name = tensor("op_1986_cast_fp16")]; - tensor var_1988_equation_0 = const()[name = tensor("op_1988_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_1988_cast_fp16 = einsum(equation = var_1988_equation_0, values = (var_1880_11, var_1964_cast_fp16))[name = tensor("op_1988_cast_fp16")]; - tensor var_1990_interleave_0 = const()[name = tensor("op_1990_interleave_0"), val = tensor(false)]; - tensor var_1990_cast_fp16 = concat(axis = var_1810, interleave = var_1990_interleave_0, values = (var_1966_cast_fp16, var_1968_cast_fp16, var_1970_cast_fp16, var_1972_cast_fp16, var_1974_cast_fp16, var_1976_cast_fp16, var_1978_cast_fp16, var_1980_cast_fp16, var_1982_cast_fp16, var_1984_cast_fp16, var_1986_cast_fp16, var_1988_cast_fp16))[name = tensor("op_1990_cast_fp16")]; - tensor var_1994 = const()[name = tensor("op_1994"), val = tensor([1, 1])]; - tensor var_1996 = const()[name = tensor("op_1996"), val = tensor([1, 1])]; - tensor var_1998_pad_type_0 = const()[name = tensor("op_1998_pad_type_0"), val = tensor("custom")]; - tensor var_1998_pad_0 = const()[name = tensor("op_1998_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_1998 = conv(bias = layers_7_attention_o_proj_bias, dilations = var_1996, groups = var_1810, pad = var_1998_pad_0, pad_type = var_1998_pad_type_0, strides = var_1994, weight = layers_7_attention_o_proj_weight, x = var_1990_cast_fp16)[name = tensor("op_1998")]; - tensor var_2000_interleave_0 = const()[name = tensor("op_2000_interleave_0"), val = tensor(false)]; - tensor var_2000 = concat(axis = var_1811, interleave = var_2000_interleave_0, values = var_1998)[name = tensor("op_2000")]; - tensor x_61 = add(x = var_1804_cast_fp16, y = var_2000)[name = tensor("x_61")]; - tensor var_1807_promoted = const()[name = tensor("op_1807_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_1808_promoted = const()[name = tensor("op_1808_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_63 = clip(alpha = var_1807_promoted, beta = var_1808_promoted, x = x_61)[name = tensor("x_63")]; - tensor var_2005 = const()[name = tensor("op_2005"), val = tensor([1])]; - tensor mean_31 = reduce_mean(axes = var_2005, keep_dims = var_1812, x = x_63)[name = tensor("mean_31")]; - tensor zero_mean_31 = sub(x = x_63, y = mean_31)[name = tensor("zero_mean_31")]; - tensor var_1809_promoted = const()[name = tensor("op_1809_promoted"), val = tensor(0x1p+1)]; - tensor var_2008 = pow(x = zero_mean_31, y = var_1809_promoted)[name = tensor("op_2008")]; - tensor var_2009 = const()[name = tensor("op_2009"), val = tensor([1])]; - tensor var_2010 = reduce_mean(axes = var_2009, keep_dims = var_1812, x = var_2008)[name = tensor("op_2010")]; - tensor var_2011_to_fp16 = const()[name = tensor("op_2011_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2012_cast_fp16 = add(x = var_2010, y = var_2011_to_fp16)[name = tensor("op_2012_cast_fp16")]; - tensor denom_31_epsilon_0 = const()[name = tensor("denom_31_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0, x = var_2012_cast_fp16)[name = tensor("denom_31_cast_fp16")]; - tensor var_2014_cast_fp16 = mul(x = zero_mean_31, y = denom_31_cast_fp16)[name = tensor("op_2014_cast_fp16")]; - tensor var_2016_gamma_0_to_fp16 = const()[name = tensor("op_2016_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66813696)))]; - tensor var_2016_beta_0_to_fp16 = const()[name = tensor("op_2016_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66814528)))]; - tensor var_2016_epsilon_0_to_fp16 = const()[name = tensor("op_2016_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2016_cast_fp16 = batch_norm(beta = var_2016_beta_0_to_fp16, epsilon = var_2016_epsilon_0_to_fp16, gamma = var_2016_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2014_cast_fp16)[name = tensor("op_2016_cast_fp16")]; - tensor var_2022 = const()[name = tensor("op_2022"), val = tensor([1, 1])]; - tensor var_2024 = const()[name = tensor("op_2024"), val = tensor([1, 1])]; - tensor var_2026_pad_type_0 = const()[name = tensor("op_2026_pad_type_0"), val = tensor("custom")]; - tensor var_2026_pad_0 = const()[name = tensor("op_2026_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2026 = conv(bias = layers_7_mlp_fc1_bias, dilations = var_2024, groups = var_1810, pad = var_2026_pad_0, pad_type = var_2026_pad_type_0, strides = var_2022, weight = layers_7_mlp_fc1_weight, x = var_2016_cast_fp16)[name = tensor("op_2026")]; - tensor input_255_mode_0 = const()[name = tensor("input_255_mode_0"), val = tensor("EXACT")]; - tensor input_255 = gelu(mode = input_255_mode_0, x = var_2026)[name = tensor("input_255")]; - tensor var_2030 = const()[name = tensor("op_2030"), val = tensor([1, 1])]; - tensor var_2032 = const()[name = tensor("op_2032"), val = tensor([1, 1])]; - tensor var_2034_pad_type_0 = const()[name = tensor("op_2034_pad_type_0"), val = tensor("custom")]; - tensor var_2034_pad_0 = const()[name = tensor("op_2034_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2034 = conv(bias = layers_7_mlp_fc2_bias, dilations = var_2032, groups = var_1810, pad = var_2034_pad_0, pad_type = var_2034_pad_type_0, strides = var_2030, weight = layers_7_mlp_fc2_weight, x = input_255)[name = tensor("op_2034")]; - tensor x_65 = add(x = var_2016_cast_fp16, y = var_2034)[name = tensor("x_65")]; - tensor var_1807_promoted_1 = const()[name = tensor("op_1807_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_1808_promoted_1 = const()[name = tensor("op_1808_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_67 = clip(alpha = var_1807_promoted_1, beta = var_1808_promoted_1, x = x_65)[name = tensor("x_67")]; - tensor var_2039 = const()[name = tensor("op_2039"), val = tensor([1])]; - tensor mean_33 = reduce_mean(axes = var_2039, keep_dims = var_1812, x = x_67)[name = tensor("mean_33")]; - tensor zero_mean_33 = sub(x = x_67, y = mean_33)[name = tensor("zero_mean_33")]; - tensor var_1809_promoted_1 = const()[name = tensor("op_1809_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2042 = pow(x = zero_mean_33, y = var_1809_promoted_1)[name = tensor("op_2042")]; - tensor var_2043 = const()[name = tensor("op_2043"), val = tensor([1])]; - tensor var_2044 = reduce_mean(axes = var_2043, keep_dims = var_1812, x = var_2042)[name = tensor("op_2044")]; - tensor var_2045_to_fp16 = const()[name = tensor("op_2045_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2046_cast_fp16 = add(x = var_2044, y = var_2045_to_fp16)[name = tensor("op_2046_cast_fp16")]; - tensor denom_33_epsilon_0 = const()[name = tensor("denom_33_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0, x = var_2046_cast_fp16)[name = tensor("denom_33_cast_fp16")]; - tensor var_2048_cast_fp16 = mul(x = zero_mean_33, y = denom_33_cast_fp16)[name = tensor("op_2048_cast_fp16")]; - tensor var_2050_gamma_0_to_fp16 = const()[name = tensor("op_2050_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66815360)))]; - tensor var_2050_beta_0_to_fp16 = const()[name = tensor("op_2050_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66816192)))]; - tensor var_2050_epsilon_0_to_fp16 = const()[name = tensor("op_2050_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2050_cast_fp16 = batch_norm(beta = var_2050_beta_0_to_fp16, epsilon = var_2050_epsilon_0_to_fp16, gamma = var_2050_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2048_cast_fp16)[name = tensor("op_2050_cast_fp16")]; - tensor var_2056 = const()[name = tensor("op_2056"), val = tensor(1)]; - tensor var_2057 = const()[name = tensor("op_2057"), val = tensor(0)]; - tensor var_2058 = const()[name = tensor("op_2058"), val = tensor(true)]; - tensor var_2080 = const()[name = tensor("op_2080"), val = tensor([1, 1])]; - tensor var_2082 = const()[name = tensor("op_2082"), val = tensor([1, 1])]; - tensor var_2084_pad_type_0 = const()[name = tensor("op_2084_pad_type_0"), val = tensor("custom")]; - tensor var_2084_pad_0 = const()[name = tensor("op_2084_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2084 = conv(bias = layers_8_attention_q_proj_bias, dilations = var_2082, groups = var_2056, pad = var_2084_pad_0, pad_type = var_2084_pad_type_0, strides = var_2080, weight = layers_8_attention_q_proj_weight, x = var_2050_cast_fp16)[name = tensor("op_2084")]; - tensor var_2087 = const()[name = tensor("op_2087"), val = tensor([1, 1])]; - tensor var_2089 = const()[name = tensor("op_2089"), val = tensor([1, 1])]; + tensor var_1789_axis_0 = const()[name = tensor("op_1789_axis_0"), val = tensor(1)]; + tensor var_1789_0, tensor var_1789_1, tensor var_1789_2, tensor var_1789_3, tensor var_1789_4, tensor var_1789_5, tensor var_1789_6, tensor var_1789_7, tensor var_1789_8, tensor var_1789_9, tensor var_1789_10, tensor var_1789_11 = split(axis = var_1789_axis_0, split_sizes = tile_39, x = var_1761)[name = tensor("op_1789")]; + tensor var_1803_equation_0 = const()[name = tensor("op_1803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1803 = einsum(equation = var_1803_equation_0, values = (var_1776_0, var_1762_0))[name = tensor("op_1803")]; + tensor var_1804_to_fp16 = const()[name = tensor("op_1804_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_169_cast_fp16 = mul(x = var_1803, y = var_1804_to_fp16)[name = tensor("w_169_cast_fp16")]; + tensor var_1807_equation_0 = const()[name = tensor("op_1807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1807 = einsum(equation = var_1807_equation_0, values = (var_1776_1, var_1762_1))[name = tensor("op_1807")]; + tensor var_1808_to_fp16 = const()[name = tensor("op_1808_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_171_cast_fp16 = mul(x = var_1807, y = var_1808_to_fp16)[name = tensor("w_171_cast_fp16")]; + tensor var_1811_equation_0 = const()[name = tensor("op_1811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1811 = einsum(equation = var_1811_equation_0, values = (var_1776_2, var_1762_2))[name = tensor("op_1811")]; + tensor var_1812_to_fp16 = const()[name = tensor("op_1812_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_173_cast_fp16 = mul(x = var_1811, y = var_1812_to_fp16)[name = tensor("w_173_cast_fp16")]; + tensor var_1815_equation_0 = const()[name = tensor("op_1815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1815 = einsum(equation = var_1815_equation_0, values = (var_1776_3, var_1762_3))[name = tensor("op_1815")]; + tensor var_1816_to_fp16 = const()[name = tensor("op_1816_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_175_cast_fp16 = mul(x = var_1815, y = var_1816_to_fp16)[name = tensor("w_175_cast_fp16")]; + tensor var_1819_equation_0 = const()[name = tensor("op_1819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1819 = einsum(equation = var_1819_equation_0, values = (var_1776_4, var_1762_4))[name = tensor("op_1819")]; + tensor var_1820_to_fp16 = const()[name = tensor("op_1820_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_177_cast_fp16 = mul(x = var_1819, y = var_1820_to_fp16)[name = tensor("w_177_cast_fp16")]; + tensor var_1823_equation_0 = const()[name = tensor("op_1823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1823 = einsum(equation = var_1823_equation_0, values = (var_1776_5, var_1762_5))[name = tensor("op_1823")]; + tensor var_1824_to_fp16 = const()[name = tensor("op_1824_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_179_cast_fp16 = mul(x = var_1823, y = var_1824_to_fp16)[name = tensor("w_179_cast_fp16")]; + tensor var_1827_equation_0 = const()[name = tensor("op_1827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1827 = einsum(equation = var_1827_equation_0, values = (var_1776_6, var_1762_6))[name = tensor("op_1827")]; + tensor var_1828_to_fp16 = const()[name = tensor("op_1828_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_181_cast_fp16 = mul(x = var_1827, y = var_1828_to_fp16)[name = tensor("w_181_cast_fp16")]; + tensor var_1831_equation_0 = const()[name = tensor("op_1831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1831 = einsum(equation = var_1831_equation_0, values = (var_1776_7, var_1762_7))[name = tensor("op_1831")]; + tensor var_1832_to_fp16 = const()[name = tensor("op_1832_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_183_cast_fp16 = mul(x = var_1831, y = var_1832_to_fp16)[name = tensor("w_183_cast_fp16")]; + tensor var_1835_equation_0 = const()[name = tensor("op_1835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1835 = einsum(equation = var_1835_equation_0, values = (var_1776_8, var_1762_8))[name = tensor("op_1835")]; + tensor var_1836_to_fp16 = const()[name = tensor("op_1836_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_185_cast_fp16 = mul(x = var_1835, y = var_1836_to_fp16)[name = tensor("w_185_cast_fp16")]; + tensor var_1839_equation_0 = const()[name = tensor("op_1839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1839 = einsum(equation = var_1839_equation_0, values = (var_1776_9, var_1762_9))[name = tensor("op_1839")]; + tensor var_1840_to_fp16 = const()[name = tensor("op_1840_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_187_cast_fp16 = mul(x = var_1839, y = var_1840_to_fp16)[name = tensor("w_187_cast_fp16")]; + tensor var_1843_equation_0 = const()[name = tensor("op_1843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1843 = einsum(equation = var_1843_equation_0, values = (var_1776_10, var_1762_10))[name = tensor("op_1843")]; + tensor var_1844_to_fp16 = const()[name = tensor("op_1844_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_189_cast_fp16 = mul(x = var_1843, y = var_1844_to_fp16)[name = tensor("w_189_cast_fp16")]; + tensor var_1847_equation_0 = const()[name = tensor("op_1847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1847 = einsum(equation = var_1847_equation_0, values = (var_1776_11, var_1762_11))[name = tensor("op_1847")]; + tensor var_1848_to_fp16 = const()[name = tensor("op_1848_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_191_cast_fp16 = mul(x = var_1847, y = var_1848_to_fp16)[name = tensor("w_191_cast_fp16")]; + tensor input_257_cast_fp16 = add(x = w_169_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_257_cast_fp16")]; + tensor var_1851_cast_fp16 = softmax(axis = var_1719, x = input_257_cast_fp16)[name = tensor("op_1851_cast_fp16")]; + tensor input_259_cast_fp16 = add(x = w_171_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_259_cast_fp16")]; + tensor var_1853_cast_fp16 = softmax(axis = var_1719, x = input_259_cast_fp16)[name = tensor("op_1853_cast_fp16")]; + tensor input_261_cast_fp16 = add(x = w_173_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_261_cast_fp16")]; + tensor var_1855_cast_fp16 = softmax(axis = var_1719, x = input_261_cast_fp16)[name = tensor("op_1855_cast_fp16")]; + tensor input_263_cast_fp16 = add(x = w_175_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_263_cast_fp16")]; + tensor var_1857_cast_fp16 = softmax(axis = var_1719, x = input_263_cast_fp16)[name = tensor("op_1857_cast_fp16")]; + tensor input_265_cast_fp16 = add(x = w_177_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_265_cast_fp16")]; + tensor var_1859_cast_fp16 = softmax(axis = var_1719, x = input_265_cast_fp16)[name = tensor("op_1859_cast_fp16")]; + tensor input_267_cast_fp16 = add(x = w_179_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_267_cast_fp16")]; + tensor var_1861_cast_fp16 = softmax(axis = var_1719, x = input_267_cast_fp16)[name = tensor("op_1861_cast_fp16")]; + tensor input_269_cast_fp16 = add(x = w_181_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_269_cast_fp16")]; + tensor var_1863_cast_fp16 = softmax(axis = var_1719, x = input_269_cast_fp16)[name = tensor("op_1863_cast_fp16")]; + tensor input_271_cast_fp16 = add(x = w_183_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_271_cast_fp16")]; + tensor var_1865_cast_fp16 = softmax(axis = var_1719, x = input_271_cast_fp16)[name = tensor("op_1865_cast_fp16")]; + tensor input_273_cast_fp16 = add(x = w_185_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_273_cast_fp16")]; + tensor var_1867_cast_fp16 = softmax(axis = var_1719, x = input_273_cast_fp16)[name = tensor("op_1867_cast_fp16")]; + tensor input_275_cast_fp16 = add(x = w_187_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_275_cast_fp16")]; + tensor var_1869_cast_fp16 = softmax(axis = var_1719, x = input_275_cast_fp16)[name = tensor("op_1869_cast_fp16")]; + tensor input_277_cast_fp16 = add(x = w_189_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_277_cast_fp16")]; + tensor var_1871_cast_fp16 = softmax(axis = var_1719, x = input_277_cast_fp16)[name = tensor("op_1871_cast_fp16")]; + tensor input_279_cast_fp16 = add(x = w_191_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_279_cast_fp16")]; + tensor var_1873_cast_fp16 = softmax(axis = var_1719, x = input_279_cast_fp16)[name = tensor("op_1873_cast_fp16")]; + tensor var_1875_equation_0 = const()[name = tensor("op_1875_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1875_cast_fp16 = einsum(equation = var_1875_equation_0, values = (var_1789_0, var_1851_cast_fp16))[name = tensor("op_1875_cast_fp16")]; + tensor var_1877_equation_0 = const()[name = tensor("op_1877_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1877_cast_fp16 = einsum(equation = var_1877_equation_0, values = (var_1789_1, var_1853_cast_fp16))[name = tensor("op_1877_cast_fp16")]; + tensor var_1879_equation_0 = const()[name = tensor("op_1879_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1879_cast_fp16 = einsum(equation = var_1879_equation_0, values = (var_1789_2, var_1855_cast_fp16))[name = tensor("op_1879_cast_fp16")]; + tensor var_1881_equation_0 = const()[name = tensor("op_1881_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1881_cast_fp16 = einsum(equation = var_1881_equation_0, values = (var_1789_3, var_1857_cast_fp16))[name = tensor("op_1881_cast_fp16")]; + tensor var_1883_equation_0 = const()[name = tensor("op_1883_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1883_cast_fp16 = einsum(equation = var_1883_equation_0, values = (var_1789_4, var_1859_cast_fp16))[name = tensor("op_1883_cast_fp16")]; + tensor var_1885_equation_0 = const()[name = tensor("op_1885_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1885_cast_fp16 = einsum(equation = var_1885_equation_0, values = (var_1789_5, var_1861_cast_fp16))[name = tensor("op_1885_cast_fp16")]; + tensor var_1887_equation_0 = const()[name = tensor("op_1887_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1887_cast_fp16 = einsum(equation = var_1887_equation_0, values = (var_1789_6, var_1863_cast_fp16))[name = tensor("op_1887_cast_fp16")]; + tensor var_1889_equation_0 = const()[name = tensor("op_1889_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1889_cast_fp16 = einsum(equation = var_1889_equation_0, values = (var_1789_7, var_1865_cast_fp16))[name = tensor("op_1889_cast_fp16")]; + tensor var_1891_equation_0 = const()[name = tensor("op_1891_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1891_cast_fp16 = einsum(equation = var_1891_equation_0, values = (var_1789_8, var_1867_cast_fp16))[name = tensor("op_1891_cast_fp16")]; + tensor var_1893_equation_0 = const()[name = tensor("op_1893_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1893_cast_fp16 = einsum(equation = var_1893_equation_0, values = (var_1789_9, var_1869_cast_fp16))[name = tensor("op_1893_cast_fp16")]; + tensor var_1895_equation_0 = const()[name = tensor("op_1895_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1895_cast_fp16 = einsum(equation = var_1895_equation_0, values = (var_1789_10, var_1871_cast_fp16))[name = tensor("op_1895_cast_fp16")]; + tensor var_1897_equation_0 = const()[name = tensor("op_1897_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1897_cast_fp16 = einsum(equation = var_1897_equation_0, values = (var_1789_11, var_1873_cast_fp16))[name = tensor("op_1897_cast_fp16")]; + tensor var_1899_interleave_0 = const()[name = tensor("op_1899_interleave_0"), val = tensor(false)]; + tensor var_1899_cast_fp16 = concat(axis = var_1719, interleave = var_1899_interleave_0, values = (var_1875_cast_fp16, var_1877_cast_fp16, var_1879_cast_fp16, var_1881_cast_fp16, var_1883_cast_fp16, var_1885_cast_fp16, var_1887_cast_fp16, var_1889_cast_fp16, var_1891_cast_fp16, var_1893_cast_fp16, var_1895_cast_fp16, var_1897_cast_fp16))[name = tensor("op_1899_cast_fp16")]; + tensor var_1903 = const()[name = tensor("op_1903"), val = tensor([1, 1])]; + tensor var_1905 = const()[name = tensor("op_1905"), val = tensor([1, 1])]; + tensor var_1907_pad_type_0 = const()[name = tensor("op_1907_pad_type_0"), val = tensor("custom")]; + tensor var_1907_pad_0 = const()[name = tensor("op_1907_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1907 = conv(bias = layers_7_attention_o_proj_bias, dilations = var_1905, groups = var_1719, pad = var_1907_pad_0, pad_type = var_1907_pad_type_0, strides = var_1903, weight = layers_7_attention_o_proj_weight, x = var_1899_cast_fp16)[name = tensor("op_1907")]; + tensor var_1909_interleave_0 = const()[name = tensor("op_1909_interleave_0"), val = tensor(false)]; + tensor var_1909 = concat(axis = var_1720, interleave = var_1909_interleave_0, values = var_1907)[name = tensor("op_1909")]; + tensor x_31 = add(x = transpose_25, y = var_1909)[name = tensor("x_31")]; + tensor input_283_perm_0 = const()[name = tensor("input_283_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_125 = const()[name = tensor("weight_125"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66812032)))]; + tensor bias_123 = const()[name = tensor("bias_123"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66812864)))]; + tensor o_31_axes_0 = const()[name = tensor("o_31_axes_0"), val = tensor([-1])]; + tensor var_1718_to_fp16 = const()[name = tensor("op_1718_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_23 = transpose(perm = input_283_perm_0, x = x_31)[name = tensor("transpose_23")]; + tensor o_31_cast_fp16 = layer_norm(axes = o_31_axes_0, beta = bias_123, epsilon = var_1718_to_fp16, gamma = weight_125, x = transpose_23)[name = tensor("o_31_cast_fp16")]; + tensor input_285_perm_0 = const()[name = tensor("input_285_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1925 = const()[name = tensor("op_1925"), val = tensor([1, 1])]; + tensor var_1927 = const()[name = tensor("op_1927"), val = tensor([1, 1])]; + tensor var_1929_pad_type_0 = const()[name = tensor("op_1929_pad_type_0"), val = tensor("custom")]; + tensor var_1929_pad_0 = const()[name = tensor("op_1929_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_22 = transpose(perm = input_285_perm_0, x = o_31_cast_fp16)[name = tensor("transpose_22")]; + tensor var_1929 = conv(bias = layers_7_mlp_fc1_bias, dilations = var_1927, groups = var_1719, pad = var_1929_pad_0, pad_type = var_1929_pad_type_0, strides = var_1925, weight = layers_7_mlp_fc1_weight, x = transpose_22)[name = tensor("op_1929")]; + tensor input_287_mode_0 = const()[name = tensor("input_287_mode_0"), val = tensor("EXACT")]; + tensor input_287 = gelu(mode = input_287_mode_0, x = var_1929)[name = tensor("input_287")]; + tensor var_1933 = const()[name = tensor("op_1933"), val = tensor([1, 1])]; + tensor var_1935 = const()[name = tensor("op_1935"), val = tensor([1, 1])]; + tensor var_1937_pad_type_0 = const()[name = tensor("op_1937_pad_type_0"), val = tensor("custom")]; + tensor var_1937_pad_0 = const()[name = tensor("op_1937_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1937 = conv(bias = layers_7_mlp_fc2_bias, dilations = var_1935, groups = var_1719, pad = var_1937_pad_0, pad_type = var_1937_pad_type_0, strides = var_1933, weight = layers_7_mlp_fc2_weight, x = input_287)[name = tensor("op_1937")]; + tensor x_33 = add(x = transpose_22, y = var_1937)[name = tensor("x_33")]; + tensor input_289_perm_0 = const()[name = tensor("input_289_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_131 = const()[name = tensor("weight_131"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66813696)))]; + tensor bias_129 = const()[name = tensor("bias_129"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66814528)))]; + tensor o_33_axes_0 = const()[name = tensor("o_33_axes_0"), val = tensor([-1])]; + tensor transpose_21 = transpose(perm = input_289_perm_0, x = x_33)[name = tensor("transpose_21")]; + tensor o_33_cast_fp16 = layer_norm(axes = o_33_axes_0, beta = bias_129, epsilon = var_1718_to_fp16, gamma = weight_131, x = transpose_21)[name = tensor("o_33_cast_fp16")]; + tensor hidden_states_17_perm_0 = const()[name = tensor("hidden_states_17_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1953 = const()[name = tensor("op_1953"), val = tensor(1)]; + tensor var_1954 = const()[name = tensor("op_1954"), val = tensor(0)]; + tensor var_1977 = const()[name = tensor("op_1977"), val = tensor([1, 1])]; + tensor var_1979 = const()[name = tensor("op_1979"), val = tensor([1, 1])]; + tensor var_1981_pad_type_0 = const()[name = tensor("op_1981_pad_type_0"), val = tensor("custom")]; + tensor var_1981_pad_0 = const()[name = tensor("op_1981_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_20 = transpose(perm = hidden_states_17_perm_0, x = o_33_cast_fp16)[name = tensor("transpose_20")]; + tensor var_1981 = conv(bias = layers_8_attention_q_proj_bias, dilations = var_1979, groups = var_1953, pad = var_1981_pad_0, pad_type = var_1981_pad_type_0, strides = var_1977, weight = layers_8_attention_q_proj_weight, x = transpose_20)[name = tensor("op_1981")]; + tensor var_1984 = const()[name = tensor("op_1984"), val = tensor([1, 1])]; + tensor var_1986 = const()[name = tensor("op_1986"), val = tensor([1, 1])]; tensor ks_17_pad_type_0 = const()[name = tensor("ks_17_pad_type_0"), val = tensor("custom")]; tensor ks_17_pad_0 = const()[name = tensor("ks_17_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_17 = conv(bias = layers_8_attention_k_proj_bias, dilations = var_2089, groups = var_2056, pad = ks_17_pad_0, pad_type = ks_17_pad_type_0, strides = var_2087, weight = layers_8_attention_k_proj_weight, x = var_2050_cast_fp16)[name = tensor("ks_17")]; - tensor var_2094 = const()[name = tensor("op_2094"), val = tensor([1, 1])]; - tensor var_2096 = const()[name = tensor("op_2096"), val = tensor([1, 1])]; - tensor var_2098_pad_type_0 = const()[name = tensor("op_2098_pad_type_0"), val = tensor("custom")]; - tensor var_2098_pad_0 = const()[name = tensor("op_2098_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2098 = conv(bias = layers_8_attention_v_proj_bias, dilations = var_2096, groups = var_2056, pad = var_2098_pad_0, pad_type = var_2098_pad_type_0, strides = var_2094, weight = layers_8_attention_v_proj_weight, x = var_2050_cast_fp16)[name = tensor("op_2098")]; + tensor ks_17 = conv(bias = layers_8_attention_k_proj_bias, dilations = var_1986, groups = var_1953, pad = ks_17_pad_0, pad_type = ks_17_pad_type_0, strides = var_1984, weight = layers_8_attention_k_proj_weight, x = transpose_20)[name = tensor("ks_17")]; + tensor var_1991 = const()[name = tensor("op_1991"), val = tensor([1, 1])]; + tensor var_1993 = const()[name = tensor("op_1993"), val = tensor([1, 1])]; + tensor var_1995_pad_type_0 = const()[name = tensor("op_1995_pad_type_0"), val = tensor("custom")]; + tensor var_1995_pad_0 = const()[name = tensor("op_1995_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1995 = conv(bias = layers_8_attention_v_proj_bias, dilations = var_1993, groups = var_1953, pad = var_1995_pad_0, pad_type = var_1995_pad_type_0, strides = var_1991, weight = layers_8_attention_v_proj_weight, x = transpose_20)[name = tensor("op_1995")]; tensor tile_42 = const()[name = tensor("tile_42"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2099_axis_0 = const()[name = tensor("op_2099_axis_0"), val = tensor(1)]; - tensor var_2099_0, tensor var_2099_1, tensor var_2099_2, tensor var_2099_3, tensor var_2099_4, tensor var_2099_5, tensor var_2099_6, tensor var_2099_7, tensor var_2099_8, tensor var_2099_9, tensor var_2099_10, tensor var_2099_11 = split(axis = var_2099_axis_0, split_sizes = tile_42, x = var_2084)[name = tensor("op_2099")]; - tensor var_2112_perm_0 = const()[name = tensor("op_2112_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_1996_axis_0 = const()[name = tensor("op_1996_axis_0"), val = tensor(1)]; + tensor var_1996_0, tensor var_1996_1, tensor var_1996_2, tensor var_1996_3, tensor var_1996_4, tensor var_1996_5, tensor var_1996_6, tensor var_1996_7, tensor var_1996_8, tensor var_1996_9, tensor var_1996_10, tensor var_1996_11 = split(axis = var_1996_axis_0, split_sizes = tile_42, x = var_1981)[name = tensor("op_1996")]; + tensor var_2009_perm_0 = const()[name = tensor("op_2009_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_43 = const()[name = tensor("tile_43"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2113_axis_0 = const()[name = tensor("op_2113_axis_0"), val = tensor(3)]; - tensor transpose_3 = transpose(perm = var_2112_perm_0, x = ks_17)[name = tensor("transpose_3")]; - tensor var_2113_0, tensor var_2113_1, tensor var_2113_2, tensor var_2113_3, tensor var_2113_4, tensor var_2113_5, tensor var_2113_6, tensor var_2113_7, tensor var_2113_8, tensor var_2113_9, tensor var_2113_10, tensor var_2113_11 = split(axis = var_2113_axis_0, split_sizes = tile_43, x = transpose_3)[name = tensor("op_2113")]; + tensor var_2010_axis_0 = const()[name = tensor("op_2010_axis_0"), val = tensor(3)]; + tensor transpose_19 = transpose(perm = var_2009_perm_0, x = ks_17)[name = tensor("transpose_19")]; + tensor var_2010_0, tensor var_2010_1, tensor var_2010_2, tensor var_2010_3, tensor var_2010_4, tensor var_2010_5, tensor var_2010_6, tensor var_2010_7, tensor var_2010_8, tensor var_2010_9, tensor var_2010_10, tensor var_2010_11 = split(axis = var_2010_axis_0, split_sizes = tile_43, x = transpose_19)[name = tensor("op_2010")]; tensor tile_44 = const()[name = tensor("tile_44"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2126_axis_0 = const()[name = tensor("op_2126_axis_0"), val = tensor(1)]; - tensor var_2126_0, tensor var_2126_1, tensor var_2126_2, tensor var_2126_3, tensor var_2126_4, tensor var_2126_5, tensor var_2126_6, tensor var_2126_7, tensor var_2126_8, tensor var_2126_9, tensor var_2126_10, tensor var_2126_11 = split(axis = var_2126_axis_0, split_sizes = tile_44, x = var_2098)[name = tensor("op_2126")]; - tensor var_2140_equation_0 = const()[name = tensor("op_2140_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2140 = einsum(equation = var_2140_equation_0, values = (var_2113_0, var_2099_0))[name = tensor("op_2140")]; - tensor var_2141_to_fp16 = const()[name = tensor("op_2141_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_193_cast_fp16 = mul(x = var_2140, y = var_2141_to_fp16)[name = tensor("w_193_cast_fp16")]; - tensor var_2144_equation_0 = const()[name = tensor("op_2144_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2144 = einsum(equation = var_2144_equation_0, values = (var_2113_1, var_2099_1))[name = tensor("op_2144")]; - tensor var_2145_to_fp16 = const()[name = tensor("op_2145_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_195_cast_fp16 = mul(x = var_2144, y = var_2145_to_fp16)[name = tensor("w_195_cast_fp16")]; - tensor var_2148_equation_0 = const()[name = tensor("op_2148_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2148 = einsum(equation = var_2148_equation_0, values = (var_2113_2, var_2099_2))[name = tensor("op_2148")]; - tensor var_2149_to_fp16 = const()[name = tensor("op_2149_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_197_cast_fp16 = mul(x = var_2148, y = var_2149_to_fp16)[name = tensor("w_197_cast_fp16")]; - tensor var_2152_equation_0 = const()[name = tensor("op_2152_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2152 = einsum(equation = var_2152_equation_0, values = (var_2113_3, var_2099_3))[name = tensor("op_2152")]; - tensor var_2153_to_fp16 = const()[name = tensor("op_2153_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_199_cast_fp16 = mul(x = var_2152, y = var_2153_to_fp16)[name = tensor("w_199_cast_fp16")]; - tensor var_2156_equation_0 = const()[name = tensor("op_2156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2156 = einsum(equation = var_2156_equation_0, values = (var_2113_4, var_2099_4))[name = tensor("op_2156")]; - tensor var_2157_to_fp16 = const()[name = tensor("op_2157_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_201_cast_fp16 = mul(x = var_2156, y = var_2157_to_fp16)[name = tensor("w_201_cast_fp16")]; - tensor var_2160_equation_0 = const()[name = tensor("op_2160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2160 = einsum(equation = var_2160_equation_0, values = (var_2113_5, var_2099_5))[name = tensor("op_2160")]; - tensor var_2161_to_fp16 = const()[name = tensor("op_2161_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_203_cast_fp16 = mul(x = var_2160, y = var_2161_to_fp16)[name = tensor("w_203_cast_fp16")]; - tensor var_2164_equation_0 = const()[name = tensor("op_2164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2164 = einsum(equation = var_2164_equation_0, values = (var_2113_6, var_2099_6))[name = tensor("op_2164")]; - tensor var_2165_to_fp16 = const()[name = tensor("op_2165_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_205_cast_fp16 = mul(x = var_2164, y = var_2165_to_fp16)[name = tensor("w_205_cast_fp16")]; - tensor var_2168_equation_0 = const()[name = tensor("op_2168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2168 = einsum(equation = var_2168_equation_0, values = (var_2113_7, var_2099_7))[name = tensor("op_2168")]; - tensor var_2169_to_fp16 = const()[name = tensor("op_2169_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_207_cast_fp16 = mul(x = var_2168, y = var_2169_to_fp16)[name = tensor("w_207_cast_fp16")]; - tensor var_2172_equation_0 = const()[name = tensor("op_2172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2172 = einsum(equation = var_2172_equation_0, values = (var_2113_8, var_2099_8))[name = tensor("op_2172")]; - tensor var_2173_to_fp16 = const()[name = tensor("op_2173_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_209_cast_fp16 = mul(x = var_2172, y = var_2173_to_fp16)[name = tensor("w_209_cast_fp16")]; - tensor var_2176_equation_0 = const()[name = tensor("op_2176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2176 = einsum(equation = var_2176_equation_0, values = (var_2113_9, var_2099_9))[name = tensor("op_2176")]; - tensor var_2177_to_fp16 = const()[name = tensor("op_2177_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_211_cast_fp16 = mul(x = var_2176, y = var_2177_to_fp16)[name = tensor("w_211_cast_fp16")]; - tensor var_2180_equation_0 = const()[name = tensor("op_2180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2180 = einsum(equation = var_2180_equation_0, values = (var_2113_10, var_2099_10))[name = tensor("op_2180")]; - tensor var_2181_to_fp16 = const()[name = tensor("op_2181_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_213_cast_fp16 = mul(x = var_2180, y = var_2181_to_fp16)[name = tensor("w_213_cast_fp16")]; - tensor var_2184_equation_0 = const()[name = tensor("op_2184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2184 = einsum(equation = var_2184_equation_0, values = (var_2113_11, var_2099_11))[name = tensor("op_2184")]; - tensor var_2185_to_fp16 = const()[name = tensor("op_2185_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_215_cast_fp16 = mul(x = var_2184, y = var_2185_to_fp16)[name = tensor("w_215_cast_fp16")]; - tensor input_259_cast_fp16 = add(x = w_193_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_259_cast_fp16")]; - tensor var_2188_cast_fp16 = softmax(axis = var_2056, x = input_259_cast_fp16)[name = tensor("op_2188_cast_fp16")]; - tensor input_261_cast_fp16 = add(x = w_195_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_261_cast_fp16")]; - tensor var_2190_cast_fp16 = softmax(axis = var_2056, x = input_261_cast_fp16)[name = tensor("op_2190_cast_fp16")]; - tensor input_263_cast_fp16 = add(x = w_197_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_263_cast_fp16")]; - tensor var_2192_cast_fp16 = softmax(axis = var_2056, x = input_263_cast_fp16)[name = tensor("op_2192_cast_fp16")]; - tensor input_265_cast_fp16 = add(x = w_199_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_265_cast_fp16")]; - tensor var_2194_cast_fp16 = softmax(axis = var_2056, x = input_265_cast_fp16)[name = tensor("op_2194_cast_fp16")]; - tensor input_267_cast_fp16 = add(x = w_201_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_267_cast_fp16")]; - tensor var_2196_cast_fp16 = softmax(axis = var_2056, x = input_267_cast_fp16)[name = tensor("op_2196_cast_fp16")]; - tensor input_269_cast_fp16 = add(x = w_203_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_269_cast_fp16")]; - tensor var_2198_cast_fp16 = softmax(axis = var_2056, x = input_269_cast_fp16)[name = tensor("op_2198_cast_fp16")]; - tensor input_271_cast_fp16 = add(x = w_205_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_271_cast_fp16")]; - tensor var_2200_cast_fp16 = softmax(axis = var_2056, x = input_271_cast_fp16)[name = tensor("op_2200_cast_fp16")]; - tensor input_273_cast_fp16 = add(x = w_207_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_273_cast_fp16")]; - tensor var_2202_cast_fp16 = softmax(axis = var_2056, x = input_273_cast_fp16)[name = tensor("op_2202_cast_fp16")]; - tensor input_275_cast_fp16 = add(x = w_209_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_275_cast_fp16")]; - tensor var_2204_cast_fp16 = softmax(axis = var_2056, x = input_275_cast_fp16)[name = tensor("op_2204_cast_fp16")]; - tensor input_277_cast_fp16 = add(x = w_211_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_277_cast_fp16")]; - tensor var_2206_cast_fp16 = softmax(axis = var_2056, x = input_277_cast_fp16)[name = tensor("op_2206_cast_fp16")]; - tensor input_279_cast_fp16 = add(x = w_213_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_279_cast_fp16")]; - tensor var_2208_cast_fp16 = softmax(axis = var_2056, x = input_279_cast_fp16)[name = tensor("op_2208_cast_fp16")]; - tensor input_281_cast_fp16 = add(x = w_215_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_281_cast_fp16")]; - tensor var_2210_cast_fp16 = softmax(axis = var_2056, x = input_281_cast_fp16)[name = tensor("op_2210_cast_fp16")]; - tensor var_2212_equation_0 = const()[name = tensor("op_2212_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2212_cast_fp16 = einsum(equation = var_2212_equation_0, values = (var_2126_0, var_2188_cast_fp16))[name = tensor("op_2212_cast_fp16")]; - tensor var_2214_equation_0 = const()[name = tensor("op_2214_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2214_cast_fp16 = einsum(equation = var_2214_equation_0, values = (var_2126_1, var_2190_cast_fp16))[name = tensor("op_2214_cast_fp16")]; - tensor var_2216_equation_0 = const()[name = tensor("op_2216_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2216_cast_fp16 = einsum(equation = var_2216_equation_0, values = (var_2126_2, var_2192_cast_fp16))[name = tensor("op_2216_cast_fp16")]; - tensor var_2218_equation_0 = const()[name = tensor("op_2218_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2218_cast_fp16 = einsum(equation = var_2218_equation_0, values = (var_2126_3, var_2194_cast_fp16))[name = tensor("op_2218_cast_fp16")]; - tensor var_2220_equation_0 = const()[name = tensor("op_2220_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2220_cast_fp16 = einsum(equation = var_2220_equation_0, values = (var_2126_4, var_2196_cast_fp16))[name = tensor("op_2220_cast_fp16")]; - tensor var_2222_equation_0 = const()[name = tensor("op_2222_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2222_cast_fp16 = einsum(equation = var_2222_equation_0, values = (var_2126_5, var_2198_cast_fp16))[name = tensor("op_2222_cast_fp16")]; - tensor var_2224_equation_0 = const()[name = tensor("op_2224_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2224_cast_fp16 = einsum(equation = var_2224_equation_0, values = (var_2126_6, var_2200_cast_fp16))[name = tensor("op_2224_cast_fp16")]; - tensor var_2226_equation_0 = const()[name = tensor("op_2226_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2226_cast_fp16 = einsum(equation = var_2226_equation_0, values = (var_2126_7, var_2202_cast_fp16))[name = tensor("op_2226_cast_fp16")]; - tensor var_2228_equation_0 = const()[name = tensor("op_2228_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2228_cast_fp16 = einsum(equation = var_2228_equation_0, values = (var_2126_8, var_2204_cast_fp16))[name = tensor("op_2228_cast_fp16")]; - tensor var_2230_equation_0 = const()[name = tensor("op_2230_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2230_cast_fp16 = einsum(equation = var_2230_equation_0, values = (var_2126_9, var_2206_cast_fp16))[name = tensor("op_2230_cast_fp16")]; - tensor var_2232_equation_0 = const()[name = tensor("op_2232_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2232_cast_fp16 = einsum(equation = var_2232_equation_0, values = (var_2126_10, var_2208_cast_fp16))[name = tensor("op_2232_cast_fp16")]; - tensor var_2234_equation_0 = const()[name = tensor("op_2234_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2234_cast_fp16 = einsum(equation = var_2234_equation_0, values = (var_2126_11, var_2210_cast_fp16))[name = tensor("op_2234_cast_fp16")]; - tensor var_2236_interleave_0 = const()[name = tensor("op_2236_interleave_0"), val = tensor(false)]; - tensor var_2236_cast_fp16 = concat(axis = var_2056, interleave = var_2236_interleave_0, values = (var_2212_cast_fp16, var_2214_cast_fp16, var_2216_cast_fp16, var_2218_cast_fp16, var_2220_cast_fp16, var_2222_cast_fp16, var_2224_cast_fp16, var_2226_cast_fp16, var_2228_cast_fp16, var_2230_cast_fp16, var_2232_cast_fp16, var_2234_cast_fp16))[name = tensor("op_2236_cast_fp16")]; - tensor var_2240 = const()[name = tensor("op_2240"), val = tensor([1, 1])]; - tensor var_2242 = const()[name = tensor("op_2242"), val = tensor([1, 1])]; - tensor var_2244_pad_type_0 = const()[name = tensor("op_2244_pad_type_0"), val = tensor("custom")]; - tensor var_2244_pad_0 = const()[name = tensor("op_2244_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2244 = conv(bias = layers_8_attention_o_proj_bias, dilations = var_2242, groups = var_2056, pad = var_2244_pad_0, pad_type = var_2244_pad_type_0, strides = var_2240, weight = layers_8_attention_o_proj_weight, x = var_2236_cast_fp16)[name = tensor("op_2244")]; - tensor var_2246_interleave_0 = const()[name = tensor("op_2246_interleave_0"), val = tensor(false)]; - tensor var_2246 = concat(axis = var_2057, interleave = var_2246_interleave_0, values = var_2244)[name = tensor("op_2246")]; - tensor x_69 = add(x = var_2050_cast_fp16, y = var_2246)[name = tensor("x_69")]; - tensor var_2053_promoted = const()[name = tensor("op_2053_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2054_promoted = const()[name = tensor("op_2054_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_71 = clip(alpha = var_2053_promoted, beta = var_2054_promoted, x = x_69)[name = tensor("x_71")]; - tensor var_2251 = const()[name = tensor("op_2251"), val = tensor([1])]; - tensor mean_35 = reduce_mean(axes = var_2251, keep_dims = var_2058, x = x_71)[name = tensor("mean_35")]; - tensor zero_mean_35 = sub(x = x_71, y = mean_35)[name = tensor("zero_mean_35")]; - tensor var_2055_promoted = const()[name = tensor("op_2055_promoted"), val = tensor(0x1p+1)]; - tensor var_2254 = pow(x = zero_mean_35, y = var_2055_promoted)[name = tensor("op_2254")]; - tensor var_2255 = const()[name = tensor("op_2255"), val = tensor([1])]; - tensor var_2256 = reduce_mean(axes = var_2255, keep_dims = var_2058, x = var_2254)[name = tensor("op_2256")]; - tensor var_2257_to_fp16 = const()[name = tensor("op_2257_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2258_cast_fp16 = add(x = var_2256, y = var_2257_to_fp16)[name = tensor("op_2258_cast_fp16")]; - tensor denom_35_epsilon_0 = const()[name = tensor("denom_35_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0, x = var_2258_cast_fp16)[name = tensor("denom_35_cast_fp16")]; - tensor var_2260_cast_fp16 = mul(x = zero_mean_35, y = denom_35_cast_fp16)[name = tensor("op_2260_cast_fp16")]; - tensor var_2262_gamma_0_to_fp16 = const()[name = tensor("op_2262_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66817024)))]; - tensor var_2262_beta_0_to_fp16 = const()[name = tensor("op_2262_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66817856)))]; - tensor var_2262_epsilon_0_to_fp16 = const()[name = tensor("op_2262_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2262_cast_fp16 = batch_norm(beta = var_2262_beta_0_to_fp16, epsilon = var_2262_epsilon_0_to_fp16, gamma = var_2262_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2260_cast_fp16)[name = tensor("op_2262_cast_fp16")]; - tensor var_2268 = const()[name = tensor("op_2268"), val = tensor([1, 1])]; - tensor var_2270 = const()[name = tensor("op_2270"), val = tensor([1, 1])]; - tensor var_2272_pad_type_0 = const()[name = tensor("op_2272_pad_type_0"), val = tensor("custom")]; - tensor var_2272_pad_0 = const()[name = tensor("op_2272_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2272 = conv(bias = layers_8_mlp_fc1_bias, dilations = var_2270, groups = var_2056, pad = var_2272_pad_0, pad_type = var_2272_pad_type_0, strides = var_2268, weight = layers_8_mlp_fc1_weight, x = var_2262_cast_fp16)[name = tensor("op_2272")]; - tensor input_287_mode_0 = const()[name = tensor("input_287_mode_0"), val = tensor("EXACT")]; - tensor input_287 = gelu(mode = input_287_mode_0, x = var_2272)[name = tensor("input_287")]; - tensor var_2276 = const()[name = tensor("op_2276"), val = tensor([1, 1])]; - tensor var_2278 = const()[name = tensor("op_2278"), val = tensor([1, 1])]; - tensor var_2280_pad_type_0 = const()[name = tensor("op_2280_pad_type_0"), val = tensor("custom")]; - tensor var_2280_pad_0 = const()[name = tensor("op_2280_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2280 = conv(bias = layers_8_mlp_fc2_bias, dilations = var_2278, groups = var_2056, pad = var_2280_pad_0, pad_type = var_2280_pad_type_0, strides = var_2276, weight = layers_8_mlp_fc2_weight, x = input_287)[name = tensor("op_2280")]; - tensor x_73 = add(x = var_2262_cast_fp16, y = var_2280)[name = tensor("x_73")]; - tensor var_2053_promoted_1 = const()[name = tensor("op_2053_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2054_promoted_1 = const()[name = tensor("op_2054_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_75 = clip(alpha = var_2053_promoted_1, beta = var_2054_promoted_1, x = x_73)[name = tensor("x_75")]; - tensor var_2285 = const()[name = tensor("op_2285"), val = tensor([1])]; - tensor mean_37 = reduce_mean(axes = var_2285, keep_dims = var_2058, x = x_75)[name = tensor("mean_37")]; - tensor zero_mean_37 = sub(x = x_75, y = mean_37)[name = tensor("zero_mean_37")]; - tensor var_2055_promoted_1 = const()[name = tensor("op_2055_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2288 = pow(x = zero_mean_37, y = var_2055_promoted_1)[name = tensor("op_2288")]; - tensor var_2289 = const()[name = tensor("op_2289"), val = tensor([1])]; - tensor var_2290 = reduce_mean(axes = var_2289, keep_dims = var_2058, x = var_2288)[name = tensor("op_2290")]; - tensor var_2291_to_fp16 = const()[name = tensor("op_2291_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2292_cast_fp16 = add(x = var_2290, y = var_2291_to_fp16)[name = tensor("op_2292_cast_fp16")]; - tensor denom_37_epsilon_0 = const()[name = tensor("denom_37_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0, x = var_2292_cast_fp16)[name = tensor("denom_37_cast_fp16")]; - tensor var_2294_cast_fp16 = mul(x = zero_mean_37, y = denom_37_cast_fp16)[name = tensor("op_2294_cast_fp16")]; - tensor var_2296_gamma_0_to_fp16 = const()[name = tensor("op_2296_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66818688)))]; - tensor var_2296_beta_0_to_fp16 = const()[name = tensor("op_2296_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66819520)))]; - tensor var_2296_epsilon_0_to_fp16 = const()[name = tensor("op_2296_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2296_cast_fp16 = batch_norm(beta = var_2296_beta_0_to_fp16, epsilon = var_2296_epsilon_0_to_fp16, gamma = var_2296_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2294_cast_fp16)[name = tensor("op_2296_cast_fp16")]; - tensor var_2302 = const()[name = tensor("op_2302"), val = tensor(1)]; - tensor var_2303 = const()[name = tensor("op_2303"), val = tensor(0)]; - tensor var_2304 = const()[name = tensor("op_2304"), val = tensor(true)]; - tensor var_2326 = const()[name = tensor("op_2326"), val = tensor([1, 1])]; - tensor var_2328 = const()[name = tensor("op_2328"), val = tensor([1, 1])]; - tensor var_2330_pad_type_0 = const()[name = tensor("op_2330_pad_type_0"), val = tensor("custom")]; - tensor var_2330_pad_0 = const()[name = tensor("op_2330_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2330 = conv(bias = layers_9_attention_q_proj_bias, dilations = var_2328, groups = var_2302, pad = var_2330_pad_0, pad_type = var_2330_pad_type_0, strides = var_2326, weight = layers_9_attention_q_proj_weight, x = var_2296_cast_fp16)[name = tensor("op_2330")]; - tensor var_2333 = const()[name = tensor("op_2333"), val = tensor([1, 1])]; - tensor var_2335 = const()[name = tensor("op_2335"), val = tensor([1, 1])]; + tensor var_2023_axis_0 = const()[name = tensor("op_2023_axis_0"), val = tensor(1)]; + tensor var_2023_0, tensor var_2023_1, tensor var_2023_2, tensor var_2023_3, tensor var_2023_4, tensor var_2023_5, tensor var_2023_6, tensor var_2023_7, tensor var_2023_8, tensor var_2023_9, tensor var_2023_10, tensor var_2023_11 = split(axis = var_2023_axis_0, split_sizes = tile_44, x = var_1995)[name = tensor("op_2023")]; + tensor var_2037_equation_0 = const()[name = tensor("op_2037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2037 = einsum(equation = var_2037_equation_0, values = (var_2010_0, var_1996_0))[name = tensor("op_2037")]; + tensor var_2038_to_fp16 = const()[name = tensor("op_2038_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_193_cast_fp16 = mul(x = var_2037, y = var_2038_to_fp16)[name = tensor("w_193_cast_fp16")]; + tensor var_2041_equation_0 = const()[name = tensor("op_2041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2041 = einsum(equation = var_2041_equation_0, values = (var_2010_1, var_1996_1))[name = tensor("op_2041")]; + tensor var_2042_to_fp16 = const()[name = tensor("op_2042_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_195_cast_fp16 = mul(x = var_2041, y = var_2042_to_fp16)[name = tensor("w_195_cast_fp16")]; + tensor var_2045_equation_0 = const()[name = tensor("op_2045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2045 = einsum(equation = var_2045_equation_0, values = (var_2010_2, var_1996_2))[name = tensor("op_2045")]; + tensor var_2046_to_fp16 = const()[name = tensor("op_2046_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_197_cast_fp16 = mul(x = var_2045, y = var_2046_to_fp16)[name = tensor("w_197_cast_fp16")]; + tensor var_2049_equation_0 = const()[name = tensor("op_2049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2049 = einsum(equation = var_2049_equation_0, values = (var_2010_3, var_1996_3))[name = tensor("op_2049")]; + tensor var_2050_to_fp16 = const()[name = tensor("op_2050_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_199_cast_fp16 = mul(x = var_2049, y = var_2050_to_fp16)[name = tensor("w_199_cast_fp16")]; + tensor var_2053_equation_0 = const()[name = tensor("op_2053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2053 = einsum(equation = var_2053_equation_0, values = (var_2010_4, var_1996_4))[name = tensor("op_2053")]; + tensor var_2054_to_fp16 = const()[name = tensor("op_2054_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_201_cast_fp16 = mul(x = var_2053, y = var_2054_to_fp16)[name = tensor("w_201_cast_fp16")]; + tensor var_2057_equation_0 = const()[name = tensor("op_2057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2057 = einsum(equation = var_2057_equation_0, values = (var_2010_5, var_1996_5))[name = tensor("op_2057")]; + tensor var_2058_to_fp16 = const()[name = tensor("op_2058_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_203_cast_fp16 = mul(x = var_2057, y = var_2058_to_fp16)[name = tensor("w_203_cast_fp16")]; + tensor var_2061_equation_0 = const()[name = tensor("op_2061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2061 = einsum(equation = var_2061_equation_0, values = (var_2010_6, var_1996_6))[name = tensor("op_2061")]; + tensor var_2062_to_fp16 = const()[name = tensor("op_2062_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_205_cast_fp16 = mul(x = var_2061, y = var_2062_to_fp16)[name = tensor("w_205_cast_fp16")]; + tensor var_2065_equation_0 = const()[name = tensor("op_2065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2065 = einsum(equation = var_2065_equation_0, values = (var_2010_7, var_1996_7))[name = tensor("op_2065")]; + tensor var_2066_to_fp16 = const()[name = tensor("op_2066_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_207_cast_fp16 = mul(x = var_2065, y = var_2066_to_fp16)[name = tensor("w_207_cast_fp16")]; + tensor var_2069_equation_0 = const()[name = tensor("op_2069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2069 = einsum(equation = var_2069_equation_0, values = (var_2010_8, var_1996_8))[name = tensor("op_2069")]; + tensor var_2070_to_fp16 = const()[name = tensor("op_2070_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_209_cast_fp16 = mul(x = var_2069, y = var_2070_to_fp16)[name = tensor("w_209_cast_fp16")]; + tensor var_2073_equation_0 = const()[name = tensor("op_2073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2073 = einsum(equation = var_2073_equation_0, values = (var_2010_9, var_1996_9))[name = tensor("op_2073")]; + tensor var_2074_to_fp16 = const()[name = tensor("op_2074_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_211_cast_fp16 = mul(x = var_2073, y = var_2074_to_fp16)[name = tensor("w_211_cast_fp16")]; + tensor var_2077_equation_0 = const()[name = tensor("op_2077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2077 = einsum(equation = var_2077_equation_0, values = (var_2010_10, var_1996_10))[name = tensor("op_2077")]; + tensor var_2078_to_fp16 = const()[name = tensor("op_2078_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_213_cast_fp16 = mul(x = var_2077, y = var_2078_to_fp16)[name = tensor("w_213_cast_fp16")]; + tensor var_2081_equation_0 = const()[name = tensor("op_2081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2081 = einsum(equation = var_2081_equation_0, values = (var_2010_11, var_1996_11))[name = tensor("op_2081")]; + tensor var_2082_to_fp16 = const()[name = tensor("op_2082_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_215_cast_fp16 = mul(x = var_2081, y = var_2082_to_fp16)[name = tensor("w_215_cast_fp16")]; + tensor input_293_cast_fp16 = add(x = w_193_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_293_cast_fp16")]; + tensor var_2085_cast_fp16 = softmax(axis = var_1953, x = input_293_cast_fp16)[name = tensor("op_2085_cast_fp16")]; + tensor input_295_cast_fp16 = add(x = w_195_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_295_cast_fp16")]; + tensor var_2087_cast_fp16 = softmax(axis = var_1953, x = input_295_cast_fp16)[name = tensor("op_2087_cast_fp16")]; + tensor input_297_cast_fp16 = add(x = w_197_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_297_cast_fp16")]; + tensor var_2089_cast_fp16 = softmax(axis = var_1953, x = input_297_cast_fp16)[name = tensor("op_2089_cast_fp16")]; + tensor input_299_cast_fp16 = add(x = w_199_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_299_cast_fp16")]; + tensor var_2091_cast_fp16 = softmax(axis = var_1953, x = input_299_cast_fp16)[name = tensor("op_2091_cast_fp16")]; + tensor input_301_cast_fp16 = add(x = w_201_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_301_cast_fp16")]; + tensor var_2093_cast_fp16 = softmax(axis = var_1953, x = input_301_cast_fp16)[name = tensor("op_2093_cast_fp16")]; + tensor input_303_cast_fp16 = add(x = w_203_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_303_cast_fp16")]; + tensor var_2095_cast_fp16 = softmax(axis = var_1953, x = input_303_cast_fp16)[name = tensor("op_2095_cast_fp16")]; + tensor input_305_cast_fp16 = add(x = w_205_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_305_cast_fp16")]; + tensor var_2097_cast_fp16 = softmax(axis = var_1953, x = input_305_cast_fp16)[name = tensor("op_2097_cast_fp16")]; + tensor input_307_cast_fp16 = add(x = w_207_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_307_cast_fp16")]; + tensor var_2099_cast_fp16 = softmax(axis = var_1953, x = input_307_cast_fp16)[name = tensor("op_2099_cast_fp16")]; + tensor input_309_cast_fp16 = add(x = w_209_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_309_cast_fp16")]; + tensor var_2101_cast_fp16 = softmax(axis = var_1953, x = input_309_cast_fp16)[name = tensor("op_2101_cast_fp16")]; + tensor input_311_cast_fp16 = add(x = w_211_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_311_cast_fp16")]; + tensor var_2103_cast_fp16 = softmax(axis = var_1953, x = input_311_cast_fp16)[name = tensor("op_2103_cast_fp16")]; + tensor input_313_cast_fp16 = add(x = w_213_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_313_cast_fp16")]; + tensor var_2105_cast_fp16 = softmax(axis = var_1953, x = input_313_cast_fp16)[name = tensor("op_2105_cast_fp16")]; + tensor input_315_cast_fp16 = add(x = w_215_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_315_cast_fp16")]; + tensor var_2107_cast_fp16 = softmax(axis = var_1953, x = input_315_cast_fp16)[name = tensor("op_2107_cast_fp16")]; + tensor var_2109_equation_0 = const()[name = tensor("op_2109_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2109_cast_fp16 = einsum(equation = var_2109_equation_0, values = (var_2023_0, var_2085_cast_fp16))[name = tensor("op_2109_cast_fp16")]; + tensor var_2111_equation_0 = const()[name = tensor("op_2111_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2111_cast_fp16 = einsum(equation = var_2111_equation_0, values = (var_2023_1, var_2087_cast_fp16))[name = tensor("op_2111_cast_fp16")]; + tensor var_2113_equation_0 = const()[name = tensor("op_2113_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2113_cast_fp16 = einsum(equation = var_2113_equation_0, values = (var_2023_2, var_2089_cast_fp16))[name = tensor("op_2113_cast_fp16")]; + tensor var_2115_equation_0 = const()[name = tensor("op_2115_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2115_cast_fp16 = einsum(equation = var_2115_equation_0, values = (var_2023_3, var_2091_cast_fp16))[name = tensor("op_2115_cast_fp16")]; + tensor var_2117_equation_0 = const()[name = tensor("op_2117_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2117_cast_fp16 = einsum(equation = var_2117_equation_0, values = (var_2023_4, var_2093_cast_fp16))[name = tensor("op_2117_cast_fp16")]; + tensor var_2119_equation_0 = const()[name = tensor("op_2119_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2119_cast_fp16 = einsum(equation = var_2119_equation_0, values = (var_2023_5, var_2095_cast_fp16))[name = tensor("op_2119_cast_fp16")]; + tensor var_2121_equation_0 = const()[name = tensor("op_2121_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2121_cast_fp16 = einsum(equation = var_2121_equation_0, values = (var_2023_6, var_2097_cast_fp16))[name = tensor("op_2121_cast_fp16")]; + tensor var_2123_equation_0 = const()[name = tensor("op_2123_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2123_cast_fp16 = einsum(equation = var_2123_equation_0, values = (var_2023_7, var_2099_cast_fp16))[name = tensor("op_2123_cast_fp16")]; + tensor var_2125_equation_0 = const()[name = tensor("op_2125_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2125_cast_fp16 = einsum(equation = var_2125_equation_0, values = (var_2023_8, var_2101_cast_fp16))[name = tensor("op_2125_cast_fp16")]; + tensor var_2127_equation_0 = const()[name = tensor("op_2127_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2127_cast_fp16 = einsum(equation = var_2127_equation_0, values = (var_2023_9, var_2103_cast_fp16))[name = tensor("op_2127_cast_fp16")]; + tensor var_2129_equation_0 = const()[name = tensor("op_2129_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2129_cast_fp16 = einsum(equation = var_2129_equation_0, values = (var_2023_10, var_2105_cast_fp16))[name = tensor("op_2129_cast_fp16")]; + tensor var_2131_equation_0 = const()[name = tensor("op_2131_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2131_cast_fp16 = einsum(equation = var_2131_equation_0, values = (var_2023_11, var_2107_cast_fp16))[name = tensor("op_2131_cast_fp16")]; + tensor var_2133_interleave_0 = const()[name = tensor("op_2133_interleave_0"), val = tensor(false)]; + tensor var_2133_cast_fp16 = concat(axis = var_1953, interleave = var_2133_interleave_0, values = (var_2109_cast_fp16, var_2111_cast_fp16, var_2113_cast_fp16, var_2115_cast_fp16, var_2117_cast_fp16, var_2119_cast_fp16, var_2121_cast_fp16, var_2123_cast_fp16, var_2125_cast_fp16, var_2127_cast_fp16, var_2129_cast_fp16, var_2131_cast_fp16))[name = tensor("op_2133_cast_fp16")]; + tensor var_2137 = const()[name = tensor("op_2137"), val = tensor([1, 1])]; + tensor var_2139 = const()[name = tensor("op_2139"), val = tensor([1, 1])]; + tensor var_2141_pad_type_0 = const()[name = tensor("op_2141_pad_type_0"), val = tensor("custom")]; + tensor var_2141_pad_0 = const()[name = tensor("op_2141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2141 = conv(bias = layers_8_attention_o_proj_bias, dilations = var_2139, groups = var_1953, pad = var_2141_pad_0, pad_type = var_2141_pad_type_0, strides = var_2137, weight = layers_8_attention_o_proj_weight, x = var_2133_cast_fp16)[name = tensor("op_2141")]; + tensor var_2143_interleave_0 = const()[name = tensor("op_2143_interleave_0"), val = tensor(false)]; + tensor var_2143 = concat(axis = var_1954, interleave = var_2143_interleave_0, values = var_2141)[name = tensor("op_2143")]; + tensor x_35 = add(x = transpose_20, y = var_2143)[name = tensor("x_35")]; + tensor input_319_perm_0 = const()[name = tensor("input_319_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_141 = const()[name = tensor("weight_141"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66815360)))]; + tensor bias_139 = const()[name = tensor("bias_139"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66816192)))]; + tensor o_35_axes_0 = const()[name = tensor("o_35_axes_0"), val = tensor([-1])]; + tensor var_1952_to_fp16 = const()[name = tensor("op_1952_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_18 = transpose(perm = input_319_perm_0, x = x_35)[name = tensor("transpose_18")]; + tensor o_35_cast_fp16 = layer_norm(axes = o_35_axes_0, beta = bias_139, epsilon = var_1952_to_fp16, gamma = weight_141, x = transpose_18)[name = tensor("o_35_cast_fp16")]; + tensor input_321_perm_0 = const()[name = tensor("input_321_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2159 = const()[name = tensor("op_2159"), val = tensor([1, 1])]; + tensor var_2161 = const()[name = tensor("op_2161"), val = tensor([1, 1])]; + tensor var_2163_pad_type_0 = const()[name = tensor("op_2163_pad_type_0"), val = tensor("custom")]; + tensor var_2163_pad_0 = const()[name = tensor("op_2163_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_17 = transpose(perm = input_321_perm_0, x = o_35_cast_fp16)[name = tensor("transpose_17")]; + tensor var_2163 = conv(bias = layers_8_mlp_fc1_bias, dilations = var_2161, groups = var_1953, pad = var_2163_pad_0, pad_type = var_2163_pad_type_0, strides = var_2159, weight = layers_8_mlp_fc1_weight, x = transpose_17)[name = tensor("op_2163")]; + tensor input_323_mode_0 = const()[name = tensor("input_323_mode_0"), val = tensor("EXACT")]; + tensor input_323 = gelu(mode = input_323_mode_0, x = var_2163)[name = tensor("input_323")]; + tensor var_2167 = const()[name = tensor("op_2167"), val = tensor([1, 1])]; + tensor var_2169 = const()[name = tensor("op_2169"), val = tensor([1, 1])]; + tensor var_2171_pad_type_0 = const()[name = tensor("op_2171_pad_type_0"), val = tensor("custom")]; + tensor var_2171_pad_0 = const()[name = tensor("op_2171_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2171 = conv(bias = layers_8_mlp_fc2_bias, dilations = var_2169, groups = var_1953, pad = var_2171_pad_0, pad_type = var_2171_pad_type_0, strides = var_2167, weight = layers_8_mlp_fc2_weight, x = input_323)[name = tensor("op_2171")]; + tensor x_37 = add(x = transpose_17, y = var_2171)[name = tensor("x_37")]; + tensor input_325_perm_0 = const()[name = tensor("input_325_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_147 = const()[name = tensor("weight_147"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66817024)))]; + tensor bias_145 = const()[name = tensor("bias_145"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66817856)))]; + tensor o_37_axes_0 = const()[name = tensor("o_37_axes_0"), val = tensor([-1])]; + tensor transpose_16 = transpose(perm = input_325_perm_0, x = x_37)[name = tensor("transpose_16")]; + tensor o_37_cast_fp16 = layer_norm(axes = o_37_axes_0, beta = bias_145, epsilon = var_1952_to_fp16, gamma = weight_147, x = transpose_16)[name = tensor("o_37_cast_fp16")]; + tensor hidden_states_19_perm_0 = const()[name = tensor("hidden_states_19_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2187 = const()[name = tensor("op_2187"), val = tensor(1)]; + tensor var_2188 = const()[name = tensor("op_2188"), val = tensor(0)]; + tensor var_2211 = const()[name = tensor("op_2211"), val = tensor([1, 1])]; + tensor var_2213 = const()[name = tensor("op_2213"), val = tensor([1, 1])]; + tensor var_2215_pad_type_0 = const()[name = tensor("op_2215_pad_type_0"), val = tensor("custom")]; + tensor var_2215_pad_0 = const()[name = tensor("op_2215_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_15 = transpose(perm = hidden_states_19_perm_0, x = o_37_cast_fp16)[name = tensor("transpose_15")]; + tensor var_2215 = conv(bias = layers_9_attention_q_proj_bias, dilations = var_2213, groups = var_2187, pad = var_2215_pad_0, pad_type = var_2215_pad_type_0, strides = var_2211, weight = layers_9_attention_q_proj_weight, x = transpose_15)[name = tensor("op_2215")]; + tensor var_2218 = const()[name = tensor("op_2218"), val = tensor([1, 1])]; + tensor var_2220 = const()[name = tensor("op_2220"), val = tensor([1, 1])]; tensor ks_19_pad_type_0 = const()[name = tensor("ks_19_pad_type_0"), val = tensor("custom")]; tensor ks_19_pad_0 = const()[name = tensor("ks_19_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_19 = conv(bias = layers_9_attention_k_proj_bias, dilations = var_2335, groups = var_2302, pad = ks_19_pad_0, pad_type = ks_19_pad_type_0, strides = var_2333, weight = layers_9_attention_k_proj_weight, x = var_2296_cast_fp16)[name = tensor("ks_19")]; - tensor var_2340 = const()[name = tensor("op_2340"), val = tensor([1, 1])]; - tensor var_2342 = const()[name = tensor("op_2342"), val = tensor([1, 1])]; - tensor var_2344_pad_type_0 = const()[name = tensor("op_2344_pad_type_0"), val = tensor("custom")]; - tensor var_2344_pad_0 = const()[name = tensor("op_2344_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2344 = conv(bias = layers_9_attention_v_proj_bias, dilations = var_2342, groups = var_2302, pad = var_2344_pad_0, pad_type = var_2344_pad_type_0, strides = var_2340, weight = layers_9_attention_v_proj_weight, x = var_2296_cast_fp16)[name = tensor("op_2344")]; + tensor ks_19 = conv(bias = layers_9_attention_k_proj_bias, dilations = var_2220, groups = var_2187, pad = ks_19_pad_0, pad_type = ks_19_pad_type_0, strides = var_2218, weight = layers_9_attention_k_proj_weight, x = transpose_15)[name = tensor("ks_19")]; + tensor var_2225 = const()[name = tensor("op_2225"), val = tensor([1, 1])]; + tensor var_2227 = const()[name = tensor("op_2227"), val = tensor([1, 1])]; + tensor var_2229_pad_type_0 = const()[name = tensor("op_2229_pad_type_0"), val = tensor("custom")]; + tensor var_2229_pad_0 = const()[name = tensor("op_2229_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2229 = conv(bias = layers_9_attention_v_proj_bias, dilations = var_2227, groups = var_2187, pad = var_2229_pad_0, pad_type = var_2229_pad_type_0, strides = var_2225, weight = layers_9_attention_v_proj_weight, x = transpose_15)[name = tensor("op_2229")]; tensor tile_47 = const()[name = tensor("tile_47"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2345_axis_0 = const()[name = tensor("op_2345_axis_0"), val = tensor(1)]; - tensor var_2345_0, tensor var_2345_1, tensor var_2345_2, tensor var_2345_3, tensor var_2345_4, tensor var_2345_5, tensor var_2345_6, tensor var_2345_7, tensor var_2345_8, tensor var_2345_9, tensor var_2345_10, tensor var_2345_11 = split(axis = var_2345_axis_0, split_sizes = tile_47, x = var_2330)[name = tensor("op_2345")]; - tensor var_2358_perm_0 = const()[name = tensor("op_2358_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2230_axis_0 = const()[name = tensor("op_2230_axis_0"), val = tensor(1)]; + tensor var_2230_0, tensor var_2230_1, tensor var_2230_2, tensor var_2230_3, tensor var_2230_4, tensor var_2230_5, tensor var_2230_6, tensor var_2230_7, tensor var_2230_8, tensor var_2230_9, tensor var_2230_10, tensor var_2230_11 = split(axis = var_2230_axis_0, split_sizes = tile_47, x = var_2215)[name = tensor("op_2230")]; + tensor var_2243_perm_0 = const()[name = tensor("op_2243_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_48 = const()[name = tensor("tile_48"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2359_axis_0 = const()[name = tensor("op_2359_axis_0"), val = tensor(3)]; - tensor transpose_2 = transpose(perm = var_2358_perm_0, x = ks_19)[name = tensor("transpose_2")]; - tensor var_2359_0, tensor var_2359_1, tensor var_2359_2, tensor var_2359_3, tensor var_2359_4, tensor var_2359_5, tensor var_2359_6, tensor var_2359_7, tensor var_2359_8, tensor var_2359_9, tensor var_2359_10, tensor var_2359_11 = split(axis = var_2359_axis_0, split_sizes = tile_48, x = transpose_2)[name = tensor("op_2359")]; + tensor var_2244_axis_0 = const()[name = tensor("op_2244_axis_0"), val = tensor(3)]; + tensor transpose_14 = transpose(perm = var_2243_perm_0, x = ks_19)[name = tensor("transpose_14")]; + tensor var_2244_0, tensor var_2244_1, tensor var_2244_2, tensor var_2244_3, tensor var_2244_4, tensor var_2244_5, tensor var_2244_6, tensor var_2244_7, tensor var_2244_8, tensor var_2244_9, tensor var_2244_10, tensor var_2244_11 = split(axis = var_2244_axis_0, split_sizes = tile_48, x = transpose_14)[name = tensor("op_2244")]; tensor tile_49 = const()[name = tensor("tile_49"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2372_axis_0 = const()[name = tensor("op_2372_axis_0"), val = tensor(1)]; - tensor var_2372_0, tensor var_2372_1, tensor var_2372_2, tensor var_2372_3, tensor var_2372_4, tensor var_2372_5, tensor var_2372_6, tensor var_2372_7, tensor var_2372_8, tensor var_2372_9, tensor var_2372_10, tensor var_2372_11 = split(axis = var_2372_axis_0, split_sizes = tile_49, x = var_2344)[name = tensor("op_2372")]; - tensor var_2386_equation_0 = const()[name = tensor("op_2386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2386 = einsum(equation = var_2386_equation_0, values = (var_2359_0, var_2345_0))[name = tensor("op_2386")]; - tensor var_2387_to_fp16 = const()[name = tensor("op_2387_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_217_cast_fp16 = mul(x = var_2386, y = var_2387_to_fp16)[name = tensor("w_217_cast_fp16")]; - tensor var_2390_equation_0 = const()[name = tensor("op_2390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2390 = einsum(equation = var_2390_equation_0, values = (var_2359_1, var_2345_1))[name = tensor("op_2390")]; - tensor var_2391_to_fp16 = const()[name = tensor("op_2391_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_219_cast_fp16 = mul(x = var_2390, y = var_2391_to_fp16)[name = tensor("w_219_cast_fp16")]; - tensor var_2394_equation_0 = const()[name = tensor("op_2394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2394 = einsum(equation = var_2394_equation_0, values = (var_2359_2, var_2345_2))[name = tensor("op_2394")]; - tensor var_2395_to_fp16 = const()[name = tensor("op_2395_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_221_cast_fp16 = mul(x = var_2394, y = var_2395_to_fp16)[name = tensor("w_221_cast_fp16")]; - tensor var_2398_equation_0 = const()[name = tensor("op_2398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2398 = einsum(equation = var_2398_equation_0, values = (var_2359_3, var_2345_3))[name = tensor("op_2398")]; - tensor var_2399_to_fp16 = const()[name = tensor("op_2399_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_223_cast_fp16 = mul(x = var_2398, y = var_2399_to_fp16)[name = tensor("w_223_cast_fp16")]; - tensor var_2402_equation_0 = const()[name = tensor("op_2402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2402 = einsum(equation = var_2402_equation_0, values = (var_2359_4, var_2345_4))[name = tensor("op_2402")]; - tensor var_2403_to_fp16 = const()[name = tensor("op_2403_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_225_cast_fp16 = mul(x = var_2402, y = var_2403_to_fp16)[name = tensor("w_225_cast_fp16")]; - tensor var_2406_equation_0 = const()[name = tensor("op_2406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2406 = einsum(equation = var_2406_equation_0, values = (var_2359_5, var_2345_5))[name = tensor("op_2406")]; - tensor var_2407_to_fp16 = const()[name = tensor("op_2407_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_227_cast_fp16 = mul(x = var_2406, y = var_2407_to_fp16)[name = tensor("w_227_cast_fp16")]; - tensor var_2410_equation_0 = const()[name = tensor("op_2410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2410 = einsum(equation = var_2410_equation_0, values = (var_2359_6, var_2345_6))[name = tensor("op_2410")]; - tensor var_2411_to_fp16 = const()[name = tensor("op_2411_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_229_cast_fp16 = mul(x = var_2410, y = var_2411_to_fp16)[name = tensor("w_229_cast_fp16")]; - tensor var_2414_equation_0 = const()[name = tensor("op_2414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2414 = einsum(equation = var_2414_equation_0, values = (var_2359_7, var_2345_7))[name = tensor("op_2414")]; - tensor var_2415_to_fp16 = const()[name = tensor("op_2415_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_231_cast_fp16 = mul(x = var_2414, y = var_2415_to_fp16)[name = tensor("w_231_cast_fp16")]; - tensor var_2418_equation_0 = const()[name = tensor("op_2418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2418 = einsum(equation = var_2418_equation_0, values = (var_2359_8, var_2345_8))[name = tensor("op_2418")]; - tensor var_2419_to_fp16 = const()[name = tensor("op_2419_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_233_cast_fp16 = mul(x = var_2418, y = var_2419_to_fp16)[name = tensor("w_233_cast_fp16")]; - tensor var_2422_equation_0 = const()[name = tensor("op_2422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2422 = einsum(equation = var_2422_equation_0, values = (var_2359_9, var_2345_9))[name = tensor("op_2422")]; - tensor var_2423_to_fp16 = const()[name = tensor("op_2423_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_235_cast_fp16 = mul(x = var_2422, y = var_2423_to_fp16)[name = tensor("w_235_cast_fp16")]; - tensor var_2426_equation_0 = const()[name = tensor("op_2426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2426 = einsum(equation = var_2426_equation_0, values = (var_2359_10, var_2345_10))[name = tensor("op_2426")]; - tensor var_2427_to_fp16 = const()[name = tensor("op_2427_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_237_cast_fp16 = mul(x = var_2426, y = var_2427_to_fp16)[name = tensor("w_237_cast_fp16")]; - tensor var_2430_equation_0 = const()[name = tensor("op_2430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2430 = einsum(equation = var_2430_equation_0, values = (var_2359_11, var_2345_11))[name = tensor("op_2430")]; - tensor var_2431_to_fp16 = const()[name = tensor("op_2431_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_239_cast_fp16 = mul(x = var_2430, y = var_2431_to_fp16)[name = tensor("w_239_cast_fp16")]; - tensor input_291_cast_fp16 = add(x = w_217_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_291_cast_fp16")]; - tensor var_2434_cast_fp16 = softmax(axis = var_2302, x = input_291_cast_fp16)[name = tensor("op_2434_cast_fp16")]; - tensor input_293_cast_fp16 = add(x = w_219_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_293_cast_fp16")]; - tensor var_2436_cast_fp16 = softmax(axis = var_2302, x = input_293_cast_fp16)[name = tensor("op_2436_cast_fp16")]; - tensor input_295_cast_fp16 = add(x = w_221_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_295_cast_fp16")]; - tensor var_2438_cast_fp16 = softmax(axis = var_2302, x = input_295_cast_fp16)[name = tensor("op_2438_cast_fp16")]; - tensor input_297_cast_fp16 = add(x = w_223_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_297_cast_fp16")]; - tensor var_2440_cast_fp16 = softmax(axis = var_2302, x = input_297_cast_fp16)[name = tensor("op_2440_cast_fp16")]; - tensor input_299_cast_fp16 = add(x = w_225_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_299_cast_fp16")]; - tensor var_2442_cast_fp16 = softmax(axis = var_2302, x = input_299_cast_fp16)[name = tensor("op_2442_cast_fp16")]; - tensor input_301_cast_fp16 = add(x = w_227_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_301_cast_fp16")]; - tensor var_2444_cast_fp16 = softmax(axis = var_2302, x = input_301_cast_fp16)[name = tensor("op_2444_cast_fp16")]; - tensor input_303_cast_fp16 = add(x = w_229_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_303_cast_fp16")]; - tensor var_2446_cast_fp16 = softmax(axis = var_2302, x = input_303_cast_fp16)[name = tensor("op_2446_cast_fp16")]; - tensor input_305_cast_fp16 = add(x = w_231_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_305_cast_fp16")]; - tensor var_2448_cast_fp16 = softmax(axis = var_2302, x = input_305_cast_fp16)[name = tensor("op_2448_cast_fp16")]; - tensor input_307_cast_fp16 = add(x = w_233_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_307_cast_fp16")]; - tensor var_2450_cast_fp16 = softmax(axis = var_2302, x = input_307_cast_fp16)[name = tensor("op_2450_cast_fp16")]; - tensor input_309_cast_fp16 = add(x = w_235_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_309_cast_fp16")]; - tensor var_2452_cast_fp16 = softmax(axis = var_2302, x = input_309_cast_fp16)[name = tensor("op_2452_cast_fp16")]; - tensor input_311_cast_fp16 = add(x = w_237_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_311_cast_fp16")]; - tensor var_2454_cast_fp16 = softmax(axis = var_2302, x = input_311_cast_fp16)[name = tensor("op_2454_cast_fp16")]; - tensor input_313_cast_fp16 = add(x = w_239_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_313_cast_fp16")]; - tensor var_2456_cast_fp16 = softmax(axis = var_2302, x = input_313_cast_fp16)[name = tensor("op_2456_cast_fp16")]; - tensor var_2458_equation_0 = const()[name = tensor("op_2458_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2458_cast_fp16 = einsum(equation = var_2458_equation_0, values = (var_2372_0, var_2434_cast_fp16))[name = tensor("op_2458_cast_fp16")]; - tensor var_2460_equation_0 = const()[name = tensor("op_2460_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2460_cast_fp16 = einsum(equation = var_2460_equation_0, values = (var_2372_1, var_2436_cast_fp16))[name = tensor("op_2460_cast_fp16")]; - tensor var_2462_equation_0 = const()[name = tensor("op_2462_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2462_cast_fp16 = einsum(equation = var_2462_equation_0, values = (var_2372_2, var_2438_cast_fp16))[name = tensor("op_2462_cast_fp16")]; - tensor var_2464_equation_0 = const()[name = tensor("op_2464_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2464_cast_fp16 = einsum(equation = var_2464_equation_0, values = (var_2372_3, var_2440_cast_fp16))[name = tensor("op_2464_cast_fp16")]; - tensor var_2466_equation_0 = const()[name = tensor("op_2466_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2466_cast_fp16 = einsum(equation = var_2466_equation_0, values = (var_2372_4, var_2442_cast_fp16))[name = tensor("op_2466_cast_fp16")]; - tensor var_2468_equation_0 = const()[name = tensor("op_2468_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2468_cast_fp16 = einsum(equation = var_2468_equation_0, values = (var_2372_5, var_2444_cast_fp16))[name = tensor("op_2468_cast_fp16")]; - tensor var_2470_equation_0 = const()[name = tensor("op_2470_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2470_cast_fp16 = einsum(equation = var_2470_equation_0, values = (var_2372_6, var_2446_cast_fp16))[name = tensor("op_2470_cast_fp16")]; - tensor var_2472_equation_0 = const()[name = tensor("op_2472_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2472_cast_fp16 = einsum(equation = var_2472_equation_0, values = (var_2372_7, var_2448_cast_fp16))[name = tensor("op_2472_cast_fp16")]; - tensor var_2474_equation_0 = const()[name = tensor("op_2474_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2474_cast_fp16 = einsum(equation = var_2474_equation_0, values = (var_2372_8, var_2450_cast_fp16))[name = tensor("op_2474_cast_fp16")]; - tensor var_2476_equation_0 = const()[name = tensor("op_2476_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2476_cast_fp16 = einsum(equation = var_2476_equation_0, values = (var_2372_9, var_2452_cast_fp16))[name = tensor("op_2476_cast_fp16")]; - tensor var_2478_equation_0 = const()[name = tensor("op_2478_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2478_cast_fp16 = einsum(equation = var_2478_equation_0, values = (var_2372_10, var_2454_cast_fp16))[name = tensor("op_2478_cast_fp16")]; - tensor var_2480_equation_0 = const()[name = tensor("op_2480_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2480_cast_fp16 = einsum(equation = var_2480_equation_0, values = (var_2372_11, var_2456_cast_fp16))[name = tensor("op_2480_cast_fp16")]; - tensor var_2482_interleave_0 = const()[name = tensor("op_2482_interleave_0"), val = tensor(false)]; - tensor var_2482_cast_fp16 = concat(axis = var_2302, interleave = var_2482_interleave_0, values = (var_2458_cast_fp16, var_2460_cast_fp16, var_2462_cast_fp16, var_2464_cast_fp16, var_2466_cast_fp16, var_2468_cast_fp16, var_2470_cast_fp16, var_2472_cast_fp16, var_2474_cast_fp16, var_2476_cast_fp16, var_2478_cast_fp16, var_2480_cast_fp16))[name = tensor("op_2482_cast_fp16")]; - tensor var_2486 = const()[name = tensor("op_2486"), val = tensor([1, 1])]; - tensor var_2488 = const()[name = tensor("op_2488"), val = tensor([1, 1])]; - tensor var_2490_pad_type_0 = const()[name = tensor("op_2490_pad_type_0"), val = tensor("custom")]; - tensor var_2490_pad_0 = const()[name = tensor("op_2490_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2490 = conv(bias = layers_9_attention_o_proj_bias, dilations = var_2488, groups = var_2302, pad = var_2490_pad_0, pad_type = var_2490_pad_type_0, strides = var_2486, weight = layers_9_attention_o_proj_weight, x = var_2482_cast_fp16)[name = tensor("op_2490")]; - tensor var_2492_interleave_0 = const()[name = tensor("op_2492_interleave_0"), val = tensor(false)]; - tensor var_2492 = concat(axis = var_2303, interleave = var_2492_interleave_0, values = var_2490)[name = tensor("op_2492")]; - tensor x_77 = add(x = var_2296_cast_fp16, y = var_2492)[name = tensor("x_77")]; - tensor var_2299_promoted = const()[name = tensor("op_2299_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2300_promoted = const()[name = tensor("op_2300_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_79 = clip(alpha = var_2299_promoted, beta = var_2300_promoted, x = x_77)[name = tensor("x_79")]; - tensor var_2497 = const()[name = tensor("op_2497"), val = tensor([1])]; - tensor mean_39 = reduce_mean(axes = var_2497, keep_dims = var_2304, x = x_79)[name = tensor("mean_39")]; - tensor zero_mean_39 = sub(x = x_79, y = mean_39)[name = tensor("zero_mean_39")]; - tensor var_2301_promoted = const()[name = tensor("op_2301_promoted"), val = tensor(0x1p+1)]; - tensor var_2500 = pow(x = zero_mean_39, y = var_2301_promoted)[name = tensor("op_2500")]; - tensor var_2501 = const()[name = tensor("op_2501"), val = tensor([1])]; - tensor var_2502 = reduce_mean(axes = var_2501, keep_dims = var_2304, x = var_2500)[name = tensor("op_2502")]; - tensor var_2503_to_fp16 = const()[name = tensor("op_2503_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2504_cast_fp16 = add(x = var_2502, y = var_2503_to_fp16)[name = tensor("op_2504_cast_fp16")]; - tensor denom_39_epsilon_0 = const()[name = tensor("denom_39_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0, x = var_2504_cast_fp16)[name = tensor("denom_39_cast_fp16")]; - tensor var_2506_cast_fp16 = mul(x = zero_mean_39, y = denom_39_cast_fp16)[name = tensor("op_2506_cast_fp16")]; - tensor var_2508_gamma_0_to_fp16 = const()[name = tensor("op_2508_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66820352)))]; - tensor var_2508_beta_0_to_fp16 = const()[name = tensor("op_2508_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66821184)))]; - tensor var_2508_epsilon_0_to_fp16 = const()[name = tensor("op_2508_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2508_cast_fp16 = batch_norm(beta = var_2508_beta_0_to_fp16, epsilon = var_2508_epsilon_0_to_fp16, gamma = var_2508_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2506_cast_fp16)[name = tensor("op_2508_cast_fp16")]; - tensor var_2514 = const()[name = tensor("op_2514"), val = tensor([1, 1])]; - tensor var_2516 = const()[name = tensor("op_2516"), val = tensor([1, 1])]; - tensor var_2518_pad_type_0 = const()[name = tensor("op_2518_pad_type_0"), val = tensor("custom")]; - tensor var_2518_pad_0 = const()[name = tensor("op_2518_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2518 = conv(bias = layers_9_mlp_fc1_bias, dilations = var_2516, groups = var_2302, pad = var_2518_pad_0, pad_type = var_2518_pad_type_0, strides = var_2514, weight = layers_9_mlp_fc1_weight, x = var_2508_cast_fp16)[name = tensor("op_2518")]; - tensor input_319_mode_0 = const()[name = tensor("input_319_mode_0"), val = tensor("EXACT")]; - tensor input_319 = gelu(mode = input_319_mode_0, x = var_2518)[name = tensor("input_319")]; - tensor var_2522 = const()[name = tensor("op_2522"), val = tensor([1, 1])]; - tensor var_2524 = const()[name = tensor("op_2524"), val = tensor([1, 1])]; - tensor var_2526_pad_type_0 = const()[name = tensor("op_2526_pad_type_0"), val = tensor("custom")]; - tensor var_2526_pad_0 = const()[name = tensor("op_2526_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2526 = conv(bias = layers_9_mlp_fc2_bias, dilations = var_2524, groups = var_2302, pad = var_2526_pad_0, pad_type = var_2526_pad_type_0, strides = var_2522, weight = layers_9_mlp_fc2_weight, x = input_319)[name = tensor("op_2526")]; - tensor x_81 = add(x = var_2508_cast_fp16, y = var_2526)[name = tensor("x_81")]; - tensor var_2299_promoted_1 = const()[name = tensor("op_2299_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2300_promoted_1 = const()[name = tensor("op_2300_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_83 = clip(alpha = var_2299_promoted_1, beta = var_2300_promoted_1, x = x_81)[name = tensor("x_83")]; - tensor var_2531 = const()[name = tensor("op_2531"), val = tensor([1])]; - tensor mean_41 = reduce_mean(axes = var_2531, keep_dims = var_2304, x = x_83)[name = tensor("mean_41")]; - tensor zero_mean_41 = sub(x = x_83, y = mean_41)[name = tensor("zero_mean_41")]; - tensor var_2301_promoted_1 = const()[name = tensor("op_2301_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2534 = pow(x = zero_mean_41, y = var_2301_promoted_1)[name = tensor("op_2534")]; - tensor var_2535 = const()[name = tensor("op_2535"), val = tensor([1])]; - tensor var_2536 = reduce_mean(axes = var_2535, keep_dims = var_2304, x = var_2534)[name = tensor("op_2536")]; - tensor var_2537_to_fp16 = const()[name = tensor("op_2537_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2538_cast_fp16 = add(x = var_2536, y = var_2537_to_fp16)[name = tensor("op_2538_cast_fp16")]; - tensor denom_41_epsilon_0 = const()[name = tensor("denom_41_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0, x = var_2538_cast_fp16)[name = tensor("denom_41_cast_fp16")]; - tensor var_2540_cast_fp16 = mul(x = zero_mean_41, y = denom_41_cast_fp16)[name = tensor("op_2540_cast_fp16")]; - tensor var_2542_gamma_0_to_fp16 = const()[name = tensor("op_2542_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66822016)))]; - tensor var_2542_beta_0_to_fp16 = const()[name = tensor("op_2542_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66822848)))]; - tensor var_2542_epsilon_0_to_fp16 = const()[name = tensor("op_2542_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2542_cast_fp16 = batch_norm(beta = var_2542_beta_0_to_fp16, epsilon = var_2542_epsilon_0_to_fp16, gamma = var_2542_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2540_cast_fp16)[name = tensor("op_2542_cast_fp16")]; - tensor var_2548 = const()[name = tensor("op_2548"), val = tensor(1)]; - tensor var_2549 = const()[name = tensor("op_2549"), val = tensor(0)]; - tensor var_2550 = const()[name = tensor("op_2550"), val = tensor(true)]; - tensor var_2572 = const()[name = tensor("op_2572"), val = tensor([1, 1])]; - tensor var_2574 = const()[name = tensor("op_2574"), val = tensor([1, 1])]; - tensor var_2576_pad_type_0 = const()[name = tensor("op_2576_pad_type_0"), val = tensor("custom")]; - tensor var_2576_pad_0 = const()[name = tensor("op_2576_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2576 = conv(bias = layers_10_attention_q_proj_bias, dilations = var_2574, groups = var_2548, pad = var_2576_pad_0, pad_type = var_2576_pad_type_0, strides = var_2572, weight = layers_10_attention_q_proj_weight, x = var_2542_cast_fp16)[name = tensor("op_2576")]; - tensor var_2579 = const()[name = tensor("op_2579"), val = tensor([1, 1])]; - tensor var_2581 = const()[name = tensor("op_2581"), val = tensor([1, 1])]; + tensor var_2257_axis_0 = const()[name = tensor("op_2257_axis_0"), val = tensor(1)]; + tensor var_2257_0, tensor var_2257_1, tensor var_2257_2, tensor var_2257_3, tensor var_2257_4, tensor var_2257_5, tensor var_2257_6, tensor var_2257_7, tensor var_2257_8, tensor var_2257_9, tensor var_2257_10, tensor var_2257_11 = split(axis = var_2257_axis_0, split_sizes = tile_49, x = var_2229)[name = tensor("op_2257")]; + tensor var_2271_equation_0 = const()[name = tensor("op_2271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2271 = einsum(equation = var_2271_equation_0, values = (var_2244_0, var_2230_0))[name = tensor("op_2271")]; + tensor var_2272_to_fp16 = const()[name = tensor("op_2272_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_217_cast_fp16 = mul(x = var_2271, y = var_2272_to_fp16)[name = tensor("w_217_cast_fp16")]; + tensor var_2275_equation_0 = const()[name = tensor("op_2275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2275 = einsum(equation = var_2275_equation_0, values = (var_2244_1, var_2230_1))[name = tensor("op_2275")]; + tensor var_2276_to_fp16 = const()[name = tensor("op_2276_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_219_cast_fp16 = mul(x = var_2275, y = var_2276_to_fp16)[name = tensor("w_219_cast_fp16")]; + tensor var_2279_equation_0 = const()[name = tensor("op_2279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2279 = einsum(equation = var_2279_equation_0, values = (var_2244_2, var_2230_2))[name = tensor("op_2279")]; + tensor var_2280_to_fp16 = const()[name = tensor("op_2280_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_221_cast_fp16 = mul(x = var_2279, y = var_2280_to_fp16)[name = tensor("w_221_cast_fp16")]; + tensor var_2283_equation_0 = const()[name = tensor("op_2283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2283 = einsum(equation = var_2283_equation_0, values = (var_2244_3, var_2230_3))[name = tensor("op_2283")]; + tensor var_2284_to_fp16 = const()[name = tensor("op_2284_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_223_cast_fp16 = mul(x = var_2283, y = var_2284_to_fp16)[name = tensor("w_223_cast_fp16")]; + tensor var_2287_equation_0 = const()[name = tensor("op_2287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2287 = einsum(equation = var_2287_equation_0, values = (var_2244_4, var_2230_4))[name = tensor("op_2287")]; + tensor var_2288_to_fp16 = const()[name = tensor("op_2288_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_225_cast_fp16 = mul(x = var_2287, y = var_2288_to_fp16)[name = tensor("w_225_cast_fp16")]; + tensor var_2291_equation_0 = const()[name = tensor("op_2291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2291 = einsum(equation = var_2291_equation_0, values = (var_2244_5, var_2230_5))[name = tensor("op_2291")]; + tensor var_2292_to_fp16 = const()[name = tensor("op_2292_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_227_cast_fp16 = mul(x = var_2291, y = var_2292_to_fp16)[name = tensor("w_227_cast_fp16")]; + tensor var_2295_equation_0 = const()[name = tensor("op_2295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2295 = einsum(equation = var_2295_equation_0, values = (var_2244_6, var_2230_6))[name = tensor("op_2295")]; + tensor var_2296_to_fp16 = const()[name = tensor("op_2296_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_229_cast_fp16 = mul(x = var_2295, y = var_2296_to_fp16)[name = tensor("w_229_cast_fp16")]; + tensor var_2299_equation_0 = const()[name = tensor("op_2299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2299 = einsum(equation = var_2299_equation_0, values = (var_2244_7, var_2230_7))[name = tensor("op_2299")]; + tensor var_2300_to_fp16 = const()[name = tensor("op_2300_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_231_cast_fp16 = mul(x = var_2299, y = var_2300_to_fp16)[name = tensor("w_231_cast_fp16")]; + tensor var_2303_equation_0 = const()[name = tensor("op_2303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2303 = einsum(equation = var_2303_equation_0, values = (var_2244_8, var_2230_8))[name = tensor("op_2303")]; + tensor var_2304_to_fp16 = const()[name = tensor("op_2304_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_233_cast_fp16 = mul(x = var_2303, y = var_2304_to_fp16)[name = tensor("w_233_cast_fp16")]; + tensor var_2307_equation_0 = const()[name = tensor("op_2307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2307 = einsum(equation = var_2307_equation_0, values = (var_2244_9, var_2230_9))[name = tensor("op_2307")]; + tensor var_2308_to_fp16 = const()[name = tensor("op_2308_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_235_cast_fp16 = mul(x = var_2307, y = var_2308_to_fp16)[name = tensor("w_235_cast_fp16")]; + tensor var_2311_equation_0 = const()[name = tensor("op_2311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2311 = einsum(equation = var_2311_equation_0, values = (var_2244_10, var_2230_10))[name = tensor("op_2311")]; + tensor var_2312_to_fp16 = const()[name = tensor("op_2312_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_237_cast_fp16 = mul(x = var_2311, y = var_2312_to_fp16)[name = tensor("w_237_cast_fp16")]; + tensor var_2315_equation_0 = const()[name = tensor("op_2315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2315 = einsum(equation = var_2315_equation_0, values = (var_2244_11, var_2230_11))[name = tensor("op_2315")]; + tensor var_2316_to_fp16 = const()[name = tensor("op_2316_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_239_cast_fp16 = mul(x = var_2315, y = var_2316_to_fp16)[name = tensor("w_239_cast_fp16")]; + tensor input_329_cast_fp16 = add(x = w_217_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_329_cast_fp16")]; + tensor var_2319_cast_fp16 = softmax(axis = var_2187, x = input_329_cast_fp16)[name = tensor("op_2319_cast_fp16")]; + tensor input_331_cast_fp16 = add(x = w_219_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_331_cast_fp16")]; + tensor var_2321_cast_fp16 = softmax(axis = var_2187, x = input_331_cast_fp16)[name = tensor("op_2321_cast_fp16")]; + tensor input_333_cast_fp16 = add(x = w_221_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_333_cast_fp16")]; + tensor var_2323_cast_fp16 = softmax(axis = var_2187, x = input_333_cast_fp16)[name = tensor("op_2323_cast_fp16")]; + tensor input_335_cast_fp16 = add(x = w_223_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_335_cast_fp16")]; + tensor var_2325_cast_fp16 = softmax(axis = var_2187, x = input_335_cast_fp16)[name = tensor("op_2325_cast_fp16")]; + tensor input_337_cast_fp16 = add(x = w_225_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_337_cast_fp16")]; + tensor var_2327_cast_fp16 = softmax(axis = var_2187, x = input_337_cast_fp16)[name = tensor("op_2327_cast_fp16")]; + tensor input_339_cast_fp16 = add(x = w_227_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_339_cast_fp16")]; + tensor var_2329_cast_fp16 = softmax(axis = var_2187, x = input_339_cast_fp16)[name = tensor("op_2329_cast_fp16")]; + tensor input_341_cast_fp16 = add(x = w_229_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_341_cast_fp16")]; + tensor var_2331_cast_fp16 = softmax(axis = var_2187, x = input_341_cast_fp16)[name = tensor("op_2331_cast_fp16")]; + tensor input_343_cast_fp16 = add(x = w_231_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_343_cast_fp16")]; + tensor var_2333_cast_fp16 = softmax(axis = var_2187, x = input_343_cast_fp16)[name = tensor("op_2333_cast_fp16")]; + tensor input_345_cast_fp16 = add(x = w_233_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_345_cast_fp16")]; + tensor var_2335_cast_fp16 = softmax(axis = var_2187, x = input_345_cast_fp16)[name = tensor("op_2335_cast_fp16")]; + tensor input_347_cast_fp16 = add(x = w_235_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_347_cast_fp16")]; + tensor var_2337_cast_fp16 = softmax(axis = var_2187, x = input_347_cast_fp16)[name = tensor("op_2337_cast_fp16")]; + tensor input_349_cast_fp16 = add(x = w_237_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_349_cast_fp16")]; + tensor var_2339_cast_fp16 = softmax(axis = var_2187, x = input_349_cast_fp16)[name = tensor("op_2339_cast_fp16")]; + tensor input_351_cast_fp16 = add(x = w_239_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_351_cast_fp16")]; + tensor var_2341_cast_fp16 = softmax(axis = var_2187, x = input_351_cast_fp16)[name = tensor("op_2341_cast_fp16")]; + tensor var_2343_equation_0 = const()[name = tensor("op_2343_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2343_cast_fp16 = einsum(equation = var_2343_equation_0, values = (var_2257_0, var_2319_cast_fp16))[name = tensor("op_2343_cast_fp16")]; + tensor var_2345_equation_0 = const()[name = tensor("op_2345_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2345_cast_fp16 = einsum(equation = var_2345_equation_0, values = (var_2257_1, var_2321_cast_fp16))[name = tensor("op_2345_cast_fp16")]; + tensor var_2347_equation_0 = const()[name = tensor("op_2347_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2347_cast_fp16 = einsum(equation = var_2347_equation_0, values = (var_2257_2, var_2323_cast_fp16))[name = tensor("op_2347_cast_fp16")]; + tensor var_2349_equation_0 = const()[name = tensor("op_2349_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2349_cast_fp16 = einsum(equation = var_2349_equation_0, values = (var_2257_3, var_2325_cast_fp16))[name = tensor("op_2349_cast_fp16")]; + tensor var_2351_equation_0 = const()[name = tensor("op_2351_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2351_cast_fp16 = einsum(equation = var_2351_equation_0, values = (var_2257_4, var_2327_cast_fp16))[name = tensor("op_2351_cast_fp16")]; + tensor var_2353_equation_0 = const()[name = tensor("op_2353_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2353_cast_fp16 = einsum(equation = var_2353_equation_0, values = (var_2257_5, var_2329_cast_fp16))[name = tensor("op_2353_cast_fp16")]; + tensor var_2355_equation_0 = const()[name = tensor("op_2355_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2355_cast_fp16 = einsum(equation = var_2355_equation_0, values = (var_2257_6, var_2331_cast_fp16))[name = tensor("op_2355_cast_fp16")]; + tensor var_2357_equation_0 = const()[name = tensor("op_2357_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2357_cast_fp16 = einsum(equation = var_2357_equation_0, values = (var_2257_7, var_2333_cast_fp16))[name = tensor("op_2357_cast_fp16")]; + tensor var_2359_equation_0 = const()[name = tensor("op_2359_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2359_cast_fp16 = einsum(equation = var_2359_equation_0, values = (var_2257_8, var_2335_cast_fp16))[name = tensor("op_2359_cast_fp16")]; + tensor var_2361_equation_0 = const()[name = tensor("op_2361_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2361_cast_fp16 = einsum(equation = var_2361_equation_0, values = (var_2257_9, var_2337_cast_fp16))[name = tensor("op_2361_cast_fp16")]; + tensor var_2363_equation_0 = const()[name = tensor("op_2363_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2363_cast_fp16 = einsum(equation = var_2363_equation_0, values = (var_2257_10, var_2339_cast_fp16))[name = tensor("op_2363_cast_fp16")]; + tensor var_2365_equation_0 = const()[name = tensor("op_2365_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2365_cast_fp16 = einsum(equation = var_2365_equation_0, values = (var_2257_11, var_2341_cast_fp16))[name = tensor("op_2365_cast_fp16")]; + tensor var_2367_interleave_0 = const()[name = tensor("op_2367_interleave_0"), val = tensor(false)]; + tensor var_2367_cast_fp16 = concat(axis = var_2187, interleave = var_2367_interleave_0, values = (var_2343_cast_fp16, var_2345_cast_fp16, var_2347_cast_fp16, var_2349_cast_fp16, var_2351_cast_fp16, var_2353_cast_fp16, var_2355_cast_fp16, var_2357_cast_fp16, var_2359_cast_fp16, var_2361_cast_fp16, var_2363_cast_fp16, var_2365_cast_fp16))[name = tensor("op_2367_cast_fp16")]; + tensor var_2371 = const()[name = tensor("op_2371"), val = tensor([1, 1])]; + tensor var_2373 = const()[name = tensor("op_2373"), val = tensor([1, 1])]; + tensor var_2375_pad_type_0 = const()[name = tensor("op_2375_pad_type_0"), val = tensor("custom")]; + tensor var_2375_pad_0 = const()[name = tensor("op_2375_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2375 = conv(bias = layers_9_attention_o_proj_bias, dilations = var_2373, groups = var_2187, pad = var_2375_pad_0, pad_type = var_2375_pad_type_0, strides = var_2371, weight = layers_9_attention_o_proj_weight, x = var_2367_cast_fp16)[name = tensor("op_2375")]; + tensor var_2377_interleave_0 = const()[name = tensor("op_2377_interleave_0"), val = tensor(false)]; + tensor var_2377 = concat(axis = var_2188, interleave = var_2377_interleave_0, values = var_2375)[name = tensor("op_2377")]; + tensor x_39 = add(x = transpose_15, y = var_2377)[name = tensor("x_39")]; + tensor input_355_perm_0 = const()[name = tensor("input_355_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_157 = const()[name = tensor("weight_157"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66818688)))]; + tensor bias_155 = const()[name = tensor("bias_155"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66819520)))]; + tensor o_39_axes_0 = const()[name = tensor("o_39_axes_0"), val = tensor([-1])]; + tensor var_2186_to_fp16 = const()[name = tensor("op_2186_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_13 = transpose(perm = input_355_perm_0, x = x_39)[name = tensor("transpose_13")]; + tensor o_39_cast_fp16 = layer_norm(axes = o_39_axes_0, beta = bias_155, epsilon = var_2186_to_fp16, gamma = weight_157, x = transpose_13)[name = tensor("o_39_cast_fp16")]; + tensor input_357_perm_0 = const()[name = tensor("input_357_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2393 = const()[name = tensor("op_2393"), val = tensor([1, 1])]; + tensor var_2395 = const()[name = tensor("op_2395"), val = tensor([1, 1])]; + tensor var_2397_pad_type_0 = const()[name = tensor("op_2397_pad_type_0"), val = tensor("custom")]; + tensor var_2397_pad_0 = const()[name = tensor("op_2397_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_12 = transpose(perm = input_357_perm_0, x = o_39_cast_fp16)[name = tensor("transpose_12")]; + tensor var_2397 = conv(bias = layers_9_mlp_fc1_bias, dilations = var_2395, groups = var_2187, pad = var_2397_pad_0, pad_type = var_2397_pad_type_0, strides = var_2393, weight = layers_9_mlp_fc1_weight, x = transpose_12)[name = tensor("op_2397")]; + tensor input_359_mode_0 = const()[name = tensor("input_359_mode_0"), val = tensor("EXACT")]; + tensor input_359 = gelu(mode = input_359_mode_0, x = var_2397)[name = tensor("input_359")]; + tensor var_2401 = const()[name = tensor("op_2401"), val = tensor([1, 1])]; + tensor var_2403 = const()[name = tensor("op_2403"), val = tensor([1, 1])]; + tensor var_2405_pad_type_0 = const()[name = tensor("op_2405_pad_type_0"), val = tensor("custom")]; + tensor var_2405_pad_0 = const()[name = tensor("op_2405_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2405 = conv(bias = layers_9_mlp_fc2_bias, dilations = var_2403, groups = var_2187, pad = var_2405_pad_0, pad_type = var_2405_pad_type_0, strides = var_2401, weight = layers_9_mlp_fc2_weight, x = input_359)[name = tensor("op_2405")]; + tensor x_41 = add(x = transpose_12, y = var_2405)[name = tensor("x_41")]; + tensor input_361_perm_0 = const()[name = tensor("input_361_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_163 = const()[name = tensor("weight_163"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66820352)))]; + tensor bias_161 = const()[name = tensor("bias_161"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66821184)))]; + tensor o_41_axes_0 = const()[name = tensor("o_41_axes_0"), val = tensor([-1])]; + tensor transpose_11 = transpose(perm = input_361_perm_0, x = x_41)[name = tensor("transpose_11")]; + tensor o_41_cast_fp16 = layer_norm(axes = o_41_axes_0, beta = bias_161, epsilon = var_2186_to_fp16, gamma = weight_163, x = transpose_11)[name = tensor("o_41_cast_fp16")]; + tensor hidden_states_21_perm_0 = const()[name = tensor("hidden_states_21_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2421 = const()[name = tensor("op_2421"), val = tensor(1)]; + tensor var_2422 = const()[name = tensor("op_2422"), val = tensor(0)]; + tensor var_2445 = const()[name = tensor("op_2445"), val = tensor([1, 1])]; + tensor var_2447 = const()[name = tensor("op_2447"), val = tensor([1, 1])]; + tensor var_2449_pad_type_0 = const()[name = tensor("op_2449_pad_type_0"), val = tensor("custom")]; + tensor var_2449_pad_0 = const()[name = tensor("op_2449_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_10 = transpose(perm = hidden_states_21_perm_0, x = o_41_cast_fp16)[name = tensor("transpose_10")]; + tensor var_2449 = conv(bias = layers_10_attention_q_proj_bias, dilations = var_2447, groups = var_2421, pad = var_2449_pad_0, pad_type = var_2449_pad_type_0, strides = var_2445, weight = layers_10_attention_q_proj_weight, x = transpose_10)[name = tensor("op_2449")]; + tensor var_2452 = const()[name = tensor("op_2452"), val = tensor([1, 1])]; + tensor var_2454 = const()[name = tensor("op_2454"), val = tensor([1, 1])]; tensor ks_21_pad_type_0 = const()[name = tensor("ks_21_pad_type_0"), val = tensor("custom")]; tensor ks_21_pad_0 = const()[name = tensor("ks_21_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks_21 = conv(bias = layers_10_attention_k_proj_bias, dilations = var_2581, groups = var_2548, pad = ks_21_pad_0, pad_type = ks_21_pad_type_0, strides = var_2579, weight = layers_10_attention_k_proj_weight, x = var_2542_cast_fp16)[name = tensor("ks_21")]; - tensor var_2586 = const()[name = tensor("op_2586"), val = tensor([1, 1])]; - tensor var_2588 = const()[name = tensor("op_2588"), val = tensor([1, 1])]; - tensor var_2590_pad_type_0 = const()[name = tensor("op_2590_pad_type_0"), val = tensor("custom")]; - tensor var_2590_pad_0 = const()[name = tensor("op_2590_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2590 = conv(bias = layers_10_attention_v_proj_bias, dilations = var_2588, groups = var_2548, pad = var_2590_pad_0, pad_type = var_2590_pad_type_0, strides = var_2586, weight = layers_10_attention_v_proj_weight, x = var_2542_cast_fp16)[name = tensor("op_2590")]; + tensor ks_21 = conv(bias = layers_10_attention_k_proj_bias, dilations = var_2454, groups = var_2421, pad = ks_21_pad_0, pad_type = ks_21_pad_type_0, strides = var_2452, weight = layers_10_attention_k_proj_weight, x = transpose_10)[name = tensor("ks_21")]; + tensor var_2459 = const()[name = tensor("op_2459"), val = tensor([1, 1])]; + tensor var_2461 = const()[name = tensor("op_2461"), val = tensor([1, 1])]; + tensor var_2463_pad_type_0 = const()[name = tensor("op_2463_pad_type_0"), val = tensor("custom")]; + tensor var_2463_pad_0 = const()[name = tensor("op_2463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2463 = conv(bias = layers_10_attention_v_proj_bias, dilations = var_2461, groups = var_2421, pad = var_2463_pad_0, pad_type = var_2463_pad_type_0, strides = var_2459, weight = layers_10_attention_v_proj_weight, x = transpose_10)[name = tensor("op_2463")]; tensor tile_52 = const()[name = tensor("tile_52"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2591_axis_0 = const()[name = tensor("op_2591_axis_0"), val = tensor(1)]; - tensor var_2591_0, tensor var_2591_1, tensor var_2591_2, tensor var_2591_3, tensor var_2591_4, tensor var_2591_5, tensor var_2591_6, tensor var_2591_7, tensor var_2591_8, tensor var_2591_9, tensor var_2591_10, tensor var_2591_11 = split(axis = var_2591_axis_0, split_sizes = tile_52, x = var_2576)[name = tensor("op_2591")]; - tensor var_2604_perm_0 = const()[name = tensor("op_2604_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2464_axis_0 = const()[name = tensor("op_2464_axis_0"), val = tensor(1)]; + tensor var_2464_0, tensor var_2464_1, tensor var_2464_2, tensor var_2464_3, tensor var_2464_4, tensor var_2464_5, tensor var_2464_6, tensor var_2464_7, tensor var_2464_8, tensor var_2464_9, tensor var_2464_10, tensor var_2464_11 = split(axis = var_2464_axis_0, split_sizes = tile_52, x = var_2449)[name = tensor("op_2464")]; + tensor var_2477_perm_0 = const()[name = tensor("op_2477_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_53 = const()[name = tensor("tile_53"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2605_axis_0 = const()[name = tensor("op_2605_axis_0"), val = tensor(3)]; - tensor transpose_1 = transpose(perm = var_2604_perm_0, x = ks_21)[name = tensor("transpose_1")]; - tensor var_2605_0, tensor var_2605_1, tensor var_2605_2, tensor var_2605_3, tensor var_2605_4, tensor var_2605_5, tensor var_2605_6, tensor var_2605_7, tensor var_2605_8, tensor var_2605_9, tensor var_2605_10, tensor var_2605_11 = split(axis = var_2605_axis_0, split_sizes = tile_53, x = transpose_1)[name = tensor("op_2605")]; + tensor var_2478_axis_0 = const()[name = tensor("op_2478_axis_0"), val = tensor(3)]; + tensor transpose_9 = transpose(perm = var_2477_perm_0, x = ks_21)[name = tensor("transpose_9")]; + tensor var_2478_0, tensor var_2478_1, tensor var_2478_2, tensor var_2478_3, tensor var_2478_4, tensor var_2478_5, tensor var_2478_6, tensor var_2478_7, tensor var_2478_8, tensor var_2478_9, tensor var_2478_10, tensor var_2478_11 = split(axis = var_2478_axis_0, split_sizes = tile_53, x = transpose_9)[name = tensor("op_2478")]; tensor tile_54 = const()[name = tensor("tile_54"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2618_axis_0 = const()[name = tensor("op_2618_axis_0"), val = tensor(1)]; - tensor var_2618_0, tensor var_2618_1, tensor var_2618_2, tensor var_2618_3, tensor var_2618_4, tensor var_2618_5, tensor var_2618_6, tensor var_2618_7, tensor var_2618_8, tensor var_2618_9, tensor var_2618_10, tensor var_2618_11 = split(axis = var_2618_axis_0, split_sizes = tile_54, x = var_2590)[name = tensor("op_2618")]; - tensor var_2632_equation_0 = const()[name = tensor("op_2632_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2632 = einsum(equation = var_2632_equation_0, values = (var_2605_0, var_2591_0))[name = tensor("op_2632")]; - tensor var_2633_to_fp16 = const()[name = tensor("op_2633_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_241_cast_fp16 = mul(x = var_2632, y = var_2633_to_fp16)[name = tensor("w_241_cast_fp16")]; - tensor var_2636_equation_0 = const()[name = tensor("op_2636_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2636 = einsum(equation = var_2636_equation_0, values = (var_2605_1, var_2591_1))[name = tensor("op_2636")]; - tensor var_2637_to_fp16 = const()[name = tensor("op_2637_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_243_cast_fp16 = mul(x = var_2636, y = var_2637_to_fp16)[name = tensor("w_243_cast_fp16")]; - tensor var_2640_equation_0 = const()[name = tensor("op_2640_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2640 = einsum(equation = var_2640_equation_0, values = (var_2605_2, var_2591_2))[name = tensor("op_2640")]; - tensor var_2641_to_fp16 = const()[name = tensor("op_2641_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_245_cast_fp16 = mul(x = var_2640, y = var_2641_to_fp16)[name = tensor("w_245_cast_fp16")]; - tensor var_2644_equation_0 = const()[name = tensor("op_2644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2644 = einsum(equation = var_2644_equation_0, values = (var_2605_3, var_2591_3))[name = tensor("op_2644")]; - tensor var_2645_to_fp16 = const()[name = tensor("op_2645_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_247_cast_fp16 = mul(x = var_2644, y = var_2645_to_fp16)[name = tensor("w_247_cast_fp16")]; - tensor var_2648_equation_0 = const()[name = tensor("op_2648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2648 = einsum(equation = var_2648_equation_0, values = (var_2605_4, var_2591_4))[name = tensor("op_2648")]; - tensor var_2649_to_fp16 = const()[name = tensor("op_2649_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_249_cast_fp16 = mul(x = var_2648, y = var_2649_to_fp16)[name = tensor("w_249_cast_fp16")]; - tensor var_2652_equation_0 = const()[name = tensor("op_2652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2652 = einsum(equation = var_2652_equation_0, values = (var_2605_5, var_2591_5))[name = tensor("op_2652")]; - tensor var_2653_to_fp16 = const()[name = tensor("op_2653_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_251_cast_fp16 = mul(x = var_2652, y = var_2653_to_fp16)[name = tensor("w_251_cast_fp16")]; - tensor var_2656_equation_0 = const()[name = tensor("op_2656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2656 = einsum(equation = var_2656_equation_0, values = (var_2605_6, var_2591_6))[name = tensor("op_2656")]; - tensor var_2657_to_fp16 = const()[name = tensor("op_2657_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_253_cast_fp16 = mul(x = var_2656, y = var_2657_to_fp16)[name = tensor("w_253_cast_fp16")]; - tensor var_2660_equation_0 = const()[name = tensor("op_2660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2660 = einsum(equation = var_2660_equation_0, values = (var_2605_7, var_2591_7))[name = tensor("op_2660")]; - tensor var_2661_to_fp16 = const()[name = tensor("op_2661_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_255_cast_fp16 = mul(x = var_2660, y = var_2661_to_fp16)[name = tensor("w_255_cast_fp16")]; - tensor var_2664_equation_0 = const()[name = tensor("op_2664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2664 = einsum(equation = var_2664_equation_0, values = (var_2605_8, var_2591_8))[name = tensor("op_2664")]; - tensor var_2665_to_fp16 = const()[name = tensor("op_2665_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_257_cast_fp16 = mul(x = var_2664, y = var_2665_to_fp16)[name = tensor("w_257_cast_fp16")]; - tensor var_2668_equation_0 = const()[name = tensor("op_2668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2668 = einsum(equation = var_2668_equation_0, values = (var_2605_9, var_2591_9))[name = tensor("op_2668")]; - tensor var_2669_to_fp16 = const()[name = tensor("op_2669_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_259_cast_fp16 = mul(x = var_2668, y = var_2669_to_fp16)[name = tensor("w_259_cast_fp16")]; - tensor var_2672_equation_0 = const()[name = tensor("op_2672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2672 = einsum(equation = var_2672_equation_0, values = (var_2605_10, var_2591_10))[name = tensor("op_2672")]; - tensor var_2673_to_fp16 = const()[name = tensor("op_2673_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_261_cast_fp16 = mul(x = var_2672, y = var_2673_to_fp16)[name = tensor("w_261_cast_fp16")]; - tensor var_2676_equation_0 = const()[name = tensor("op_2676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2676 = einsum(equation = var_2676_equation_0, values = (var_2605_11, var_2591_11))[name = tensor("op_2676")]; - tensor var_2677_to_fp16 = const()[name = tensor("op_2677_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_263_cast_fp16 = mul(x = var_2676, y = var_2677_to_fp16)[name = tensor("w_263_cast_fp16")]; - tensor input_323_cast_fp16 = add(x = w_241_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_323_cast_fp16")]; - tensor var_2680_cast_fp16 = softmax(axis = var_2548, x = input_323_cast_fp16)[name = tensor("op_2680_cast_fp16")]; - tensor input_325_cast_fp16 = add(x = w_243_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_325_cast_fp16")]; - tensor var_2682_cast_fp16 = softmax(axis = var_2548, x = input_325_cast_fp16)[name = tensor("op_2682_cast_fp16")]; - tensor input_327_cast_fp16 = add(x = w_245_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_327_cast_fp16")]; - tensor var_2684_cast_fp16 = softmax(axis = var_2548, x = input_327_cast_fp16)[name = tensor("op_2684_cast_fp16")]; - tensor input_329_cast_fp16 = add(x = w_247_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_329_cast_fp16")]; - tensor var_2686_cast_fp16 = softmax(axis = var_2548, x = input_329_cast_fp16)[name = tensor("op_2686_cast_fp16")]; - tensor input_331_cast_fp16 = add(x = w_249_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_331_cast_fp16")]; - tensor var_2688_cast_fp16 = softmax(axis = var_2548, x = input_331_cast_fp16)[name = tensor("op_2688_cast_fp16")]; - tensor input_333_cast_fp16 = add(x = w_251_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_333_cast_fp16")]; - tensor var_2690_cast_fp16 = softmax(axis = var_2548, x = input_333_cast_fp16)[name = tensor("op_2690_cast_fp16")]; - tensor input_335_cast_fp16 = add(x = w_253_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_335_cast_fp16")]; - tensor var_2692_cast_fp16 = softmax(axis = var_2548, x = input_335_cast_fp16)[name = tensor("op_2692_cast_fp16")]; - tensor input_337_cast_fp16 = add(x = w_255_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_337_cast_fp16")]; - tensor var_2694_cast_fp16 = softmax(axis = var_2548, x = input_337_cast_fp16)[name = tensor("op_2694_cast_fp16")]; - tensor input_339_cast_fp16 = add(x = w_257_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_339_cast_fp16")]; - tensor var_2696_cast_fp16 = softmax(axis = var_2548, x = input_339_cast_fp16)[name = tensor("op_2696_cast_fp16")]; - tensor input_341_cast_fp16 = add(x = w_259_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_341_cast_fp16")]; - tensor var_2698_cast_fp16 = softmax(axis = var_2548, x = input_341_cast_fp16)[name = tensor("op_2698_cast_fp16")]; - tensor input_343_cast_fp16 = add(x = w_261_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_343_cast_fp16")]; - tensor var_2700_cast_fp16 = softmax(axis = var_2548, x = input_343_cast_fp16)[name = tensor("op_2700_cast_fp16")]; - tensor input_345_cast_fp16 = add(x = w_263_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_345_cast_fp16")]; - tensor var_2702_cast_fp16 = softmax(axis = var_2548, x = input_345_cast_fp16)[name = tensor("op_2702_cast_fp16")]; - tensor var_2704_equation_0 = const()[name = tensor("op_2704_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2704_cast_fp16 = einsum(equation = var_2704_equation_0, values = (var_2618_0, var_2680_cast_fp16))[name = tensor("op_2704_cast_fp16")]; - tensor var_2706_equation_0 = const()[name = tensor("op_2706_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2706_cast_fp16 = einsum(equation = var_2706_equation_0, values = (var_2618_1, var_2682_cast_fp16))[name = tensor("op_2706_cast_fp16")]; - tensor var_2708_equation_0 = const()[name = tensor("op_2708_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2708_cast_fp16 = einsum(equation = var_2708_equation_0, values = (var_2618_2, var_2684_cast_fp16))[name = tensor("op_2708_cast_fp16")]; - tensor var_2710_equation_0 = const()[name = tensor("op_2710_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2710_cast_fp16 = einsum(equation = var_2710_equation_0, values = (var_2618_3, var_2686_cast_fp16))[name = tensor("op_2710_cast_fp16")]; - tensor var_2712_equation_0 = const()[name = tensor("op_2712_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2712_cast_fp16 = einsum(equation = var_2712_equation_0, values = (var_2618_4, var_2688_cast_fp16))[name = tensor("op_2712_cast_fp16")]; - tensor var_2714_equation_0 = const()[name = tensor("op_2714_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2714_cast_fp16 = einsum(equation = var_2714_equation_0, values = (var_2618_5, var_2690_cast_fp16))[name = tensor("op_2714_cast_fp16")]; - tensor var_2716_equation_0 = const()[name = tensor("op_2716_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2716_cast_fp16 = einsum(equation = var_2716_equation_0, values = (var_2618_6, var_2692_cast_fp16))[name = tensor("op_2716_cast_fp16")]; - tensor var_2718_equation_0 = const()[name = tensor("op_2718_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2718_cast_fp16 = einsum(equation = var_2718_equation_0, values = (var_2618_7, var_2694_cast_fp16))[name = tensor("op_2718_cast_fp16")]; - tensor var_2720_equation_0 = const()[name = tensor("op_2720_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2720_cast_fp16 = einsum(equation = var_2720_equation_0, values = (var_2618_8, var_2696_cast_fp16))[name = tensor("op_2720_cast_fp16")]; - tensor var_2722_equation_0 = const()[name = tensor("op_2722_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2618_9, var_2698_cast_fp16))[name = tensor("op_2722_cast_fp16")]; - tensor var_2724_equation_0 = const()[name = tensor("op_2724_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2724_cast_fp16 = einsum(equation = var_2724_equation_0, values = (var_2618_10, var_2700_cast_fp16))[name = tensor("op_2724_cast_fp16")]; - tensor var_2726_equation_0 = const()[name = tensor("op_2726_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2618_11, var_2702_cast_fp16))[name = tensor("op_2726_cast_fp16")]; - tensor var_2728_interleave_0 = const()[name = tensor("op_2728_interleave_0"), val = tensor(false)]; - tensor var_2728_cast_fp16 = concat(axis = var_2548, interleave = var_2728_interleave_0, values = (var_2704_cast_fp16, var_2706_cast_fp16, var_2708_cast_fp16, var_2710_cast_fp16, var_2712_cast_fp16, var_2714_cast_fp16, var_2716_cast_fp16, var_2718_cast_fp16, var_2720_cast_fp16, var_2722_cast_fp16, var_2724_cast_fp16, var_2726_cast_fp16))[name = tensor("op_2728_cast_fp16")]; - tensor var_2732 = const()[name = tensor("op_2732"), val = tensor([1, 1])]; - tensor var_2734 = const()[name = tensor("op_2734"), val = tensor([1, 1])]; - tensor var_2736_pad_type_0 = const()[name = tensor("op_2736_pad_type_0"), val = tensor("custom")]; - tensor var_2736_pad_0 = const()[name = tensor("op_2736_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2736 = conv(bias = layers_10_attention_o_proj_bias, dilations = var_2734, groups = var_2548, pad = var_2736_pad_0, pad_type = var_2736_pad_type_0, strides = var_2732, weight = layers_10_attention_o_proj_weight, x = var_2728_cast_fp16)[name = tensor("op_2736")]; - tensor var_2738_interleave_0 = const()[name = tensor("op_2738_interleave_0"), val = tensor(false)]; - tensor var_2738 = concat(axis = var_2549, interleave = var_2738_interleave_0, values = var_2736)[name = tensor("op_2738")]; - tensor x_85 = add(x = var_2542_cast_fp16, y = var_2738)[name = tensor("x_85")]; - tensor var_2545_promoted = const()[name = tensor("op_2545_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2546_promoted = const()[name = tensor("op_2546_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_87 = clip(alpha = var_2545_promoted, beta = var_2546_promoted, x = x_85)[name = tensor("x_87")]; - tensor var_2743 = const()[name = tensor("op_2743"), val = tensor([1])]; - tensor mean_43 = reduce_mean(axes = var_2743, keep_dims = var_2550, x = x_87)[name = tensor("mean_43")]; - tensor zero_mean_43 = sub(x = x_87, y = mean_43)[name = tensor("zero_mean_43")]; - tensor var_2547_promoted = const()[name = tensor("op_2547_promoted"), val = tensor(0x1p+1)]; - tensor var_2746 = pow(x = zero_mean_43, y = var_2547_promoted)[name = tensor("op_2746")]; - tensor var_2747 = const()[name = tensor("op_2747"), val = tensor([1])]; - tensor var_2748 = reduce_mean(axes = var_2747, keep_dims = var_2550, x = var_2746)[name = tensor("op_2748")]; - tensor var_2749_to_fp16 = const()[name = tensor("op_2749_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2750_cast_fp16 = add(x = var_2748, y = var_2749_to_fp16)[name = tensor("op_2750_cast_fp16")]; - tensor denom_43_epsilon_0 = const()[name = tensor("denom_43_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0, x = var_2750_cast_fp16)[name = tensor("denom_43_cast_fp16")]; - tensor var_2752_cast_fp16 = mul(x = zero_mean_43, y = denom_43_cast_fp16)[name = tensor("op_2752_cast_fp16")]; - tensor var_2754_gamma_0_to_fp16 = const()[name = tensor("op_2754_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66823680)))]; - tensor var_2754_beta_0_to_fp16 = const()[name = tensor("op_2754_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66824512)))]; - tensor var_2754_epsilon_0_to_fp16 = const()[name = tensor("op_2754_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2754_cast_fp16 = batch_norm(beta = var_2754_beta_0_to_fp16, epsilon = var_2754_epsilon_0_to_fp16, gamma = var_2754_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2752_cast_fp16)[name = tensor("op_2754_cast_fp16")]; - tensor var_2760 = const()[name = tensor("op_2760"), val = tensor([1, 1])]; - tensor var_2762 = const()[name = tensor("op_2762"), val = tensor([1, 1])]; - tensor var_2764_pad_type_0 = const()[name = tensor("op_2764_pad_type_0"), val = tensor("custom")]; - tensor var_2764_pad_0 = const()[name = tensor("op_2764_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2764 = conv(bias = layers_10_mlp_fc1_bias, dilations = var_2762, groups = var_2548, pad = var_2764_pad_0, pad_type = var_2764_pad_type_0, strides = var_2760, weight = layers_10_mlp_fc1_weight, x = var_2754_cast_fp16)[name = tensor("op_2764")]; - tensor input_351_mode_0 = const()[name = tensor("input_351_mode_0"), val = tensor("EXACT")]; - tensor input_351 = gelu(mode = input_351_mode_0, x = var_2764)[name = tensor("input_351")]; - tensor var_2768 = const()[name = tensor("op_2768"), val = tensor([1, 1])]; - tensor var_2770 = const()[name = tensor("op_2770"), val = tensor([1, 1])]; - tensor var_2772_pad_type_0 = const()[name = tensor("op_2772_pad_type_0"), val = tensor("custom")]; - tensor var_2772_pad_0 = const()[name = tensor("op_2772_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2772 = conv(bias = layers_10_mlp_fc2_bias, dilations = var_2770, groups = var_2548, pad = var_2772_pad_0, pad_type = var_2772_pad_type_0, strides = var_2768, weight = layers_10_mlp_fc2_weight, x = input_351)[name = tensor("op_2772")]; - tensor x_89 = add(x = var_2754_cast_fp16, y = var_2772)[name = tensor("x_89")]; - tensor var_2545_promoted_1 = const()[name = tensor("op_2545_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2546_promoted_1 = const()[name = tensor("op_2546_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x_91 = clip(alpha = var_2545_promoted_1, beta = var_2546_promoted_1, x = x_89)[name = tensor("x_91")]; - tensor var_2777 = const()[name = tensor("op_2777"), val = tensor([1])]; - tensor mean_45 = reduce_mean(axes = var_2777, keep_dims = var_2550, x = x_91)[name = tensor("mean_45")]; - tensor zero_mean_45 = sub(x = x_91, y = mean_45)[name = tensor("zero_mean_45")]; - tensor var_2547_promoted_1 = const()[name = tensor("op_2547_promoted_1"), val = tensor(0x1p+1)]; - tensor var_2780 = pow(x = zero_mean_45, y = var_2547_promoted_1)[name = tensor("op_2780")]; - tensor var_2781 = const()[name = tensor("op_2781"), val = tensor([1])]; - tensor var_2782 = reduce_mean(axes = var_2781, keep_dims = var_2550, x = var_2780)[name = tensor("op_2782")]; - tensor var_2783_to_fp16 = const()[name = tensor("op_2783_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2784_cast_fp16 = add(x = var_2782, y = var_2783_to_fp16)[name = tensor("op_2784_cast_fp16")]; - tensor denom_45_epsilon_0 = const()[name = tensor("denom_45_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0, x = var_2784_cast_fp16)[name = tensor("denom_45_cast_fp16")]; - tensor var_2786_cast_fp16 = mul(x = zero_mean_45, y = denom_45_cast_fp16)[name = tensor("op_2786_cast_fp16")]; - tensor var_2788_gamma_0_to_fp16 = const()[name = tensor("op_2788_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66825344)))]; - tensor var_2788_beta_0_to_fp16 = const()[name = tensor("op_2788_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66826176)))]; - tensor var_2788_epsilon_0_to_fp16 = const()[name = tensor("op_2788_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_2788_cast_fp16 = batch_norm(beta = var_2788_beta_0_to_fp16, epsilon = var_2788_epsilon_0_to_fp16, gamma = var_2788_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2786_cast_fp16)[name = tensor("op_2788_cast_fp16")]; - tensor var_2794 = const()[name = tensor("op_2794"), val = tensor(1)]; - tensor var_2795 = const()[name = tensor("op_2795"), val = tensor(0)]; - tensor var_2796 = const()[name = tensor("op_2796"), val = tensor(true)]; - tensor var_2818 = const()[name = tensor("op_2818"), val = tensor([1, 1])]; - tensor var_2820 = const()[name = tensor("op_2820"), val = tensor([1, 1])]; - tensor var_2822_pad_type_0 = const()[name = tensor("op_2822_pad_type_0"), val = tensor("custom")]; - tensor var_2822_pad_0 = const()[name = tensor("op_2822_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2822 = conv(bias = layers_11_attention_q_proj_bias, dilations = var_2820, groups = var_2794, pad = var_2822_pad_0, pad_type = var_2822_pad_type_0, strides = var_2818, weight = layers_11_attention_q_proj_weight, x = var_2788_cast_fp16)[name = tensor("op_2822")]; - tensor var_2825 = const()[name = tensor("op_2825"), val = tensor([1, 1])]; - tensor var_2827 = const()[name = tensor("op_2827"), val = tensor([1, 1])]; + tensor var_2491_axis_0 = const()[name = tensor("op_2491_axis_0"), val = tensor(1)]; + tensor var_2491_0, tensor var_2491_1, tensor var_2491_2, tensor var_2491_3, tensor var_2491_4, tensor var_2491_5, tensor var_2491_6, tensor var_2491_7, tensor var_2491_8, tensor var_2491_9, tensor var_2491_10, tensor var_2491_11 = split(axis = var_2491_axis_0, split_sizes = tile_54, x = var_2463)[name = tensor("op_2491")]; + tensor var_2505_equation_0 = const()[name = tensor("op_2505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2505 = einsum(equation = var_2505_equation_0, values = (var_2478_0, var_2464_0))[name = tensor("op_2505")]; + tensor var_2506_to_fp16 = const()[name = tensor("op_2506_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_241_cast_fp16 = mul(x = var_2505, y = var_2506_to_fp16)[name = tensor("w_241_cast_fp16")]; + tensor var_2509_equation_0 = const()[name = tensor("op_2509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2509 = einsum(equation = var_2509_equation_0, values = (var_2478_1, var_2464_1))[name = tensor("op_2509")]; + tensor var_2510_to_fp16 = const()[name = tensor("op_2510_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_243_cast_fp16 = mul(x = var_2509, y = var_2510_to_fp16)[name = tensor("w_243_cast_fp16")]; + tensor var_2513_equation_0 = const()[name = tensor("op_2513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2513 = einsum(equation = var_2513_equation_0, values = (var_2478_2, var_2464_2))[name = tensor("op_2513")]; + tensor var_2514_to_fp16 = const()[name = tensor("op_2514_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_245_cast_fp16 = mul(x = var_2513, y = var_2514_to_fp16)[name = tensor("w_245_cast_fp16")]; + tensor var_2517_equation_0 = const()[name = tensor("op_2517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2517 = einsum(equation = var_2517_equation_0, values = (var_2478_3, var_2464_3))[name = tensor("op_2517")]; + tensor var_2518_to_fp16 = const()[name = tensor("op_2518_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_247_cast_fp16 = mul(x = var_2517, y = var_2518_to_fp16)[name = tensor("w_247_cast_fp16")]; + tensor var_2521_equation_0 = const()[name = tensor("op_2521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2521 = einsum(equation = var_2521_equation_0, values = (var_2478_4, var_2464_4))[name = tensor("op_2521")]; + tensor var_2522_to_fp16 = const()[name = tensor("op_2522_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_249_cast_fp16 = mul(x = var_2521, y = var_2522_to_fp16)[name = tensor("w_249_cast_fp16")]; + tensor var_2525_equation_0 = const()[name = tensor("op_2525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2525 = einsum(equation = var_2525_equation_0, values = (var_2478_5, var_2464_5))[name = tensor("op_2525")]; + tensor var_2526_to_fp16 = const()[name = tensor("op_2526_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_251_cast_fp16 = mul(x = var_2525, y = var_2526_to_fp16)[name = tensor("w_251_cast_fp16")]; + tensor var_2529_equation_0 = const()[name = tensor("op_2529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2529 = einsum(equation = var_2529_equation_0, values = (var_2478_6, var_2464_6))[name = tensor("op_2529")]; + tensor var_2530_to_fp16 = const()[name = tensor("op_2530_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_253_cast_fp16 = mul(x = var_2529, y = var_2530_to_fp16)[name = tensor("w_253_cast_fp16")]; + tensor var_2533_equation_0 = const()[name = tensor("op_2533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2533 = einsum(equation = var_2533_equation_0, values = (var_2478_7, var_2464_7))[name = tensor("op_2533")]; + tensor var_2534_to_fp16 = const()[name = tensor("op_2534_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_255_cast_fp16 = mul(x = var_2533, y = var_2534_to_fp16)[name = tensor("w_255_cast_fp16")]; + tensor var_2537_equation_0 = const()[name = tensor("op_2537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2537 = einsum(equation = var_2537_equation_0, values = (var_2478_8, var_2464_8))[name = tensor("op_2537")]; + tensor var_2538_to_fp16 = const()[name = tensor("op_2538_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_257_cast_fp16 = mul(x = var_2537, y = var_2538_to_fp16)[name = tensor("w_257_cast_fp16")]; + tensor var_2541_equation_0 = const()[name = tensor("op_2541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2541 = einsum(equation = var_2541_equation_0, values = (var_2478_9, var_2464_9))[name = tensor("op_2541")]; + tensor var_2542_to_fp16 = const()[name = tensor("op_2542_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_259_cast_fp16 = mul(x = var_2541, y = var_2542_to_fp16)[name = tensor("w_259_cast_fp16")]; + tensor var_2545_equation_0 = const()[name = tensor("op_2545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2545 = einsum(equation = var_2545_equation_0, values = (var_2478_10, var_2464_10))[name = tensor("op_2545")]; + tensor var_2546_to_fp16 = const()[name = tensor("op_2546_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_261_cast_fp16 = mul(x = var_2545, y = var_2546_to_fp16)[name = tensor("w_261_cast_fp16")]; + tensor var_2549_equation_0 = const()[name = tensor("op_2549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2549 = einsum(equation = var_2549_equation_0, values = (var_2478_11, var_2464_11))[name = tensor("op_2549")]; + tensor var_2550_to_fp16 = const()[name = tensor("op_2550_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_263_cast_fp16 = mul(x = var_2549, y = var_2550_to_fp16)[name = tensor("w_263_cast_fp16")]; + tensor input_365_cast_fp16 = add(x = w_241_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_365_cast_fp16")]; + tensor var_2553_cast_fp16 = softmax(axis = var_2421, x = input_365_cast_fp16)[name = tensor("op_2553_cast_fp16")]; + tensor input_367_cast_fp16 = add(x = w_243_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_367_cast_fp16")]; + tensor var_2555_cast_fp16 = softmax(axis = var_2421, x = input_367_cast_fp16)[name = tensor("op_2555_cast_fp16")]; + tensor input_369_cast_fp16 = add(x = w_245_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_369_cast_fp16")]; + tensor var_2557_cast_fp16 = softmax(axis = var_2421, x = input_369_cast_fp16)[name = tensor("op_2557_cast_fp16")]; + tensor input_371_cast_fp16 = add(x = w_247_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_371_cast_fp16")]; + tensor var_2559_cast_fp16 = softmax(axis = var_2421, x = input_371_cast_fp16)[name = tensor("op_2559_cast_fp16")]; + tensor input_373_cast_fp16 = add(x = w_249_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_373_cast_fp16")]; + tensor var_2561_cast_fp16 = softmax(axis = var_2421, x = input_373_cast_fp16)[name = tensor("op_2561_cast_fp16")]; + tensor input_375_cast_fp16 = add(x = w_251_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_375_cast_fp16")]; + tensor var_2563_cast_fp16 = softmax(axis = var_2421, x = input_375_cast_fp16)[name = tensor("op_2563_cast_fp16")]; + tensor input_377_cast_fp16 = add(x = w_253_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_377_cast_fp16")]; + tensor var_2565_cast_fp16 = softmax(axis = var_2421, x = input_377_cast_fp16)[name = tensor("op_2565_cast_fp16")]; + tensor input_379_cast_fp16 = add(x = w_255_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_379_cast_fp16")]; + tensor var_2567_cast_fp16 = softmax(axis = var_2421, x = input_379_cast_fp16)[name = tensor("op_2567_cast_fp16")]; + tensor input_381_cast_fp16 = add(x = w_257_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_381_cast_fp16")]; + tensor var_2569_cast_fp16 = softmax(axis = var_2421, x = input_381_cast_fp16)[name = tensor("op_2569_cast_fp16")]; + tensor input_383_cast_fp16 = add(x = w_259_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_383_cast_fp16")]; + tensor var_2571_cast_fp16 = softmax(axis = var_2421, x = input_383_cast_fp16)[name = tensor("op_2571_cast_fp16")]; + tensor input_385_cast_fp16 = add(x = w_261_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_385_cast_fp16")]; + tensor var_2573_cast_fp16 = softmax(axis = var_2421, x = input_385_cast_fp16)[name = tensor("op_2573_cast_fp16")]; + tensor input_387_cast_fp16 = add(x = w_263_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_387_cast_fp16")]; + tensor var_2575_cast_fp16 = softmax(axis = var_2421, x = input_387_cast_fp16)[name = tensor("op_2575_cast_fp16")]; + tensor var_2577_equation_0 = const()[name = tensor("op_2577_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2577_cast_fp16 = einsum(equation = var_2577_equation_0, values = (var_2491_0, var_2553_cast_fp16))[name = tensor("op_2577_cast_fp16")]; + tensor var_2579_equation_0 = const()[name = tensor("op_2579_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2579_cast_fp16 = einsum(equation = var_2579_equation_0, values = (var_2491_1, var_2555_cast_fp16))[name = tensor("op_2579_cast_fp16")]; + tensor var_2581_equation_0 = const()[name = tensor("op_2581_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2581_cast_fp16 = einsum(equation = var_2581_equation_0, values = (var_2491_2, var_2557_cast_fp16))[name = tensor("op_2581_cast_fp16")]; + tensor var_2583_equation_0 = const()[name = tensor("op_2583_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2583_cast_fp16 = einsum(equation = var_2583_equation_0, values = (var_2491_3, var_2559_cast_fp16))[name = tensor("op_2583_cast_fp16")]; + tensor var_2585_equation_0 = const()[name = tensor("op_2585_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2585_cast_fp16 = einsum(equation = var_2585_equation_0, values = (var_2491_4, var_2561_cast_fp16))[name = tensor("op_2585_cast_fp16")]; + tensor var_2587_equation_0 = const()[name = tensor("op_2587_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2587_cast_fp16 = einsum(equation = var_2587_equation_0, values = (var_2491_5, var_2563_cast_fp16))[name = tensor("op_2587_cast_fp16")]; + tensor var_2589_equation_0 = const()[name = tensor("op_2589_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2589_cast_fp16 = einsum(equation = var_2589_equation_0, values = (var_2491_6, var_2565_cast_fp16))[name = tensor("op_2589_cast_fp16")]; + tensor var_2591_equation_0 = const()[name = tensor("op_2591_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2591_cast_fp16 = einsum(equation = var_2591_equation_0, values = (var_2491_7, var_2567_cast_fp16))[name = tensor("op_2591_cast_fp16")]; + tensor var_2593_equation_0 = const()[name = tensor("op_2593_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2593_cast_fp16 = einsum(equation = var_2593_equation_0, values = (var_2491_8, var_2569_cast_fp16))[name = tensor("op_2593_cast_fp16")]; + tensor var_2595_equation_0 = const()[name = tensor("op_2595_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2595_cast_fp16 = einsum(equation = var_2595_equation_0, values = (var_2491_9, var_2571_cast_fp16))[name = tensor("op_2595_cast_fp16")]; + tensor var_2597_equation_0 = const()[name = tensor("op_2597_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2597_cast_fp16 = einsum(equation = var_2597_equation_0, values = (var_2491_10, var_2573_cast_fp16))[name = tensor("op_2597_cast_fp16")]; + tensor var_2599_equation_0 = const()[name = tensor("op_2599_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2599_cast_fp16 = einsum(equation = var_2599_equation_0, values = (var_2491_11, var_2575_cast_fp16))[name = tensor("op_2599_cast_fp16")]; + tensor var_2601_interleave_0 = const()[name = tensor("op_2601_interleave_0"), val = tensor(false)]; + tensor var_2601_cast_fp16 = concat(axis = var_2421, interleave = var_2601_interleave_0, values = (var_2577_cast_fp16, var_2579_cast_fp16, var_2581_cast_fp16, var_2583_cast_fp16, var_2585_cast_fp16, var_2587_cast_fp16, var_2589_cast_fp16, var_2591_cast_fp16, var_2593_cast_fp16, var_2595_cast_fp16, var_2597_cast_fp16, var_2599_cast_fp16))[name = tensor("op_2601_cast_fp16")]; + tensor var_2605 = const()[name = tensor("op_2605"), val = tensor([1, 1])]; + tensor var_2607 = const()[name = tensor("op_2607"), val = tensor([1, 1])]; + tensor var_2609_pad_type_0 = const()[name = tensor("op_2609_pad_type_0"), val = tensor("custom")]; + tensor var_2609_pad_0 = const()[name = tensor("op_2609_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2609 = conv(bias = layers_10_attention_o_proj_bias, dilations = var_2607, groups = var_2421, pad = var_2609_pad_0, pad_type = var_2609_pad_type_0, strides = var_2605, weight = layers_10_attention_o_proj_weight, x = var_2601_cast_fp16)[name = tensor("op_2609")]; + tensor var_2611_interleave_0 = const()[name = tensor("op_2611_interleave_0"), val = tensor(false)]; + tensor var_2611 = concat(axis = var_2422, interleave = var_2611_interleave_0, values = var_2609)[name = tensor("op_2611")]; + tensor x_43 = add(x = transpose_10, y = var_2611)[name = tensor("x_43")]; + tensor input_391_perm_0 = const()[name = tensor("input_391_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_173 = const()[name = tensor("weight_173"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66822016)))]; + tensor bias_171 = const()[name = tensor("bias_171"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66822848)))]; + tensor o_43_axes_0 = const()[name = tensor("o_43_axes_0"), val = tensor([-1])]; + tensor var_2420_to_fp16 = const()[name = tensor("op_2420_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_8 = transpose(perm = input_391_perm_0, x = x_43)[name = tensor("transpose_8")]; + tensor o_43_cast_fp16 = layer_norm(axes = o_43_axes_0, beta = bias_171, epsilon = var_2420_to_fp16, gamma = weight_173, x = transpose_8)[name = tensor("o_43_cast_fp16")]; + tensor input_393_perm_0 = const()[name = tensor("input_393_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2627 = const()[name = tensor("op_2627"), val = tensor([1, 1])]; + tensor var_2629 = const()[name = tensor("op_2629"), val = tensor([1, 1])]; + tensor var_2631_pad_type_0 = const()[name = tensor("op_2631_pad_type_0"), val = tensor("custom")]; + tensor var_2631_pad_0 = const()[name = tensor("op_2631_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_7 = transpose(perm = input_393_perm_0, x = o_43_cast_fp16)[name = tensor("transpose_7")]; + tensor var_2631 = conv(bias = layers_10_mlp_fc1_bias, dilations = var_2629, groups = var_2421, pad = var_2631_pad_0, pad_type = var_2631_pad_type_0, strides = var_2627, weight = layers_10_mlp_fc1_weight, x = transpose_7)[name = tensor("op_2631")]; + tensor input_395_mode_0 = const()[name = tensor("input_395_mode_0"), val = tensor("EXACT")]; + tensor input_395 = gelu(mode = input_395_mode_0, x = var_2631)[name = tensor("input_395")]; + tensor var_2635 = const()[name = tensor("op_2635"), val = tensor([1, 1])]; + tensor var_2637 = const()[name = tensor("op_2637"), val = tensor([1, 1])]; + tensor var_2639_pad_type_0 = const()[name = tensor("op_2639_pad_type_0"), val = tensor("custom")]; + tensor var_2639_pad_0 = const()[name = tensor("op_2639_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2639 = conv(bias = layers_10_mlp_fc2_bias, dilations = var_2637, groups = var_2421, pad = var_2639_pad_0, pad_type = var_2639_pad_type_0, strides = var_2635, weight = layers_10_mlp_fc2_weight, x = input_395)[name = tensor("op_2639")]; + tensor x_45 = add(x = transpose_7, y = var_2639)[name = tensor("x_45")]; + tensor input_397_perm_0 = const()[name = tensor("input_397_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_179 = const()[name = tensor("weight_179"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66823680)))]; + tensor bias_177 = const()[name = tensor("bias_177"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66824512)))]; + tensor o_45_axes_0 = const()[name = tensor("o_45_axes_0"), val = tensor([-1])]; + tensor transpose_6 = transpose(perm = input_397_perm_0, x = x_45)[name = tensor("transpose_6")]; + tensor o_45_cast_fp16 = layer_norm(axes = o_45_axes_0, beta = bias_177, epsilon = var_2420_to_fp16, gamma = weight_179, x = transpose_6)[name = tensor("o_45_cast_fp16")]; + tensor hidden_states_23_perm_0 = const()[name = tensor("hidden_states_23_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2655 = const()[name = tensor("op_2655"), val = tensor(1)]; + tensor var_2656 = const()[name = tensor("op_2656"), val = tensor(0)]; + tensor var_2679 = const()[name = tensor("op_2679"), val = tensor([1, 1])]; + tensor var_2681 = const()[name = tensor("op_2681"), val = tensor([1, 1])]; + tensor var_2683_pad_type_0 = const()[name = tensor("op_2683_pad_type_0"), val = tensor("custom")]; + tensor var_2683_pad_0 = const()[name = tensor("op_2683_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_5 = transpose(perm = hidden_states_23_perm_0, x = o_45_cast_fp16)[name = tensor("transpose_5")]; + tensor var_2683 = conv(bias = layers_11_attention_q_proj_bias, dilations = var_2681, groups = var_2655, pad = var_2683_pad_0, pad_type = var_2683_pad_type_0, strides = var_2679, weight = layers_11_attention_q_proj_weight, x = transpose_5)[name = tensor("op_2683")]; + tensor var_2686 = const()[name = tensor("op_2686"), val = tensor([1, 1])]; + tensor var_2688 = const()[name = tensor("op_2688"), val = tensor([1, 1])]; tensor ks_pad_type_0 = const()[name = tensor("ks_pad_type_0"), val = tensor("custom")]; tensor ks_pad_0 = const()[name = tensor("ks_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor ks = conv(bias = layers_11_attention_k_proj_bias, dilations = var_2827, groups = var_2794, pad = ks_pad_0, pad_type = ks_pad_type_0, strides = var_2825, weight = layers_11_attention_k_proj_weight, x = var_2788_cast_fp16)[name = tensor("ks")]; - tensor var_2832 = const()[name = tensor("op_2832"), val = tensor([1, 1])]; - tensor var_2834 = const()[name = tensor("op_2834"), val = tensor([1, 1])]; - tensor var_2836_pad_type_0 = const()[name = tensor("op_2836_pad_type_0"), val = tensor("custom")]; - tensor var_2836_pad_0 = const()[name = tensor("op_2836_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2836 = conv(bias = layers_11_attention_v_proj_bias, dilations = var_2834, groups = var_2794, pad = var_2836_pad_0, pad_type = var_2836_pad_type_0, strides = var_2832, weight = layers_11_attention_v_proj_weight, x = var_2788_cast_fp16)[name = tensor("op_2836")]; + tensor ks = conv(bias = layers_11_attention_k_proj_bias, dilations = var_2688, groups = var_2655, pad = ks_pad_0, pad_type = ks_pad_type_0, strides = var_2686, weight = layers_11_attention_k_proj_weight, x = transpose_5)[name = tensor("ks")]; + tensor var_2693 = const()[name = tensor("op_2693"), val = tensor([1, 1])]; + tensor var_2695 = const()[name = tensor("op_2695"), val = tensor([1, 1])]; + tensor var_2697_pad_type_0 = const()[name = tensor("op_2697_pad_type_0"), val = tensor("custom")]; + tensor var_2697_pad_0 = const()[name = tensor("op_2697_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2697 = conv(bias = layers_11_attention_v_proj_bias, dilations = var_2695, groups = var_2655, pad = var_2697_pad_0, pad_type = var_2697_pad_type_0, strides = var_2693, weight = layers_11_attention_v_proj_weight, x = transpose_5)[name = tensor("op_2697")]; tensor tile_57 = const()[name = tensor("tile_57"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2837_axis_0 = const()[name = tensor("op_2837_axis_0"), val = tensor(1)]; - tensor var_2837_0, tensor var_2837_1, tensor var_2837_2, tensor var_2837_3, tensor var_2837_4, tensor var_2837_5, tensor var_2837_6, tensor var_2837_7, tensor var_2837_8, tensor var_2837_9, tensor var_2837_10, tensor var_2837_11 = split(axis = var_2837_axis_0, split_sizes = tile_57, x = var_2822)[name = tensor("op_2837")]; - tensor var_2850_perm_0 = const()[name = tensor("op_2850_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2698_axis_0 = const()[name = tensor("op_2698_axis_0"), val = tensor(1)]; + tensor var_2698_0, tensor var_2698_1, tensor var_2698_2, tensor var_2698_3, tensor var_2698_4, tensor var_2698_5, tensor var_2698_6, tensor var_2698_7, tensor var_2698_8, tensor var_2698_9, tensor var_2698_10, tensor var_2698_11 = split(axis = var_2698_axis_0, split_sizes = tile_57, x = var_2683)[name = tensor("op_2698")]; + tensor var_2711_perm_0 = const()[name = tensor("op_2711_perm_0"), val = tensor([0, 3, 2, 1])]; tensor tile_58 = const()[name = tensor("tile_58"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2851_axis_0 = const()[name = tensor("op_2851_axis_0"), val = tensor(3)]; - tensor transpose_0 = transpose(perm = var_2850_perm_0, x = ks)[name = tensor("transpose_0")]; - tensor var_2851_0, tensor var_2851_1, tensor var_2851_2, tensor var_2851_3, tensor var_2851_4, tensor var_2851_5, tensor var_2851_6, tensor var_2851_7, tensor var_2851_8, tensor var_2851_9, tensor var_2851_10, tensor var_2851_11 = split(axis = var_2851_axis_0, split_sizes = tile_58, x = transpose_0)[name = tensor("op_2851")]; + tensor var_2712_axis_0 = const()[name = tensor("op_2712_axis_0"), val = tensor(3)]; + tensor transpose_4 = transpose(perm = var_2711_perm_0, x = ks)[name = tensor("transpose_4")]; + tensor var_2712_0, tensor var_2712_1, tensor var_2712_2, tensor var_2712_3, tensor var_2712_4, tensor var_2712_5, tensor var_2712_6, tensor var_2712_7, tensor var_2712_8, tensor var_2712_9, tensor var_2712_10, tensor var_2712_11 = split(axis = var_2712_axis_0, split_sizes = tile_58, x = transpose_4)[name = tensor("op_2712")]; tensor tile_59 = const()[name = tensor("tile_59"), val = tensor([32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32])]; - tensor var_2864_axis_0 = const()[name = tensor("op_2864_axis_0"), val = tensor(1)]; - tensor var_2864_0, tensor var_2864_1, tensor var_2864_2, tensor var_2864_3, tensor var_2864_4, tensor var_2864_5, tensor var_2864_6, tensor var_2864_7, tensor var_2864_8, tensor var_2864_9, tensor var_2864_10, tensor var_2864_11 = split(axis = var_2864_axis_0, split_sizes = tile_59, x = var_2836)[name = tensor("op_2864")]; - tensor var_2878_equation_0 = const()[name = tensor("op_2878_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2878 = einsum(equation = var_2878_equation_0, values = (var_2851_0, var_2837_0))[name = tensor("op_2878")]; - tensor var_2879_to_fp16 = const()[name = tensor("op_2879_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_265_cast_fp16 = mul(x = var_2878, y = var_2879_to_fp16)[name = tensor("w_265_cast_fp16")]; - tensor var_2882_equation_0 = const()[name = tensor("op_2882_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2882 = einsum(equation = var_2882_equation_0, values = (var_2851_1, var_2837_1))[name = tensor("op_2882")]; - tensor var_2883_to_fp16 = const()[name = tensor("op_2883_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_267_cast_fp16 = mul(x = var_2882, y = var_2883_to_fp16)[name = tensor("w_267_cast_fp16")]; - tensor var_2886_equation_0 = const()[name = tensor("op_2886_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2886 = einsum(equation = var_2886_equation_0, values = (var_2851_2, var_2837_2))[name = tensor("op_2886")]; - tensor var_2887_to_fp16 = const()[name = tensor("op_2887_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_269_cast_fp16 = mul(x = var_2886, y = var_2887_to_fp16)[name = tensor("w_269_cast_fp16")]; - tensor var_2890_equation_0 = const()[name = tensor("op_2890_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2890 = einsum(equation = var_2890_equation_0, values = (var_2851_3, var_2837_3))[name = tensor("op_2890")]; - tensor var_2891_to_fp16 = const()[name = tensor("op_2891_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_271_cast_fp16 = mul(x = var_2890, y = var_2891_to_fp16)[name = tensor("w_271_cast_fp16")]; - tensor var_2894_equation_0 = const()[name = tensor("op_2894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2894 = einsum(equation = var_2894_equation_0, values = (var_2851_4, var_2837_4))[name = tensor("op_2894")]; - tensor var_2895_to_fp16 = const()[name = tensor("op_2895_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_273_cast_fp16 = mul(x = var_2894, y = var_2895_to_fp16)[name = tensor("w_273_cast_fp16")]; - tensor var_2898_equation_0 = const()[name = tensor("op_2898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2898 = einsum(equation = var_2898_equation_0, values = (var_2851_5, var_2837_5))[name = tensor("op_2898")]; - tensor var_2899_to_fp16 = const()[name = tensor("op_2899_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_275_cast_fp16 = mul(x = var_2898, y = var_2899_to_fp16)[name = tensor("w_275_cast_fp16")]; - tensor var_2902_equation_0 = const()[name = tensor("op_2902_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2902 = einsum(equation = var_2902_equation_0, values = (var_2851_6, var_2837_6))[name = tensor("op_2902")]; - tensor var_2903_to_fp16 = const()[name = tensor("op_2903_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_277_cast_fp16 = mul(x = var_2902, y = var_2903_to_fp16)[name = tensor("w_277_cast_fp16")]; - tensor var_2906_equation_0 = const()[name = tensor("op_2906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2906 = einsum(equation = var_2906_equation_0, values = (var_2851_7, var_2837_7))[name = tensor("op_2906")]; - tensor var_2907_to_fp16 = const()[name = tensor("op_2907_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_279_cast_fp16 = mul(x = var_2906, y = var_2907_to_fp16)[name = tensor("w_279_cast_fp16")]; - tensor var_2910_equation_0 = const()[name = tensor("op_2910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2910 = einsum(equation = var_2910_equation_0, values = (var_2851_8, var_2837_8))[name = tensor("op_2910")]; - tensor var_2911_to_fp16 = const()[name = tensor("op_2911_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_281_cast_fp16 = mul(x = var_2910, y = var_2911_to_fp16)[name = tensor("w_281_cast_fp16")]; - tensor var_2914_equation_0 = const()[name = tensor("op_2914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2914 = einsum(equation = var_2914_equation_0, values = (var_2851_9, var_2837_9))[name = tensor("op_2914")]; - tensor var_2915_to_fp16 = const()[name = tensor("op_2915_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_283_cast_fp16 = mul(x = var_2914, y = var_2915_to_fp16)[name = tensor("w_283_cast_fp16")]; - tensor var_2918_equation_0 = const()[name = tensor("op_2918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2918 = einsum(equation = var_2918_equation_0, values = (var_2851_10, var_2837_10))[name = tensor("op_2918")]; - tensor var_2919_to_fp16 = const()[name = tensor("op_2919_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_285_cast_fp16 = mul(x = var_2918, y = var_2919_to_fp16)[name = tensor("w_285_cast_fp16")]; - tensor var_2922_equation_0 = const()[name = tensor("op_2922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; - tensor var_2922 = einsum(equation = var_2922_equation_0, values = (var_2851_11, var_2837_11))[name = tensor("op_2922")]; - tensor var_2923_to_fp16 = const()[name = tensor("op_2923_to_fp16"), val = tensor(0x1.6ap-3)]; - tensor w_cast_fp16 = mul(x = var_2922, y = var_2923_to_fp16)[name = tensor("w_cast_fp16")]; - tensor input_355_cast_fp16 = add(x = w_265_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_355_cast_fp16")]; - tensor var_2926_cast_fp16 = softmax(axis = var_2794, x = input_355_cast_fp16)[name = tensor("op_2926_cast_fp16")]; - tensor input_357_cast_fp16 = add(x = w_267_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_357_cast_fp16")]; - tensor var_2928_cast_fp16 = softmax(axis = var_2794, x = input_357_cast_fp16)[name = tensor("op_2928_cast_fp16")]; - tensor input_359_cast_fp16 = add(x = w_269_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_359_cast_fp16")]; - tensor var_2930_cast_fp16 = softmax(axis = var_2794, x = input_359_cast_fp16)[name = tensor("op_2930_cast_fp16")]; - tensor input_361_cast_fp16 = add(x = w_271_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_361_cast_fp16")]; - tensor var_2932_cast_fp16 = softmax(axis = var_2794, x = input_361_cast_fp16)[name = tensor("op_2932_cast_fp16")]; - tensor input_363_cast_fp16 = add(x = w_273_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_363_cast_fp16")]; - tensor var_2934_cast_fp16 = softmax(axis = var_2794, x = input_363_cast_fp16)[name = tensor("op_2934_cast_fp16")]; - tensor input_365_cast_fp16 = add(x = w_275_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_365_cast_fp16")]; - tensor var_2936_cast_fp16 = softmax(axis = var_2794, x = input_365_cast_fp16)[name = tensor("op_2936_cast_fp16")]; - tensor input_367_cast_fp16 = add(x = w_277_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_367_cast_fp16")]; - tensor var_2938_cast_fp16 = softmax(axis = var_2794, x = input_367_cast_fp16)[name = tensor("op_2938_cast_fp16")]; - tensor input_369_cast_fp16 = add(x = w_279_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_369_cast_fp16")]; - tensor var_2940_cast_fp16 = softmax(axis = var_2794, x = input_369_cast_fp16)[name = tensor("op_2940_cast_fp16")]; - tensor input_371_cast_fp16 = add(x = w_281_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_371_cast_fp16")]; - tensor var_2942_cast_fp16 = softmax(axis = var_2794, x = input_371_cast_fp16)[name = tensor("op_2942_cast_fp16")]; - tensor input_373_cast_fp16 = add(x = w_283_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_373_cast_fp16")]; - tensor var_2944_cast_fp16 = softmax(axis = var_2794, x = input_373_cast_fp16)[name = tensor("op_2944_cast_fp16")]; - tensor input_375_cast_fp16 = add(x = w_285_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_375_cast_fp16")]; - tensor var_2946_cast_fp16 = softmax(axis = var_2794, x = input_375_cast_fp16)[name = tensor("op_2946_cast_fp16")]; - tensor input_377_cast_fp16 = add(x = w_cast_fp16, y = var_83_cast_fp16)[name = tensor("input_377_cast_fp16")]; - tensor var_2948_cast_fp16 = softmax(axis = var_2794, x = input_377_cast_fp16)[name = tensor("op_2948_cast_fp16")]; - tensor var_2950_equation_0 = const()[name = tensor("op_2950_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2950_cast_fp16 = einsum(equation = var_2950_equation_0, values = (var_2864_0, var_2926_cast_fp16))[name = tensor("op_2950_cast_fp16")]; - tensor var_2952_equation_0 = const()[name = tensor("op_2952_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2952_cast_fp16 = einsum(equation = var_2952_equation_0, values = (var_2864_1, var_2928_cast_fp16))[name = tensor("op_2952_cast_fp16")]; - tensor var_2954_equation_0 = const()[name = tensor("op_2954_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2954_cast_fp16 = einsum(equation = var_2954_equation_0, values = (var_2864_2, var_2930_cast_fp16))[name = tensor("op_2954_cast_fp16")]; - tensor var_2956_equation_0 = const()[name = tensor("op_2956_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2956_cast_fp16 = einsum(equation = var_2956_equation_0, values = (var_2864_3, var_2932_cast_fp16))[name = tensor("op_2956_cast_fp16")]; - tensor var_2958_equation_0 = const()[name = tensor("op_2958_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2958_cast_fp16 = einsum(equation = var_2958_equation_0, values = (var_2864_4, var_2934_cast_fp16))[name = tensor("op_2958_cast_fp16")]; - tensor var_2960_equation_0 = const()[name = tensor("op_2960_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2960_cast_fp16 = einsum(equation = var_2960_equation_0, values = (var_2864_5, var_2936_cast_fp16))[name = tensor("op_2960_cast_fp16")]; - tensor var_2962_equation_0 = const()[name = tensor("op_2962_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2962_cast_fp16 = einsum(equation = var_2962_equation_0, values = (var_2864_6, var_2938_cast_fp16))[name = tensor("op_2962_cast_fp16")]; - tensor var_2964_equation_0 = const()[name = tensor("op_2964_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2964_cast_fp16 = einsum(equation = var_2964_equation_0, values = (var_2864_7, var_2940_cast_fp16))[name = tensor("op_2964_cast_fp16")]; - tensor var_2966_equation_0 = const()[name = tensor("op_2966_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2966_cast_fp16 = einsum(equation = var_2966_equation_0, values = (var_2864_8, var_2942_cast_fp16))[name = tensor("op_2966_cast_fp16")]; - tensor var_2968_equation_0 = const()[name = tensor("op_2968_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2968_cast_fp16 = einsum(equation = var_2968_equation_0, values = (var_2864_9, var_2944_cast_fp16))[name = tensor("op_2968_cast_fp16")]; - tensor var_2970_equation_0 = const()[name = tensor("op_2970_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2970_cast_fp16 = einsum(equation = var_2970_equation_0, values = (var_2864_10, var_2946_cast_fp16))[name = tensor("op_2970_cast_fp16")]; - tensor var_2972_equation_0 = const()[name = tensor("op_2972_equation_0"), val = tensor("bchk,bkhq->bchq")]; - tensor var_2972_cast_fp16 = einsum(equation = var_2972_equation_0, values = (var_2864_11, var_2948_cast_fp16))[name = tensor("op_2972_cast_fp16")]; - tensor var_2974_interleave_0 = const()[name = tensor("op_2974_interleave_0"), val = tensor(false)]; - tensor var_2974_cast_fp16 = concat(axis = var_2794, interleave = var_2974_interleave_0, values = (var_2950_cast_fp16, var_2952_cast_fp16, var_2954_cast_fp16, var_2956_cast_fp16, var_2958_cast_fp16, var_2960_cast_fp16, var_2962_cast_fp16, var_2964_cast_fp16, var_2966_cast_fp16, var_2968_cast_fp16, var_2970_cast_fp16, var_2972_cast_fp16))[name = tensor("op_2974_cast_fp16")]; - tensor var_2978 = const()[name = tensor("op_2978"), val = tensor([1, 1])]; - tensor var_2980 = const()[name = tensor("op_2980"), val = tensor([1, 1])]; - tensor var_2982_pad_type_0 = const()[name = tensor("op_2982_pad_type_0"), val = tensor("custom")]; - tensor var_2982_pad_0 = const()[name = tensor("op_2982_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_2982 = conv(bias = layers_11_attention_o_proj_bias, dilations = var_2980, groups = var_2794, pad = var_2982_pad_0, pad_type = var_2982_pad_type_0, strides = var_2978, weight = layers_11_attention_o_proj_weight, x = var_2974_cast_fp16)[name = tensor("op_2982")]; - tensor var_2984_interleave_0 = const()[name = tensor("op_2984_interleave_0"), val = tensor(false)]; - tensor var_2984 = concat(axis = var_2795, interleave = var_2984_interleave_0, values = var_2982)[name = tensor("op_2984")]; - tensor x_93 = add(x = var_2788_cast_fp16, y = var_2984)[name = tensor("x_93")]; - tensor var_2791_promoted = const()[name = tensor("op_2791_promoted"), val = tensor(-0x1.f4p+7)]; - tensor var_2792_promoted = const()[name = tensor("op_2792_promoted"), val = tensor(0x1.f4p+7)]; - tensor x_95 = clip(alpha = var_2791_promoted, beta = var_2792_promoted, x = x_93)[name = tensor("x_95")]; - tensor var_2989 = const()[name = tensor("op_2989"), val = tensor([1])]; - tensor mean_47 = reduce_mean(axes = var_2989, keep_dims = var_2796, x = x_95)[name = tensor("mean_47")]; - tensor zero_mean_47 = sub(x = x_95, y = mean_47)[name = tensor("zero_mean_47")]; - tensor var_2793_promoted = const()[name = tensor("op_2793_promoted"), val = tensor(0x1p+1)]; - tensor var_2992 = pow(x = zero_mean_47, y = var_2793_promoted)[name = tensor("op_2992")]; - tensor var_2993 = const()[name = tensor("op_2993"), val = tensor([1])]; - tensor var_2994 = reduce_mean(axes = var_2993, keep_dims = var_2796, x = var_2992)[name = tensor("op_2994")]; - tensor var_2995_to_fp16 = const()[name = tensor("op_2995_to_fp16"), val = tensor(0x1p-24)]; - tensor var_2996_cast_fp16 = add(x = var_2994, y = var_2995_to_fp16)[name = tensor("op_2996_cast_fp16")]; - tensor denom_47_epsilon_0 = const()[name = tensor("denom_47_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0, x = var_2996_cast_fp16)[name = tensor("denom_47_cast_fp16")]; - tensor var_2998_cast_fp16 = mul(x = zero_mean_47, y = denom_47_cast_fp16)[name = tensor("op_2998_cast_fp16")]; - tensor var_3000_gamma_0_to_fp16 = const()[name = tensor("op_3000_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66827008)))]; - tensor var_3000_beta_0_to_fp16 = const()[name = tensor("op_3000_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66827840)))]; - tensor var_3000_epsilon_0_to_fp16 = const()[name = tensor("op_3000_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_3000_cast_fp16 = batch_norm(beta = var_3000_beta_0_to_fp16, epsilon = var_3000_epsilon_0_to_fp16, gamma = var_3000_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_2998_cast_fp16)[name = tensor("op_3000_cast_fp16")]; - tensor var_3006 = const()[name = tensor("op_3006"), val = tensor([1, 1])]; - tensor var_3008 = const()[name = tensor("op_3008"), val = tensor([1, 1])]; - tensor var_3010_pad_type_0 = const()[name = tensor("op_3010_pad_type_0"), val = tensor("custom")]; - tensor var_3010_pad_0 = const()[name = tensor("op_3010_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3010 = conv(bias = layers_11_mlp_fc1_bias, dilations = var_3008, groups = var_2794, pad = var_3010_pad_0, pad_type = var_3010_pad_type_0, strides = var_3006, weight = layers_11_mlp_fc1_weight, x = var_3000_cast_fp16)[name = tensor("op_3010")]; - tensor input_383_mode_0 = const()[name = tensor("input_383_mode_0"), val = tensor("EXACT")]; - tensor input_383 = gelu(mode = input_383_mode_0, x = var_3010)[name = tensor("input_383")]; - tensor var_3014 = const()[name = tensor("op_3014"), val = tensor([1, 1])]; - tensor var_3016 = const()[name = tensor("op_3016"), val = tensor([1, 1])]; - tensor var_3018_pad_type_0 = const()[name = tensor("op_3018_pad_type_0"), val = tensor("custom")]; - tensor var_3018_pad_0 = const()[name = tensor("op_3018_pad_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3018 = conv(bias = layers_11_mlp_fc2_bias, dilations = var_3016, groups = var_2794, pad = var_3018_pad_0, pad_type = var_3018_pad_type_0, strides = var_3014, weight = layers_11_mlp_fc2_weight, x = input_383)[name = tensor("op_3018")]; - tensor x_97 = add(x = var_3000_cast_fp16, y = var_3018)[name = tensor("x_97")]; - tensor var_2791_promoted_1 = const()[name = tensor("op_2791_promoted_1"), val = tensor(-0x1.f4p+7)]; - tensor var_2792_promoted_1 = const()[name = tensor("op_2792_promoted_1"), val = tensor(0x1.f4p+7)]; - tensor x = clip(alpha = var_2791_promoted_1, beta = var_2792_promoted_1, x = x_97)[name = tensor("x")]; - tensor var_3023 = const()[name = tensor("op_3023"), val = tensor([1])]; - tensor mean = reduce_mean(axes = var_3023, keep_dims = var_2796, x = x)[name = tensor("mean")]; - tensor zero_mean = sub(x = x, y = mean)[name = tensor("zero_mean")]; - tensor var_2793_promoted_1 = const()[name = tensor("op_2793_promoted_1"), val = tensor(0x1p+1)]; - tensor var_3026 = pow(x = zero_mean, y = var_2793_promoted_1)[name = tensor("op_3026")]; - tensor var_3027 = const()[name = tensor("op_3027"), val = tensor([1])]; - tensor var_3028 = reduce_mean(axes = var_3027, keep_dims = var_2796, x = var_3026)[name = tensor("op_3028")]; - tensor var_3029_to_fp16 = const()[name = tensor("op_3029_to_fp16"), val = tensor(0x1p-24)]; - tensor var_3030_cast_fp16 = add(x = var_3028, y = var_3029_to_fp16)[name = tensor("op_3030_cast_fp16")]; - tensor denom_49_epsilon_0 = const()[name = tensor("denom_49_epsilon_0"), val = tensor(0x1.197998p-40)]; - tensor denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0, x = var_3030_cast_fp16)[name = tensor("denom_49_cast_fp16")]; - tensor var_3032_cast_fp16 = mul(x = zero_mean, y = denom_49_cast_fp16)[name = tensor("op_3032_cast_fp16")]; - tensor var_3034_gamma_0_to_fp16 = const()[name = tensor("op_3034_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66828672)))]; - tensor var_3034_beta_0_to_fp16 = const()[name = tensor("op_3034_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66829504)))]; - tensor var_3034_epsilon_0_to_fp16 = const()[name = tensor("op_3034_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_3034_cast_fp16 = batch_norm(beta = var_3034_beta_0_to_fp16, epsilon = var_3034_epsilon_0_to_fp16, gamma = var_3034_gamma_0_to_fp16, mean = var_63_mean_0_to_fp16, variance = var_63_variance_0_to_fp16, x = var_3032_cast_fp16)[name = tensor("op_3034_cast_fp16")]; - tensor var_3048_begin_0 = const()[name = tensor("op_3048_begin_0"), val = tensor([0, 0, 0, 0])]; - tensor var_3048_end_0 = const()[name = tensor("op_3048_end_0"), val = tensor([1, 384, 1, 512])]; - tensor var_3048_end_mask_0 = const()[name = tensor("op_3048_end_mask_0"), val = tensor([true, true, false, true])]; - tensor var_3048_squeeze_mask_0 = const()[name = tensor("op_3048_squeeze_mask_0"), val = tensor([false, false, true, false])]; - tensor var_3048 = slice_by_index(begin = var_3048_begin_0, end = var_3048_end_0, end_mask = var_3048_end_mask_0, squeeze_mask = var_3048_squeeze_mask_0, x = var_3034_cast_fp16)[name = tensor("op_3048")]; - tensor var_3051_begin_0 = const()[name = tensor("op_3051_begin_0"), val = tensor([0, 0, 0])]; - tensor var_3051_end_0 = const()[name = tensor("op_3051_end_0"), val = tensor([1, 384, 1])]; - tensor var_3051_end_mask_0 = const()[name = tensor("op_3051_end_mask_0"), val = tensor([true, true, false])]; - tensor var_3051_squeeze_mask_0 = const()[name = tensor("op_3051_squeeze_mask_0"), val = tensor([false, false, true])]; - tensor var_3051 = slice_by_index(begin = var_3051_begin_0, end = var_3051_end_0, end_mask = var_3051_end_mask_0, squeeze_mask = var_3051_squeeze_mask_0, x = var_3048)[name = tensor("op_3051")]; - tensor var_3059 = const()[name = tensor("op_3059"), val = tensor([1])]; - tensor var_3060 = const()[name = tensor("op_3060"), val = tensor(true)]; - tensor abs_0_cast_fp16 = abs(x = var_3051)[name = tensor("abs_0_cast_fp16")]; + tensor var_2725_axis_0 = const()[name = tensor("op_2725_axis_0"), val = tensor(1)]; + tensor var_2725_0, tensor var_2725_1, tensor var_2725_2, tensor var_2725_3, tensor var_2725_4, tensor var_2725_5, tensor var_2725_6, tensor var_2725_7, tensor var_2725_8, tensor var_2725_9, tensor var_2725_10, tensor var_2725_11 = split(axis = var_2725_axis_0, split_sizes = tile_59, x = var_2697)[name = tensor("op_2725")]; + tensor var_2739_equation_0 = const()[name = tensor("op_2739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2739 = einsum(equation = var_2739_equation_0, values = (var_2712_0, var_2698_0))[name = tensor("op_2739")]; + tensor var_2740_to_fp16 = const()[name = tensor("op_2740_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_265_cast_fp16 = mul(x = var_2739, y = var_2740_to_fp16)[name = tensor("w_265_cast_fp16")]; + tensor var_2743_equation_0 = const()[name = tensor("op_2743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2743 = einsum(equation = var_2743_equation_0, values = (var_2712_1, var_2698_1))[name = tensor("op_2743")]; + tensor var_2744_to_fp16 = const()[name = tensor("op_2744_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_267_cast_fp16 = mul(x = var_2743, y = var_2744_to_fp16)[name = tensor("w_267_cast_fp16")]; + tensor var_2747_equation_0 = const()[name = tensor("op_2747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2747 = einsum(equation = var_2747_equation_0, values = (var_2712_2, var_2698_2))[name = tensor("op_2747")]; + tensor var_2748_to_fp16 = const()[name = tensor("op_2748_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_269_cast_fp16 = mul(x = var_2747, y = var_2748_to_fp16)[name = tensor("w_269_cast_fp16")]; + tensor var_2751_equation_0 = const()[name = tensor("op_2751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2751 = einsum(equation = var_2751_equation_0, values = (var_2712_3, var_2698_3))[name = tensor("op_2751")]; + tensor var_2752_to_fp16 = const()[name = tensor("op_2752_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_271_cast_fp16 = mul(x = var_2751, y = var_2752_to_fp16)[name = tensor("w_271_cast_fp16")]; + tensor var_2755_equation_0 = const()[name = tensor("op_2755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2755 = einsum(equation = var_2755_equation_0, values = (var_2712_4, var_2698_4))[name = tensor("op_2755")]; + tensor var_2756_to_fp16 = const()[name = tensor("op_2756_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_273_cast_fp16 = mul(x = var_2755, y = var_2756_to_fp16)[name = tensor("w_273_cast_fp16")]; + tensor var_2759_equation_0 = const()[name = tensor("op_2759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2759 = einsum(equation = var_2759_equation_0, values = (var_2712_5, var_2698_5))[name = tensor("op_2759")]; + tensor var_2760_to_fp16 = const()[name = tensor("op_2760_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_275_cast_fp16 = mul(x = var_2759, y = var_2760_to_fp16)[name = tensor("w_275_cast_fp16")]; + tensor var_2763_equation_0 = const()[name = tensor("op_2763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2763 = einsum(equation = var_2763_equation_0, values = (var_2712_6, var_2698_6))[name = tensor("op_2763")]; + tensor var_2764_to_fp16 = const()[name = tensor("op_2764_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_277_cast_fp16 = mul(x = var_2763, y = var_2764_to_fp16)[name = tensor("w_277_cast_fp16")]; + tensor var_2767_equation_0 = const()[name = tensor("op_2767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2767 = einsum(equation = var_2767_equation_0, values = (var_2712_7, var_2698_7))[name = tensor("op_2767")]; + tensor var_2768_to_fp16 = const()[name = tensor("op_2768_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_279_cast_fp16 = mul(x = var_2767, y = var_2768_to_fp16)[name = tensor("w_279_cast_fp16")]; + tensor var_2771_equation_0 = const()[name = tensor("op_2771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2771 = einsum(equation = var_2771_equation_0, values = (var_2712_8, var_2698_8))[name = tensor("op_2771")]; + tensor var_2772_to_fp16 = const()[name = tensor("op_2772_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_281_cast_fp16 = mul(x = var_2771, y = var_2772_to_fp16)[name = tensor("w_281_cast_fp16")]; + tensor var_2775_equation_0 = const()[name = tensor("op_2775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2775 = einsum(equation = var_2775_equation_0, values = (var_2712_9, var_2698_9))[name = tensor("op_2775")]; + tensor var_2776_to_fp16 = const()[name = tensor("op_2776_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_283_cast_fp16 = mul(x = var_2775, y = var_2776_to_fp16)[name = tensor("w_283_cast_fp16")]; + tensor var_2779_equation_0 = const()[name = tensor("op_2779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2779 = einsum(equation = var_2779_equation_0, values = (var_2712_10, var_2698_10))[name = tensor("op_2779")]; + tensor var_2780_to_fp16 = const()[name = tensor("op_2780_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_285_cast_fp16 = mul(x = var_2779, y = var_2780_to_fp16)[name = tensor("w_285_cast_fp16")]; + tensor var_2783_equation_0 = const()[name = tensor("op_2783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2783 = einsum(equation = var_2783_equation_0, values = (var_2712_11, var_2698_11))[name = tensor("op_2783")]; + tensor var_2784_to_fp16 = const()[name = tensor("op_2784_to_fp16"), val = tensor(0x1.6ap-3)]; + tensor w_cast_fp16 = mul(x = var_2783, y = var_2784_to_fp16)[name = tensor("w_cast_fp16")]; + tensor input_401_cast_fp16 = add(x = w_265_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_401_cast_fp16")]; + tensor var_2787_cast_fp16 = softmax(axis = var_2655, x = input_401_cast_fp16)[name = tensor("op_2787_cast_fp16")]; + tensor input_403_cast_fp16 = add(x = w_267_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_403_cast_fp16")]; + tensor var_2789_cast_fp16 = softmax(axis = var_2655, x = input_403_cast_fp16)[name = tensor("op_2789_cast_fp16")]; + tensor input_405_cast_fp16 = add(x = w_269_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_405_cast_fp16")]; + tensor var_2791_cast_fp16 = softmax(axis = var_2655, x = input_405_cast_fp16)[name = tensor("op_2791_cast_fp16")]; + tensor input_407_cast_fp16 = add(x = w_271_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_407_cast_fp16")]; + tensor var_2793_cast_fp16 = softmax(axis = var_2655, x = input_407_cast_fp16)[name = tensor("op_2793_cast_fp16")]; + tensor input_409_cast_fp16 = add(x = w_273_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_409_cast_fp16")]; + tensor var_2795_cast_fp16 = softmax(axis = var_2655, x = input_409_cast_fp16)[name = tensor("op_2795_cast_fp16")]; + tensor input_411_cast_fp16 = add(x = w_275_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_411_cast_fp16")]; + tensor var_2797_cast_fp16 = softmax(axis = var_2655, x = input_411_cast_fp16)[name = tensor("op_2797_cast_fp16")]; + tensor input_413_cast_fp16 = add(x = w_277_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_413_cast_fp16")]; + tensor var_2799_cast_fp16 = softmax(axis = var_2655, x = input_413_cast_fp16)[name = tensor("op_2799_cast_fp16")]; + tensor input_415_cast_fp16 = add(x = w_279_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_415_cast_fp16")]; + tensor var_2801_cast_fp16 = softmax(axis = var_2655, x = input_415_cast_fp16)[name = tensor("op_2801_cast_fp16")]; + tensor input_417_cast_fp16 = add(x = w_281_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_417_cast_fp16")]; + tensor var_2803_cast_fp16 = softmax(axis = var_2655, x = input_417_cast_fp16)[name = tensor("op_2803_cast_fp16")]; + tensor input_419_cast_fp16 = add(x = w_283_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_419_cast_fp16")]; + tensor var_2805_cast_fp16 = softmax(axis = var_2655, x = input_419_cast_fp16)[name = tensor("op_2805_cast_fp16")]; + tensor input_421_cast_fp16 = add(x = w_285_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_421_cast_fp16")]; + tensor var_2807_cast_fp16 = softmax(axis = var_2655, x = input_421_cast_fp16)[name = tensor("op_2807_cast_fp16")]; + tensor input_423_cast_fp16 = add(x = w_cast_fp16, y = var_76_cast_fp16)[name = tensor("input_423_cast_fp16")]; + tensor var_2809_cast_fp16 = softmax(axis = var_2655, x = input_423_cast_fp16)[name = tensor("op_2809_cast_fp16")]; + tensor var_2811_equation_0 = const()[name = tensor("op_2811_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2811_cast_fp16 = einsum(equation = var_2811_equation_0, values = (var_2725_0, var_2787_cast_fp16))[name = tensor("op_2811_cast_fp16")]; + tensor var_2813_equation_0 = const()[name = tensor("op_2813_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2813_cast_fp16 = einsum(equation = var_2813_equation_0, values = (var_2725_1, var_2789_cast_fp16))[name = tensor("op_2813_cast_fp16")]; + tensor var_2815_equation_0 = const()[name = tensor("op_2815_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2815_cast_fp16 = einsum(equation = var_2815_equation_0, values = (var_2725_2, var_2791_cast_fp16))[name = tensor("op_2815_cast_fp16")]; + tensor var_2817_equation_0 = const()[name = tensor("op_2817_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2817_cast_fp16 = einsum(equation = var_2817_equation_0, values = (var_2725_3, var_2793_cast_fp16))[name = tensor("op_2817_cast_fp16")]; + tensor var_2819_equation_0 = const()[name = tensor("op_2819_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2819_cast_fp16 = einsum(equation = var_2819_equation_0, values = (var_2725_4, var_2795_cast_fp16))[name = tensor("op_2819_cast_fp16")]; + tensor var_2821_equation_0 = const()[name = tensor("op_2821_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2821_cast_fp16 = einsum(equation = var_2821_equation_0, values = (var_2725_5, var_2797_cast_fp16))[name = tensor("op_2821_cast_fp16")]; + tensor var_2823_equation_0 = const()[name = tensor("op_2823_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2823_cast_fp16 = einsum(equation = var_2823_equation_0, values = (var_2725_6, var_2799_cast_fp16))[name = tensor("op_2823_cast_fp16")]; + tensor var_2825_equation_0 = const()[name = tensor("op_2825_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2825_cast_fp16 = einsum(equation = var_2825_equation_0, values = (var_2725_7, var_2801_cast_fp16))[name = tensor("op_2825_cast_fp16")]; + tensor var_2827_equation_0 = const()[name = tensor("op_2827_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2827_cast_fp16 = einsum(equation = var_2827_equation_0, values = (var_2725_8, var_2803_cast_fp16))[name = tensor("op_2827_cast_fp16")]; + tensor var_2829_equation_0 = const()[name = tensor("op_2829_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2829_cast_fp16 = einsum(equation = var_2829_equation_0, values = (var_2725_9, var_2805_cast_fp16))[name = tensor("op_2829_cast_fp16")]; + tensor var_2831_equation_0 = const()[name = tensor("op_2831_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2831_cast_fp16 = einsum(equation = var_2831_equation_0, values = (var_2725_10, var_2807_cast_fp16))[name = tensor("op_2831_cast_fp16")]; + tensor var_2833_equation_0 = const()[name = tensor("op_2833_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2833_cast_fp16 = einsum(equation = var_2833_equation_0, values = (var_2725_11, var_2809_cast_fp16))[name = tensor("op_2833_cast_fp16")]; + tensor var_2835_interleave_0 = const()[name = tensor("op_2835_interleave_0"), val = tensor(false)]; + tensor var_2835_cast_fp16 = concat(axis = var_2655, interleave = var_2835_interleave_0, values = (var_2811_cast_fp16, var_2813_cast_fp16, var_2815_cast_fp16, var_2817_cast_fp16, var_2819_cast_fp16, var_2821_cast_fp16, var_2823_cast_fp16, var_2825_cast_fp16, var_2827_cast_fp16, var_2829_cast_fp16, var_2831_cast_fp16, var_2833_cast_fp16))[name = tensor("op_2835_cast_fp16")]; + tensor var_2839 = const()[name = tensor("op_2839"), val = tensor([1, 1])]; + tensor var_2841 = const()[name = tensor("op_2841"), val = tensor([1, 1])]; + tensor var_2843_pad_type_0 = const()[name = tensor("op_2843_pad_type_0"), val = tensor("custom")]; + tensor var_2843_pad_0 = const()[name = tensor("op_2843_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2843 = conv(bias = layers_11_attention_o_proj_bias, dilations = var_2841, groups = var_2655, pad = var_2843_pad_0, pad_type = var_2843_pad_type_0, strides = var_2839, weight = layers_11_attention_o_proj_weight, x = var_2835_cast_fp16)[name = tensor("op_2843")]; + tensor var_2845_interleave_0 = const()[name = tensor("op_2845_interleave_0"), val = tensor(false)]; + tensor var_2845 = concat(axis = var_2656, interleave = var_2845_interleave_0, values = var_2843)[name = tensor("op_2845")]; + tensor x_47 = add(x = transpose_5, y = var_2845)[name = tensor("x_47")]; + tensor input_427_perm_0 = const()[name = tensor("input_427_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_189 = const()[name = tensor("weight_189"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66825344)))]; + tensor bias_187 = const()[name = tensor("bias_187"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66826176)))]; + tensor o_47_axes_0 = const()[name = tensor("o_47_axes_0"), val = tensor([-1])]; + tensor var_2654_to_fp16 = const()[name = tensor("op_2654_to_fp16"), val = tensor(0x1.5p-17)]; + tensor transpose_3 = transpose(perm = input_427_perm_0, x = x_47)[name = tensor("transpose_3")]; + tensor o_47_cast_fp16 = layer_norm(axes = o_47_axes_0, beta = bias_187, epsilon = var_2654_to_fp16, gamma = weight_189, x = transpose_3)[name = tensor("o_47_cast_fp16")]; + tensor input_429_perm_0 = const()[name = tensor("input_429_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2861 = const()[name = tensor("op_2861"), val = tensor([1, 1])]; + tensor var_2863 = const()[name = tensor("op_2863"), val = tensor([1, 1])]; + tensor var_2865_pad_type_0 = const()[name = tensor("op_2865_pad_type_0"), val = tensor("custom")]; + tensor var_2865_pad_0 = const()[name = tensor("op_2865_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor transpose_2 = transpose(perm = input_429_perm_0, x = o_47_cast_fp16)[name = tensor("transpose_2")]; + tensor var_2865 = conv(bias = layers_11_mlp_fc1_bias, dilations = var_2863, groups = var_2655, pad = var_2865_pad_0, pad_type = var_2865_pad_type_0, strides = var_2861, weight = layers_11_mlp_fc1_weight, x = transpose_2)[name = tensor("op_2865")]; + tensor input_431_mode_0 = const()[name = tensor("input_431_mode_0"), val = tensor("EXACT")]; + tensor input_431 = gelu(mode = input_431_mode_0, x = var_2865)[name = tensor("input_431")]; + tensor var_2869 = const()[name = tensor("op_2869"), val = tensor([1, 1])]; + tensor var_2871 = const()[name = tensor("op_2871"), val = tensor([1, 1])]; + tensor var_2873_pad_type_0 = const()[name = tensor("op_2873_pad_type_0"), val = tensor("custom")]; + tensor var_2873_pad_0 = const()[name = tensor("op_2873_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2873 = conv(bias = layers_11_mlp_fc2_bias, dilations = var_2871, groups = var_2655, pad = var_2873_pad_0, pad_type = var_2873_pad_type_0, strides = var_2869, weight = layers_11_mlp_fc2_weight, x = input_431)[name = tensor("op_2873")]; + tensor x = add(x = transpose_2, y = var_2873)[name = tensor("x")]; + tensor input_433_perm_0 = const()[name = tensor("input_433_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor weight_195 = const()[name = tensor("weight_195"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66827008)))]; + tensor bias_193 = const()[name = tensor("bias_193"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66827840)))]; + tensor o_axes_0 = const()[name = tensor("o_axes_0"), val = tensor([-1])]; + tensor transpose_1 = transpose(perm = input_433_perm_0, x = x)[name = tensor("transpose_1")]; + tensor o_cast_fp16 = layer_norm(axes = o_axes_0, beta = bias_193, epsilon = var_2654_to_fp16, gamma = weight_195, x = transpose_1)[name = tensor("o_cast_fp16")]; + tensor hidden_states_perm_0 = const()[name = tensor("hidden_states_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2897_begin_0 = const()[name = tensor("op_2897_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2897_end_0 = const()[name = tensor("op_2897_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_2897_end_mask_0 = const()[name = tensor("op_2897_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_2897_squeeze_mask_0 = const()[name = tensor("op_2897_squeeze_mask_0"), val = tensor([false, false, true, false])]; + tensor transpose_0 = transpose(perm = hidden_states_perm_0, x = o_cast_fp16)[name = tensor("transpose_0")]; + tensor var_2897 = slice_by_index(begin = var_2897_begin_0, end = var_2897_end_0, end_mask = var_2897_end_mask_0, squeeze_mask = var_2897_squeeze_mask_0, x = transpose_0)[name = tensor("op_2897")]; + tensor var_2900_begin_0 = const()[name = tensor("op_2900_begin_0"), val = tensor([0, 0, 0])]; + tensor var_2900_end_0 = const()[name = tensor("op_2900_end_0"), val = tensor([1, 384, 1])]; + tensor var_2900_end_mask_0 = const()[name = tensor("op_2900_end_mask_0"), val = tensor([true, true, false])]; + tensor var_2900_squeeze_mask_0 = const()[name = tensor("op_2900_squeeze_mask_0"), val = tensor([false, false, true])]; + tensor var_2900 = slice_by_index(begin = var_2900_begin_0, end = var_2900_end_0, end_mask = var_2900_end_mask_0, squeeze_mask = var_2900_squeeze_mask_0, x = var_2897)[name = tensor("op_2900")]; + tensor var_2908 = const()[name = tensor("op_2908"), val = tensor([1])]; + tensor var_2909 = const()[name = tensor("op_2909"), val = tensor(true)]; + tensor abs_0_cast_fp16 = abs(x = var_2900)[name = tensor("abs_0_cast_fp16")]; tensor const_120_promoted_to_fp16 = const()[name = tensor("const_120_promoted_to_fp16"), val = tensor(0x1p+1)]; tensor pow_0_cast_fp16 = pow(x = abs_0_cast_fp16, y = const_120_promoted_to_fp16)[name = tensor("pow_0_cast_fp16")]; - tensor reduce_sum_0_cast_fp16 = reduce_sum(axes = var_3059, keep_dims = var_3060, x = pow_0_cast_fp16)[name = tensor("reduce_sum_0_cast_fp16")]; - tensor var_3061_y_0_to_fp16 = const()[name = tensor("op_3061_y_0_to_fp16"), val = tensor(0x1p-1)]; - tensor var_3061_cast_fp16 = pow(x = reduce_sum_0_cast_fp16, y = var_3061_y_0_to_fp16)[name = tensor("op_3061_cast_fp16")]; - tensor var_3062_to_fp16 = const()[name = tensor("op_3062_to_fp16"), val = tensor(0x1p-24)]; - tensor var_3063_cast_fp16 = maximum(x = var_3061_cast_fp16, y = var_3062_to_fp16)[name = tensor("op_3063_cast_fp16")]; + tensor reduce_sum_0_cast_fp16 = reduce_sum(axes = var_2908, keep_dims = var_2909, x = pow_0_cast_fp16)[name = tensor("reduce_sum_0_cast_fp16")]; + tensor var_2910_y_0_to_fp16 = const()[name = tensor("op_2910_y_0_to_fp16"), val = tensor(0x1p-1)]; + tensor var_2910_cast_fp16 = pow(x = reduce_sum_0_cast_fp16, y = var_2910_y_0_to_fp16)[name = tensor("op_2910_cast_fp16")]; + tensor var_2911_to_fp16 = const()[name = tensor("op_2911_to_fp16"), val = tensor(0x1p-24)]; + tensor var_2912_cast_fp16 = maximum(x = var_2910_cast_fp16, y = var_2911_to_fp16)[name = tensor("op_2912_cast_fp16")]; tensor denom_reps_0 = const()[name = tensor("denom_reps_0"), val = tensor([1, 384])]; - tensor denom_cast_fp16 = tile(reps = denom_reps_0, x = var_3063_cast_fp16)[name = tensor("denom_cast_fp16")]; - tensor outputs = real_div(x = var_3051, y = denom_cast_fp16)[name = tensor("op_3065_cast_fp16")]; + tensor denom_cast_fp16 = tile(reps = denom_reps_0, x = var_2912_cast_fp16)[name = tensor("denom_cast_fp16")]; + tensor outputs = real_div(x = var_2900, y = denom_cast_fp16)[name = tensor("op_2914_cast_fp16")]; } -> (outputs); } \ No newline at end of file