gstaff commited on
Commit
b68774a
1 Parent(s): 28ea93b

Upload improved model weights.

Browse files
onnx/decoder_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:203088c58b43ed3d529666bd1e8b7abf84c8c4bc33a929b181cce280776b5bd2
3
  size 499275976
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04797f6f61f2ac95d7e53467d7b880005af0e7d6e23ac7af61c8966d387479a8
3
  size 499275976
onnx/decoder_model_merged.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33238cfce1785c347e0ac7134f2500fadfa7f5af6e537a7bb2a769d9e4217260
3
  size 500799139
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b064a9a5e6c51608025266be8ba9d6ccf0ba588b6476a2498f29164ddadc41b1
3
  size 500799139
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da6257be48d422fe7b0a71e2be74b1cec2f1d7637d9ec552c8f9e5ea4102de32
3
  size 128730727
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28ec91a265697bed4cdd4d560c83d6eafb7911ed51e7c3e3f6dc77395309884e
3
  size 128730727
onnx/decoder_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fc84ac6c0b3de482208b865e23c05f718d116587abeca70cee52bc607c78db8
3
  size 126971841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00d7b62491d00c106617e73bdb3568d8e864700a1b8d6668d6dbdf2b70ab5016
3
  size 126971841
onnx/decoder_with_past_model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bccf364af191d522910eac476249a8f386489fe79287e9b30de307420129275b
3
  size 499282782
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c44ea42f606fdcb678b4a8637c6c23e9e826c692f54f8b17d60d53b25459b2bc
3
  size 499282782
onnx/decoder_with_past_model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4a108612c823b5ad4c00257dc6937fe3beca7ef4494944761360e080f563ef9
3
  size 126980889
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7cffc5fce24929f862e04df1188478fa565dc03049ec61a2350b9d04f080398
3
  size 126980889
quantize_config.json CHANGED
@@ -4,92 +4,92 @@
4
  "per_model_config": {
5
  "decoder_model": {
6
  "op_types": [
7
- "Shape",
 
 
8
  "Pow",
9
  "Concat",
 
 
 
 
 
10
  "Div",
11
- "Add",
12
- "Cast",
13
- "Slice",
14
  "Sub",
15
- "Gather",
16
  "ConstantOfShape",
17
- "MatMul",
18
  "Reshape",
 
19
  "Where",
20
- "Unsqueeze",
21
- "Tanh",
22
- "Transpose",
23
- "Squeeze",
24
- "Softmax",
25
- "ReduceMean",
26
- "Constant",
27
- "Mul",
28
- "Split",
29
  "Sqrt",
30
- "Range",
31
- "Gemm"
 
 
 
32
  ],
33
  "weight_type": "QInt8"
34
  },
35
  "decoder_model_merged": {
36
  "op_types": [
37
- "Shape",
 
 
38
  "Pow",
39
- "If",
40
  "Concat",
 
 
 
 
 
 
41
  "Div",
42
- "Add",
43
- "Cast",
44
- "Slice",
45
  "Sub",
46
- "Gather",
47
  "ConstantOfShape",
48
- "MatMul",
49
  "Reshape",
 
50
  "Where",
51
- "Unsqueeze",
52
- "Tanh",
53
- "Transpose",
54
- "Squeeze",
55
- "Softmax",
56
- "ReduceMean",
57
- "Constant",
58
- "Mul",
59
- "Split",
60
  "Sqrt",
61
- "Range",
62
- "Gemm"
 
 
 
63
  ],
64
  "weight_type": "QInt8"
65
  },
66
  "decoder_with_past_model": {
67
  "op_types": [
68
- "Shape",
 
 
69
  "Pow",
70
  "Concat",
 
 
 
 
 
71
  "Div",
72
- "Add",
73
- "Cast",
74
- "Slice",
75
  "Sub",
76
- "Gather",
77
  "ConstantOfShape",
78
- "MatMul",
79
  "Reshape",
 
80
  "Where",
81
- "Unsqueeze",
82
- "Tanh",
83
- "Transpose",
84
- "Squeeze",
85
- "Softmax",
86
- "ReduceMean",
87
- "Constant",
88
- "Mul",
89
- "Split",
90
  "Sqrt",
91
- "Range",
92
- "Gemm"
 
 
 
93
  ],
94
  "weight_type": "QInt8"
95
  }
 
4
  "per_model_config": {
5
  "decoder_model": {
6
  "op_types": [
7
+ "Slice",
8
+ "Gemm",
9
+ "Split",
10
  "Pow",
11
  "Concat",
12
+ "Transpose",
13
+ "Gather",
14
+ "Tanh",
15
+ "Softmax",
16
+ "Mul",
17
  "Div",
18
+ "ReduceMean",
19
+ "Range",
 
20
  "Sub",
 
21
  "ConstantOfShape",
 
22
  "Reshape",
23
+ "MatMul",
24
  "Where",
25
+ "Cast",
 
 
 
 
 
 
 
 
26
  "Sqrt",
27
+ "Shape",
28
+ "Squeeze",
29
+ "Add",
30
+ "Unsqueeze",
31
+ "Constant"
32
  ],
33
  "weight_type": "QInt8"
34
  },
35
  "decoder_model_merged": {
36
  "op_types": [
37
+ "Slice",
38
+ "Gemm",
39
+ "Split",
40
  "Pow",
 
41
  "Concat",
42
+ "Transpose",
43
+ "Gather",
44
+ "Tanh",
45
+ "Softmax",
46
+ "If",
47
+ "Mul",
48
  "Div",
49
+ "ReduceMean",
50
+ "Range",
 
51
  "Sub",
 
52
  "ConstantOfShape",
 
53
  "Reshape",
54
+ "MatMul",
55
  "Where",
56
+ "Cast",
 
 
 
 
 
 
 
 
57
  "Sqrt",
58
+ "Shape",
59
+ "Squeeze",
60
+ "Add",
61
+ "Unsqueeze",
62
+ "Constant"
63
  ],
64
  "weight_type": "QInt8"
65
  },
66
  "decoder_with_past_model": {
67
  "op_types": [
68
+ "Slice",
69
+ "Gemm",
70
+ "Split",
71
  "Pow",
72
  "Concat",
73
+ "Transpose",
74
+ "Gather",
75
+ "Tanh",
76
+ "Softmax",
77
+ "Mul",
78
  "Div",
79
+ "ReduceMean",
80
+ "Range",
 
81
  "Sub",
 
82
  "ConstantOfShape",
 
83
  "Reshape",
84
+ "MatMul",
85
  "Where",
86
+ "Cast",
 
 
 
 
 
 
 
 
87
  "Sqrt",
88
+ "Shape",
89
+ "Squeeze",
90
+ "Add",
91
+ "Unsqueeze",
92
+ "Constant"
93
  ],
94
  "weight_type": "QInt8"
95
  }