Rotary Scaling Factor of 4 for 8k context (Do not merge)
Browse filesThis is a revision that updates the "rotary_scaling_factor" to 4.0 which corresponds with a sequence length of 8192 tokens.
This PR should not be merged, as it is intended only for usage in TEI by specifying the revision argument.
- config.json +1 -1
config.json
CHANGED
@@ -38,7 +38,7 @@
|
|
38 |
"rotary_emb_fraction": 1.0,
|
39 |
"rotary_emb_interleaved": false,
|
40 |
"rotary_emb_scale_base": null,
|
41 |
-
"rotary_scaling_factor":
|
42 |
"scale_attn_by_inverse_layer_idx": false,
|
43 |
"scale_attn_weights": true,
|
44 |
"summary_activation": null,
|
|
|
38 |
"rotary_emb_fraction": 1.0,
|
39 |
"rotary_emb_interleaved": false,
|
40 |
"rotary_emb_scale_base": null,
|
41 |
+
"rotary_scaling_factor": 4.0,
|
42 |
"scale_attn_by_inverse_layer_idx": false,
|
43 |
"scale_attn_weights": true,
|
44 |
"summary_activation": null,
|