olivernormand
commited on
Commit
•
fc42b35
1
Parent(s):
33ba877
Upload folder using huggingface_hub
Browse files- 1_Pooling/config.json +10 -0
- README.md +62 -0
- checkpoint-10119/1_Pooling/config.json +10 -0
- checkpoint-10119/README.md +1076 -0
- checkpoint-10119/config.json +58 -0
- checkpoint-10119/config_sentence_transformers.json +10 -0
- checkpoint-10119/model.safetensors +3 -0
- checkpoint-10119/modules.json +14 -0
- checkpoint-10119/optimizer.pt +3 -0
- checkpoint-10119/rng_state.pth +3 -0
- checkpoint-10119/scheduler.pt +3 -0
- checkpoint-10119/sentence_bert_config.json +4 -0
- checkpoint-10119/special_tokens_map.json +37 -0
- checkpoint-10119/tokenizer.json +0 -0
- checkpoint-10119/tokenizer_config.json +55 -0
- checkpoint-10119/trainer_state.json +2894 -0
- checkpoint-10119/training_args.bin +3 -0
- checkpoint-10119/vocab.txt +0 -0
- config.json +58 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- runs/Sep03_17-43-39_r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc/events.out.tfevents.1725385422.r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc.91.0 +2 -2
- runs/Sep03_17-43-39_r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc/events.out.tfevents.1725388175.r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc.91.1 +3 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +55 -0
- training_args.bin +3 -0
- training_params.json +33 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
---
|
3 |
+
library_name: sentence-transformers
|
4 |
+
tags:
|
5 |
+
- sentence-transformers
|
6 |
+
- sentence-similarity
|
7 |
+
- feature-extraction
|
8 |
+
- autotrain
|
9 |
+
base_model: nomic-ai/nomic-embed-text-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: 'search_query: i love autotrain'
|
12 |
+
sentences:
|
13 |
+
- 'search_query: huggingface auto train'
|
14 |
+
- 'search_query: hugging face auto train'
|
15 |
+
- 'search_query: i love autotrain'
|
16 |
+
pipeline_tag: sentence-similarity
|
17 |
+
---
|
18 |
+
|
19 |
+
# Model Trained Using AutoTrain
|
20 |
+
|
21 |
+
- Problem type: Sentence Transformers
|
22 |
+
|
23 |
+
## Validation Metrics
|
24 |
+
loss: 0.0011504614958539605
|
25 |
+
|
26 |
+
runtime: 12.6072
|
27 |
+
|
28 |
+
samples_per_second: 112.396
|
29 |
+
|
30 |
+
steps_per_second: 7.059
|
31 |
+
|
32 |
+
: 3.0
|
33 |
+
|
34 |
+
## Usage
|
35 |
+
|
36 |
+
### Direct Usage (Sentence Transformers)
|
37 |
+
|
38 |
+
First install the Sentence Transformers library:
|
39 |
+
|
40 |
+
```bash
|
41 |
+
pip install -U sentence-transformers
|
42 |
+
```
|
43 |
+
|
44 |
+
Then you can load this model and run inference.
|
45 |
+
```python
|
46 |
+
from sentence_transformers import SentenceTransformer
|
47 |
+
|
48 |
+
# Download from the Hugging Face Hub
|
49 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
50 |
+
# Run inference
|
51 |
+
sentences = [
|
52 |
+
'search_query: autotrain',
|
53 |
+
'search_query: auto train',
|
54 |
+
'search_query: i love autotrain',
|
55 |
+
]
|
56 |
+
embeddings = model.encode(sentences)
|
57 |
+
print(embeddings.shape)
|
58 |
+
|
59 |
+
# Get the similarity scores for the embeddings
|
60 |
+
similarities = model.similarity(embeddings, embeddings)
|
61 |
+
print(similarities.shape)
|
62 |
+
```
|
checkpoint-10119/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
checkpoint-10119/README.md
ADDED
@@ -0,0 +1,1076 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
base_model: nomic-ai/nomic-embed-text-v1.5
|
3 |
+
datasets: []
|
4 |
+
language: []
|
5 |
+
library_name: sentence-transformers
|
6 |
+
pipeline_tag: sentence-similarity
|
7 |
+
tags:
|
8 |
+
- sentence-transformers
|
9 |
+
- sentence-similarity
|
10 |
+
- feature-extraction
|
11 |
+
- generated_from_trainer
|
12 |
+
- dataset_size:26984
|
13 |
+
- loss:MultipleNegativesRankingLoss
|
14 |
+
widget:
|
15 |
+
- source_sentence: 'search_query: Regulations for taking hair samples in Northern
|
16 |
+
Ireland'
|
17 |
+
sentences:
|
18 |
+
- "search_document: 39) In Article 63 of the Police and Criminal Evidence (Northern\
|
19 |
+
\ Ireland) Order 1989 (regulation of taking of non-intimate samples), at the end,\
|
20 |
+
\ there shall be inserted the following paragraph—\n\tWhere a sample of hair other\
|
21 |
+
\ than pubic hair is to be taken the sample may be taken either by cutting hairs\
|
22 |
+
\ or by plucking hairs with their roots so long as no more are plucked than the\
|
23 |
+
\ person taking the sample reasonably considers to be necessary (in point of quantity\
|
24 |
+
\ or quality) for the purpose of enabling information to be produced by means\
|
25 |
+
\ of analysis used or to be used in relation to the sample."
|
26 |
+
- "search_document: ### Section 253\n### Declaration of general improvement area.\n\
|
27 |
+
\t1) \n\t\tWhere a report with respect to a predominantly residential area within\
|
28 |
+
\ their district is submitted to the local housing authority by a person appearing\
|
29 |
+
\ to the authority to be suitably qualified (who may be an officer of the authority),\
|
30 |
+
\ and it appears to the authority, upon consideration of the report and of any\
|
31 |
+
\ other information in their possession—\n\t\tthe authority may cause the area\
|
32 |
+
\ to be defined on a map and by resolution declare it to be a general improvement\
|
33 |
+
\ area.\n\t\t\ta) that living conditions in the area can most appropriately be\
|
34 |
+
\ improved by the improvement of the amenities of the area or of dwellings in\
|
35 |
+
\ the area, or both, and\n\t\t\tb) that such an improvement may be effected or\
|
36 |
+
\ assisted by the exercise of their powers under the provisions of this Part relating\
|
37 |
+
\ to general improvement areas,\n\t2) A general improvement area may not be defined\
|
38 |
+
\ so as to include, but may be defined so as to surround, land which is comprised\
|
39 |
+
\ in a housing action area.\n\t3) \n\t\tA general improvement area may not (unless\
|
40 |
+
\ the land has been cleared of buildings) be so defined as to include, but may\
|
41 |
+
\ be so defined as to surround—\n\t\tand where the Secretary of State on confirming\
|
42 |
+
\ a compulsory purchase order under Schedule 22 (acquisition of land for clearance)\
|
43 |
+
\ modifies the order by excluding from a clearance area land adjoining a general\
|
44 |
+
\ improvement area, the land shall, unless the Secretary of State otherwise directs,\
|
45 |
+
\ be taken to be included in the general improvement area.\n\t\t\ta) land comprised\
|
46 |
+
\ in a clearance area,\n\t\t\tb) land purchased by the local housing authority\
|
47 |
+
\ under section 290(2) (land surrounded by or adjoining clearance area), or\n\t\
|
48 |
+
\t\tc) land included in a clearance area under section 293(1) (local housing authority’s\
|
49 |
+
\ own property);"
|
50 |
+
- "search_document: 2) Regulations made by virtue of section 88(2)(e) of this Act\
|
51 |
+
\ may make provision—\n\ta) for all matters relevant to the maintenance of a register\
|
52 |
+
\ of submersible apparatus,\n\tb) without prejudice to sub-paragraph (a) above,\
|
53 |
+
\ for the period for which any registration or exemption is to remain effective\
|
54 |
+
\ without renewal, the alteration or cancellation in any prescribed circumstances\
|
55 |
+
\ of registration or exemption or of any conditions attached thereto, the person\
|
56 |
+
\ by whom and manner in which applications in connection with any registration\
|
57 |
+
\ or exemption are to be made, and information and evidence to be furnished in\
|
58 |
+
\ connection with any such application,\n\tc) for the marking or other means of\
|
59 |
+
\ identification of any submersible apparatus,\n\td) for the issue of certificates\
|
60 |
+
\ of registration or exemption, and the custody, surrender, production or display\
|
61 |
+
\ of the certificates or copies of them,\n\te) for matters arising out of the\
|
62 |
+
\ termination of any registration or exemption, or any conditions attached thereto."
|
63 |
+
- source_sentence: 'search_query: Regulations regarding the designation of special
|
64 |
+
parking areas in London.'
|
65 |
+
sentences:
|
66 |
+
- 'search_document: 4) The enforcement authority may, if they consider that the
|
67 |
+
penalty charge notice ought not to have been given, give the recipient a notice
|
68 |
+
withdrawing the penalty charge notice.'
|
69 |
+
- "search_document: ### Section 268\n### Reckoning of time spent pending appeal.\n\
|
70 |
+
\t1) Subject to subsection (2) below, where None is admitted to bail under section\
|
71 |
+
\ 238 of this Act the period beginning with the date of his admission to bail\
|
72 |
+
\ and ending on the date of his readmission to prison in consequence of the determination\
|
73 |
+
\ or abandonment of his appeal None None None shall not be reckoned as part of\
|
74 |
+
\ any term of imprisonment under None sentence.\n\t2) The time (including any\
|
75 |
+
\ period consequent on the recall of bail) during which a convicted person is\
|
76 |
+
\ in custody pending the determination of his appeal, or as the case may be of\
|
77 |
+
\ any None\n\t3) Subject to any direction which the High Court may give to the\
|
78 |
+
\ contrary, imprisonment of an appellant None —\n\t\ta) who is in custody in consequence\
|
79 |
+
\ of the conviction or sentence appealed against shall be deemed to run as from\
|
80 |
+
\ the date on which the sentence was passed;\n\t\tb) who is in custody other than\
|
81 |
+
\ in consequence of such conviction or sentence shall be deemed to run or to be\
|
82 |
+
\ resumed as from the date on which his appeal was determined or abandoned;\n\t\
|
83 |
+
\tc) who is not in custody shall be deemed to run or to be resumed as from the\
|
84 |
+
\ date on which he is received into prison under the sentence.\n\t4) young offenders\
|
85 |
+
\ institution or, as respects a child sentenced to be detained under section 206\
|
86 |
+
\ of this Act, the place directed by the Secretary of State or, as respects such\
|
87 |
+
\ a child, place directed by the Secretary of State"
|
88 |
+
- "search_document: ### Section 76\n### Special parking areas.\n\t1) Where a London\
|
89 |
+
\ authority apply to the Secretary of State for an order to be made under this\
|
90 |
+
\ section, the Secretary of State may make an order designating the whole, or\
|
91 |
+
\ any part, of that authority’s area as a special parking area.\n\tAn application\
|
92 |
+
\ for an order under subsection (1) above may only be made—\n\t\tby Transport\
|
93 |
+
\ for London, to the extent that the special parking area is to consist of GLA\
|
94 |
+
\ roads or trunk roads; or\n\t\tby a London local authority, to the extent that\
|
95 |
+
\ the special parking area is to consist of roads other than GLA roads and trunk\
|
96 |
+
\ roads.\n\t2) Before making an order under this section, the Secretary of State\
|
97 |
+
\ shall consult the relevant Commissioner or, if appropriate, both Commissioners.\n\
|
98 |
+
\t3) While an order under this section is in force, the following provisions shall\
|
99 |
+
\ cease to apply in relation to the special parking area designated by the order—\n\
|
100 |
+
\t\ta) section 8 of the Road Traffic Regulation Act 1984 (contravention of, or\
|
101 |
+
\ failure to comply with, an order under section 6 of that Act to be an offence),\
|
102 |
+
\ so far as it relates to the contravention of, or failure to comply with, any\
|
103 |
+
\ provision of such an order—\n\t\t\ti) prohibiting or restricting the waiting\
|
104 |
+
\ of vehicles on any road; or\n\t\t\tii) relating to any of the matters mentioned\
|
105 |
+
\ in paragraph 7 or 8 of Schedule 1 to that Act (conditions for loading or unloading,\
|
106 |
+
\ or delivery or collecting);\n\t\tb) section 11 of the Act of 1984 (contravention\
|
107 |
+
\ of, or failure to comply with, an experimental traffic order under section 9\
|
108 |
+
\ of that Act to be an offence), so far as it relates to any contravention of,\
|
109 |
+
\ or failure to comply with, any provision of such an experimental traffic order—\n\
|
110 |
+
\t\t\ti) prohibiting or restricting the waiting of vehicles on any road; or\n\t\
|
111 |
+
\t\tii) relating to any of the matters mentioned in paragraph 7 or 8 of Schedule\
|
112 |
+
\ 1 to that Act (conditions for loading or unloading, or delivery or collecting);\n\
|
113 |
+
\t\tc) section 16(1) of the Act of 1984 so far as it relates to the contravention\
|
114 |
+
\ of any provision of an order or notice under section 14 of that Act—\n\t\t\t\
|
115 |
+
i) prohibiting or restricting the waiting of vehicles on any road; or\n\t\t\t\
|
116 |
+
ii) relating to any of the matters mentioned in paragraph 7 or 8 of Schedule 1\
|
117 |
+
\ to that Act;\n\t\tsection 35A(1) (contravention of parking place orders) of\
|
118 |
+
\ the Act of 1984 so far as it applies in relation to stationary vehicles;\n\t\
|
119 |
+
\tsection 61(5) (prohibition of vehicles in loading areas) of the Act of 1984\
|
120 |
+
\ so far as it applies in relation to stationary vehicles;.\n\t\td) section 15\
|
121 |
+
\ of the Greater London Council (General Powers) Act 1974 (parking of vehicles\
|
122 |
+
\ on verges, central reservations and footpaths etc. to be an offence);\n\t\t\
|
123 |
+
e) section 19 of the Road Traffic Act 1988 (parking of heavy vehicles on verges,\
|
124 |
+
\ central reservations and footpaths etc. to be an offence);\n\t\tf) section 21\
|
125 |
+
\ of the Act of 1988 (prohibition of driving or parking on cycle tracks), so far\
|
126 |
+
\ as it makes it an offence to park a motor vehicle wholly or partly on a cycle\
|
127 |
+
\ track;\n\t\tsection 36(1) of the Act of 1988 (failure to comply with traffic\
|
128 |
+
\ signs), so far as it makes it an offence to fail to comply with an indication\
|
129 |
+
\ given by a traffic sign of a prohibition on causing a vehicle to stop on part\
|
130 |
+
\ of a road in London demarcated by that sign as a stopping area for a bus.\n\t\
|
131 |
+
4) The Secretary of State may by order amend subsection (3) above by adding further\
|
132 |
+
\ provisions (but only in so far as they apply in relation to stationary vehicles).\n\
|
133 |
+
\t5) Before making an order under subsection (4) above, the Secretary of State\
|
134 |
+
\ shall consult—\n\t\ta) the two Commissioners; and\n\t\tb) such associations\
|
135 |
+
\ of London authorities (if any) as he thinks appropriate."
|
136 |
+
- source_sentence: 'search_query: Financial adjustments between local authorities
|
137 |
+
for adult care and support needs'
|
138 |
+
sentences:
|
139 |
+
- "search_document: ### Section 84\n### Determination of questions arising out of\
|
140 |
+
\ section 83.\n\t1) \n\t\tA question as to whether—\n\t\tshall be decided—\n\t\
|
141 |
+
\t\ti) if the parties so agree, by a single arbiter appointed by them; or\n\t\t\
|
142 |
+
\tii) in default of such agreement by the sheriff.\n\t2) In determining any such\
|
143 |
+
\ question as is mentioned in paragraph (a) of subsection (1) above, the arbiter\
|
144 |
+
\ or sheriff shall have power to order that the requirement or restriction shall\
|
145 |
+
\ have effect subject to such modifications, if any, as he may direct."
|
146 |
+
- "search_document: ### Section 41\n### Financial adjustments between local authorities\n\
|
147 |
+
\t1) This section applies where—\n\t\ta) a local authority has been meeting an\
|
148 |
+
\ adult's needs for care and support, but\n\t\tb) it transpires (whether following\
|
149 |
+
\ the determination of a dispute under section 40 or otherwise) that the adult\
|
150 |
+
\ was, for some or all of the time that the authority has been meeting the adult's\
|
151 |
+
\ needs, ordinarily resident in the area of another local authority.\n\t2) This\
|
152 |
+
\ section also applies where—\n\t\ta) a local authority has been meeting a carer's\
|
153 |
+
\ needs for support, but\n\t\tb) it transpires (whether following the determination\
|
154 |
+
\ of a dispute under section 40 or otherwise) that the adult needing care was,\
|
155 |
+
\ for some or all of the time that the authority has been meeting the carer's\
|
156 |
+
\ needs, ordinarily resident in the area of another local authority.\n\t3) The\
|
157 |
+
\ local authority concerned may recover from the other local authority the amount\
|
158 |
+
\ of any payments it made towards meeting the needs in question at a time when\
|
159 |
+
\ the other local authority was instead liable to meet them under section 18 or\
|
160 |
+
\ 20(1) (as the case may be).\n\t4) Subsection (3) does not apply to payments\
|
161 |
+
\ which are the subject of a deferred payment agreement entered into by the local\
|
162 |
+
\ authority in question, unless it agrees with the other local authority to assign\
|
163 |
+
\ its rights and obligations under the deferred payment agreement to that other\
|
164 |
+
\ authority.\n\t5) Any period during which a local authority was meeting the needs\
|
165 |
+
\ in question under section 19 or 20(6) is to be disregarded for the purposes\
|
166 |
+
\ of this section."
|
167 |
+
- "search_document: ### Section 64\n### Decision on applications for operators’\
|
168 |
+
\ licences.\n\t1) Subject to section 69E of this ActOn an application for an operator’s\
|
169 |
+
\ licence, the licensing authority shall in every case consider whether the requirements\
|
170 |
+
\ mentioned in paragraphs (a) to (d) of subsection (2) of this section, and, if\
|
171 |
+
\ the licensing authority in any case thinks fit, paragraph (e) of that subsection,\
|
172 |
+
\ are satisfied, and in doing so shall have regard to any objection duly made\
|
173 |
+
\ under section 63 of this Act.\n\t2) The said requirements are as follows—\n\t\
|
174 |
+
\ta) that the applicant is a fit person to hold an operator’s licence, having\
|
175 |
+
\ regard to the matters of which particulars may be required to be given under\
|
176 |
+
\ section 62(4)(d) and (e) of this Act and to any conviction required to be notified\
|
177 |
+
\ in accordance with section 62(4A) thereof;\n\t\tb) . . . . . . . . . . . . .\
|
178 |
+
\ . . . . . . . . . . . . . . . . . . . \n\t\tc) that there will be satisfactory\
|
179 |
+
\ arrangements for securing that Part VI of this Act (or, so long as those sections\
|
180 |
+
\ remain in force, sections 73 and 186 of the Act of 1960) will be complied with\
|
181 |
+
\ in the case of the authorised vehicles, and for securing that those vehicles\
|
182 |
+
\ are not overloaded;\n\t\td) that there will be satisfactory facilities and arrangements\
|
183 |
+
\ for maintaining the authorised vehicles in a fit and serviceable condition and\
|
184 |
+
\ that the place which is to be the operating centre for those vehicles is suitable\
|
185 |
+
\ for that purpose;\n\t\te) that the provision of such facilities and arrangements\
|
186 |
+
\ as are mentioned in paragraph (d) of this subsection and of a suitable operating\
|
187 |
+
\ centre will not be prejudiced by reason of the applicant’s having insufficient\
|
188 |
+
\ financial resources for that purpose.\n\t3) If the licensing authority determines\
|
189 |
+
\ that any requirement which he has taken into consideration in accordance with\
|
190 |
+
\ subsection (1) of this section is not satisfied, he shall refuse the application\
|
191 |
+
\ but, in any other case, he shall, subject to subsection (4) of this section\
|
192 |
+
\ and section 69B of this Act, grant the application.\n\t4) In any case in which\
|
193 |
+
\ the licensing authority grants an application for an operator’s licence, the\
|
194 |
+
\ licensing authority may issue that licence in the terms applied for or, if the\
|
195 |
+
\ authority thinks fit, subject to either or both of the following modifications\
|
196 |
+
\ or limitations, that is to say—\n\t\ta) so that the licence is in respect of\
|
197 |
+
\ motor vehicles other than those of which particulars were contained in the application,\
|
198 |
+
\ or in respect of motor vehicles or trailers greater or less in number than,\
|
199 |
+
\ or differing in type from, those for the use of which authorisation was applied\
|
200 |
+
\ for;\n\t\tb) so that the licence does not permit the addition of authorised\
|
201 |
+
\ vehicles under section 61(1)(c) of this Act.\n\t5) In exercising his functions\
|
202 |
+
\ under this section in relation to the requirement mentioned in subsection (2)(e)\
|
203 |
+
\ thereof, a licensing authority may be assisted by an assessor drawn from a panel\
|
204 |
+
\ of persons appointed by the Minister for that purpose; and there shall be paid\
|
205 |
+
\ by the licensing authority to any such assessor in respect of his services remuneration\
|
206 |
+
\ on a scale prescribed by the Minister with the approval of the Treasury."
|
207 |
+
- source_sentence: 'search_query: Legal powers to establish a constitution for Zimbabwe'
|
208 |
+
sentences:
|
209 |
+
- "search_document: ### Section 72\n### Section 71: supplementary provisions.\n\t\
|
210 |
+
1) \n\t\tSubject to subsection (2) below, where the members of the existing governing\
|
211 |
+
\ body of a school to which section 71 of this Act applies include a person—\n\
|
212 |
+
\t\tthe governing body may by notice in writing to that person terminate his term\
|
213 |
+
\ of office on a date specified in the notice.\n\t\t\ta) who holds office as a\
|
214 |
+
\ governor of an elected category, and\n\t\t\tb) whose term of office is due to\
|
215 |
+
\ come to an end before the date of implementation of the proposals or at any\
|
216 |
+
\ time within the period of six months beginning with that date,\n\t2) The governing\
|
217 |
+
\ body may only terminate a person’s term of office under subsection (1) above\
|
218 |
+
\ if—\n\t\ta) his term of office is due to come to an end after the proposed date\
|
219 |
+
\ of publication of the proposals, or\n\t\tb) it would not in their view be reasonably\
|
220 |
+
\ practicable, in the time available between the date on which his term of office\
|
221 |
+
\ is due to come to an end and the proposed date of publication of the proposals,\
|
222 |
+
\ to fill the vacancy by the procedure applicable under the Education (No. 2)\
|
223 |
+
\ Act 1986.\n\t3) Without prejudice to section 8(2) of that Act (instrument of\
|
224 |
+
\ government for county, controlled or maintained special school to provide for\
|
225 |
+
\ four year term of office for governors other than ex officio governors), the\
|
226 |
+
\ term of office of a person elected or appointed in accordance with the requirements\
|
227 |
+
\ of that Act and any requirements of the instrument of government of the school\
|
228 |
+
\ to fill a vacancy arising by virtue of subsection (1) above shall be four years.\n\
|
229 |
+
\t4) Where any such election or appointment as is referred to in section 71(2)\
|
230 |
+
\ of this Act is held or made on or after the date of publication of the proposals,\
|
231 |
+
\ the existing governing body shall publish at such time and in such manner as\
|
232 |
+
\ may be prescribed notice of the election or appointment."
|
233 |
+
- "search_document: ### Section 90\n### Compensation for statements in listing particulars\
|
234 |
+
\ or prospectus\n\t1) Any person responsible for listing particulars is liable\
|
235 |
+
\ to pay compensation to a person who has—\n\t\ta) acquired securities to which\
|
236 |
+
\ the particulars apply; and\n\t\tb) suffered loss in respect of them as a result\
|
237 |
+
\ of—\n\t\t\ti) any untrue or misleading statement in the particulars; or\n\t\t\
|
238 |
+
\tii) the omission from the particulars of any matter required to be included\
|
239 |
+
\ by section 80 or 81.\n\t2) Subsection (1) is subject to exemptions provided\
|
240 |
+
\ by Schedule 10.\n\t3) If listing particulars are required to include information\
|
241 |
+
\ about the absence of a particular matter, the omission from the particulars\
|
242 |
+
\ of that information is to be treated as a statement in the listing particulars\
|
243 |
+
\ that there is no such matter.\n\t4) Any person who fails to comply with section\
|
244 |
+
\ 81 is liable to pay compensation to any person who has—\n\t\ta) acquired securities\
|
245 |
+
\ of the kind in question; and\n\t\tb) suffered loss in respect of them as a result\
|
246 |
+
\ of the failure.\n\t5) Subsection (4) is subject to exemptions provided by Schedule\
|
247 |
+
\ 10.\n\t6) This section does not affect any liability which may be incurred apart\
|
248 |
+
\ from this section.\n\t7) References in this section to the acquisition by a\
|
249 |
+
\ person of securities include references to his contracting to acquire them or\
|
250 |
+
\ any interest in them.\n\t8) No person shall, by reason of being a promoter of\
|
251 |
+
\ a company or otherwise, incur any liability for failing to disclose information\
|
252 |
+
\ which he would not be required to disclose in listing particulars in respect\
|
253 |
+
\ of a company’s securities—\n\t\ta) if he were responsible for those particulars;\
|
254 |
+
\ or\n\t\tb) if he is responsible for them, which he is entitled to omit by virtue\
|
255 |
+
\ of section 82.\n\t9) The reference in subsection (8) to a person incurring liability\
|
256 |
+
\ includes a reference to any other person being entitled as against that person\
|
257 |
+
\ to be granted any civil remedy or to rescind or repudiate an agreement.\n\t\
|
258 |
+
10) “Listing particulars”, in subsection (1) and Schedule 10, includes supplementary\
|
259 |
+
\ listing particulars.\n\t11) This section applies in relation to a prospectus\
|
260 |
+
\ as it applies to listing particulars, with the following modifications—\n\t\t\
|
261 |
+
a) references in this section or in Schedule 10 to listing particulars, supplementary\
|
262 |
+
\ listing particulars or sections 80, 81 or 82 are to be read, respectively, as\
|
263 |
+
\ references to a prospectus, supplementary prospectus and None ;\n\t\tb) references\
|
264 |
+
\ in Schedule 10 to admission to the official list are to be read as references\
|
265 |
+
\ to admission to trading on a regulated market;\n\t\tc) in relation to a prospectus,\
|
266 |
+
\ “ ” means “transferable securities”.\n\tNone\n\t12) \n\t\tNone\n\t\tNone\n\
|
267 |
+
\t\t\ta) None\n\t\t\tb) None None None"
|
268 |
+
- "search_document: ### Section 1\n### Power to provide constitution for Zimbabwe.\n\
|
269 |
+
\t1) Her Majesty may by Order in Council provide a constitution for Zimbabwe to\
|
270 |
+
\ come into effect on the day (in this Act referred to as “the appointed day”)\
|
271 |
+
\ on which, in accordance with such provision in that behalf as may after the\
|
272 |
+
\ passing of this Act be made by Act of Parliament, Southern Rhodesia becomes\
|
273 |
+
\ independent as a Republic under the name of Zimbabwe.\n\t2) Her Majesty may\
|
274 |
+
\ by Order in Council revoke the Constitution of Southern Rhodesia 1961, and may\
|
275 |
+
\ make such transitional provision as appears to Her Majesty to be necessary or\
|
276 |
+
\ expedient in connection with the coming into effect of the new constitution\
|
277 |
+
\ or the revocation of the said Constitution of 1961.\n\t3) Any Order in Council\
|
278 |
+
\ under this section shall be laid before Parliament after being made.\n\t4) Subsection\
|
279 |
+
\ (1) is without prejudice to any power conferred on Her Majesty by section 2."
|
280 |
+
- source_sentence: 'search_query: How is a poinding enforced on premises with existing
|
281 |
+
poinding for the same debt?'
|
282 |
+
sentences:
|
283 |
+
- "search_document: ### Section 15\n### Orders and directions.\n\t1) Any order under\
|
284 |
+
\ this Act shall be made by statutory instrument, and may be varied or revoked\
|
285 |
+
\ by a subsequent order so made.\n\t2) A statutory instrument containing an order\
|
286 |
+
\ made under section 1(1) or 6(3) above shall be subject to annulment in pursuance\
|
287 |
+
\ of a resolution of either House of Parliament.\n\t3) It is hereby declared that\
|
288 |
+
\ any direction given under this Act may be varied or revoked by a subsequent\
|
289 |
+
\ direction so given."
|
290 |
+
- "search_document: ### Section 70\n### Exemptions from section 69.\n\t1) Section\
|
291 |
+
\ 69(1) of this Act shall not apply in relation to a vehicle if—\n\t\ta) a current\
|
292 |
+
\ disabled person’s badge is displayed on the vehicle;\n\t\ta current recognised\
|
293 |
+
\ badge (within the meaning given by section 21A of the Chronically Sick and Disabled\
|
294 |
+
\ Persons Act 1970) is displayed on the vehicle;\n\t\tb) not more than 15 minutes\
|
295 |
+
\ have elapsed since the end of any period for which the appropriate charge was\
|
296 |
+
\ duly paid at the time of parking; or\n\t\tc) not more than 15 minutes have elapsed\
|
297 |
+
\ since the end of any unexpired time (in respect of another vehicle) which is\
|
298 |
+
\ available at the relevant parking meter at the time of parking.\n\t2) \n\t\t\
|
299 |
+
In any case in which section 69(1) of this Act would apply to a vehicle but for\
|
300 |
+
\ subsection (1)(a) above and the vehicle was not, at the time at which it was\
|
301 |
+
\ parked, being used—\n\t\t\ta) in accordance with regulations under section 21\
|
302 |
+
\ of the Chronically Sick and Disabled Persons Act 1970; and\n\t\t\tb) in circumstances\
|
303 |
+
\ falling within section 117(1)(b) of theRoad Traffic Regulation Act 1984 (use\
|
304 |
+
\ where a disabled person’s concession would be available),\n\t\tthe person in\
|
305 |
+
\ charge of the vehicle at that time shall be guilty of an offence and liable\
|
306 |
+
\ on summary conviction to a fine not exceeding level 3 on the standard scale.\n\
|
307 |
+
\t\n\t\tIn any case in which section 69(1) of this Act would apply to a vehicle\
|
308 |
+
\ but for subsection (1)(aa) above and the vehicle was not, at the time at which\
|
309 |
+
\ it was parked, being used—\n\t\t\tin accordance with regulations under section\
|
310 |
+
\ 21A of the Chronically Sick and Disabled Persons Act 1970, and\n\t\t\tin circumstances\
|
311 |
+
\ falling within section 117(1A)(b) of the Road Traffic Regulation Act 1984 (use\
|
312 |
+
\ where a disabled person’s concession would be available by virtue of displaying\
|
313 |
+
\ a non-GB badge),\n\t\tthe person in charge of the vehicle at that time shall\
|
314 |
+
\ be guilty of an offence and liable on summary conviction to a fine not exceeding\
|
315 |
+
\ level 3 on the standard scale.\n\t3) In this section “disabled person’s badge”\
|
316 |
+
\ has the same meaning as in section 142(1) of the Road Traffic Regulation Act\
|
317 |
+
\ 1984, and “parking meter” has the same meaning as in section 46(2)(a) of that\
|
318 |
+
\ Act."
|
319 |
+
- 'search_document: 9) Subject to paragraph 7(2) above and paragraphs 12(2) and
|
320 |
+
(6), 13(2), 21(4) and 22(5) below, where articles are poinded in any premises
|
321 |
+
(whether or not the poinding is valid), another poinding in those premises to
|
322 |
+
enforce the same debt shall not be competent except in relation to articles which
|
323 |
+
have been brought on to the premises since the execution of the first poinding.'
|
324 |
+
---
|
325 |
+
|
326 |
+
# SentenceTransformer based on nomic-ai/nomic-embed-text-v1.5
|
327 |
+
|
328 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
329 |
+
|
330 |
+
## Model Details
|
331 |
+
|
332 |
+
### Model Description
|
333 |
+
- **Model Type:** Sentence Transformer
|
334 |
+
- **Base model:** [nomic-ai/nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) <!-- at revision 679199c2575b5bfe93b06161d06cd7c16ebe4124 -->
|
335 |
+
- **Maximum Sequence Length:** 8192 tokens
|
336 |
+
- **Output Dimensionality:** 768 tokens
|
337 |
+
- **Similarity Function:** Cosine Similarity
|
338 |
+
<!-- - **Training Dataset:** Unknown -->
|
339 |
+
<!-- - **Language:** Unknown -->
|
340 |
+
<!-- - **License:** Unknown -->
|
341 |
+
|
342 |
+
### Model Sources
|
343 |
+
|
344 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
345 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
346 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
347 |
+
|
348 |
+
### Full Model Architecture
|
349 |
+
|
350 |
+
```
|
351 |
+
SentenceTransformer(
|
352 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NomicBertModel
|
353 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
354 |
+
)
|
355 |
+
```
|
356 |
+
|
357 |
+
## Usage
|
358 |
+
|
359 |
+
### Direct Usage (Sentence Transformers)
|
360 |
+
|
361 |
+
First install the Sentence Transformers library:
|
362 |
+
|
363 |
+
```bash
|
364 |
+
pip install -U sentence-transformers
|
365 |
+
```
|
366 |
+
|
367 |
+
Then you can load this model and run inference.
|
368 |
+
```python
|
369 |
+
from sentence_transformers import SentenceTransformer
|
370 |
+
|
371 |
+
# Download from the 🤗 Hub
|
372 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
373 |
+
# Run inference
|
374 |
+
sentences = [
|
375 |
+
'search_query: How is a poinding enforced on premises with existing poinding for the same debt?',
|
376 |
+
'search_document: 9) Subject to paragraph 7(2) above and paragraphs 12(2) and (6), 13(2), 21(4) and 22(5) below, where articles are poinded in any premises (whether or not the poinding is valid), another poinding in those premises to enforce the same debt shall not be competent except in relation to articles which have been brought on to the premises since the execution of the first poinding.',
|
377 |
+
'search_document: ### Section 15\n### Orders and directions.\n\t1) Any order under this Act shall be made by statutory instrument, and may be varied or revoked by a subsequent order so made.\n\t2) A statutory instrument containing an order made under section 1(1) or 6(3) above shall be subject to annulment in pursuance of a resolution of either House of Parliament.\n\t3) It is hereby declared that any direction given under this Act may be varied or revoked by a subsequent direction so given.',
|
378 |
+
]
|
379 |
+
embeddings = model.encode(sentences)
|
380 |
+
print(embeddings.shape)
|
381 |
+
# [3, 768]
|
382 |
+
|
383 |
+
# Get the similarity scores for the embeddings
|
384 |
+
similarities = model.similarity(embeddings, embeddings)
|
385 |
+
print(similarities.shape)
|
386 |
+
# [3, 3]
|
387 |
+
```
|
388 |
+
|
389 |
+
<!--
|
390 |
+
### Direct Usage (Transformers)
|
391 |
+
|
392 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
393 |
+
|
394 |
+
</details>
|
395 |
+
-->
|
396 |
+
|
397 |
+
<!--
|
398 |
+
### Downstream Usage (Sentence Transformers)
|
399 |
+
|
400 |
+
You can finetune this model on your own dataset.
|
401 |
+
|
402 |
+
<details><summary>Click to expand</summary>
|
403 |
+
|
404 |
+
</details>
|
405 |
+
-->
|
406 |
+
|
407 |
+
<!--
|
408 |
+
### Out-of-Scope Use
|
409 |
+
|
410 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
411 |
+
-->
|
412 |
+
|
413 |
+
<!--
|
414 |
+
## Bias, Risks and Limitations
|
415 |
+
|
416 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
417 |
+
-->
|
418 |
+
|
419 |
+
<!--
|
420 |
+
### Recommendations
|
421 |
+
|
422 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
423 |
+
-->
|
424 |
+
|
425 |
+
## Training Details
|
426 |
+
|
427 |
+
### Training Dataset
|
428 |
+
|
429 |
+
#### Unnamed Dataset
|
430 |
+
|
431 |
+
|
432 |
+
* Size: 26,984 training samples
|
433 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
434 |
+
* Approximate statistics based on the first 1000 samples:
|
435 |
+
| | anchor | positive |
|
436 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
437 |
+
| type | string | string |
|
438 |
+
| details | <ul><li>min: 11 tokens</li><li>mean: 19.2 tokens</li><li>max: 40 tokens</li></ul> | <ul><li>min: 26 tokens</li><li>mean: 308.96 tokens</li><li>max: 994 tokens</li></ul> |
|
439 |
+
* Samples:
|
440 |
+
| anchor | positive |
|
441 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
442 |
+
| <code>search_query: What powers do universities and colleges have regarding their land ownership?</code> | <code>search_document: ### Section 24<br>### Amendments of the Universities and College Estates Act 1925<br> 1) The Universities and College Estates Act 1925 is amended in accordance with subsections to (2)(6).<br> 2) After section 1 insert—<br> General power over land<br> 1A) General power over land<br> 1) A university or college has in relation to land belonging to the university or college all the powers of an absolute owner.<br> 2) The power conferred by subsection is subject to(1)—<br> a) any restriction, condition or limitation imposed by, or arising under, any enactment,<br> b) any rule of law or equity, or<br> c) the statutes regulating the university or college.<br> 3) Omit sections 2 to 38 and Schedule 1 (provisions relating to land and the application of capital money).<br> 4) In section 40 (power to transfer to university or college), omit “with the consent of the Minister”.<br> 5) In section 42 (saving of existing powers), omit from “: Provided that” to the end.<br> 6) In section 43 (definitions)—<br> a) in the opening words, omit from “unless” to “say”;<br> b) omit sub-paragraphs (i), (ii), (viii) and (x).<br> 7) Schedule 1 (which contains consequential amendments) has effect.</code> |
|
443 |
+
| <code>search_query: What particulars must be registered for corporate directors and firms in a company's register of directors?</code> | <code>search_document: ### Section 164<br>### Particulars of directors to be registered: corporate directors and firms<br>A company's register of directors must contain the following particulars in the case of a body corporate, or a firm that is a legal person under the law by which it is governed—<br> a) corporate or firm name;<br> b) registered or principal office;<br> in the case of a limited company that is a UK-registered company, the registered number;<br> d) in any other case, particulars of—<br> i) the legal form of the company or firm and the law by which it is governed, and<br> ii) if applicable, the register in which it is entered (including details of the state) and its registration number in that register.</code> |
|
444 |
+
| <code>search_query: Details on the continuation and effect of a criminal care order.</code> | <code>search_document: 36) <br> 1) This paragraph applies where, immediately before the commencement of section 90(2) there was in force an order (“a criminal care order") made—<br> a) under section 7(7)(a) of the Children and Young Persons Act 1969 (alteration in treatment of young offenders etc.); or<br> b) under section 15(1) of that Act, on discharging a supervision order made under section 7(7)(b) of that Act.<br> 2) The criminal care order shall continue to have effect until the end of the period of six months beginning with the day on which section 90(2) comes into force unless it is brought to an end earlier in accordance with—<br> a) the provisions of the Act of 1969 preserved by sub-paragraph (3)(a); or<br> b) this paragraph.<br> 3) <br> While the criminal care order remains in force, any relevant provisions—<br> a) of the Act of 1969; and<br> b) of the Child Care Act 1980,<br> shall continue to have effect with respect to it.<br> 4) <br> While the criminal care order remains in force, a court may, on the application of the appropriate person, make—<br> a) a residence order;<br> b) a care order or a supervision order under section 31;<br> c) an education supervision order under section 36 (regardless of subsection (6) of that section); or<br> d) an order falling within sub-paragraph (5),<br> and shall, on making any of those orders, discharge the criminal care order.<br> 5) The order mentioned in sub-paragraph (4)(d) is an order having effect as if it were a supervision order of a kind mentioned in section 12AA of the Act of 1969 (as inserted by paragraph 23 of Schedule 12), that is to say, a supervision order—<br> a) imposing a requirement that the child shall live for a specified period in local authority accommodation; but<br> b) in relation to which the conditions mentioned in subsection (6) of section 12AA are not required to be satisfied.<br> 6) The maximum period which may be specified in an order made under sub-paragraph (4)(d) is six months and such an order may stipulate that the child shall not live with a named person.<br> 7) . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .<br> 8) In sub-paragraph (4) “appropriate person” means—<br> a) in the case of an application for a residence order, any person (other than a local authority) who has the leave of the court;<br> b) in the case of an application for an education supervision order, a local education authority; and<br> c) in any other case, the local authority to whose care the child was committed by the order.</code> |
|
445 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
446 |
+
```json
|
447 |
+
{
|
448 |
+
"scale": 20.0,
|
449 |
+
"similarity_fct": "cos_sim"
|
450 |
+
}
|
451 |
+
```
|
452 |
+
|
453 |
+
### Evaluation Dataset
|
454 |
+
|
455 |
+
#### Unnamed Dataset
|
456 |
+
|
457 |
+
|
458 |
+
* Size: 1,417 evaluation samples
|
459 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
460 |
+
* Approximate statistics based on the first 1000 samples:
|
461 |
+
| | anchor | positive |
|
462 |
+
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
463 |
+
| type | string | string |
|
464 |
+
| details | <ul><li>min: 12 tokens</li><li>mean: 19.41 tokens</li><li>max: 36 tokens</li></ul> | <ul><li>min: 23 tokens</li><li>mean: 295.19 tokens</li><li>max: 993 tokens</li></ul> |
|
465 |
+
* Samples:
|
466 |
+
| anchor | positive |
|
467 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
468 |
+
| <code>search_query: Can an order made under this legislation be questioned in any legal proceedings?</code> | <code>search_document: 7) Subject to the provisions of paragraph 6 above, an order under section 1 of this Act shall not, either before or after it has been made, be questioned in any legal proceedings whatsoever, and shall become operative on the date on which notice is first published as mentioned in that paragraph.</code> |
|
469 |
+
| <code>search_query: When did the Badgers Act 1991 come into force?</code> | <code>search_document: ### Section 6<br>### Citation and commencement.<br> 1) This Act may be cited as the Badgers Act 1991.<br> 2) This Act shall come into force at the end of the period of three months beginning with the day on which it is passed.</code> |
|
470 |
+
| <code>search_query: Under what circumstances do intangible fixed assets not apply to legislation regarding non-commercial purposes?</code> | <code>search_document: ### Section 803<br>### Non-commercial purposes etc<br>This Part does not apply to an intangible fixed asset so far as it is held—<br> a) for a purpose that is not a business or other commercial purpose of the company, or<br> b) , otherwise than as a result of Chapter 3A of Part 2</code> |
|
471 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
472 |
+
```json
|
473 |
+
{
|
474 |
+
"scale": 20.0,
|
475 |
+
"similarity_fct": "cos_sim"
|
476 |
+
}
|
477 |
+
```
|
478 |
+
|
479 |
+
### Training Hyperparameters
|
480 |
+
#### Non-Default Hyperparameters
|
481 |
+
|
482 |
+
- `eval_strategy`: epoch
|
483 |
+
- `per_device_eval_batch_size`: 16
|
484 |
+
- `learning_rate`: 3e-05
|
485 |
+
- `warmup_ratio`: 0.1
|
486 |
+
- `fp16`: True
|
487 |
+
- `load_best_model_at_end`: True
|
488 |
+
- `ddp_find_unused_parameters`: False
|
489 |
+
|
490 |
+
#### All Hyperparameters
|
491 |
+
<details><summary>Click to expand</summary>
|
492 |
+
|
493 |
+
- `overwrite_output_dir`: False
|
494 |
+
- `do_predict`: False
|
495 |
+
- `eval_strategy`: epoch
|
496 |
+
- `prediction_loss_only`: True
|
497 |
+
- `per_device_train_batch_size`: 8
|
498 |
+
- `per_device_eval_batch_size`: 16
|
499 |
+
- `per_gpu_train_batch_size`: None
|
500 |
+
- `per_gpu_eval_batch_size`: None
|
501 |
+
- `gradient_accumulation_steps`: 1
|
502 |
+
- `eval_accumulation_steps`: None
|
503 |
+
- `torch_empty_cache_steps`: None
|
504 |
+
- `learning_rate`: 3e-05
|
505 |
+
- `weight_decay`: 0.0
|
506 |
+
- `adam_beta1`: 0.9
|
507 |
+
- `adam_beta2`: 0.999
|
508 |
+
- `adam_epsilon`: 1e-08
|
509 |
+
- `max_grad_norm`: 1.0
|
510 |
+
- `num_train_epochs`: 3
|
511 |
+
- `max_steps`: -1
|
512 |
+
- `lr_scheduler_type`: linear
|
513 |
+
- `lr_scheduler_kwargs`: {}
|
514 |
+
- `warmup_ratio`: 0.1
|
515 |
+
- `warmup_steps`: 0
|
516 |
+
- `log_level`: passive
|
517 |
+
- `log_level_replica`: warning
|
518 |
+
- `log_on_each_node`: True
|
519 |
+
- `logging_nan_inf_filter`: True
|
520 |
+
- `save_safetensors`: True
|
521 |
+
- `save_on_each_node`: False
|
522 |
+
- `save_only_model`: False
|
523 |
+
- `restore_callback_states_from_checkpoint`: False
|
524 |
+
- `no_cuda`: False
|
525 |
+
- `use_cpu`: False
|
526 |
+
- `use_mps_device`: False
|
527 |
+
- `seed`: 42
|
528 |
+
- `data_seed`: None
|
529 |
+
- `jit_mode_eval`: False
|
530 |
+
- `use_ipex`: False
|
531 |
+
- `bf16`: False
|
532 |
+
- `fp16`: True
|
533 |
+
- `fp16_opt_level`: O1
|
534 |
+
- `half_precision_backend`: auto
|
535 |
+
- `bf16_full_eval`: False
|
536 |
+
- `fp16_full_eval`: False
|
537 |
+
- `tf32`: None
|
538 |
+
- `local_rank`: 0
|
539 |
+
- `ddp_backend`: None
|
540 |
+
- `tpu_num_cores`: None
|
541 |
+
- `tpu_metrics_debug`: False
|
542 |
+
- `debug`: []
|
543 |
+
- `dataloader_drop_last`: False
|
544 |
+
- `dataloader_num_workers`: 0
|
545 |
+
- `dataloader_prefetch_factor`: None
|
546 |
+
- `past_index`: -1
|
547 |
+
- `disable_tqdm`: False
|
548 |
+
- `remove_unused_columns`: True
|
549 |
+
- `label_names`: None
|
550 |
+
- `load_best_model_at_end`: True
|
551 |
+
- `ignore_data_skip`: False
|
552 |
+
- `fsdp`: []
|
553 |
+
- `fsdp_min_num_params`: 0
|
554 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
555 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
556 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
557 |
+
- `deepspeed`: None
|
558 |
+
- `label_smoothing_factor`: 0.0
|
559 |
+
- `optim`: adamw_torch
|
560 |
+
- `optim_args`: None
|
561 |
+
- `adafactor`: False
|
562 |
+
- `group_by_length`: False
|
563 |
+
- `length_column_name`: length
|
564 |
+
- `ddp_find_unused_parameters`: False
|
565 |
+
- `ddp_bucket_cap_mb`: None
|
566 |
+
- `ddp_broadcast_buffers`: False
|
567 |
+
- `dataloader_pin_memory`: True
|
568 |
+
- `dataloader_persistent_workers`: False
|
569 |
+
- `skip_memory_metrics`: True
|
570 |
+
- `use_legacy_prediction_loop`: False
|
571 |
+
- `push_to_hub`: False
|
572 |
+
- `resume_from_checkpoint`: None
|
573 |
+
- `hub_model_id`: None
|
574 |
+
- `hub_strategy`: every_save
|
575 |
+
- `hub_private_repo`: False
|
576 |
+
- `hub_always_push`: False
|
577 |
+
- `gradient_checkpointing`: False
|
578 |
+
- `gradient_checkpointing_kwargs`: None
|
579 |
+
- `include_inputs_for_metrics`: False
|
580 |
+
- `eval_do_concat_batches`: True
|
581 |
+
- `fp16_backend`: auto
|
582 |
+
- `push_to_hub_model_id`: None
|
583 |
+
- `push_to_hub_organization`: None
|
584 |
+
- `mp_parameters`:
|
585 |
+
- `auto_find_batch_size`: False
|
586 |
+
- `full_determinism`: False
|
587 |
+
- `torchdynamo`: None
|
588 |
+
- `ray_scope`: last
|
589 |
+
- `ddp_timeout`: 1800
|
590 |
+
- `torch_compile`: False
|
591 |
+
- `torch_compile_backend`: None
|
592 |
+
- `torch_compile_mode`: None
|
593 |
+
- `dispatch_batches`: None
|
594 |
+
- `split_batches`: None
|
595 |
+
- `include_tokens_per_second`: False
|
596 |
+
- `include_num_input_tokens_seen`: False
|
597 |
+
- `neftune_noise_alpha`: None
|
598 |
+
- `optim_target_modules`: None
|
599 |
+
- `batch_eval_metrics`: False
|
600 |
+
- `eval_on_start`: False
|
601 |
+
- `eval_use_gather_object`: False
|
602 |
+
- `batch_sampler`: batch_sampler
|
603 |
+
- `multi_dataset_batch_sampler`: proportional
|
604 |
+
|
605 |
+
</details>
|
606 |
+
|
607 |
+
### Training Logs
|
608 |
+
<details><summary>Click to expand</summary>
|
609 |
+
|
610 |
+
| Epoch | Step | Training Loss | loss |
|
611 |
+
|:------:|:-----:|:-------------:|:------:|
|
612 |
+
| 0.0074 | 25 | 0.07 | - |
|
613 |
+
| 0.0148 | 50 | 0.0304 | - |
|
614 |
+
| 0.0222 | 75 | 0.0184 | - |
|
615 |
+
| 0.0296 | 100 | 0.0019 | - |
|
616 |
+
| 0.0371 | 125 | 0.0103 | - |
|
617 |
+
| 0.0445 | 150 | 0.002 | - |
|
618 |
+
| 0.0519 | 175 | 0.0017 | - |
|
619 |
+
| 0.0593 | 200 | 0.0136 | - |
|
620 |
+
| 0.0667 | 225 | 0.0008 | - |
|
621 |
+
| 0.0741 | 250 | 0.0038 | - |
|
622 |
+
| 0.0815 | 275 | 0.0005 | - |
|
623 |
+
| 0.0889 | 300 | 0.0002 | - |
|
624 |
+
| 0.0964 | 325 | 0.0014 | - |
|
625 |
+
| 0.1038 | 350 | 0.0004 | - |
|
626 |
+
| 0.1112 | 375 | 0.0002 | - |
|
627 |
+
| 0.1186 | 400 | 0.0004 | - |
|
628 |
+
| 0.1260 | 425 | 0.0012 | - |
|
629 |
+
| 0.1334 | 450 | 0.0017 | - |
|
630 |
+
| 0.1408 | 475 | 0.0002 | - |
|
631 |
+
| 0.1482 | 500 | 0.0006 | - |
|
632 |
+
| 0.1556 | 525 | 0.0003 | - |
|
633 |
+
| 0.1631 | 550 | 0.0075 | - |
|
634 |
+
| 0.1705 | 575 | 0.0006 | - |
|
635 |
+
| 0.1779 | 600 | 0.0002 | - |
|
636 |
+
| 0.1853 | 625 | 0.0019 | - |
|
637 |
+
| 0.1927 | 650 | 0.0088 | - |
|
638 |
+
| 0.2001 | 675 | 0.0023 | - |
|
639 |
+
| 0.2075 | 700 | 0.0005 | - |
|
640 |
+
| 0.2149 | 725 | 0.0005 | - |
|
641 |
+
| 0.2224 | 750 | 0.0002 | - |
|
642 |
+
| 0.2298 | 775 | 0.003 | - |
|
643 |
+
| 0.2372 | 800 | 0.0006 | - |
|
644 |
+
| 0.2446 | 825 | 0.001 | - |
|
645 |
+
| 0.2520 | 850 | 0.0012 | - |
|
646 |
+
| 0.2594 | 875 | 0.001 | - |
|
647 |
+
| 0.2668 | 900 | 0.0004 | - |
|
648 |
+
| 0.2742 | 925 | 0.0016 | - |
|
649 |
+
| 0.2816 | 950 | 0.0002 | - |
|
650 |
+
| 0.2891 | 975 | 0.0008 | - |
|
651 |
+
| 0.2965 | 1000 | 0.0008 | - |
|
652 |
+
| 0.3039 | 1025 | 0.0016 | - |
|
653 |
+
| 0.3113 | 1050 | 0.0007 | - |
|
654 |
+
| 0.3187 | 1075 | 0.0206 | - |
|
655 |
+
| 0.3261 | 1100 | 0.0009 | - |
|
656 |
+
| 0.3335 | 1125 | 0.0007 | - |
|
657 |
+
| 0.3409 | 1150 | 0.0057 | - |
|
658 |
+
| 0.3484 | 1175 | 0.0004 | - |
|
659 |
+
| 0.3558 | 1200 | 0.0003 | - |
|
660 |
+
| 0.3632 | 1225 | 0.0022 | - |
|
661 |
+
| 0.3706 | 1250 | 0.0022 | - |
|
662 |
+
| 0.3780 | 1275 | 0.0014 | - |
|
663 |
+
| 0.3854 | 1300 | 0.0017 | - |
|
664 |
+
| 0.3928 | 1325 | 0.0048 | - |
|
665 |
+
| 0.4002 | 1350 | 0.0068 | - |
|
666 |
+
| 0.4076 | 1375 | 0.0012 | - |
|
667 |
+
| 0.4151 | 1400 | 0.0095 | - |
|
668 |
+
| 0.4225 | 1425 | 0.0014 | - |
|
669 |
+
| 0.4299 | 1450 | 0.0008 | - |
|
670 |
+
| 0.4373 | 1475 | 0.0091 | - |
|
671 |
+
| 0.4447 | 1500 | 0.002 | - |
|
672 |
+
| 0.4521 | 1525 | 0.0033 | - |
|
673 |
+
| 0.4595 | 1550 | 0.0027 | - |
|
674 |
+
| 0.4669 | 1575 | 0.0016 | - |
|
675 |
+
| 0.4744 | 1600 | 0.0003 | - |
|
676 |
+
| 0.4818 | 1625 | 0.0004 | - |
|
677 |
+
| 0.4892 | 1650 | 0.0045 | - |
|
678 |
+
| 0.4966 | 1675 | 0.0008 | - |
|
679 |
+
| 0.5040 | 1700 | 0.0009 | - |
|
680 |
+
| 0.5114 | 1725 | 0.0015 | - |
|
681 |
+
| 0.5188 | 1750 | 0.0149 | - |
|
682 |
+
| 0.5262 | 1775 | 0.0154 | - |
|
683 |
+
| 0.5336 | 1800 | 0.0036 | - |
|
684 |
+
| 0.5411 | 1825 | 0.0028 | - |
|
685 |
+
| 0.5485 | 1850 | 0.0033 | - |
|
686 |
+
| 0.5559 | 1875 | 0.0086 | - |
|
687 |
+
| 0.5633 | 1900 | 0.0124 | - |
|
688 |
+
| 0.5707 | 1925 | 0.0005 | - |
|
689 |
+
| 0.5781 | 1950 | 0.0037 | - |
|
690 |
+
| 0.5855 | 1975 | 0.0052 | - |
|
691 |
+
| 0.5929 | 2000 | 0.004 | - |
|
692 |
+
| 0.6004 | 2025 | 0.0041 | - |
|
693 |
+
| 0.6078 | 2050 | 0.0006 | - |
|
694 |
+
| 0.6152 | 2075 | 0.0035 | - |
|
695 |
+
| 0.6226 | 2100 | 0.0079 | - |
|
696 |
+
| 0.6300 | 2125 | 0.0012 | - |
|
697 |
+
| 0.6374 | 2150 | 0.0012 | - |
|
698 |
+
| 0.6448 | 2175 | 0.0035 | - |
|
699 |
+
| 0.6522 | 2200 | 0.0027 | - |
|
700 |
+
| 0.6597 | 2225 | 0.0015 | - |
|
701 |
+
| 0.6671 | 2250 | 0.0004 | - |
|
702 |
+
| 0.6745 | 2275 | 0.001 | - |
|
703 |
+
| 0.6819 | 2300 | 0.0002 | - |
|
704 |
+
| 0.6893 | 2325 | 0.0009 | - |
|
705 |
+
| 0.6967 | 2350 | 0.0019 | - |
|
706 |
+
| 0.7041 | 2375 | 0.0005 | - |
|
707 |
+
| 0.7115 | 2400 | 0.0008 | - |
|
708 |
+
| 0.7189 | 2425 | 0.0012 | - |
|
709 |
+
| 0.7264 | 2450 | 0.0036 | - |
|
710 |
+
| 0.7338 | 2475 | 0.0007 | - |
|
711 |
+
| 0.7412 | 2500 | 0.0036 | - |
|
712 |
+
| 0.7486 | 2525 | 0.0007 | - |
|
713 |
+
| 0.7560 | 2550 | 0.0008 | - |
|
714 |
+
| 0.7634 | 2575 | 0.0008 | - |
|
715 |
+
| 0.7708 | 2600 | 0.0016 | - |
|
716 |
+
| 0.7782 | 2625 | 0.008 | - |
|
717 |
+
| 0.7857 | 2650 | 0.0058 | - |
|
718 |
+
| 0.7931 | 2675 | 0.0003 | - |
|
719 |
+
| 0.8005 | 2700 | 0.0005 | - |
|
720 |
+
| 0.8079 | 2725 | 0.0101 | - |
|
721 |
+
| 0.8153 | 2750 | 0.0025 | - |
|
722 |
+
| 0.8227 | 2775 | 0.0041 | - |
|
723 |
+
| 0.8301 | 2800 | 0.0113 | - |
|
724 |
+
| 0.8375 | 2825 | 0.0219 | - |
|
725 |
+
| 0.8449 | 2850 | 0.0004 | - |
|
726 |
+
| 0.8524 | 2875 | 0.0062 | - |
|
727 |
+
| 0.8598 | 2900 | 0.0097 | - |
|
728 |
+
| 0.8672 | 2925 | 0.002 | - |
|
729 |
+
| 0.8746 | 2950 | 0.0093 | - |
|
730 |
+
| 0.8820 | 2975 | 0.0046 | - |
|
731 |
+
| 0.8894 | 3000 | 0.0009 | - |
|
732 |
+
| 0.8968 | 3025 | 0.0014 | - |
|
733 |
+
| 0.9042 | 3050 | 0.0021 | - |
|
734 |
+
| 0.9117 | 3075 | 0.0009 | - |
|
735 |
+
| 0.9191 | 3100 | 0.0082 | - |
|
736 |
+
| 0.9265 | 3125 | 0.0016 | - |
|
737 |
+
| 0.9339 | 3150 | 0.0023 | - |
|
738 |
+
| 0.9413 | 3175 | 0.0019 | - |
|
739 |
+
| 0.9487 | 3200 | 0.002 | - |
|
740 |
+
| 0.9561 | 3225 | 0.0009 | - |
|
741 |
+
| 0.9635 | 3250 | 0.0111 | - |
|
742 |
+
| 0.9709 | 3275 | 0.0137 | - |
|
743 |
+
| 0.9784 | 3300 | 0.0038 | - |
|
744 |
+
| 0.9858 | 3325 | 0.0061 | - |
|
745 |
+
| 0.9932 | 3350 | 0.0045 | - |
|
746 |
+
| 1.0 | 3373 | - | 0.0038 |
|
747 |
+
| 1.0006 | 3375 | 0.0004 | - |
|
748 |
+
| 1.0080 | 3400 | 0.0104 | - |
|
749 |
+
| 1.0154 | 3425 | 0.0033 | - |
|
750 |
+
| 1.0228 | 3450 | 0.0004 | - |
|
751 |
+
| 1.0302 | 3475 | 0.0065 | - |
|
752 |
+
| 1.0377 | 3500 | 0.004 | - |
|
753 |
+
| 1.0451 | 3525 | 0.0003 | - |
|
754 |
+
| 1.0525 | 3550 | 0.0004 | - |
|
755 |
+
| 1.0599 | 3575 | 0.0006 | - |
|
756 |
+
| 1.0673 | 3600 | 0.0018 | - |
|
757 |
+
| 1.0747 | 3625 | 0.0246 | - |
|
758 |
+
| 1.0821 | 3650 | 0.007 | - |
|
759 |
+
| 1.0895 | 3675 | 0.0002 | - |
|
760 |
+
| 1.0969 | 3700 | 0.0005 | - |
|
761 |
+
| 1.1044 | 3725 | 0.0097 | - |
|
762 |
+
| 1.1118 | 3750 | 0.0011 | - |
|
763 |
+
| 1.1192 | 3775 | 0.0005 | - |
|
764 |
+
| 1.1266 | 3800 | 0.0015 | - |
|
765 |
+
| 1.1340 | 3825 | 0.0009 | - |
|
766 |
+
| 1.1414 | 3850 | 0.0002 | - |
|
767 |
+
| 1.1488 | 3875 | 0.0002 | - |
|
768 |
+
| 1.1562 | 3900 | 0.0021 | - |
|
769 |
+
| 1.1637 | 3925 | 0.0003 | - |
|
770 |
+
| 1.1711 | 3950 | 0.0006 | - |
|
771 |
+
| 1.1785 | 3975 | 0.0019 | - |
|
772 |
+
| 1.1859 | 4000 | 0.0013 | - |
|
773 |
+
| 1.1933 | 4025 | 0.002 | - |
|
774 |
+
| 1.2007 | 4050 | 0.0005 | - |
|
775 |
+
| 1.2081 | 4075 | 0.0037 | - |
|
776 |
+
| 1.2155 | 4100 | 0.0013 | - |
|
777 |
+
| 1.2229 | 4125 | 0.0006 | - |
|
778 |
+
| 1.2304 | 4150 | 0.0037 | - |
|
779 |
+
| 1.2378 | 4175 | 0.0018 | - |
|
780 |
+
| 1.2452 | 4200 | 0.0005 | - |
|
781 |
+
| 1.2526 | 4225 | 0.0003 | - |
|
782 |
+
| 1.2600 | 4250 | 0.0003 | - |
|
783 |
+
| 1.2674 | 4275 | 0.0003 | - |
|
784 |
+
| 1.2748 | 4300 | 0.0004 | - |
|
785 |
+
| 1.2822 | 4325 | 0.0029 | - |
|
786 |
+
| 1.2897 | 4350 | 0.0021 | - |
|
787 |
+
| 1.2971 | 4375 | 0.0004 | - |
|
788 |
+
| 1.3045 | 4400 | 0.0008 | - |
|
789 |
+
| 1.3119 | 4425 | 0.0002 | - |
|
790 |
+
| 1.3193 | 4450 | 0.0031 | - |
|
791 |
+
| 1.3267 | 4475 | 0.0005 | - |
|
792 |
+
| 1.3341 | 4500 | 0.0006 | - |
|
793 |
+
| 1.3415 | 4525 | 0.0005 | - |
|
794 |
+
| 1.3489 | 4550 | 0.0012 | - |
|
795 |
+
| 1.3564 | 4575 | 0.0009 | - |
|
796 |
+
| 1.3638 | 4600 | 0.0006 | - |
|
797 |
+
| 1.3712 | 4625 | 0.0237 | - |
|
798 |
+
| 1.3786 | 4650 | 0.0004 | - |
|
799 |
+
| 1.3860 | 4675 | 0.0001 | - |
|
800 |
+
| 1.3934 | 4700 | 0.0005 | - |
|
801 |
+
| 1.4008 | 4725 | 0.0003 | - |
|
802 |
+
| 1.4082 | 4750 | 0.0059 | - |
|
803 |
+
| 1.4157 | 4775 | 0.0005 | - |
|
804 |
+
| 1.4231 | 4800 | 0.0014 | - |
|
805 |
+
| 1.4305 | 4825 | 0.0001 | - |
|
806 |
+
| 1.4379 | 4850 | 0.0003 | - |
|
807 |
+
| 1.4453 | 4875 | 0.0003 | - |
|
808 |
+
| 1.4527 | 4900 | 0.0062 | - |
|
809 |
+
| 1.4601 | 4925 | 0.0002 | - |
|
810 |
+
| 1.4675 | 4950 | 0.0004 | - |
|
811 |
+
| 1.4749 | 4975 | 0.0001 | - |
|
812 |
+
| 1.4824 | 5000 | 0.0003 | - |
|
813 |
+
| 1.4898 | 5025 | 0.0003 | - |
|
814 |
+
| 1.4972 | 5050 | 0.0004 | - |
|
815 |
+
| 1.5046 | 5075 | 0.0003 | - |
|
816 |
+
| 1.5120 | 5100 | 0.0001 | - |
|
817 |
+
| 1.5194 | 5125 | 0.0031 | - |
|
818 |
+
| 1.5268 | 5150 | 0.0002 | - |
|
819 |
+
| 1.5342 | 5175 | 0.0006 | - |
|
820 |
+
| 1.5417 | 5200 | 0.0001 | - |
|
821 |
+
| 1.5491 | 5225 | 0.0017 | - |
|
822 |
+
| 1.5565 | 5250 | 0.0163 | - |
|
823 |
+
| 1.5639 | 5275 | 0.0049 | - |
|
824 |
+
| 1.5713 | 5300 | 0.0029 | - |
|
825 |
+
| 1.5787 | 5325 | 0.0035 | - |
|
826 |
+
| 1.5861 | 5350 | 0.007 | - |
|
827 |
+
| 1.5935 | 5375 | 0.0006 | - |
|
828 |
+
| 1.6009 | 5400 | 0.0003 | - |
|
829 |
+
| 1.6084 | 5425 | 0.0004 | - |
|
830 |
+
| 1.6158 | 5450 | 0.0067 | - |
|
831 |
+
| 1.6232 | 5475 | 0.0112 | - |
|
832 |
+
| 1.6306 | 5500 | 0.0002 | - |
|
833 |
+
| 1.6380 | 5525 | 0.0005 | - |
|
834 |
+
| 1.6454 | 5550 | 0.0029 | - |
|
835 |
+
| 1.6528 | 5575 | 0.0006 | - |
|
836 |
+
| 1.6602 | 5600 | 0.0012 | - |
|
837 |
+
| 1.6677 | 5625 | 0.0002 | - |
|
838 |
+
| 1.6751 | 5650 | 0.0005 | - |
|
839 |
+
| 1.6825 | 5675 | 0.0002 | - |
|
840 |
+
| 1.6899 | 5700 | 0.0002 | - |
|
841 |
+
| 1.6973 | 5725 | 0.0003 | - |
|
842 |
+
| 1.7047 | 5750 | 0.0018 | - |
|
843 |
+
| 1.7121 | 5775 | 0.0027 | - |
|
844 |
+
| 1.7195 | 5800 | 0.0002 | - |
|
845 |
+
| 1.7269 | 5825 | 0.0001 | - |
|
846 |
+
| 1.7344 | 5850 | 0.0007 | - |
|
847 |
+
| 1.7418 | 5875 | 0.0047 | - |
|
848 |
+
| 1.7492 | 5900 | 0.0001 | - |
|
849 |
+
| 1.7566 | 5925 | 0.0002 | - |
|
850 |
+
| 1.7640 | 5950 | 0.0007 | - |
|
851 |
+
| 1.7714 | 5975 | 0.0039 | - |
|
852 |
+
| 1.7788 | 6000 | 0.0001 | - |
|
853 |
+
| 1.7862 | 6025 | 0.0001 | - |
|
854 |
+
| 1.7937 | 6050 | 0.0002 | - |
|
855 |
+
| 1.8011 | 6075 | 0.0001 | - |
|
856 |
+
| 1.8085 | 6100 | 0.0016 | - |
|
857 |
+
| 1.8159 | 6125 | 0.0004 | - |
|
858 |
+
| 1.8233 | 6150 | 0.0003 | - |
|
859 |
+
| 1.8307 | 6175 | 0.0005 | - |
|
860 |
+
| 1.8381 | 6200 | 0.0005 | - |
|
861 |
+
| 1.8455 | 6225 | 0.0004 | - |
|
862 |
+
| 1.8529 | 6250 | 0.0036 | - |
|
863 |
+
| 1.8604 | 6275 | 0.0002 | - |
|
864 |
+
| 1.8678 | 6300 | 0.0006 | - |
|
865 |
+
| 1.8752 | 6325 | 0.0001 | - |
|
866 |
+
| 1.8826 | 6350 | 0.0022 | - |
|
867 |
+
| 1.8900 | 6375 | 0.0001 | - |
|
868 |
+
| 1.8974 | 6400 | 0.0003 | - |
|
869 |
+
| 1.9048 | 6425 | 0.0001 | - |
|
870 |
+
| 1.9122 | 6450 | 0.001 | - |
|
871 |
+
| 1.9197 | 6475 | 0.0005 | - |
|
872 |
+
| 1.9271 | 6500 | 0.0002 | - |
|
873 |
+
| 1.9345 | 6525 | 0.0049 | - |
|
874 |
+
| 1.9419 | 6550 | 0.0002 | - |
|
875 |
+
| 1.9493 | 6575 | 0.0009 | - |
|
876 |
+
| 1.9567 | 6600 | 0.0145 | - |
|
877 |
+
| 1.9641 | 6625 | 0.0004 | - |
|
878 |
+
| 1.9715 | 6650 | 0.0009 | - |
|
879 |
+
| 1.9790 | 6675 | 0.0003 | - |
|
880 |
+
| 1.9864 | 6700 | 0.0053 | - |
|
881 |
+
| 1.9938 | 6725 | 0.0002 | - |
|
882 |
+
| 2.0 | 6746 | - | 0.0012 |
|
883 |
+
| 2.0012 | 6750 | 0.0002 | - |
|
884 |
+
| 2.0086 | 6775 | 0.0018 | - |
|
885 |
+
| 2.0160 | 6800 | 0.0001 | - |
|
886 |
+
| 2.0234 | 6825 | 0.0084 | - |
|
887 |
+
| 2.0308 | 6850 | 0.0004 | - |
|
888 |
+
| 2.0382 | 6875 | 0.0001 | - |
|
889 |
+
| 2.0457 | 6900 | 0.0001 | - |
|
890 |
+
| 2.0531 | 6925 | 0.0023 | - |
|
891 |
+
| 2.0605 | 6950 | 0.0006 | - |
|
892 |
+
| 2.0679 | 6975 | 0.0002 | - |
|
893 |
+
| 2.0753 | 7000 | 0.0025 | - |
|
894 |
+
| 2.0827 | 7025 | 0.0001 | - |
|
895 |
+
| 2.0901 | 7050 | 0.0003 | - |
|
896 |
+
| 2.0975 | 7075 | 0.0003 | - |
|
897 |
+
| 2.1050 | 7100 | 0.0004 | - |
|
898 |
+
| 2.1124 | 7125 | 0.0002 | - |
|
899 |
+
| 2.1198 | 7150 | 0.0137 | - |
|
900 |
+
| 2.1272 | 7175 | 0.0002 | - |
|
901 |
+
| 2.1346 | 7200 | 0.0001 | - |
|
902 |
+
| 2.1420 | 7225 | 0.0002 | - |
|
903 |
+
| 2.1494 | 7250 | 0.0002 | - |
|
904 |
+
| 2.1568 | 7275 | 0.0002 | - |
|
905 |
+
| 2.1642 | 7300 | 0.0002 | - |
|
906 |
+
| 2.1717 | 7325 | 0.0001 | - |
|
907 |
+
| 2.1791 | 7350 | 0.0003 | - |
|
908 |
+
| 2.1865 | 7375 | 0.0002 | - |
|
909 |
+
| 2.1939 | 7400 | 0.0002 | - |
|
910 |
+
| 2.2013 | 7425 | 0.0003 | - |
|
911 |
+
| 2.2087 | 7450 | 0.0109 | - |
|
912 |
+
| 2.2161 | 7475 | 0.0003 | - |
|
913 |
+
| 2.2235 | 7500 | 0.0006 | - |
|
914 |
+
| 2.2310 | 7525 | 0.0009 | - |
|
915 |
+
| 2.2384 | 7550 | 0.0004 | - |
|
916 |
+
| 2.2458 | 7575 | 0.0004 | - |
|
917 |
+
| 2.2532 | 7600 | 0.0002 | - |
|
918 |
+
| 2.2606 | 7625 | 0.0003 | - |
|
919 |
+
| 2.2680 | 7650 | 0.0001 | - |
|
920 |
+
| 2.2754 | 7675 | 0.0002 | - |
|
921 |
+
| 2.2828 | 7700 | 0.0 | - |
|
922 |
+
| 2.2902 | 7725 | 0.0006 | - |
|
923 |
+
| 2.2977 | 7750 | 0.0001 | - |
|
924 |
+
| 2.3051 | 7775 | 0.0001 | - |
|
925 |
+
| 2.3125 | 7800 | 0.0105 | - |
|
926 |
+
| 2.3199 | 7825 | 0.0001 | - |
|
927 |
+
| 2.3273 | 7850 | 0.0001 | - |
|
928 |
+
| 2.3347 | 7875 | 0.0001 | - |
|
929 |
+
| 2.3421 | 7900 | 0.0005 | - |
|
930 |
+
| 2.3495 | 7925 | 0.0023 | - |
|
931 |
+
| 2.3570 | 7950 | 0.0 | - |
|
932 |
+
| 2.3644 | 7975 | 0.0002 | - |
|
933 |
+
| 2.3718 | 8000 | 0.0003 | - |
|
934 |
+
| 2.3792 | 8025 | 0.0001 | - |
|
935 |
+
| 2.3866 | 8050 | 0.0001 | - |
|
936 |
+
| 2.3940 | 8075 | 0.0001 | - |
|
937 |
+
| 2.4014 | 8100 | 0.0001 | - |
|
938 |
+
| 2.4088 | 8125 | 0.0 | - |
|
939 |
+
| 2.4162 | 8150 | 0.0002 | - |
|
940 |
+
| 2.4237 | 8175 | 0.0001 | - |
|
941 |
+
| 2.4311 | 8200 | 0.0001 | - |
|
942 |
+
| 2.4385 | 8225 | 0.0001 | - |
|
943 |
+
| 2.4459 | 8250 | 0.0001 | - |
|
944 |
+
| 2.4533 | 8275 | 0.0001 | - |
|
945 |
+
| 2.4607 | 8300 | 0.001 | - |
|
946 |
+
| 2.4681 | 8325 | 0.0003 | - |
|
947 |
+
| 2.4755 | 8350 | 0.0003 | - |
|
948 |
+
| 2.4830 | 8375 | 0.0001 | - |
|
949 |
+
| 2.4904 | 8400 | 0.0009 | - |
|
950 |
+
| 2.4978 | 8425 | 0.0011 | - |
|
951 |
+
| 2.5052 | 8450 | 0.0005 | - |
|
952 |
+
| 2.5126 | 8475 | 0.0024 | - |
|
953 |
+
| 2.5200 | 8500 | 0.0002 | - |
|
954 |
+
| 2.5274 | 8525 | 0.0006 | - |
|
955 |
+
| 2.5348 | 8550 | 0.0001 | - |
|
956 |
+
| 2.5422 | 8575 | 0.0001 | - |
|
957 |
+
| 2.5497 | 8600 | 0.0003 | - |
|
958 |
+
| 2.5571 | 8625 | 0.0007 | - |
|
959 |
+
| 2.5645 | 8650 | 0.0009 | - |
|
960 |
+
| 2.5719 | 8675 | 0.0002 | - |
|
961 |
+
| 2.5793 | 8700 | 0.0001 | - |
|
962 |
+
| 2.5867 | 8725 | 0.0006 | - |
|
963 |
+
| 2.5941 | 8750 | 0.0 | - |
|
964 |
+
| 2.6015 | 8775 | 0.0002 | - |
|
965 |
+
| 2.6090 | 8800 | 0.0 | - |
|
966 |
+
| 2.6164 | 8825 | 0.0004 | - |
|
967 |
+
| 2.6238 | 8850 | 0.0001 | - |
|
968 |
+
| 2.6312 | 8875 | 0.0005 | - |
|
969 |
+
| 2.6386 | 8900 | 0.0002 | - |
|
970 |
+
| 2.6460 | 8925 | 0.0001 | - |
|
971 |
+
| 2.6534 | 8950 | 0.0001 | - |
|
972 |
+
| 2.6608 | 8975 | 0.0001 | - |
|
973 |
+
| 2.6682 | 9000 | 0.0 | - |
|
974 |
+
| 2.6757 | 9025 | 0.0004 | - |
|
975 |
+
| 2.6831 | 9050 | 0.0002 | - |
|
976 |
+
| 2.6905 | 9075 | 0.0001 | - |
|
977 |
+
| 2.6979 | 9100 | 0.0001 | - |
|
978 |
+
| 2.7053 | 9125 | 0.0 | - |
|
979 |
+
| 2.7127 | 9150 | 0.0002 | - |
|
980 |
+
| 2.7201 | 9175 | 0.0014 | - |
|
981 |
+
| 2.7275 | 9200 | 0.0003 | - |
|
982 |
+
| 2.7350 | 9225 | 0.0006 | - |
|
983 |
+
| 2.7424 | 9250 | 0.0002 | - |
|
984 |
+
| 2.7498 | 9275 | 0.001 | - |
|
985 |
+
| 2.7572 | 9300 | 0.0002 | - |
|
986 |
+
| 2.7646 | 9325 | 0.0002 | - |
|
987 |
+
| 2.7720 | 9350 | 0.0021 | - |
|
988 |
+
| 2.7794 | 9375 | 0.0001 | - |
|
989 |
+
| 2.7868 | 9400 | 0.0033 | - |
|
990 |
+
| 2.7942 | 9425 | 0.0 | - |
|
991 |
+
| 2.8017 | 9450 | 0.0005 | - |
|
992 |
+
| 2.8091 | 9475 | 0.0002 | - |
|
993 |
+
| 2.8165 | 9500 | 0.0001 | - |
|
994 |
+
| 2.8239 | 9525 | 0.0002 | - |
|
995 |
+
| 2.8313 | 9550 | 0.0006 | - |
|
996 |
+
| 2.8387 | 9575 | 0.0002 | - |
|
997 |
+
| 2.8461 | 9600 | 0.0001 | - |
|
998 |
+
| 2.8535 | 9625 | 0.0 | - |
|
999 |
+
| 2.8610 | 9650 | 0.0001 | - |
|
1000 |
+
| 2.8684 | 9675 | 0.0003 | - |
|
1001 |
+
| 2.8758 | 9700 | 0.0011 | - |
|
1002 |
+
| 2.8832 | 9725 | 0.0024 | - |
|
1003 |
+
| 2.8906 | 9750 | 0.0002 | - |
|
1004 |
+
| 2.8980 | 9775 | 0.0001 | - |
|
1005 |
+
| 2.9054 | 9800 | 0.0001 | - |
|
1006 |
+
| 2.9128 | 9825 | 0.0002 | - |
|
1007 |
+
| 2.9202 | 9850 | 0.0001 | - |
|
1008 |
+
| 2.9277 | 9875 | 0.0013 | - |
|
1009 |
+
| 2.9351 | 9900 | 0.0 | - |
|
1010 |
+
| 2.9425 | 9925 | 0.0002 | - |
|
1011 |
+
| 2.9499 | 9950 | 0.0 | - |
|
1012 |
+
| 2.9573 | 9975 | 0.0 | - |
|
1013 |
+
| 2.9647 | 10000 | 0.0002 | - |
|
1014 |
+
| 2.9721 | 10025 | 0.0001 | - |
|
1015 |
+
| 2.9795 | 10050 | 0.0001 | - |
|
1016 |
+
| 2.9870 | 10075 | 0.0 | - |
|
1017 |
+
| 2.9944 | 10100 | 0.0002 | - |
|
1018 |
+
| 3.0 | 10119 | - | 0.0012 |
|
1019 |
+
|
1020 |
+
</details>
|
1021 |
+
|
1022 |
+
### Framework Versions
|
1023 |
+
- Python: 3.10.14
|
1024 |
+
- Sentence Transformers: 3.0.1
|
1025 |
+
- Transformers: 4.44.1
|
1026 |
+
- PyTorch: 2.3.0
|
1027 |
+
- Accelerate: 0.33.0
|
1028 |
+
- Datasets: 2.19.1
|
1029 |
+
- Tokenizers: 0.19.1
|
1030 |
+
|
1031 |
+
## Citation
|
1032 |
+
|
1033 |
+
### BibTeX
|
1034 |
+
|
1035 |
+
#### Sentence Transformers
|
1036 |
+
```bibtex
|
1037 |
+
@inproceedings{reimers-2019-sentence-bert,
|
1038 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
1039 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
1040 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
1041 |
+
month = "11",
|
1042 |
+
year = "2019",
|
1043 |
+
publisher = "Association for Computational Linguistics",
|
1044 |
+
url = "https://arxiv.org/abs/1908.10084",
|
1045 |
+
}
|
1046 |
+
```
|
1047 |
+
|
1048 |
+
#### MultipleNegativesRankingLoss
|
1049 |
+
```bibtex
|
1050 |
+
@misc{henderson2017efficient,
|
1051 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
1052 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
1053 |
+
year={2017},
|
1054 |
+
eprint={1705.00652},
|
1055 |
+
archivePrefix={arXiv},
|
1056 |
+
primaryClass={cs.CL}
|
1057 |
+
}
|
1058 |
+
```
|
1059 |
+
|
1060 |
+
<!--
|
1061 |
+
## Glossary
|
1062 |
+
|
1063 |
+
*Clearly define terms in order to be accessible across audiences.*
|
1064 |
+
-->
|
1065 |
+
|
1066 |
+
<!--
|
1067 |
+
## Model Card Authors
|
1068 |
+
|
1069 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
1070 |
+
-->
|
1071 |
+
|
1072 |
+
<!--
|
1073 |
+
## Model Card Contact
|
1074 |
+
|
1075 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
1076 |
+
-->
|
checkpoint-10119/config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nomic-ai/nomic-embed-text-v1.5",
|
3 |
+
"activation_function": "swiglu",
|
4 |
+
"architectures": [
|
5 |
+
"NomicBertModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.0,
|
8 |
+
"auto_map": {
|
9 |
+
"AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
|
10 |
+
"AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
|
11 |
+
"AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
|
12 |
+
},
|
13 |
+
"bos_token_id": null,
|
14 |
+
"causal": false,
|
15 |
+
"dense_seq_output": true,
|
16 |
+
"embd_pdrop": 0.0,
|
17 |
+
"eos_token_id": null,
|
18 |
+
"fused_bias_fc": true,
|
19 |
+
"fused_dropout_add_ln": true,
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"layer_norm_epsilon": 1e-12,
|
22 |
+
"max_trained_positions": 2048,
|
23 |
+
"mlp_fc1_bias": false,
|
24 |
+
"mlp_fc2_bias": false,
|
25 |
+
"model_type": "nomic_bert",
|
26 |
+
"n_embd": 768,
|
27 |
+
"n_head": 12,
|
28 |
+
"n_inner": 3072,
|
29 |
+
"n_layer": 12,
|
30 |
+
"n_positions": 8192,
|
31 |
+
"pad_vocab_size_multiple": 64,
|
32 |
+
"parallel_block": false,
|
33 |
+
"parallel_block_tied_norm": false,
|
34 |
+
"prenorm": false,
|
35 |
+
"qkv_proj_bias": false,
|
36 |
+
"reorder_and_upcast_attn": false,
|
37 |
+
"resid_pdrop": 0.0,
|
38 |
+
"rotary_emb_base": 1000,
|
39 |
+
"rotary_emb_fraction": 1.0,
|
40 |
+
"rotary_emb_interleaved": false,
|
41 |
+
"rotary_emb_scale_base": null,
|
42 |
+
"rotary_scaling_factor": null,
|
43 |
+
"scale_attn_by_inverse_layer_idx": false,
|
44 |
+
"scale_attn_weights": true,
|
45 |
+
"summary_activation": null,
|
46 |
+
"summary_first_dropout": 0.0,
|
47 |
+
"summary_proj_to_labels": true,
|
48 |
+
"summary_type": "cls_index",
|
49 |
+
"summary_use_proj": true,
|
50 |
+
"torch_dtype": "float32",
|
51 |
+
"transformers_version": "4.44.1",
|
52 |
+
"type_vocab_size": 2,
|
53 |
+
"use_cache": true,
|
54 |
+
"use_flash_attn": true,
|
55 |
+
"use_rms_norm": false,
|
56 |
+
"use_xentropy": true,
|
57 |
+
"vocab_size": 30528
|
58 |
+
}
|
checkpoint-10119/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.44.1",
|
5 |
+
"pytorch": "2.3.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
checkpoint-10119/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:759cbec716931ff76cdbd4a7871383f735a315ed6d81bcd06b39a1b086f4ffaf
|
3 |
+
size 546938168
|
checkpoint-10119/modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
checkpoint-10119/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63589318d8cdc24ec044c6e074813cd7098f9fe4265eef0156ef85184886bbf4
|
3 |
+
size 1093947386
|
checkpoint-10119/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efe90f99c39bdf981bce1b64e4354d2e88ce3ca786c2d8bbcc0d1dfd0ab8057f
|
3 |
+
size 14244
|
checkpoint-10119/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f1acdf701a5c9298adf7b2791b4bb6332200dc7cd3808ee627e187b1eae0b9b
|
3 |
+
size 1064
|
checkpoint-10119/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
checkpoint-10119/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
checkpoint-10119/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-10119/tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 8192,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
checkpoint-10119/trainer_state.json
ADDED
@@ -0,0 +1,2894 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.0011504614958539605,
|
3 |
+
"best_model_checkpoint": "autotrain-7flvh-khn72/checkpoint-10119",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"eval_steps": 500,
|
6 |
+
"global_step": 10119,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.007411799584939223,
|
13 |
+
"grad_norm": 7.738826751708984,
|
14 |
+
"learning_rate": 7.411067193675889e-07,
|
15 |
+
"loss": 0.07,
|
16 |
+
"step": 25
|
17 |
+
},
|
18 |
+
{
|
19 |
+
"epoch": 0.014823599169878446,
|
20 |
+
"grad_norm": 1.3767492771148682,
|
21 |
+
"learning_rate": 1.4822134387351778e-06,
|
22 |
+
"loss": 0.0304,
|
23 |
+
"step": 50
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.02223539875481767,
|
27 |
+
"grad_norm": 0.9382590055465698,
|
28 |
+
"learning_rate": 2.193675889328063e-06,
|
29 |
+
"loss": 0.0184,
|
30 |
+
"step": 75
|
31 |
+
},
|
32 |
+
{
|
33 |
+
"epoch": 0.029647198339756892,
|
34 |
+
"grad_norm": 0.22179915010929108,
|
35 |
+
"learning_rate": 2.9347826086956523e-06,
|
36 |
+
"loss": 0.0019,
|
37 |
+
"step": 100
|
38 |
+
},
|
39 |
+
{
|
40 |
+
"epoch": 0.037058997924696115,
|
41 |
+
"grad_norm": 0.022122476249933243,
|
42 |
+
"learning_rate": 3.6758893280632412e-06,
|
43 |
+
"loss": 0.0103,
|
44 |
+
"step": 125
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.04447079750963534,
|
48 |
+
"grad_norm": 0.049805257469415665,
|
49 |
+
"learning_rate": 4.416996047430831e-06,
|
50 |
+
"loss": 0.002,
|
51 |
+
"step": 150
|
52 |
+
},
|
53 |
+
{
|
54 |
+
"epoch": 0.051882597094574566,
|
55 |
+
"grad_norm": 0.30325883626937866,
|
56 |
+
"learning_rate": 5.158102766798419e-06,
|
57 |
+
"loss": 0.0017,
|
58 |
+
"step": 175
|
59 |
+
},
|
60 |
+
{
|
61 |
+
"epoch": 0.059294396679513785,
|
62 |
+
"grad_norm": 0.013520199805498123,
|
63 |
+
"learning_rate": 5.899209486166008e-06,
|
64 |
+
"loss": 0.0136,
|
65 |
+
"step": 200
|
66 |
+
},
|
67 |
+
{
|
68 |
+
"epoch": 0.06670619626445301,
|
69 |
+
"grad_norm": 0.08144009858369827,
|
70 |
+
"learning_rate": 6.640316205533597e-06,
|
71 |
+
"loss": 0.0008,
|
72 |
+
"step": 225
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"epoch": 0.07411799584939223,
|
76 |
+
"grad_norm": 0.03251965716481209,
|
77 |
+
"learning_rate": 7.381422924901186e-06,
|
78 |
+
"loss": 0.0038,
|
79 |
+
"step": 250
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"epoch": 0.08152979543433146,
|
83 |
+
"grad_norm": 0.009104475378990173,
|
84 |
+
"learning_rate": 8.122529644268774e-06,
|
85 |
+
"loss": 0.0005,
|
86 |
+
"step": 275
|
87 |
+
},
|
88 |
+
{
|
89 |
+
"epoch": 0.08894159501927068,
|
90 |
+
"grad_norm": 0.004219604656100273,
|
91 |
+
"learning_rate": 8.863636363636365e-06,
|
92 |
+
"loss": 0.0002,
|
93 |
+
"step": 300
|
94 |
+
},
|
95 |
+
{
|
96 |
+
"epoch": 0.0963533946042099,
|
97 |
+
"grad_norm": 0.006504200864583254,
|
98 |
+
"learning_rate": 9.604743083003952e-06,
|
99 |
+
"loss": 0.0014,
|
100 |
+
"step": 325
|
101 |
+
},
|
102 |
+
{
|
103 |
+
"epoch": 0.10376519418914913,
|
104 |
+
"grad_norm": 0.004516596905887127,
|
105 |
+
"learning_rate": 1.0345849802371542e-05,
|
106 |
+
"loss": 0.0004,
|
107 |
+
"step": 350
|
108 |
+
},
|
109 |
+
{
|
110 |
+
"epoch": 0.11117699377408835,
|
111 |
+
"grad_norm": 0.0006104411440901458,
|
112 |
+
"learning_rate": 1.108695652173913e-05,
|
113 |
+
"loss": 0.0002,
|
114 |
+
"step": 375
|
115 |
+
},
|
116 |
+
{
|
117 |
+
"epoch": 0.11858879335902757,
|
118 |
+
"grad_norm": 0.035294629633426666,
|
119 |
+
"learning_rate": 1.182806324110672e-05,
|
120 |
+
"loss": 0.0004,
|
121 |
+
"step": 400
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"epoch": 0.1260005929439668,
|
125 |
+
"grad_norm": 0.019285837188363075,
|
126 |
+
"learning_rate": 1.256916996047431e-05,
|
127 |
+
"loss": 0.0012,
|
128 |
+
"step": 425
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.13341239252890602,
|
132 |
+
"grad_norm": 0.3558584451675415,
|
133 |
+
"learning_rate": 1.3310276679841897e-05,
|
134 |
+
"loss": 0.0017,
|
135 |
+
"step": 450
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.14082419211384525,
|
139 |
+
"grad_norm": 0.059573035687208176,
|
140 |
+
"learning_rate": 1.4051383399209487e-05,
|
141 |
+
"loss": 0.0002,
|
142 |
+
"step": 475
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.14823599169878446,
|
146 |
+
"grad_norm": 0.18277816474437714,
|
147 |
+
"learning_rate": 1.4792490118577074e-05,
|
148 |
+
"loss": 0.0006,
|
149 |
+
"step": 500
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.1556477912837237,
|
153 |
+
"grad_norm": 0.059150487184524536,
|
154 |
+
"learning_rate": 1.5533596837944665e-05,
|
155 |
+
"loss": 0.0003,
|
156 |
+
"step": 525
|
157 |
+
},
|
158 |
+
{
|
159 |
+
"epoch": 0.16305959086866292,
|
160 |
+
"grad_norm": 0.0057366532273590565,
|
161 |
+
"learning_rate": 1.6274703557312253e-05,
|
162 |
+
"loss": 0.0075,
|
163 |
+
"step": 550
|
164 |
+
},
|
165 |
+
{
|
166 |
+
"epoch": 0.17047139045360213,
|
167 |
+
"grad_norm": 0.00453445827588439,
|
168 |
+
"learning_rate": 1.701581027667984e-05,
|
169 |
+
"loss": 0.0006,
|
170 |
+
"step": 575
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 0.17788319003854136,
|
174 |
+
"grad_norm": 0.0051128678023815155,
|
175 |
+
"learning_rate": 1.7756916996047432e-05,
|
176 |
+
"loss": 0.0002,
|
177 |
+
"step": 600
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 0.1852949896234806,
|
181 |
+
"grad_norm": 0.04630223289132118,
|
182 |
+
"learning_rate": 1.849802371541502e-05,
|
183 |
+
"loss": 0.0019,
|
184 |
+
"step": 625
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"epoch": 0.1927067892084198,
|
188 |
+
"grad_norm": 0.7785825729370117,
|
189 |
+
"learning_rate": 1.9239130434782607e-05,
|
190 |
+
"loss": 0.0088,
|
191 |
+
"step": 650
|
192 |
+
},
|
193 |
+
{
|
194 |
+
"epoch": 0.20011858879335903,
|
195 |
+
"grad_norm": 0.48438361287117004,
|
196 |
+
"learning_rate": 1.9980237154150198e-05,
|
197 |
+
"loss": 0.0023,
|
198 |
+
"step": 675
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"epoch": 0.20753038837829826,
|
202 |
+
"grad_norm": 0.01803724654018879,
|
203 |
+
"learning_rate": 2.0721343873517786e-05,
|
204 |
+
"loss": 0.0005,
|
205 |
+
"step": 700
|
206 |
+
},
|
207 |
+
{
|
208 |
+
"epoch": 0.21494218796323747,
|
209 |
+
"grad_norm": 0.009904771111905575,
|
210 |
+
"learning_rate": 2.1462450592885377e-05,
|
211 |
+
"loss": 0.0005,
|
212 |
+
"step": 725
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"epoch": 0.2223539875481767,
|
216 |
+
"grad_norm": 0.02594936452805996,
|
217 |
+
"learning_rate": 2.2203557312252964e-05,
|
218 |
+
"loss": 0.0002,
|
219 |
+
"step": 750
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 0.22976578713311593,
|
223 |
+
"grad_norm": 0.0479484386742115,
|
224 |
+
"learning_rate": 2.2944664031620555e-05,
|
225 |
+
"loss": 0.003,
|
226 |
+
"step": 775
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 0.23717758671805514,
|
230 |
+
"grad_norm": 0.07394908368587494,
|
231 |
+
"learning_rate": 2.3685770750988143e-05,
|
232 |
+
"loss": 0.0006,
|
233 |
+
"step": 800
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 0.24458938630299437,
|
237 |
+
"grad_norm": 0.00584772601723671,
|
238 |
+
"learning_rate": 2.442687747035573e-05,
|
239 |
+
"loss": 0.001,
|
240 |
+
"step": 825
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"epoch": 0.2520011858879336,
|
244 |
+
"grad_norm": 0.5621177554130554,
|
245 |
+
"learning_rate": 2.516798418972332e-05,
|
246 |
+
"loss": 0.0012,
|
247 |
+
"step": 850
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 0.25941298547287284,
|
251 |
+
"grad_norm": 0.03085828199982643,
|
252 |
+
"learning_rate": 2.590909090909091e-05,
|
253 |
+
"loss": 0.001,
|
254 |
+
"step": 875
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 0.26682478505781204,
|
258 |
+
"grad_norm": 0.004033543635159731,
|
259 |
+
"learning_rate": 2.6650197628458497e-05,
|
260 |
+
"loss": 0.0004,
|
261 |
+
"step": 900
|
262 |
+
},
|
263 |
+
{
|
264 |
+
"epoch": 0.27423658464275125,
|
265 |
+
"grad_norm": 0.015690704807639122,
|
266 |
+
"learning_rate": 2.7391304347826085e-05,
|
267 |
+
"loss": 0.0016,
|
268 |
+
"step": 925
|
269 |
+
},
|
270 |
+
{
|
271 |
+
"epoch": 0.2816483842276905,
|
272 |
+
"grad_norm": 0.026539193466305733,
|
273 |
+
"learning_rate": 2.813241106719368e-05,
|
274 |
+
"loss": 0.0002,
|
275 |
+
"step": 950
|
276 |
+
},
|
277 |
+
{
|
278 |
+
"epoch": 0.2890601838126297,
|
279 |
+
"grad_norm": 0.0341588519513607,
|
280 |
+
"learning_rate": 2.8873517786561267e-05,
|
281 |
+
"loss": 0.0008,
|
282 |
+
"step": 975
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"epoch": 0.2964719833975689,
|
286 |
+
"grad_norm": 0.0481799840927124,
|
287 |
+
"learning_rate": 2.9614624505928854e-05,
|
288 |
+
"loss": 0.0008,
|
289 |
+
"step": 1000
|
290 |
+
},
|
291 |
+
{
|
292 |
+
"epoch": 0.3038837829825082,
|
293 |
+
"grad_norm": 0.028982171788811684,
|
294 |
+
"learning_rate": 2.9960469968156362e-05,
|
295 |
+
"loss": 0.0016,
|
296 |
+
"step": 1025
|
297 |
+
},
|
298 |
+
{
|
299 |
+
"epoch": 0.3112955825674474,
|
300 |
+
"grad_norm": 0.007785373833030462,
|
301 |
+
"learning_rate": 2.9878115735148788e-05,
|
302 |
+
"loss": 0.0007,
|
303 |
+
"step": 1050
|
304 |
+
},
|
305 |
+
{
|
306 |
+
"epoch": 0.3187073821523866,
|
307 |
+
"grad_norm": 0.16795194149017334,
|
308 |
+
"learning_rate": 2.9799055671461514e-05,
|
309 |
+
"loss": 0.0206,
|
310 |
+
"step": 1075
|
311 |
+
},
|
312 |
+
{
|
313 |
+
"epoch": 0.32611918173732585,
|
314 |
+
"grad_norm": 0.006599959917366505,
|
315 |
+
"learning_rate": 2.9716701438453936e-05,
|
316 |
+
"loss": 0.0009,
|
317 |
+
"step": 1100
|
318 |
+
},
|
319 |
+
{
|
320 |
+
"epoch": 0.33353098132226505,
|
321 |
+
"grad_norm": 0.005065989214926958,
|
322 |
+
"learning_rate": 2.963434720544636e-05,
|
323 |
+
"loss": 0.0007,
|
324 |
+
"step": 1125
|
325 |
+
},
|
326 |
+
{
|
327 |
+
"epoch": 0.34094278090720426,
|
328 |
+
"grad_norm": 0.06960754096508026,
|
329 |
+
"learning_rate": 2.9551992972438783e-05,
|
330 |
+
"loss": 0.0057,
|
331 |
+
"step": 1150
|
332 |
+
},
|
333 |
+
{
|
334 |
+
"epoch": 0.3483545804921435,
|
335 |
+
"grad_norm": 0.0384642519056797,
|
336 |
+
"learning_rate": 2.9469638739431205e-05,
|
337 |
+
"loss": 0.0004,
|
338 |
+
"step": 1175
|
339 |
+
},
|
340 |
+
{
|
341 |
+
"epoch": 0.3557663800770827,
|
342 |
+
"grad_norm": 0.005713630933314562,
|
343 |
+
"learning_rate": 2.938728450642363e-05,
|
344 |
+
"loss": 0.0003,
|
345 |
+
"step": 1200
|
346 |
+
},
|
347 |
+
{
|
348 |
+
"epoch": 0.3631781796620219,
|
349 |
+
"grad_norm": 0.011937534436583519,
|
350 |
+
"learning_rate": 2.9304930273416055e-05,
|
351 |
+
"loss": 0.0022,
|
352 |
+
"step": 1225
|
353 |
+
},
|
354 |
+
{
|
355 |
+
"epoch": 0.3705899792469612,
|
356 |
+
"grad_norm": 0.007326376158744097,
|
357 |
+
"learning_rate": 2.9222576040408477e-05,
|
358 |
+
"loss": 0.0022,
|
359 |
+
"step": 1250
|
360 |
+
},
|
361 |
+
{
|
362 |
+
"epoch": 0.3780017788319004,
|
363 |
+
"grad_norm": 0.016074607148766518,
|
364 |
+
"learning_rate": 2.9140221807400903e-05,
|
365 |
+
"loss": 0.0014,
|
366 |
+
"step": 1275
|
367 |
+
},
|
368 |
+
{
|
369 |
+
"epoch": 0.3854135784168396,
|
370 |
+
"grad_norm": 0.010498239658772945,
|
371 |
+
"learning_rate": 2.9057867574393324e-05,
|
372 |
+
"loss": 0.0017,
|
373 |
+
"step": 1300
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 0.39282537800177886,
|
377 |
+
"grad_norm": 0.14429619908332825,
|
378 |
+
"learning_rate": 2.8975513341385746e-05,
|
379 |
+
"loss": 0.0048,
|
380 |
+
"step": 1325
|
381 |
+
},
|
382 |
+
{
|
383 |
+
"epoch": 0.40023717758671806,
|
384 |
+
"grad_norm": 0.0035154372453689575,
|
385 |
+
"learning_rate": 2.889315910837817e-05,
|
386 |
+
"loss": 0.0068,
|
387 |
+
"step": 1350
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"epoch": 0.40764897717165727,
|
391 |
+
"grad_norm": 0.05379508063197136,
|
392 |
+
"learning_rate": 2.8810804875370593e-05,
|
393 |
+
"loss": 0.0012,
|
394 |
+
"step": 1375
|
395 |
+
},
|
396 |
+
{
|
397 |
+
"epoch": 0.41506077675659653,
|
398 |
+
"grad_norm": 0.002669960493221879,
|
399 |
+
"learning_rate": 2.8728450642363015e-05,
|
400 |
+
"loss": 0.0095,
|
401 |
+
"step": 1400
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"epoch": 0.42247257634153573,
|
405 |
+
"grad_norm": 0.021320341154932976,
|
406 |
+
"learning_rate": 2.8646096409355444e-05,
|
407 |
+
"loss": 0.0014,
|
408 |
+
"step": 1425
|
409 |
+
},
|
410 |
+
{
|
411 |
+
"epoch": 0.42988437592647494,
|
412 |
+
"grad_norm": 0.7060187458992004,
|
413 |
+
"learning_rate": 2.8563742176347866e-05,
|
414 |
+
"loss": 0.0008,
|
415 |
+
"step": 1450
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"epoch": 0.4372961755114142,
|
419 |
+
"grad_norm": 0.9327678680419922,
|
420 |
+
"learning_rate": 2.8481387943340288e-05,
|
421 |
+
"loss": 0.0091,
|
422 |
+
"step": 1475
|
423 |
+
},
|
424 |
+
{
|
425 |
+
"epoch": 0.4447079750963534,
|
426 |
+
"grad_norm": 0.027771558612585068,
|
427 |
+
"learning_rate": 2.8399033710332713e-05,
|
428 |
+
"loss": 0.002,
|
429 |
+
"step": 1500
|
430 |
+
},
|
431 |
+
{
|
432 |
+
"epoch": 0.4521197746812926,
|
433 |
+
"grad_norm": 0.004578313324600458,
|
434 |
+
"learning_rate": 2.8316679477325135e-05,
|
435 |
+
"loss": 0.0033,
|
436 |
+
"step": 1525
|
437 |
+
},
|
438 |
+
{
|
439 |
+
"epoch": 0.45953157426623187,
|
440 |
+
"grad_norm": 2.601940393447876,
|
441 |
+
"learning_rate": 2.8234325244317557e-05,
|
442 |
+
"loss": 0.0027,
|
443 |
+
"step": 1550
|
444 |
+
},
|
445 |
+
{
|
446 |
+
"epoch": 0.4669433738511711,
|
447 |
+
"grad_norm": 0.020152784883975983,
|
448 |
+
"learning_rate": 2.8151971011309982e-05,
|
449 |
+
"loss": 0.0016,
|
450 |
+
"step": 1575
|
451 |
+
},
|
452 |
+
{
|
453 |
+
"epoch": 0.4743551734361103,
|
454 |
+
"grad_norm": 0.006129037588834763,
|
455 |
+
"learning_rate": 2.8069616778302404e-05,
|
456 |
+
"loss": 0.0003,
|
457 |
+
"step": 1600
|
458 |
+
},
|
459 |
+
{
|
460 |
+
"epoch": 0.48176697302104954,
|
461 |
+
"grad_norm": 0.0011216738494113088,
|
462 |
+
"learning_rate": 2.798726254529483e-05,
|
463 |
+
"loss": 0.0004,
|
464 |
+
"step": 1625
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 0.48917877260598874,
|
468 |
+
"grad_norm": 0.18000362813472748,
|
469 |
+
"learning_rate": 2.7904908312287254e-05,
|
470 |
+
"loss": 0.0045,
|
471 |
+
"step": 1650
|
472 |
+
},
|
473 |
+
{
|
474 |
+
"epoch": 0.49659057219092795,
|
475 |
+
"grad_norm": 0.2856181561946869,
|
476 |
+
"learning_rate": 2.7822554079279676e-05,
|
477 |
+
"loss": 0.0008,
|
478 |
+
"step": 1675
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"epoch": 0.5040023717758672,
|
482 |
+
"grad_norm": 0.20144785940647125,
|
483 |
+
"learning_rate": 2.77401998462721e-05,
|
484 |
+
"loss": 0.0009,
|
485 |
+
"step": 1700
|
486 |
+
},
|
487 |
+
{
|
488 |
+
"epoch": 0.5114141713608064,
|
489 |
+
"grad_norm": 0.07046128064393997,
|
490 |
+
"learning_rate": 2.7657845613264523e-05,
|
491 |
+
"loss": 0.0015,
|
492 |
+
"step": 1725
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 0.5188259709457457,
|
496 |
+
"grad_norm": 0.23730403184890747,
|
497 |
+
"learning_rate": 2.7575491380256945e-05,
|
498 |
+
"loss": 0.0149,
|
499 |
+
"step": 1750
|
500 |
+
},
|
501 |
+
{
|
502 |
+
"epoch": 0.5262377705306849,
|
503 |
+
"grad_norm": 0.006619285326451063,
|
504 |
+
"learning_rate": 2.749313714724937e-05,
|
505 |
+
"loss": 0.0154,
|
506 |
+
"step": 1775
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 0.5336495701156241,
|
510 |
+
"grad_norm": 3.927973747253418,
|
511 |
+
"learning_rate": 2.7410782914241792e-05,
|
512 |
+
"loss": 0.0036,
|
513 |
+
"step": 1800
|
514 |
+
},
|
515 |
+
{
|
516 |
+
"epoch": 0.5410613697005633,
|
517 |
+
"grad_norm": 0.2023760825395584,
|
518 |
+
"learning_rate": 2.7328428681234214e-05,
|
519 |
+
"loss": 0.0028,
|
520 |
+
"step": 1825
|
521 |
+
},
|
522 |
+
{
|
523 |
+
"epoch": 0.5484731692855025,
|
524 |
+
"grad_norm": 0.008169805631041527,
|
525 |
+
"learning_rate": 2.7246074448226643e-05,
|
526 |
+
"loss": 0.0033,
|
527 |
+
"step": 1850
|
528 |
+
},
|
529 |
+
{
|
530 |
+
"epoch": 0.5558849688704417,
|
531 |
+
"grad_norm": 0.03589046001434326,
|
532 |
+
"learning_rate": 2.7163720215219065e-05,
|
533 |
+
"loss": 0.0086,
|
534 |
+
"step": 1875
|
535 |
+
},
|
536 |
+
{
|
537 |
+
"epoch": 0.563296768455381,
|
538 |
+
"grad_norm": 0.0460566021502018,
|
539 |
+
"learning_rate": 2.7081365982211486e-05,
|
540 |
+
"loss": 0.0124,
|
541 |
+
"step": 1900
|
542 |
+
},
|
543 |
+
{
|
544 |
+
"epoch": 0.5707085680403202,
|
545 |
+
"grad_norm": 0.038972873240709305,
|
546 |
+
"learning_rate": 2.699901174920391e-05,
|
547 |
+
"loss": 0.0005,
|
548 |
+
"step": 1925
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 0.5781203676252594,
|
552 |
+
"grad_norm": 0.023740865290164948,
|
553 |
+
"learning_rate": 2.6916657516196334e-05,
|
554 |
+
"loss": 0.0037,
|
555 |
+
"step": 1950
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 0.5855321672101986,
|
559 |
+
"grad_norm": 5.729846477508545,
|
560 |
+
"learning_rate": 2.6834303283188755e-05,
|
561 |
+
"loss": 0.0052,
|
562 |
+
"step": 1975
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 0.5929439667951378,
|
566 |
+
"grad_norm": 0.0025414160918444395,
|
567 |
+
"learning_rate": 2.675194905018118e-05,
|
568 |
+
"loss": 0.004,
|
569 |
+
"step": 2000
|
570 |
+
},
|
571 |
+
{
|
572 |
+
"epoch": 0.600355766380077,
|
573 |
+
"grad_norm": 0.006911627948284149,
|
574 |
+
"learning_rate": 2.6669594817173603e-05,
|
575 |
+
"loss": 0.0041,
|
576 |
+
"step": 2025
|
577 |
+
},
|
578 |
+
{
|
579 |
+
"epoch": 0.6077675659650164,
|
580 |
+
"grad_norm": 0.046909186989068985,
|
581 |
+
"learning_rate": 2.6587240584166024e-05,
|
582 |
+
"loss": 0.0006,
|
583 |
+
"step": 2050
|
584 |
+
},
|
585 |
+
{
|
586 |
+
"epoch": 0.6151793655499556,
|
587 |
+
"grad_norm": 1.3354122638702393,
|
588 |
+
"learning_rate": 2.6504886351158453e-05,
|
589 |
+
"loss": 0.0035,
|
590 |
+
"step": 2075
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 0.6225911651348948,
|
594 |
+
"grad_norm": 0.12669125199317932,
|
595 |
+
"learning_rate": 2.6422532118150875e-05,
|
596 |
+
"loss": 0.0079,
|
597 |
+
"step": 2100
|
598 |
+
},
|
599 |
+
{
|
600 |
+
"epoch": 0.630002964719834,
|
601 |
+
"grad_norm": 0.001392390113323927,
|
602 |
+
"learning_rate": 2.6340177885143297e-05,
|
603 |
+
"loss": 0.0012,
|
604 |
+
"step": 2125
|
605 |
+
},
|
606 |
+
{
|
607 |
+
"epoch": 0.6374147643047732,
|
608 |
+
"grad_norm": 0.0009501671884208918,
|
609 |
+
"learning_rate": 2.6257823652135722e-05,
|
610 |
+
"loss": 0.0012,
|
611 |
+
"step": 2150
|
612 |
+
},
|
613 |
+
{
|
614 |
+
"epoch": 0.6448265638897124,
|
615 |
+
"grad_norm": 9.893109321594238,
|
616 |
+
"learning_rate": 2.6175469419128144e-05,
|
617 |
+
"loss": 0.0035,
|
618 |
+
"step": 2175
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"epoch": 0.6522383634746517,
|
622 |
+
"grad_norm": 0.00362469838000834,
|
623 |
+
"learning_rate": 2.6093115186120566e-05,
|
624 |
+
"loss": 0.0027,
|
625 |
+
"step": 2200
|
626 |
+
},
|
627 |
+
{
|
628 |
+
"epoch": 0.6596501630595909,
|
629 |
+
"grad_norm": 0.01450972817838192,
|
630 |
+
"learning_rate": 2.601076095311299e-05,
|
631 |
+
"loss": 0.0015,
|
632 |
+
"step": 2225
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 0.6670619626445301,
|
636 |
+
"grad_norm": 0.010469946078956127,
|
637 |
+
"learning_rate": 2.5928406720105413e-05,
|
638 |
+
"loss": 0.0004,
|
639 |
+
"step": 2250
|
640 |
+
},
|
641 |
+
{
|
642 |
+
"epoch": 0.6744737622294693,
|
643 |
+
"grad_norm": 0.2838996350765228,
|
644 |
+
"learning_rate": 2.5846052487097838e-05,
|
645 |
+
"loss": 0.001,
|
646 |
+
"step": 2275
|
647 |
+
},
|
648 |
+
{
|
649 |
+
"epoch": 0.6818855618144085,
|
650 |
+
"grad_norm": 0.0013039965415373445,
|
651 |
+
"learning_rate": 2.5763698254090263e-05,
|
652 |
+
"loss": 0.0002,
|
653 |
+
"step": 2300
|
654 |
+
},
|
655 |
+
{
|
656 |
+
"epoch": 0.6892973613993477,
|
657 |
+
"grad_norm": 0.0675489753484726,
|
658 |
+
"learning_rate": 2.5681344021082685e-05,
|
659 |
+
"loss": 0.0009,
|
660 |
+
"step": 2325
|
661 |
+
},
|
662 |
+
{
|
663 |
+
"epoch": 0.696709160984287,
|
664 |
+
"grad_norm": 0.3720860481262207,
|
665 |
+
"learning_rate": 2.5598989788075107e-05,
|
666 |
+
"loss": 0.0019,
|
667 |
+
"step": 2350
|
668 |
+
},
|
669 |
+
{
|
670 |
+
"epoch": 0.7041209605692262,
|
671 |
+
"grad_norm": 0.005105954594910145,
|
672 |
+
"learning_rate": 2.5516635555067532e-05,
|
673 |
+
"loss": 0.0005,
|
674 |
+
"step": 2375
|
675 |
+
},
|
676 |
+
{
|
677 |
+
"epoch": 0.7115327601541654,
|
678 |
+
"grad_norm": 0.46072980761528015,
|
679 |
+
"learning_rate": 2.5434281322059954e-05,
|
680 |
+
"loss": 0.0008,
|
681 |
+
"step": 2400
|
682 |
+
},
|
683 |
+
{
|
684 |
+
"epoch": 0.7189445597391046,
|
685 |
+
"grad_norm": 0.030280854552984238,
|
686 |
+
"learning_rate": 2.5351927089052376e-05,
|
687 |
+
"loss": 0.0012,
|
688 |
+
"step": 2425
|
689 |
+
},
|
690 |
+
{
|
691 |
+
"epoch": 0.7263563593240439,
|
692 |
+
"grad_norm": 0.007249627728015184,
|
693 |
+
"learning_rate": 2.52695728560448e-05,
|
694 |
+
"loss": 0.0036,
|
695 |
+
"step": 2450
|
696 |
+
},
|
697 |
+
{
|
698 |
+
"epoch": 0.7337681589089831,
|
699 |
+
"grad_norm": 0.007724974304437637,
|
700 |
+
"learning_rate": 2.5187218623037223e-05,
|
701 |
+
"loss": 0.0007,
|
702 |
+
"step": 2475
|
703 |
+
},
|
704 |
+
{
|
705 |
+
"epoch": 0.7411799584939224,
|
706 |
+
"grad_norm": 0.014301043003797531,
|
707 |
+
"learning_rate": 2.510486439002965e-05,
|
708 |
+
"loss": 0.0036,
|
709 |
+
"step": 2500
|
710 |
+
},
|
711 |
+
{
|
712 |
+
"epoch": 0.7485917580788616,
|
713 |
+
"grad_norm": 0.003261040663346648,
|
714 |
+
"learning_rate": 2.5022510157022074e-05,
|
715 |
+
"loss": 0.0007,
|
716 |
+
"step": 2525
|
717 |
+
},
|
718 |
+
{
|
719 |
+
"epoch": 0.7560035576638008,
|
720 |
+
"grad_norm": 0.01370958425104618,
|
721 |
+
"learning_rate": 2.4940155924014496e-05,
|
722 |
+
"loss": 0.0008,
|
723 |
+
"step": 2550
|
724 |
+
},
|
725 |
+
{
|
726 |
+
"epoch": 0.76341535724874,
|
727 |
+
"grad_norm": 0.000538276566658169,
|
728 |
+
"learning_rate": 2.4857801691006917e-05,
|
729 |
+
"loss": 0.0008,
|
730 |
+
"step": 2575
|
731 |
+
},
|
732 |
+
{
|
733 |
+
"epoch": 0.7708271568336792,
|
734 |
+
"grad_norm": 1.4628574848175049,
|
735 |
+
"learning_rate": 2.4775447457999343e-05,
|
736 |
+
"loss": 0.0016,
|
737 |
+
"step": 2600
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"epoch": 0.7782389564186184,
|
741 |
+
"grad_norm": 0.569579541683197,
|
742 |
+
"learning_rate": 2.4693093224991765e-05,
|
743 |
+
"loss": 0.008,
|
744 |
+
"step": 2625
|
745 |
+
},
|
746 |
+
{
|
747 |
+
"epoch": 0.7856507560035577,
|
748 |
+
"grad_norm": 0.16939352452754974,
|
749 |
+
"learning_rate": 2.4610738991984186e-05,
|
750 |
+
"loss": 0.0058,
|
751 |
+
"step": 2650
|
752 |
+
},
|
753 |
+
{
|
754 |
+
"epoch": 0.7930625555884969,
|
755 |
+
"grad_norm": 0.11091727018356323,
|
756 |
+
"learning_rate": 2.452838475897661e-05,
|
757 |
+
"loss": 0.0003,
|
758 |
+
"step": 2675
|
759 |
+
},
|
760 |
+
{
|
761 |
+
"epoch": 0.8004743551734361,
|
762 |
+
"grad_norm": 0.007662549149245024,
|
763 |
+
"learning_rate": 2.4446030525969037e-05,
|
764 |
+
"loss": 0.0005,
|
765 |
+
"step": 2700
|
766 |
+
},
|
767 |
+
{
|
768 |
+
"epoch": 0.8078861547583753,
|
769 |
+
"grad_norm": 0.003822131548076868,
|
770 |
+
"learning_rate": 2.436367629296146e-05,
|
771 |
+
"loss": 0.0101,
|
772 |
+
"step": 2725
|
773 |
+
},
|
774 |
+
{
|
775 |
+
"epoch": 0.8152979543433145,
|
776 |
+
"grad_norm": 0.004805045202374458,
|
777 |
+
"learning_rate": 2.4281322059953884e-05,
|
778 |
+
"loss": 0.0025,
|
779 |
+
"step": 2750
|
780 |
+
},
|
781 |
+
{
|
782 |
+
"epoch": 0.8227097539282537,
|
783 |
+
"grad_norm": 0.004129350651055574,
|
784 |
+
"learning_rate": 2.4198967826946306e-05,
|
785 |
+
"loss": 0.0041,
|
786 |
+
"step": 2775
|
787 |
+
},
|
788 |
+
{
|
789 |
+
"epoch": 0.8301215535131931,
|
790 |
+
"grad_norm": 0.0160355307161808,
|
791 |
+
"learning_rate": 2.4116613593938728e-05,
|
792 |
+
"loss": 0.0113,
|
793 |
+
"step": 2800
|
794 |
+
},
|
795 |
+
{
|
796 |
+
"epoch": 0.8375333530981323,
|
797 |
+
"grad_norm": 0.017653746530413628,
|
798 |
+
"learning_rate": 2.4034259360931153e-05,
|
799 |
+
"loss": 0.0219,
|
800 |
+
"step": 2825
|
801 |
+
},
|
802 |
+
{
|
803 |
+
"epoch": 0.8449451526830715,
|
804 |
+
"grad_norm": 0.004908108152449131,
|
805 |
+
"learning_rate": 2.3951905127923575e-05,
|
806 |
+
"loss": 0.0004,
|
807 |
+
"step": 2850
|
808 |
+
},
|
809 |
+
{
|
810 |
+
"epoch": 0.8523569522680107,
|
811 |
+
"grad_norm": 0.0010231022024527192,
|
812 |
+
"learning_rate": 2.3869550894915997e-05,
|
813 |
+
"loss": 0.0062,
|
814 |
+
"step": 2875
|
815 |
+
},
|
816 |
+
{
|
817 |
+
"epoch": 0.8597687518529499,
|
818 |
+
"grad_norm": 1.0080921649932861,
|
819 |
+
"learning_rate": 2.3787196661908422e-05,
|
820 |
+
"loss": 0.0097,
|
821 |
+
"step": 2900
|
822 |
+
},
|
823 |
+
{
|
824 |
+
"epoch": 0.8671805514378891,
|
825 |
+
"grad_norm": 0.014476609416306019,
|
826 |
+
"learning_rate": 2.3704842428900847e-05,
|
827 |
+
"loss": 0.002,
|
828 |
+
"step": 2925
|
829 |
+
},
|
830 |
+
{
|
831 |
+
"epoch": 0.8745923510228284,
|
832 |
+
"grad_norm": 12.20775032043457,
|
833 |
+
"learning_rate": 2.362248819589327e-05,
|
834 |
+
"loss": 0.0093,
|
835 |
+
"step": 2950
|
836 |
+
},
|
837 |
+
{
|
838 |
+
"epoch": 0.8820041506077676,
|
839 |
+
"grad_norm": 0.04047137871384621,
|
840 |
+
"learning_rate": 2.3540133962885694e-05,
|
841 |
+
"loss": 0.0046,
|
842 |
+
"step": 2975
|
843 |
+
},
|
844 |
+
{
|
845 |
+
"epoch": 0.8894159501927068,
|
846 |
+
"grad_norm": 1.3424041271209717,
|
847 |
+
"learning_rate": 2.3457779729878116e-05,
|
848 |
+
"loss": 0.0009,
|
849 |
+
"step": 3000
|
850 |
+
},
|
851 |
+
{
|
852 |
+
"epoch": 0.896827749777646,
|
853 |
+
"grad_norm": 1.0432305335998535,
|
854 |
+
"learning_rate": 2.3375425496870538e-05,
|
855 |
+
"loss": 0.0014,
|
856 |
+
"step": 3025
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 0.9042395493625852,
|
860 |
+
"grad_norm": 0.013758724555373192,
|
861 |
+
"learning_rate": 2.3293071263862963e-05,
|
862 |
+
"loss": 0.0021,
|
863 |
+
"step": 3050
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"epoch": 0.9116513489475244,
|
867 |
+
"grad_norm": 0.022794967517256737,
|
868 |
+
"learning_rate": 2.3210717030855385e-05,
|
869 |
+
"loss": 0.0009,
|
870 |
+
"step": 3075
|
871 |
+
},
|
872 |
+
{
|
873 |
+
"epoch": 0.9190631485324637,
|
874 |
+
"grad_norm": 0.00779375247657299,
|
875 |
+
"learning_rate": 2.3128362797847807e-05,
|
876 |
+
"loss": 0.0082,
|
877 |
+
"step": 3100
|
878 |
+
},
|
879 |
+
{
|
880 |
+
"epoch": 0.9264749481174029,
|
881 |
+
"grad_norm": 0.0006471537053585052,
|
882 |
+
"learning_rate": 2.3046008564840236e-05,
|
883 |
+
"loss": 0.0016,
|
884 |
+
"step": 3125
|
885 |
+
},
|
886 |
+
{
|
887 |
+
"epoch": 0.9338867477023421,
|
888 |
+
"grad_norm": 0.010517094284296036,
|
889 |
+
"learning_rate": 2.2963654331832658e-05,
|
890 |
+
"loss": 0.0023,
|
891 |
+
"step": 3150
|
892 |
+
},
|
893 |
+
{
|
894 |
+
"epoch": 0.9412985472872814,
|
895 |
+
"grad_norm": 0.0007998093497008085,
|
896 |
+
"learning_rate": 2.288130009882508e-05,
|
897 |
+
"loss": 0.0019,
|
898 |
+
"step": 3175
|
899 |
+
},
|
900 |
+
{
|
901 |
+
"epoch": 0.9487103468722206,
|
902 |
+
"grad_norm": 0.03586767986416817,
|
903 |
+
"learning_rate": 2.2798945865817505e-05,
|
904 |
+
"loss": 0.002,
|
905 |
+
"step": 3200
|
906 |
+
},
|
907 |
+
{
|
908 |
+
"epoch": 0.9561221464571598,
|
909 |
+
"grad_norm": 0.11343257874250412,
|
910 |
+
"learning_rate": 2.2716591632809927e-05,
|
911 |
+
"loss": 0.0009,
|
912 |
+
"step": 3225
|
913 |
+
},
|
914 |
+
{
|
915 |
+
"epoch": 0.9635339460420991,
|
916 |
+
"grad_norm": 0.05241592228412628,
|
917 |
+
"learning_rate": 2.2637531569122653e-05,
|
918 |
+
"loss": 0.0111,
|
919 |
+
"step": 3250
|
920 |
+
},
|
921 |
+
{
|
922 |
+
"epoch": 0.9709457456270383,
|
923 |
+
"grad_norm": 0.0006894416292198002,
|
924 |
+
"learning_rate": 2.255517733611508e-05,
|
925 |
+
"loss": 0.0137,
|
926 |
+
"step": 3275
|
927 |
+
},
|
928 |
+
{
|
929 |
+
"epoch": 0.9783575452119775,
|
930 |
+
"grad_norm": 0.001717191538773477,
|
931 |
+
"learning_rate": 2.24728231031075e-05,
|
932 |
+
"loss": 0.0038,
|
933 |
+
"step": 3300
|
934 |
+
},
|
935 |
+
{
|
936 |
+
"epoch": 0.9857693447969167,
|
937 |
+
"grad_norm": 0.020298583433032036,
|
938 |
+
"learning_rate": 2.2390468870099925e-05,
|
939 |
+
"loss": 0.0061,
|
940 |
+
"step": 3325
|
941 |
+
},
|
942 |
+
{
|
943 |
+
"epoch": 0.9931811443818559,
|
944 |
+
"grad_norm": 0.02918991446495056,
|
945 |
+
"learning_rate": 2.2308114637092347e-05,
|
946 |
+
"loss": 0.0045,
|
947 |
+
"step": 3350
|
948 |
+
},
|
949 |
+
{
|
950 |
+
"epoch": 1.0,
|
951 |
+
"eval_loss": 0.0037797815166413784,
|
952 |
+
"eval_runtime": 12.579,
|
953 |
+
"eval_samples_per_second": 112.648,
|
954 |
+
"eval_steps_per_second": 7.075,
|
955 |
+
"step": 3373
|
956 |
+
},
|
957 |
+
{
|
958 |
+
"epoch": 1.000592943966795,
|
959 |
+
"grad_norm": 0.0031215930357575417,
|
960 |
+
"learning_rate": 2.222576040408477e-05,
|
961 |
+
"loss": 0.0004,
|
962 |
+
"step": 3375
|
963 |
+
},
|
964 |
+
{
|
965 |
+
"epoch": 1.0080047435517343,
|
966 |
+
"grad_norm": 0.15596337616443634,
|
967 |
+
"learning_rate": 2.2143406171077194e-05,
|
968 |
+
"loss": 0.0104,
|
969 |
+
"step": 3400
|
970 |
+
},
|
971 |
+
{
|
972 |
+
"epoch": 1.0154165431366735,
|
973 |
+
"grad_norm": 0.024601919576525688,
|
974 |
+
"learning_rate": 2.2061051938069616e-05,
|
975 |
+
"loss": 0.0033,
|
976 |
+
"step": 3425
|
977 |
+
},
|
978 |
+
{
|
979 |
+
"epoch": 1.0228283427216127,
|
980 |
+
"grad_norm": 0.05934903770685196,
|
981 |
+
"learning_rate": 2.1978697705062038e-05,
|
982 |
+
"loss": 0.0004,
|
983 |
+
"step": 3450
|
984 |
+
},
|
985 |
+
{
|
986 |
+
"epoch": 1.030240142306552,
|
987 |
+
"grad_norm": 0.0020625083707273006,
|
988 |
+
"learning_rate": 2.1896343472054467e-05,
|
989 |
+
"loss": 0.0065,
|
990 |
+
"step": 3475
|
991 |
+
},
|
992 |
+
{
|
993 |
+
"epoch": 1.0376519418914913,
|
994 |
+
"grad_norm": 0.0041097910143435,
|
995 |
+
"learning_rate": 2.181398923904689e-05,
|
996 |
+
"loss": 0.004,
|
997 |
+
"step": 3500
|
998 |
+
},
|
999 |
+
{
|
1000 |
+
"epoch": 1.0450637414764306,
|
1001 |
+
"grad_norm": 0.019944315776228905,
|
1002 |
+
"learning_rate": 2.173163500603931e-05,
|
1003 |
+
"loss": 0.0003,
|
1004 |
+
"step": 3525
|
1005 |
+
},
|
1006 |
+
{
|
1007 |
+
"epoch": 1.0524755410613698,
|
1008 |
+
"grad_norm": 0.021289722993969917,
|
1009 |
+
"learning_rate": 2.1649280773031736e-05,
|
1010 |
+
"loss": 0.0004,
|
1011 |
+
"step": 3550
|
1012 |
+
},
|
1013 |
+
{
|
1014 |
+
"epoch": 1.059887340646309,
|
1015 |
+
"grad_norm": 0.0058084093034267426,
|
1016 |
+
"learning_rate": 2.1566926540024158e-05,
|
1017 |
+
"loss": 0.0006,
|
1018 |
+
"step": 3575
|
1019 |
+
},
|
1020 |
+
{
|
1021 |
+
"epoch": 1.0672991402312482,
|
1022 |
+
"grad_norm": 0.0045490204356610775,
|
1023 |
+
"learning_rate": 2.148457230701658e-05,
|
1024 |
+
"loss": 0.0018,
|
1025 |
+
"step": 3600
|
1026 |
+
},
|
1027 |
+
{
|
1028 |
+
"epoch": 1.0747109398161874,
|
1029 |
+
"grad_norm": 0.23320598900318146,
|
1030 |
+
"learning_rate": 2.140551224332931e-05,
|
1031 |
+
"loss": 0.0246,
|
1032 |
+
"step": 3625
|
1033 |
+
},
|
1034 |
+
{
|
1035 |
+
"epoch": 1.0821227394011266,
|
1036 |
+
"grad_norm": 0.4190017879009247,
|
1037 |
+
"learning_rate": 2.132315801032173e-05,
|
1038 |
+
"loss": 0.007,
|
1039 |
+
"step": 3650
|
1040 |
+
},
|
1041 |
+
{
|
1042 |
+
"epoch": 1.0895345389860658,
|
1043 |
+
"grad_norm": 0.012420530430972576,
|
1044 |
+
"learning_rate": 2.1240803777314157e-05,
|
1045 |
+
"loss": 0.0002,
|
1046 |
+
"step": 3675
|
1047 |
+
},
|
1048 |
+
{
|
1049 |
+
"epoch": 1.096946338571005,
|
1050 |
+
"grad_norm": 0.0024064735043793917,
|
1051 |
+
"learning_rate": 2.115844954430658e-05,
|
1052 |
+
"loss": 0.0005,
|
1053 |
+
"step": 3700
|
1054 |
+
},
|
1055 |
+
{
|
1056 |
+
"epoch": 1.1043581381559442,
|
1057 |
+
"grad_norm": 0.025788817554712296,
|
1058 |
+
"learning_rate": 2.1076095311299e-05,
|
1059 |
+
"loss": 0.0097,
|
1060 |
+
"step": 3725
|
1061 |
+
},
|
1062 |
+
{
|
1063 |
+
"epoch": 1.1117699377408834,
|
1064 |
+
"grad_norm": 0.007708790246397257,
|
1065 |
+
"learning_rate": 2.0993741078291425e-05,
|
1066 |
+
"loss": 0.0011,
|
1067 |
+
"step": 3750
|
1068 |
+
},
|
1069 |
+
{
|
1070 |
+
"epoch": 1.1191817373258228,
|
1071 |
+
"grad_norm": 0.006970885209739208,
|
1072 |
+
"learning_rate": 2.0911386845283847e-05,
|
1073 |
+
"loss": 0.0005,
|
1074 |
+
"step": 3775
|
1075 |
+
},
|
1076 |
+
{
|
1077 |
+
"epoch": 1.126593536910762,
|
1078 |
+
"grad_norm": 0.27546170353889465,
|
1079 |
+
"learning_rate": 2.082903261227627e-05,
|
1080 |
+
"loss": 0.0015,
|
1081 |
+
"step": 3800
|
1082 |
+
},
|
1083 |
+
{
|
1084 |
+
"epoch": 1.1340053364957012,
|
1085 |
+
"grad_norm": 0.047784604132175446,
|
1086 |
+
"learning_rate": 2.0746678379268698e-05,
|
1087 |
+
"loss": 0.0009,
|
1088 |
+
"step": 3825
|
1089 |
+
},
|
1090 |
+
{
|
1091 |
+
"epoch": 1.1414171360806404,
|
1092 |
+
"grad_norm": 0.21240082383155823,
|
1093 |
+
"learning_rate": 2.066432414626112e-05,
|
1094 |
+
"loss": 0.0002,
|
1095 |
+
"step": 3850
|
1096 |
+
},
|
1097 |
+
{
|
1098 |
+
"epoch": 1.1488289356655796,
|
1099 |
+
"grad_norm": 0.010284190997481346,
|
1100 |
+
"learning_rate": 2.058196991325354e-05,
|
1101 |
+
"loss": 0.0002,
|
1102 |
+
"step": 3875
|
1103 |
+
},
|
1104 |
+
{
|
1105 |
+
"epoch": 1.1562407352505188,
|
1106 |
+
"grad_norm": 0.009727857075631618,
|
1107 |
+
"learning_rate": 2.0499615680245967e-05,
|
1108 |
+
"loss": 0.0021,
|
1109 |
+
"step": 3900
|
1110 |
+
},
|
1111 |
+
{
|
1112 |
+
"epoch": 1.163652534835458,
|
1113 |
+
"grad_norm": 0.003159493440762162,
|
1114 |
+
"learning_rate": 2.041726144723839e-05,
|
1115 |
+
"loss": 0.0003,
|
1116 |
+
"step": 3925
|
1117 |
+
},
|
1118 |
+
{
|
1119 |
+
"epoch": 1.1710643344203973,
|
1120 |
+
"grad_norm": 0.003643095726147294,
|
1121 |
+
"learning_rate": 2.033490721423081e-05,
|
1122 |
+
"loss": 0.0006,
|
1123 |
+
"step": 3950
|
1124 |
+
},
|
1125 |
+
{
|
1126 |
+
"epoch": 1.1784761340053365,
|
1127 |
+
"grad_norm": 0.007551380433142185,
|
1128 |
+
"learning_rate": 2.0252552981223236e-05,
|
1129 |
+
"loss": 0.0019,
|
1130 |
+
"step": 3975
|
1131 |
+
},
|
1132 |
+
{
|
1133 |
+
"epoch": 1.1858879335902757,
|
1134 |
+
"grad_norm": 2.013258218765259,
|
1135 |
+
"learning_rate": 2.0170198748215658e-05,
|
1136 |
+
"loss": 0.0013,
|
1137 |
+
"step": 4000
|
1138 |
+
},
|
1139 |
+
{
|
1140 |
+
"epoch": 1.1932997331752149,
|
1141 |
+
"grad_norm": 0.847743570804596,
|
1142 |
+
"learning_rate": 2.0087844515208083e-05,
|
1143 |
+
"loss": 0.002,
|
1144 |
+
"step": 4025
|
1145 |
+
},
|
1146 |
+
{
|
1147 |
+
"epoch": 1.200711532760154,
|
1148 |
+
"grad_norm": 0.015584784559905529,
|
1149 |
+
"learning_rate": 2.0005490282200508e-05,
|
1150 |
+
"loss": 0.0005,
|
1151 |
+
"step": 4050
|
1152 |
+
},
|
1153 |
+
{
|
1154 |
+
"epoch": 1.2081233323450933,
|
1155 |
+
"grad_norm": 0.002146722748875618,
|
1156 |
+
"learning_rate": 1.992313604919293e-05,
|
1157 |
+
"loss": 0.0037,
|
1158 |
+
"step": 4075
|
1159 |
+
},
|
1160 |
+
{
|
1161 |
+
"epoch": 1.2155351319300327,
|
1162 |
+
"grad_norm": 0.2943512797355652,
|
1163 |
+
"learning_rate": 1.9840781816185352e-05,
|
1164 |
+
"loss": 0.0013,
|
1165 |
+
"step": 4100
|
1166 |
+
},
|
1167 |
+
{
|
1168 |
+
"epoch": 1.222946931514972,
|
1169 |
+
"grad_norm": 0.01032156590372324,
|
1170 |
+
"learning_rate": 1.9758427583177777e-05,
|
1171 |
+
"loss": 0.0006,
|
1172 |
+
"step": 4125
|
1173 |
+
},
|
1174 |
+
{
|
1175 |
+
"epoch": 1.2303587310999111,
|
1176 |
+
"grad_norm": 0.0028528086841106415,
|
1177 |
+
"learning_rate": 1.96760733501702e-05,
|
1178 |
+
"loss": 0.0037,
|
1179 |
+
"step": 4150
|
1180 |
+
},
|
1181 |
+
{
|
1182 |
+
"epoch": 1.2377705306848503,
|
1183 |
+
"grad_norm": 0.01456816028803587,
|
1184 |
+
"learning_rate": 1.959371911716262e-05,
|
1185 |
+
"loss": 0.0018,
|
1186 |
+
"step": 4175
|
1187 |
+
},
|
1188 |
+
{
|
1189 |
+
"epoch": 1.2451823302697895,
|
1190 |
+
"grad_norm": 0.06787148863077164,
|
1191 |
+
"learning_rate": 1.9511364884155046e-05,
|
1192 |
+
"loss": 0.0005,
|
1193 |
+
"step": 4200
|
1194 |
+
},
|
1195 |
+
{
|
1196 |
+
"epoch": 1.2525941298547287,
|
1197 |
+
"grad_norm": 0.03550760820508003,
|
1198 |
+
"learning_rate": 1.9429010651147468e-05,
|
1199 |
+
"loss": 0.0003,
|
1200 |
+
"step": 4225
|
1201 |
+
},
|
1202 |
+
{
|
1203 |
+
"epoch": 1.260005929439668,
|
1204 |
+
"grad_norm": 0.04113534837961197,
|
1205 |
+
"learning_rate": 1.9346656418139893e-05,
|
1206 |
+
"loss": 0.0003,
|
1207 |
+
"step": 4250
|
1208 |
+
},
|
1209 |
+
{
|
1210 |
+
"epoch": 1.2674177290246071,
|
1211 |
+
"grad_norm": 0.030371995642781258,
|
1212 |
+
"learning_rate": 1.926430218513232e-05,
|
1213 |
+
"loss": 0.0003,
|
1214 |
+
"step": 4275
|
1215 |
+
},
|
1216 |
+
{
|
1217 |
+
"epoch": 1.2748295286095463,
|
1218 |
+
"grad_norm": 0.14188840985298157,
|
1219 |
+
"learning_rate": 1.918194795212474e-05,
|
1220 |
+
"loss": 0.0004,
|
1221 |
+
"step": 4300
|
1222 |
+
},
|
1223 |
+
{
|
1224 |
+
"epoch": 1.2822413281944856,
|
1225 |
+
"grad_norm": 0.02768622897565365,
|
1226 |
+
"learning_rate": 1.9099593719117162e-05,
|
1227 |
+
"loss": 0.0029,
|
1228 |
+
"step": 4325
|
1229 |
+
},
|
1230 |
+
{
|
1231 |
+
"epoch": 1.2896531277794248,
|
1232 |
+
"grad_norm": 0.0008035032660700381,
|
1233 |
+
"learning_rate": 1.9017239486109588e-05,
|
1234 |
+
"loss": 0.0021,
|
1235 |
+
"step": 4350
|
1236 |
+
},
|
1237 |
+
{
|
1238 |
+
"epoch": 1.2970649273643642,
|
1239 |
+
"grad_norm": 0.0008042781846597791,
|
1240 |
+
"learning_rate": 1.893488525310201e-05,
|
1241 |
+
"loss": 0.0004,
|
1242 |
+
"step": 4375
|
1243 |
+
},
|
1244 |
+
{
|
1245 |
+
"epoch": 1.3044767269493032,
|
1246 |
+
"grad_norm": 0.0014630877412855625,
|
1247 |
+
"learning_rate": 1.885253102009443e-05,
|
1248 |
+
"loss": 0.0008,
|
1249 |
+
"step": 4400
|
1250 |
+
},
|
1251 |
+
{
|
1252 |
+
"epoch": 1.3118885265342426,
|
1253 |
+
"grad_norm": 0.3650088608264923,
|
1254 |
+
"learning_rate": 1.8770176787086857e-05,
|
1255 |
+
"loss": 0.0002,
|
1256 |
+
"step": 4425
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 1.3193003261191818,
|
1260 |
+
"grad_norm": 0.0012581262271851301,
|
1261 |
+
"learning_rate": 1.8687822554079282e-05,
|
1262 |
+
"loss": 0.0031,
|
1263 |
+
"step": 4450
|
1264 |
+
},
|
1265 |
+
{
|
1266 |
+
"epoch": 1.326712125704121,
|
1267 |
+
"grad_norm": 0.009501802735030651,
|
1268 |
+
"learning_rate": 1.8605468321071704e-05,
|
1269 |
+
"loss": 0.0005,
|
1270 |
+
"step": 4475
|
1271 |
+
},
|
1272 |
+
{
|
1273 |
+
"epoch": 1.3341239252890602,
|
1274 |
+
"grad_norm": 0.023450246080756187,
|
1275 |
+
"learning_rate": 1.852311408806413e-05,
|
1276 |
+
"loss": 0.0006,
|
1277 |
+
"step": 4500
|
1278 |
+
},
|
1279 |
+
{
|
1280 |
+
"epoch": 1.3415357248739994,
|
1281 |
+
"grad_norm": 0.041543640196323395,
|
1282 |
+
"learning_rate": 1.844075985505655e-05,
|
1283 |
+
"loss": 0.0005,
|
1284 |
+
"step": 4525
|
1285 |
+
},
|
1286 |
+
{
|
1287 |
+
"epoch": 1.3489475244589386,
|
1288 |
+
"grad_norm": 0.030355799943208694,
|
1289 |
+
"learning_rate": 1.8358405622048973e-05,
|
1290 |
+
"loss": 0.0012,
|
1291 |
+
"step": 4550
|
1292 |
+
},
|
1293 |
+
{
|
1294 |
+
"epoch": 1.3563593240438778,
|
1295 |
+
"grad_norm": 0.002064308850094676,
|
1296 |
+
"learning_rate": 1.8276051389041398e-05,
|
1297 |
+
"loss": 0.0009,
|
1298 |
+
"step": 4575
|
1299 |
+
},
|
1300 |
+
{
|
1301 |
+
"epoch": 1.363771123628817,
|
1302 |
+
"grad_norm": 0.04516091197729111,
|
1303 |
+
"learning_rate": 1.819369715603382e-05,
|
1304 |
+
"loss": 0.0006,
|
1305 |
+
"step": 4600
|
1306 |
+
},
|
1307 |
+
{
|
1308 |
+
"epoch": 1.3711829232137562,
|
1309 |
+
"grad_norm": 0.0014140811981633306,
|
1310 |
+
"learning_rate": 1.811134292302624e-05,
|
1311 |
+
"loss": 0.0237,
|
1312 |
+
"step": 4625
|
1313 |
+
},
|
1314 |
+
{
|
1315 |
+
"epoch": 1.3785947227986957,
|
1316 |
+
"grad_norm": 0.01035268697887659,
|
1317 |
+
"learning_rate": 1.8028988690018667e-05,
|
1318 |
+
"loss": 0.0004,
|
1319 |
+
"step": 4650
|
1320 |
+
},
|
1321 |
+
{
|
1322 |
+
"epoch": 1.3860065223836346,
|
1323 |
+
"grad_norm": 0.0017914492636919022,
|
1324 |
+
"learning_rate": 1.7946634457011092e-05,
|
1325 |
+
"loss": 0.0001,
|
1326 |
+
"step": 4675
|
1327 |
+
},
|
1328 |
+
{
|
1329 |
+
"epoch": 1.393418321968574,
|
1330 |
+
"grad_norm": 1.8334236145019531,
|
1331 |
+
"learning_rate": 1.7864280224003514e-05,
|
1332 |
+
"loss": 0.0005,
|
1333 |
+
"step": 4700
|
1334 |
+
},
|
1335 |
+
{
|
1336 |
+
"epoch": 1.4008301215535133,
|
1337 |
+
"grad_norm": 0.093447744846344,
|
1338 |
+
"learning_rate": 1.778192599099594e-05,
|
1339 |
+
"loss": 0.0003,
|
1340 |
+
"step": 4725
|
1341 |
+
},
|
1342 |
+
{
|
1343 |
+
"epoch": 1.4082419211384525,
|
1344 |
+
"grad_norm": 0.004699540790170431,
|
1345 |
+
"learning_rate": 1.769957175798836e-05,
|
1346 |
+
"loss": 0.0059,
|
1347 |
+
"step": 4750
|
1348 |
+
},
|
1349 |
+
{
|
1350 |
+
"epoch": 1.4156537207233917,
|
1351 |
+
"grad_norm": 0.01479920744895935,
|
1352 |
+
"learning_rate": 1.7617217524980783e-05,
|
1353 |
+
"loss": 0.0005,
|
1354 |
+
"step": 4775
|
1355 |
+
},
|
1356 |
+
{
|
1357 |
+
"epoch": 1.423065520308331,
|
1358 |
+
"grad_norm": 0.007109279744327068,
|
1359 |
+
"learning_rate": 1.7534863291973208e-05,
|
1360 |
+
"loss": 0.0014,
|
1361 |
+
"step": 4800
|
1362 |
+
},
|
1363 |
+
{
|
1364 |
+
"epoch": 1.43047731989327,
|
1365 |
+
"grad_norm": 0.007401443552225828,
|
1366 |
+
"learning_rate": 1.745250905896563e-05,
|
1367 |
+
"loss": 0.0001,
|
1368 |
+
"step": 4825
|
1369 |
+
},
|
1370 |
+
{
|
1371 |
+
"epoch": 1.4378891194782093,
|
1372 |
+
"grad_norm": 0.5947569012641907,
|
1373 |
+
"learning_rate": 1.7370154825958052e-05,
|
1374 |
+
"loss": 0.0003,
|
1375 |
+
"step": 4850
|
1376 |
+
},
|
1377 |
+
{
|
1378 |
+
"epoch": 1.4453009190631485,
|
1379 |
+
"grad_norm": 0.001491679809987545,
|
1380 |
+
"learning_rate": 1.728780059295048e-05,
|
1381 |
+
"loss": 0.0003,
|
1382 |
+
"step": 4875
|
1383 |
+
},
|
1384 |
+
{
|
1385 |
+
"epoch": 1.4527127186480877,
|
1386 |
+
"grad_norm": 0.009767352603375912,
|
1387 |
+
"learning_rate": 1.7205446359942902e-05,
|
1388 |
+
"loss": 0.0062,
|
1389 |
+
"step": 4900
|
1390 |
+
},
|
1391 |
+
{
|
1392 |
+
"epoch": 1.460124518233027,
|
1393 |
+
"grad_norm": 0.0028043785132467747,
|
1394 |
+
"learning_rate": 1.7123092126935324e-05,
|
1395 |
+
"loss": 0.0002,
|
1396 |
+
"step": 4925
|
1397 |
+
},
|
1398 |
+
{
|
1399 |
+
"epoch": 1.4675363178179661,
|
1400 |
+
"grad_norm": 0.037593334913253784,
|
1401 |
+
"learning_rate": 1.704073789392775e-05,
|
1402 |
+
"loss": 0.0004,
|
1403 |
+
"step": 4950
|
1404 |
+
},
|
1405 |
+
{
|
1406 |
+
"epoch": 1.4749481174029055,
|
1407 |
+
"grad_norm": 0.046085864305496216,
|
1408 |
+
"learning_rate": 1.695838366092017e-05,
|
1409 |
+
"loss": 0.0001,
|
1410 |
+
"step": 4975
|
1411 |
+
},
|
1412 |
+
{
|
1413 |
+
"epoch": 1.4823599169878445,
|
1414 |
+
"grad_norm": 0.0233808271586895,
|
1415 |
+
"learning_rate": 1.6876029427912593e-05,
|
1416 |
+
"loss": 0.0003,
|
1417 |
+
"step": 5000
|
1418 |
+
},
|
1419 |
+
{
|
1420 |
+
"epoch": 1.489771716572784,
|
1421 |
+
"grad_norm": 0.06796186417341232,
|
1422 |
+
"learning_rate": 1.679367519490502e-05,
|
1423 |
+
"loss": 0.0003,
|
1424 |
+
"step": 5025
|
1425 |
+
},
|
1426 |
+
{
|
1427 |
+
"epoch": 1.4971835161577232,
|
1428 |
+
"grad_norm": 0.017352351918816566,
|
1429 |
+
"learning_rate": 1.671132096189744e-05,
|
1430 |
+
"loss": 0.0004,
|
1431 |
+
"step": 5050
|
1432 |
+
},
|
1433 |
+
{
|
1434 |
+
"epoch": 1.5045953157426624,
|
1435 |
+
"grad_norm": 0.003020123578608036,
|
1436 |
+
"learning_rate": 1.6628966728889862e-05,
|
1437 |
+
"loss": 0.0003,
|
1438 |
+
"step": 5075
|
1439 |
+
},
|
1440 |
+
{
|
1441 |
+
"epoch": 1.5120071153276016,
|
1442 |
+
"grad_norm": 0.004012423101812601,
|
1443 |
+
"learning_rate": 1.654661249588229e-05,
|
1444 |
+
"loss": 0.0001,
|
1445 |
+
"step": 5100
|
1446 |
+
},
|
1447 |
+
{
|
1448 |
+
"epoch": 1.5194189149125408,
|
1449 |
+
"grad_norm": 0.001218698569573462,
|
1450 |
+
"learning_rate": 1.6464258262874713e-05,
|
1451 |
+
"loss": 0.0031,
|
1452 |
+
"step": 5125
|
1453 |
+
},
|
1454 |
+
{
|
1455 |
+
"epoch": 1.52683071449748,
|
1456 |
+
"grad_norm": 0.003939245827496052,
|
1457 |
+
"learning_rate": 1.6381904029867135e-05,
|
1458 |
+
"loss": 0.0002,
|
1459 |
+
"step": 5150
|
1460 |
+
},
|
1461 |
+
{
|
1462 |
+
"epoch": 1.5342425140824192,
|
1463 |
+
"grad_norm": 0.00894363597035408,
|
1464 |
+
"learning_rate": 1.629954979685956e-05,
|
1465 |
+
"loss": 0.0006,
|
1466 |
+
"step": 5175
|
1467 |
+
},
|
1468 |
+
{
|
1469 |
+
"epoch": 1.5416543136673584,
|
1470 |
+
"grad_norm": 0.0632522702217102,
|
1471 |
+
"learning_rate": 1.6217195563851982e-05,
|
1472 |
+
"loss": 0.0001,
|
1473 |
+
"step": 5200
|
1474 |
+
},
|
1475 |
+
{
|
1476 |
+
"epoch": 1.5490661132522976,
|
1477 |
+
"grad_norm": 0.02053726837038994,
|
1478 |
+
"learning_rate": 1.6134841330844404e-05,
|
1479 |
+
"loss": 0.0017,
|
1480 |
+
"step": 5225
|
1481 |
+
},
|
1482 |
+
{
|
1483 |
+
"epoch": 1.556477912837237,
|
1484 |
+
"grad_norm": 0.0023088334128260612,
|
1485 |
+
"learning_rate": 1.605248709783683e-05,
|
1486 |
+
"loss": 0.0163,
|
1487 |
+
"step": 5250
|
1488 |
+
},
|
1489 |
+
{
|
1490 |
+
"epoch": 1.563889712422176,
|
1491 |
+
"grad_norm": 0.007689852733165026,
|
1492 |
+
"learning_rate": 1.597013286482925e-05,
|
1493 |
+
"loss": 0.0049,
|
1494 |
+
"step": 5275
|
1495 |
+
},
|
1496 |
+
{
|
1497 |
+
"epoch": 1.5713015120071154,
|
1498 |
+
"grad_norm": 0.02146066538989544,
|
1499 |
+
"learning_rate": 1.5887778631821676e-05,
|
1500 |
+
"loss": 0.0029,
|
1501 |
+
"step": 5300
|
1502 |
+
},
|
1503 |
+
{
|
1504 |
+
"epoch": 1.5787133115920544,
|
1505 |
+
"grad_norm": 0.007000596262514591,
|
1506 |
+
"learning_rate": 1.58054243988141e-05,
|
1507 |
+
"loss": 0.0035,
|
1508 |
+
"step": 5325
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 1.5861251111769938,
|
1512 |
+
"grad_norm": 0.040092576295137405,
|
1513 |
+
"learning_rate": 1.5723070165806523e-05,
|
1514 |
+
"loss": 0.007,
|
1515 |
+
"step": 5350
|
1516 |
+
},
|
1517 |
+
{
|
1518 |
+
"epoch": 1.593536910761933,
|
1519 |
+
"grad_norm": 0.008139602839946747,
|
1520 |
+
"learning_rate": 1.5640715932798945e-05,
|
1521 |
+
"loss": 0.0006,
|
1522 |
+
"step": 5375
|
1523 |
+
},
|
1524 |
+
{
|
1525 |
+
"epoch": 1.6009487103468723,
|
1526 |
+
"grad_norm": 0.0004769627994392067,
|
1527 |
+
"learning_rate": 1.555836169979137e-05,
|
1528 |
+
"loss": 0.0003,
|
1529 |
+
"step": 5400
|
1530 |
+
},
|
1531 |
+
{
|
1532 |
+
"epoch": 1.6083605099318115,
|
1533 |
+
"grad_norm": 0.0034301020205020905,
|
1534 |
+
"learning_rate": 1.5476007466783792e-05,
|
1535 |
+
"loss": 0.0004,
|
1536 |
+
"step": 5425
|
1537 |
+
},
|
1538 |
+
{
|
1539 |
+
"epoch": 1.6157723095167507,
|
1540 |
+
"grad_norm": 0.002083411207422614,
|
1541 |
+
"learning_rate": 1.5393653233776217e-05,
|
1542 |
+
"loss": 0.0067,
|
1543 |
+
"step": 5450
|
1544 |
+
},
|
1545 |
+
{
|
1546 |
+
"epoch": 1.6231841091016899,
|
1547 |
+
"grad_norm": 9.624741554260254,
|
1548 |
+
"learning_rate": 1.531129900076864e-05,
|
1549 |
+
"loss": 0.0112,
|
1550 |
+
"step": 5475
|
1551 |
+
},
|
1552 |
+
{
|
1553 |
+
"epoch": 1.630595908686629,
|
1554 |
+
"grad_norm": 0.0528758242726326,
|
1555 |
+
"learning_rate": 1.5228944767761063e-05,
|
1556 |
+
"loss": 0.0002,
|
1557 |
+
"step": 5500
|
1558 |
+
},
|
1559 |
+
{
|
1560 |
+
"epoch": 1.6380077082715685,
|
1561 |
+
"grad_norm": 0.0011297637829557061,
|
1562 |
+
"learning_rate": 1.5146590534753488e-05,
|
1563 |
+
"loss": 0.0005,
|
1564 |
+
"step": 5525
|
1565 |
+
},
|
1566 |
+
{
|
1567 |
+
"epoch": 1.6454195078565075,
|
1568 |
+
"grad_norm": 0.03151678293943405,
|
1569 |
+
"learning_rate": 1.506423630174591e-05,
|
1570 |
+
"loss": 0.0029,
|
1571 |
+
"step": 5550
|
1572 |
+
},
|
1573 |
+
{
|
1574 |
+
"epoch": 1.652831307441447,
|
1575 |
+
"grad_norm": 0.01291624829173088,
|
1576 |
+
"learning_rate": 1.4981882068738333e-05,
|
1577 |
+
"loss": 0.0006,
|
1578 |
+
"step": 5575
|
1579 |
+
},
|
1580 |
+
{
|
1581 |
+
"epoch": 1.660243107026386,
|
1582 |
+
"grad_norm": 0.0060423207469284534,
|
1583 |
+
"learning_rate": 1.4899527835730757e-05,
|
1584 |
+
"loss": 0.0012,
|
1585 |
+
"step": 5600
|
1586 |
+
},
|
1587 |
+
{
|
1588 |
+
"epoch": 1.6676549066113253,
|
1589 |
+
"grad_norm": 0.025346050038933754,
|
1590 |
+
"learning_rate": 1.481717360272318e-05,
|
1591 |
+
"loss": 0.0002,
|
1592 |
+
"step": 5625
|
1593 |
+
},
|
1594 |
+
{
|
1595 |
+
"epoch": 1.6750667061962643,
|
1596 |
+
"grad_norm": 0.0028023323975503445,
|
1597 |
+
"learning_rate": 1.4734819369715602e-05,
|
1598 |
+
"loss": 0.0005,
|
1599 |
+
"step": 5650
|
1600 |
+
},
|
1601 |
+
{
|
1602 |
+
"epoch": 1.6824785057812037,
|
1603 |
+
"grad_norm": 0.0084083816036582,
|
1604 |
+
"learning_rate": 1.4652465136708028e-05,
|
1605 |
+
"loss": 0.0002,
|
1606 |
+
"step": 5675
|
1607 |
+
},
|
1608 |
+
{
|
1609 |
+
"epoch": 1.689890305366143,
|
1610 |
+
"grad_norm": 0.00158981594722718,
|
1611 |
+
"learning_rate": 1.4570110903700451e-05,
|
1612 |
+
"loss": 0.0002,
|
1613 |
+
"step": 5700
|
1614 |
+
},
|
1615 |
+
{
|
1616 |
+
"epoch": 1.6973021049510821,
|
1617 |
+
"grad_norm": 0.004132249392569065,
|
1618 |
+
"learning_rate": 1.4487756670692873e-05,
|
1619 |
+
"loss": 0.0003,
|
1620 |
+
"step": 5725
|
1621 |
+
},
|
1622 |
+
{
|
1623 |
+
"epoch": 1.7047139045360213,
|
1624 |
+
"grad_norm": 0.0054726507514715195,
|
1625 |
+
"learning_rate": 1.4405402437685297e-05,
|
1626 |
+
"loss": 0.0018,
|
1627 |
+
"step": 5750
|
1628 |
+
},
|
1629 |
+
{
|
1630 |
+
"epoch": 1.7121257041209605,
|
1631 |
+
"grad_norm": 0.006865357980132103,
|
1632 |
+
"learning_rate": 1.4323048204677722e-05,
|
1633 |
+
"loss": 0.0027,
|
1634 |
+
"step": 5775
|
1635 |
+
},
|
1636 |
+
{
|
1637 |
+
"epoch": 1.7195375037058998,
|
1638 |
+
"grad_norm": 0.07013662904500961,
|
1639 |
+
"learning_rate": 1.4240693971670144e-05,
|
1640 |
+
"loss": 0.0002,
|
1641 |
+
"step": 5800
|
1642 |
+
},
|
1643 |
+
{
|
1644 |
+
"epoch": 1.726949303290839,
|
1645 |
+
"grad_norm": 0.00439479760825634,
|
1646 |
+
"learning_rate": 1.4158339738662567e-05,
|
1647 |
+
"loss": 0.0001,
|
1648 |
+
"step": 5825
|
1649 |
+
},
|
1650 |
+
{
|
1651 |
+
"epoch": 1.7343611028757784,
|
1652 |
+
"grad_norm": 0.024386949837207794,
|
1653 |
+
"learning_rate": 1.4075985505654991e-05,
|
1654 |
+
"loss": 0.0007,
|
1655 |
+
"step": 5850
|
1656 |
+
},
|
1657 |
+
{
|
1658 |
+
"epoch": 1.7417729024607174,
|
1659 |
+
"grad_norm": 0.005310242995619774,
|
1660 |
+
"learning_rate": 1.3993631272647414e-05,
|
1661 |
+
"loss": 0.0047,
|
1662 |
+
"step": 5875
|
1663 |
+
},
|
1664 |
+
{
|
1665 |
+
"epoch": 1.7491847020456568,
|
1666 |
+
"grad_norm": 0.008037442341446877,
|
1667 |
+
"learning_rate": 1.3911277039639838e-05,
|
1668 |
+
"loss": 0.0001,
|
1669 |
+
"step": 5900
|
1670 |
+
},
|
1671 |
+
{
|
1672 |
+
"epoch": 1.7565965016305958,
|
1673 |
+
"grad_norm": 0.0005814226460643113,
|
1674 |
+
"learning_rate": 1.3828922806632262e-05,
|
1675 |
+
"loss": 0.0002,
|
1676 |
+
"step": 5925
|
1677 |
+
},
|
1678 |
+
{
|
1679 |
+
"epoch": 1.7640083012155352,
|
1680 |
+
"grad_norm": 0.0108102485537529,
|
1681 |
+
"learning_rate": 1.3746568573624685e-05,
|
1682 |
+
"loss": 0.0007,
|
1683 |
+
"step": 5950
|
1684 |
+
},
|
1685 |
+
{
|
1686 |
+
"epoch": 1.7714201008004744,
|
1687 |
+
"grad_norm": 0.03512908145785332,
|
1688 |
+
"learning_rate": 1.3664214340617107e-05,
|
1689 |
+
"loss": 0.0039,
|
1690 |
+
"step": 5975
|
1691 |
+
},
|
1692 |
+
{
|
1693 |
+
"epoch": 1.7788319003854136,
|
1694 |
+
"grad_norm": 0.001055323169566691,
|
1695 |
+
"learning_rate": 1.3581860107609532e-05,
|
1696 |
+
"loss": 0.0001,
|
1697 |
+
"step": 6000
|
1698 |
+
},
|
1699 |
+
{
|
1700 |
+
"epoch": 1.7862436999703528,
|
1701 |
+
"grad_norm": 0.0011812145821750164,
|
1702 |
+
"learning_rate": 1.3499505874601956e-05,
|
1703 |
+
"loss": 0.0001,
|
1704 |
+
"step": 6025
|
1705 |
+
},
|
1706 |
+
{
|
1707 |
+
"epoch": 1.793655499555292,
|
1708 |
+
"grad_norm": 0.004607312846928835,
|
1709 |
+
"learning_rate": 1.3417151641594378e-05,
|
1710 |
+
"loss": 0.0002,
|
1711 |
+
"step": 6050
|
1712 |
+
},
|
1713 |
+
{
|
1714 |
+
"epoch": 1.8010672991402312,
|
1715 |
+
"grad_norm": 0.01913565769791603,
|
1716 |
+
"learning_rate": 1.3334797408586801e-05,
|
1717 |
+
"loss": 0.0001,
|
1718 |
+
"step": 6075
|
1719 |
+
},
|
1720 |
+
{
|
1721 |
+
"epoch": 1.8084790987251704,
|
1722 |
+
"grad_norm": 4.993204116821289,
|
1723 |
+
"learning_rate": 1.3252443175579227e-05,
|
1724 |
+
"loss": 0.0016,
|
1725 |
+
"step": 6100
|
1726 |
+
},
|
1727 |
+
{
|
1728 |
+
"epoch": 1.8158908983101099,
|
1729 |
+
"grad_norm": 0.004902560729533434,
|
1730 |
+
"learning_rate": 1.3170088942571648e-05,
|
1731 |
+
"loss": 0.0004,
|
1732 |
+
"step": 6125
|
1733 |
+
},
|
1734 |
+
{
|
1735 |
+
"epoch": 1.8233026978950488,
|
1736 |
+
"grad_norm": 0.02545163407921791,
|
1737 |
+
"learning_rate": 1.3087734709564072e-05,
|
1738 |
+
"loss": 0.0003,
|
1739 |
+
"step": 6150
|
1740 |
+
},
|
1741 |
+
{
|
1742 |
+
"epoch": 1.8307144974799883,
|
1743 |
+
"grad_norm": 0.0022334237582981586,
|
1744 |
+
"learning_rate": 1.3005380476556495e-05,
|
1745 |
+
"loss": 0.0005,
|
1746 |
+
"step": 6175
|
1747 |
+
},
|
1748 |
+
{
|
1749 |
+
"epoch": 1.8381262970649273,
|
1750 |
+
"grad_norm": 0.020511144772171974,
|
1751 |
+
"learning_rate": 1.2923026243548919e-05,
|
1752 |
+
"loss": 0.0005,
|
1753 |
+
"step": 6200
|
1754 |
+
},
|
1755 |
+
{
|
1756 |
+
"epoch": 1.8455380966498667,
|
1757 |
+
"grad_norm": 0.00042026047594845295,
|
1758 |
+
"learning_rate": 1.2840672010541343e-05,
|
1759 |
+
"loss": 0.0004,
|
1760 |
+
"step": 6225
|
1761 |
+
},
|
1762 |
+
{
|
1763 |
+
"epoch": 1.8529498962348057,
|
1764 |
+
"grad_norm": 0.01851501315832138,
|
1765 |
+
"learning_rate": 1.2758317777533766e-05,
|
1766 |
+
"loss": 0.0036,
|
1767 |
+
"step": 6250
|
1768 |
+
},
|
1769 |
+
{
|
1770 |
+
"epoch": 1.860361695819745,
|
1771 |
+
"grad_norm": 0.0019784029573202133,
|
1772 |
+
"learning_rate": 1.2675963544526188e-05,
|
1773 |
+
"loss": 0.0002,
|
1774 |
+
"step": 6275
|
1775 |
+
},
|
1776 |
+
{
|
1777 |
+
"epoch": 1.8677734954046843,
|
1778 |
+
"grad_norm": 0.005374327767640352,
|
1779 |
+
"learning_rate": 1.2593609311518612e-05,
|
1780 |
+
"loss": 0.0006,
|
1781 |
+
"step": 6300
|
1782 |
+
},
|
1783 |
+
{
|
1784 |
+
"epoch": 1.8751852949896235,
|
1785 |
+
"grad_norm": 0.009900149889290333,
|
1786 |
+
"learning_rate": 1.2511255078511037e-05,
|
1787 |
+
"loss": 0.0001,
|
1788 |
+
"step": 6325
|
1789 |
+
},
|
1790 |
+
{
|
1791 |
+
"epoch": 1.8825970945745627,
|
1792 |
+
"grad_norm": 0.4626489579677582,
|
1793 |
+
"learning_rate": 1.2428900845503459e-05,
|
1794 |
+
"loss": 0.0022,
|
1795 |
+
"step": 6350
|
1796 |
+
},
|
1797 |
+
{
|
1798 |
+
"epoch": 1.890008894159502,
|
1799 |
+
"grad_norm": 0.012091380544006824,
|
1800 |
+
"learning_rate": 1.2346546612495882e-05,
|
1801 |
+
"loss": 0.0001,
|
1802 |
+
"step": 6375
|
1803 |
+
},
|
1804 |
+
{
|
1805 |
+
"epoch": 1.8974206937444411,
|
1806 |
+
"grad_norm": 0.01360028050839901,
|
1807 |
+
"learning_rate": 1.2264192379488306e-05,
|
1808 |
+
"loss": 0.0003,
|
1809 |
+
"step": 6400
|
1810 |
+
},
|
1811 |
+
{
|
1812 |
+
"epoch": 1.9048324933293803,
|
1813 |
+
"grad_norm": 0.010124334134161472,
|
1814 |
+
"learning_rate": 1.218183814648073e-05,
|
1815 |
+
"loss": 0.0001,
|
1816 |
+
"step": 6425
|
1817 |
+
},
|
1818 |
+
{
|
1819 |
+
"epoch": 1.9122442929143197,
|
1820 |
+
"grad_norm": 0.0006189885316416621,
|
1821 |
+
"learning_rate": 1.2099483913473153e-05,
|
1822 |
+
"loss": 0.001,
|
1823 |
+
"step": 6450
|
1824 |
+
},
|
1825 |
+
{
|
1826 |
+
"epoch": 1.9196560924992587,
|
1827 |
+
"grad_norm": 0.012544695287942886,
|
1828 |
+
"learning_rate": 1.2017129680465577e-05,
|
1829 |
+
"loss": 0.0005,
|
1830 |
+
"step": 6475
|
1831 |
+
},
|
1832 |
+
{
|
1833 |
+
"epoch": 1.9270678920841982,
|
1834 |
+
"grad_norm": 0.03128047287464142,
|
1835 |
+
"learning_rate": 1.1934775447457998e-05,
|
1836 |
+
"loss": 0.0002,
|
1837 |
+
"step": 6500
|
1838 |
+
},
|
1839 |
+
{
|
1840 |
+
"epoch": 1.9344796916691371,
|
1841 |
+
"grad_norm": 0.0022450904361903667,
|
1842 |
+
"learning_rate": 1.1852421214450424e-05,
|
1843 |
+
"loss": 0.0049,
|
1844 |
+
"step": 6525
|
1845 |
+
},
|
1846 |
+
{
|
1847 |
+
"epoch": 1.9418914912540766,
|
1848 |
+
"grad_norm": 0.012787646614015102,
|
1849 |
+
"learning_rate": 1.1770066981442847e-05,
|
1850 |
+
"loss": 0.0002,
|
1851 |
+
"step": 6550
|
1852 |
+
},
|
1853 |
+
{
|
1854 |
+
"epoch": 1.9493032908390158,
|
1855 |
+
"grad_norm": 0.0022699732799082994,
|
1856 |
+
"learning_rate": 1.1687712748435269e-05,
|
1857 |
+
"loss": 0.0009,
|
1858 |
+
"step": 6575
|
1859 |
+
},
|
1860 |
+
{
|
1861 |
+
"epoch": 1.956715090423955,
|
1862 |
+
"grad_norm": 0.001058919820934534,
|
1863 |
+
"learning_rate": 1.1605358515427693e-05,
|
1864 |
+
"loss": 0.0145,
|
1865 |
+
"step": 6600
|
1866 |
+
},
|
1867 |
+
{
|
1868 |
+
"epoch": 1.9641268900088942,
|
1869 |
+
"grad_norm": 0.002509496873244643,
|
1870 |
+
"learning_rate": 1.1523004282420118e-05,
|
1871 |
+
"loss": 0.0004,
|
1872 |
+
"step": 6625
|
1873 |
+
},
|
1874 |
+
{
|
1875 |
+
"epoch": 1.9715386895938334,
|
1876 |
+
"grad_norm": 0.0002950621419586241,
|
1877 |
+
"learning_rate": 1.144065004941254e-05,
|
1878 |
+
"loss": 0.0009,
|
1879 |
+
"step": 6650
|
1880 |
+
},
|
1881 |
+
{
|
1882 |
+
"epoch": 1.9789504891787726,
|
1883 |
+
"grad_norm": 0.007797342259436846,
|
1884 |
+
"learning_rate": 1.1358295816404963e-05,
|
1885 |
+
"loss": 0.0003,
|
1886 |
+
"step": 6675
|
1887 |
+
},
|
1888 |
+
{
|
1889 |
+
"epoch": 1.9863622887637118,
|
1890 |
+
"grad_norm": 17.98861312866211,
|
1891 |
+
"learning_rate": 1.1275941583397387e-05,
|
1892 |
+
"loss": 0.0053,
|
1893 |
+
"step": 6700
|
1894 |
+
},
|
1895 |
+
{
|
1896 |
+
"epoch": 1.9937740883486512,
|
1897 |
+
"grad_norm": 0.00034863411565311253,
|
1898 |
+
"learning_rate": 1.1193587350389809e-05,
|
1899 |
+
"loss": 0.0002,
|
1900 |
+
"step": 6725
|
1901 |
+
},
|
1902 |
+
{
|
1903 |
+
"epoch": 2.0,
|
1904 |
+
"eval_loss": 0.00123180216178298,
|
1905 |
+
"eval_runtime": 12.6622,
|
1906 |
+
"eval_samples_per_second": 111.908,
|
1907 |
+
"eval_steps_per_second": 7.029,
|
1908 |
+
"step": 6746
|
1909 |
+
},
|
1910 |
+
{
|
1911 |
+
"epoch": 2.00118588793359,
|
1912 |
+
"grad_norm": 0.00869747158139944,
|
1913 |
+
"learning_rate": 1.1111233117382234e-05,
|
1914 |
+
"loss": 0.0002,
|
1915 |
+
"step": 6750
|
1916 |
+
},
|
1917 |
+
{
|
1918 |
+
"epoch": 2.0085976875185296,
|
1919 |
+
"grad_norm": 0.021044636145234108,
|
1920 |
+
"learning_rate": 1.1028878884374658e-05,
|
1921 |
+
"loss": 0.0018,
|
1922 |
+
"step": 6775
|
1923 |
+
},
|
1924 |
+
{
|
1925 |
+
"epoch": 2.0160094871034686,
|
1926 |
+
"grad_norm": 0.12446173280477524,
|
1927 |
+
"learning_rate": 1.0946524651367081e-05,
|
1928 |
+
"loss": 0.0001,
|
1929 |
+
"step": 6800
|
1930 |
+
},
|
1931 |
+
{
|
1932 |
+
"epoch": 2.023421286688408,
|
1933 |
+
"grad_norm": 0.6716703772544861,
|
1934 |
+
"learning_rate": 1.0864170418359503e-05,
|
1935 |
+
"loss": 0.0084,
|
1936 |
+
"step": 6825
|
1937 |
+
},
|
1938 |
+
{
|
1939 |
+
"epoch": 2.030833086273347,
|
1940 |
+
"grad_norm": 0.022197945043444633,
|
1941 |
+
"learning_rate": 1.0781816185351928e-05,
|
1942 |
+
"loss": 0.0004,
|
1943 |
+
"step": 6850
|
1944 |
+
},
|
1945 |
+
{
|
1946 |
+
"epoch": 2.0382448858582864,
|
1947 |
+
"grad_norm": 0.020370731130242348,
|
1948 |
+
"learning_rate": 1.0699461952344352e-05,
|
1949 |
+
"loss": 0.0001,
|
1950 |
+
"step": 6875
|
1951 |
+
},
|
1952 |
+
{
|
1953 |
+
"epoch": 2.0456566854432254,
|
1954 |
+
"grad_norm": 0.0005291994893923402,
|
1955 |
+
"learning_rate": 1.0617107719336774e-05,
|
1956 |
+
"loss": 0.0001,
|
1957 |
+
"step": 6900
|
1958 |
+
},
|
1959 |
+
{
|
1960 |
+
"epoch": 2.053068485028165,
|
1961 |
+
"grad_norm": 0.8127829432487488,
|
1962 |
+
"learning_rate": 1.0534753486329197e-05,
|
1963 |
+
"loss": 0.0023,
|
1964 |
+
"step": 6925
|
1965 |
+
},
|
1966 |
+
{
|
1967 |
+
"epoch": 2.060480284613104,
|
1968 |
+
"grad_norm": 0.008397881872951984,
|
1969 |
+
"learning_rate": 1.0452399253321622e-05,
|
1970 |
+
"loss": 0.0006,
|
1971 |
+
"step": 6950
|
1972 |
+
},
|
1973 |
+
{
|
1974 |
+
"epoch": 2.0678920841980433,
|
1975 |
+
"grad_norm": 0.001353152678348124,
|
1976 |
+
"learning_rate": 1.0370045020314044e-05,
|
1977 |
+
"loss": 0.0002,
|
1978 |
+
"step": 6975
|
1979 |
+
},
|
1980 |
+
{
|
1981 |
+
"epoch": 2.0753038837829827,
|
1982 |
+
"grad_norm": 2.089665174484253,
|
1983 |
+
"learning_rate": 1.0287690787306468e-05,
|
1984 |
+
"loss": 0.0025,
|
1985 |
+
"step": 7000
|
1986 |
+
},
|
1987 |
+
{
|
1988 |
+
"epoch": 2.0827156833679217,
|
1989 |
+
"grad_norm": 0.004494812805205584,
|
1990 |
+
"learning_rate": 1.0205336554298891e-05,
|
1991 |
+
"loss": 0.0001,
|
1992 |
+
"step": 7025
|
1993 |
+
},
|
1994 |
+
{
|
1995 |
+
"epoch": 2.090127482952861,
|
1996 |
+
"grad_norm": 0.0007270874921232462,
|
1997 |
+
"learning_rate": 1.0122982321291315e-05,
|
1998 |
+
"loss": 0.0003,
|
1999 |
+
"step": 7050
|
2000 |
+
},
|
2001 |
+
{
|
2002 |
+
"epoch": 2.0975392825378,
|
2003 |
+
"grad_norm": 0.955007016658783,
|
2004 |
+
"learning_rate": 1.0040628088283739e-05,
|
2005 |
+
"loss": 0.0003,
|
2006 |
+
"step": 7075
|
2007 |
+
},
|
2008 |
+
{
|
2009 |
+
"epoch": 2.1049510821227395,
|
2010 |
+
"grad_norm": 0.002524587558582425,
|
2011 |
+
"learning_rate": 9.958273855276162e-06,
|
2012 |
+
"loss": 0.0004,
|
2013 |
+
"step": 7100
|
2014 |
+
},
|
2015 |
+
{
|
2016 |
+
"epoch": 2.1123628817076785,
|
2017 |
+
"grad_norm": 0.002211947925388813,
|
2018 |
+
"learning_rate": 9.875919622268584e-06,
|
2019 |
+
"loss": 0.0002,
|
2020 |
+
"step": 7125
|
2021 |
+
},
|
2022 |
+
{
|
2023 |
+
"epoch": 2.119774681292618,
|
2024 |
+
"grad_norm": 0.12918436527252197,
|
2025 |
+
"learning_rate": 9.793565389261008e-06,
|
2026 |
+
"loss": 0.0137,
|
2027 |
+
"step": 7150
|
2028 |
+
},
|
2029 |
+
{
|
2030 |
+
"epoch": 2.127186480877557,
|
2031 |
+
"grad_norm": 0.06649267673492432,
|
2032 |
+
"learning_rate": 9.711211156253433e-06,
|
2033 |
+
"loss": 0.0002,
|
2034 |
+
"step": 7175
|
2035 |
+
},
|
2036 |
+
{
|
2037 |
+
"epoch": 2.1345982804624963,
|
2038 |
+
"grad_norm": 0.007204731460660696,
|
2039 |
+
"learning_rate": 9.628856923245855e-06,
|
2040 |
+
"loss": 0.0001,
|
2041 |
+
"step": 7200
|
2042 |
+
},
|
2043 |
+
{
|
2044 |
+
"epoch": 2.1420100800474353,
|
2045 |
+
"grad_norm": 0.020881352946162224,
|
2046 |
+
"learning_rate": 9.546502690238278e-06,
|
2047 |
+
"loss": 0.0002,
|
2048 |
+
"step": 7225
|
2049 |
+
},
|
2050 |
+
{
|
2051 |
+
"epoch": 2.1494218796323747,
|
2052 |
+
"grad_norm": 0.0012391919735819101,
|
2053 |
+
"learning_rate": 9.464148457230702e-06,
|
2054 |
+
"loss": 0.0002,
|
2055 |
+
"step": 7250
|
2056 |
+
},
|
2057 |
+
{
|
2058 |
+
"epoch": 2.156833679217314,
|
2059 |
+
"grad_norm": 0.07225095480680466,
|
2060 |
+
"learning_rate": 9.381794224223125e-06,
|
2061 |
+
"loss": 0.0002,
|
2062 |
+
"step": 7275
|
2063 |
+
},
|
2064 |
+
{
|
2065 |
+
"epoch": 2.164245478802253,
|
2066 |
+
"grad_norm": 0.0023537089582532644,
|
2067 |
+
"learning_rate": 9.299439991215549e-06,
|
2068 |
+
"loss": 0.0002,
|
2069 |
+
"step": 7300
|
2070 |
+
},
|
2071 |
+
{
|
2072 |
+
"epoch": 2.1716572783871926,
|
2073 |
+
"grad_norm": 0.0023852228187024593,
|
2074 |
+
"learning_rate": 9.217085758207972e-06,
|
2075 |
+
"loss": 0.0001,
|
2076 |
+
"step": 7325
|
2077 |
+
},
|
2078 |
+
{
|
2079 |
+
"epoch": 2.1790690779721316,
|
2080 |
+
"grad_norm": 0.00434435810893774,
|
2081 |
+
"learning_rate": 9.134731525200394e-06,
|
2082 |
+
"loss": 0.0003,
|
2083 |
+
"step": 7350
|
2084 |
+
},
|
2085 |
+
{
|
2086 |
+
"epoch": 2.186480877557071,
|
2087 |
+
"grad_norm": 0.0017129664774984121,
|
2088 |
+
"learning_rate": 9.05237729219282e-06,
|
2089 |
+
"loss": 0.0002,
|
2090 |
+
"step": 7375
|
2091 |
+
},
|
2092 |
+
{
|
2093 |
+
"epoch": 2.19389267714201,
|
2094 |
+
"grad_norm": 0.031847961246967316,
|
2095 |
+
"learning_rate": 8.970023059185243e-06,
|
2096 |
+
"loss": 0.0002,
|
2097 |
+
"step": 7400
|
2098 |
+
},
|
2099 |
+
{
|
2100 |
+
"epoch": 2.2013044767269494,
|
2101 |
+
"grad_norm": 0.0012447929475456476,
|
2102 |
+
"learning_rate": 8.887668826177665e-06,
|
2103 |
+
"loss": 0.0003,
|
2104 |
+
"step": 7425
|
2105 |
+
},
|
2106 |
+
{
|
2107 |
+
"epoch": 2.2087162763118884,
|
2108 |
+
"grad_norm": 0.0008703101193532348,
|
2109 |
+
"learning_rate": 8.805314593170089e-06,
|
2110 |
+
"loss": 0.0109,
|
2111 |
+
"step": 7450
|
2112 |
+
},
|
2113 |
+
{
|
2114 |
+
"epoch": 2.216128075896828,
|
2115 |
+
"grad_norm": 0.0011485127033665776,
|
2116 |
+
"learning_rate": 8.722960360162514e-06,
|
2117 |
+
"loss": 0.0003,
|
2118 |
+
"step": 7475
|
2119 |
+
},
|
2120 |
+
{
|
2121 |
+
"epoch": 2.223539875481767,
|
2122 |
+
"grad_norm": 0.0031809755600988865,
|
2123 |
+
"learning_rate": 8.640606127154936e-06,
|
2124 |
+
"loss": 0.0006,
|
2125 |
+
"step": 7500
|
2126 |
+
},
|
2127 |
+
{
|
2128 |
+
"epoch": 2.230951675066706,
|
2129 |
+
"grad_norm": 0.025839926674962044,
|
2130 |
+
"learning_rate": 8.55825189414736e-06,
|
2131 |
+
"loss": 0.0009,
|
2132 |
+
"step": 7525
|
2133 |
+
},
|
2134 |
+
{
|
2135 |
+
"epoch": 2.2383634746516456,
|
2136 |
+
"grad_norm": 0.0020416686311364174,
|
2137 |
+
"learning_rate": 8.475897661139783e-06,
|
2138 |
+
"loss": 0.0004,
|
2139 |
+
"step": 7550
|
2140 |
+
},
|
2141 |
+
{
|
2142 |
+
"epoch": 2.2457752742365846,
|
2143 |
+
"grad_norm": 0.0024621151387691498,
|
2144 |
+
"learning_rate": 8.393543428132205e-06,
|
2145 |
+
"loss": 0.0004,
|
2146 |
+
"step": 7575
|
2147 |
+
},
|
2148 |
+
{
|
2149 |
+
"epoch": 2.253187073821524,
|
2150 |
+
"grad_norm": 0.00846477784216404,
|
2151 |
+
"learning_rate": 8.31118919512463e-06,
|
2152 |
+
"loss": 0.0002,
|
2153 |
+
"step": 7600
|
2154 |
+
},
|
2155 |
+
{
|
2156 |
+
"epoch": 2.260598873406463,
|
2157 |
+
"grad_norm": 0.0007214430370368063,
|
2158 |
+
"learning_rate": 8.228834962117053e-06,
|
2159 |
+
"loss": 0.0003,
|
2160 |
+
"step": 7625
|
2161 |
+
},
|
2162 |
+
{
|
2163 |
+
"epoch": 2.2680106729914025,
|
2164 |
+
"grad_norm": 0.0028224673587828875,
|
2165 |
+
"learning_rate": 8.146480729109475e-06,
|
2166 |
+
"loss": 0.0001,
|
2167 |
+
"step": 7650
|
2168 |
+
},
|
2169 |
+
{
|
2170 |
+
"epoch": 2.2754224725763414,
|
2171 |
+
"grad_norm": 0.0036359152290970087,
|
2172 |
+
"learning_rate": 8.064126496101899e-06,
|
2173 |
+
"loss": 0.0002,
|
2174 |
+
"step": 7675
|
2175 |
+
},
|
2176 |
+
{
|
2177 |
+
"epoch": 2.282834272161281,
|
2178 |
+
"grad_norm": 0.01586153358221054,
|
2179 |
+
"learning_rate": 7.981772263094324e-06,
|
2180 |
+
"loss": 0.0,
|
2181 |
+
"step": 7700
|
2182 |
+
},
|
2183 |
+
{
|
2184 |
+
"epoch": 2.29024607174622,
|
2185 |
+
"grad_norm": 0.0017704274505376816,
|
2186 |
+
"learning_rate": 7.899418030086748e-06,
|
2187 |
+
"loss": 0.0006,
|
2188 |
+
"step": 7725
|
2189 |
+
},
|
2190 |
+
{
|
2191 |
+
"epoch": 2.2976578713311593,
|
2192 |
+
"grad_norm": 0.0014685478527098894,
|
2193 |
+
"learning_rate": 7.81706379707917e-06,
|
2194 |
+
"loss": 0.0001,
|
2195 |
+
"step": 7750
|
2196 |
+
},
|
2197 |
+
{
|
2198 |
+
"epoch": 2.3050696709160983,
|
2199 |
+
"grad_norm": 0.0018168434035032988,
|
2200 |
+
"learning_rate": 7.734709564071593e-06,
|
2201 |
+
"loss": 0.0001,
|
2202 |
+
"step": 7775
|
2203 |
+
},
|
2204 |
+
{
|
2205 |
+
"epoch": 2.3124814705010377,
|
2206 |
+
"grad_norm": 0.0007384305936284363,
|
2207 |
+
"learning_rate": 7.652355331064018e-06,
|
2208 |
+
"loss": 0.0105,
|
2209 |
+
"step": 7800
|
2210 |
+
},
|
2211 |
+
{
|
2212 |
+
"epoch": 2.319893270085977,
|
2213 |
+
"grad_norm": 0.023864462971687317,
|
2214 |
+
"learning_rate": 7.57000109805644e-06,
|
2215 |
+
"loss": 0.0001,
|
2216 |
+
"step": 7825
|
2217 |
+
},
|
2218 |
+
{
|
2219 |
+
"epoch": 2.327305069670916,
|
2220 |
+
"grad_norm": 0.0014232024550437927,
|
2221 |
+
"learning_rate": 7.487646865048864e-06,
|
2222 |
+
"loss": 0.0001,
|
2223 |
+
"step": 7850
|
2224 |
+
},
|
2225 |
+
{
|
2226 |
+
"epoch": 2.334716869255855,
|
2227 |
+
"grad_norm": 0.009727663360536098,
|
2228 |
+
"learning_rate": 7.405292632041287e-06,
|
2229 |
+
"loss": 0.0001,
|
2230 |
+
"step": 7875
|
2231 |
+
},
|
2232 |
+
{
|
2233 |
+
"epoch": 2.3421286688407945,
|
2234 |
+
"grad_norm": 0.002969397697597742,
|
2235 |
+
"learning_rate": 7.322938399033711e-06,
|
2236 |
+
"loss": 0.0005,
|
2237 |
+
"step": 7900
|
2238 |
+
},
|
2239 |
+
{
|
2240 |
+
"epoch": 2.349540468425734,
|
2241 |
+
"grad_norm": 0.006611447781324387,
|
2242 |
+
"learning_rate": 7.240584166026134e-06,
|
2243 |
+
"loss": 0.0023,
|
2244 |
+
"step": 7925
|
2245 |
+
},
|
2246 |
+
{
|
2247 |
+
"epoch": 2.356952268010673,
|
2248 |
+
"grad_norm": 0.0021192070562392473,
|
2249 |
+
"learning_rate": 7.158229933018557e-06,
|
2250 |
+
"loss": 0.0,
|
2251 |
+
"step": 7950
|
2252 |
+
},
|
2253 |
+
{
|
2254 |
+
"epoch": 2.3643640675956124,
|
2255 |
+
"grad_norm": 0.010217389091849327,
|
2256 |
+
"learning_rate": 7.075875700010981e-06,
|
2257 |
+
"loss": 0.0002,
|
2258 |
+
"step": 7975
|
2259 |
+
},
|
2260 |
+
{
|
2261 |
+
"epoch": 2.3717758671805513,
|
2262 |
+
"grad_norm": 0.0004124463885091245,
|
2263 |
+
"learning_rate": 6.993521467003404e-06,
|
2264 |
+
"loss": 0.0003,
|
2265 |
+
"step": 8000
|
2266 |
+
},
|
2267 |
+
{
|
2268 |
+
"epoch": 2.3791876667654908,
|
2269 |
+
"grad_norm": 0.007629746571183205,
|
2270 |
+
"learning_rate": 6.911167233995827e-06,
|
2271 |
+
"loss": 0.0001,
|
2272 |
+
"step": 8025
|
2273 |
+
},
|
2274 |
+
{
|
2275 |
+
"epoch": 2.3865994663504297,
|
2276 |
+
"grad_norm": 0.060671769082546234,
|
2277 |
+
"learning_rate": 6.8288130009882514e-06,
|
2278 |
+
"loss": 0.0001,
|
2279 |
+
"step": 8050
|
2280 |
+
},
|
2281 |
+
{
|
2282 |
+
"epoch": 2.394011265935369,
|
2283 |
+
"grad_norm": 0.0004924974055029452,
|
2284 |
+
"learning_rate": 6.746458767980674e-06,
|
2285 |
+
"loss": 0.0001,
|
2286 |
+
"step": 8075
|
2287 |
+
},
|
2288 |
+
{
|
2289 |
+
"epoch": 2.401423065520308,
|
2290 |
+
"grad_norm": 0.0014034019550308585,
|
2291 |
+
"learning_rate": 6.664104534973098e-06,
|
2292 |
+
"loss": 0.0001,
|
2293 |
+
"step": 8100
|
2294 |
+
},
|
2295 |
+
{
|
2296 |
+
"epoch": 2.4088348651052476,
|
2297 |
+
"grad_norm": 0.006600458174943924,
|
2298 |
+
"learning_rate": 6.581750301965521e-06,
|
2299 |
+
"loss": 0.0,
|
2300 |
+
"step": 8125
|
2301 |
+
},
|
2302 |
+
{
|
2303 |
+
"epoch": 2.4162466646901866,
|
2304 |
+
"grad_norm": 0.001500199898146093,
|
2305 |
+
"learning_rate": 6.499396068957945e-06,
|
2306 |
+
"loss": 0.0002,
|
2307 |
+
"step": 8150
|
2308 |
+
},
|
2309 |
+
{
|
2310 |
+
"epoch": 2.423658464275126,
|
2311 |
+
"grad_norm": 0.07915302366018295,
|
2312 |
+
"learning_rate": 6.4170418359503675e-06,
|
2313 |
+
"loss": 0.0001,
|
2314 |
+
"step": 8175
|
2315 |
+
},
|
2316 |
+
{
|
2317 |
+
"epoch": 2.4310702638600654,
|
2318 |
+
"grad_norm": 0.0008610423537902534,
|
2319 |
+
"learning_rate": 6.334687602942792e-06,
|
2320 |
+
"loss": 0.0001,
|
2321 |
+
"step": 8200
|
2322 |
+
},
|
2323 |
+
{
|
2324 |
+
"epoch": 2.4384820634450044,
|
2325 |
+
"grad_norm": 0.005987235344946384,
|
2326 |
+
"learning_rate": 6.252333369935215e-06,
|
2327 |
+
"loss": 0.0001,
|
2328 |
+
"step": 8225
|
2329 |
+
},
|
2330 |
+
{
|
2331 |
+
"epoch": 2.445893863029944,
|
2332 |
+
"grad_norm": 0.0015195367159321904,
|
2333 |
+
"learning_rate": 6.169979136927638e-06,
|
2334 |
+
"loss": 0.0001,
|
2335 |
+
"step": 8250
|
2336 |
+
},
|
2337 |
+
{
|
2338 |
+
"epoch": 2.453305662614883,
|
2339 |
+
"grad_norm": 0.00758790411055088,
|
2340 |
+
"learning_rate": 6.087624903920062e-06,
|
2341 |
+
"loss": 0.0001,
|
2342 |
+
"step": 8275
|
2343 |
+
},
|
2344 |
+
{
|
2345 |
+
"epoch": 2.4607174621998222,
|
2346 |
+
"grad_norm": 0.000942397688049823,
|
2347 |
+
"learning_rate": 6.005270670912485e-06,
|
2348 |
+
"loss": 0.001,
|
2349 |
+
"step": 8300
|
2350 |
+
},
|
2351 |
+
{
|
2352 |
+
"epoch": 2.468129261784761,
|
2353 |
+
"grad_norm": 0.01316822413355112,
|
2354 |
+
"learning_rate": 5.922916437904908e-06,
|
2355 |
+
"loss": 0.0003,
|
2356 |
+
"step": 8325
|
2357 |
+
},
|
2358 |
+
{
|
2359 |
+
"epoch": 2.4755410613697006,
|
2360 |
+
"grad_norm": 0.004891947843134403,
|
2361 |
+
"learning_rate": 5.840562204897332e-06,
|
2362 |
+
"loss": 0.0003,
|
2363 |
+
"step": 8350
|
2364 |
+
},
|
2365 |
+
{
|
2366 |
+
"epoch": 2.4829528609546396,
|
2367 |
+
"grad_norm": 0.0005889868480153382,
|
2368 |
+
"learning_rate": 5.758207971889755e-06,
|
2369 |
+
"loss": 0.0001,
|
2370 |
+
"step": 8375
|
2371 |
+
},
|
2372 |
+
{
|
2373 |
+
"epoch": 2.490364660539579,
|
2374 |
+
"grad_norm": 0.004644739907234907,
|
2375 |
+
"learning_rate": 5.675853738882179e-06,
|
2376 |
+
"loss": 0.0009,
|
2377 |
+
"step": 8400
|
2378 |
+
},
|
2379 |
+
{
|
2380 |
+
"epoch": 2.497776460124518,
|
2381 |
+
"grad_norm": 0.009528938680887222,
|
2382 |
+
"learning_rate": 5.593499505874602e-06,
|
2383 |
+
"loss": 0.0011,
|
2384 |
+
"step": 8425
|
2385 |
+
},
|
2386 |
+
{
|
2387 |
+
"epoch": 2.5051882597094575,
|
2388 |
+
"grad_norm": 0.003150611650198698,
|
2389 |
+
"learning_rate": 5.511145272867025e-06,
|
2390 |
+
"loss": 0.0005,
|
2391 |
+
"step": 8450
|
2392 |
+
},
|
2393 |
+
{
|
2394 |
+
"epoch": 2.512600059294397,
|
2395 |
+
"grad_norm": 0.0655895322561264,
|
2396 |
+
"learning_rate": 5.428791039859449e-06,
|
2397 |
+
"loss": 0.0024,
|
2398 |
+
"step": 8475
|
2399 |
+
},
|
2400 |
+
{
|
2401 |
+
"epoch": 2.520011858879336,
|
2402 |
+
"grad_norm": 0.012722983956336975,
|
2403 |
+
"learning_rate": 5.346436806851872e-06,
|
2404 |
+
"loss": 0.0002,
|
2405 |
+
"step": 8500
|
2406 |
+
},
|
2407 |
+
{
|
2408 |
+
"epoch": 2.527423658464275,
|
2409 |
+
"grad_norm": 0.0908973291516304,
|
2410 |
+
"learning_rate": 5.264082573844296e-06,
|
2411 |
+
"loss": 0.0006,
|
2412 |
+
"step": 8525
|
2413 |
+
},
|
2414 |
+
{
|
2415 |
+
"epoch": 2.5348354580492143,
|
2416 |
+
"grad_norm": 0.0024270841386169195,
|
2417 |
+
"learning_rate": 5.181728340836719e-06,
|
2418 |
+
"loss": 0.0001,
|
2419 |
+
"step": 8550
|
2420 |
+
},
|
2421 |
+
{
|
2422 |
+
"epoch": 2.5422472576341537,
|
2423 |
+
"grad_norm": 0.015870608389377594,
|
2424 |
+
"learning_rate": 5.099374107829143e-06,
|
2425 |
+
"loss": 0.0001,
|
2426 |
+
"step": 8575
|
2427 |
+
},
|
2428 |
+
{
|
2429 |
+
"epoch": 2.5496590572190927,
|
2430 |
+
"grad_norm": 0.00794550683349371,
|
2431 |
+
"learning_rate": 5.0170198748215655e-06,
|
2432 |
+
"loss": 0.0003,
|
2433 |
+
"step": 8600
|
2434 |
+
},
|
2435 |
+
{
|
2436 |
+
"epoch": 2.557070856804032,
|
2437 |
+
"grad_norm": 0.0217802282422781,
|
2438 |
+
"learning_rate": 4.93466564181399e-06,
|
2439 |
+
"loss": 0.0007,
|
2440 |
+
"step": 8625
|
2441 |
+
},
|
2442 |
+
{
|
2443 |
+
"epoch": 2.564482656388971,
|
2444 |
+
"grad_norm": 0.00040505212382413447,
|
2445 |
+
"learning_rate": 4.852311408806413e-06,
|
2446 |
+
"loss": 0.0009,
|
2447 |
+
"step": 8650
|
2448 |
+
},
|
2449 |
+
{
|
2450 |
+
"epoch": 2.5718944559739105,
|
2451 |
+
"grad_norm": 0.0009371866472065449,
|
2452 |
+
"learning_rate": 4.769957175798836e-06,
|
2453 |
+
"loss": 0.0002,
|
2454 |
+
"step": 8675
|
2455 |
+
},
|
2456 |
+
{
|
2457 |
+
"epoch": 2.5793062555588495,
|
2458 |
+
"grad_norm": 0.0234972070902586,
|
2459 |
+
"learning_rate": 4.68760294279126e-06,
|
2460 |
+
"loss": 0.0001,
|
2461 |
+
"step": 8700
|
2462 |
+
},
|
2463 |
+
{
|
2464 |
+
"epoch": 2.586718055143789,
|
2465 |
+
"grad_norm": 0.0047541470266878605,
|
2466 |
+
"learning_rate": 4.605248709783683e-06,
|
2467 |
+
"loss": 0.0006,
|
2468 |
+
"step": 8725
|
2469 |
+
},
|
2470 |
+
{
|
2471 |
+
"epoch": 2.5941298547287284,
|
2472 |
+
"grad_norm": 0.0020951908081769943,
|
2473 |
+
"learning_rate": 4.522894476776106e-06,
|
2474 |
+
"loss": 0.0,
|
2475 |
+
"step": 8750
|
2476 |
+
},
|
2477 |
+
{
|
2478 |
+
"epoch": 2.6015416543136674,
|
2479 |
+
"grad_norm": 0.005374910309910774,
|
2480 |
+
"learning_rate": 4.4405402437685296e-06,
|
2481 |
+
"loss": 0.0002,
|
2482 |
+
"step": 8775
|
2483 |
+
},
|
2484 |
+
{
|
2485 |
+
"epoch": 2.6089534538986063,
|
2486 |
+
"grad_norm": 0.0005201067542657256,
|
2487 |
+
"learning_rate": 4.358186010760953e-06,
|
2488 |
+
"loss": 0.0,
|
2489 |
+
"step": 8800
|
2490 |
+
},
|
2491 |
+
{
|
2492 |
+
"epoch": 2.6163652534835458,
|
2493 |
+
"grad_norm": 0.01255794707685709,
|
2494 |
+
"learning_rate": 4.275831777753377e-06,
|
2495 |
+
"loss": 0.0004,
|
2496 |
+
"step": 8825
|
2497 |
+
},
|
2498 |
+
{
|
2499 |
+
"epoch": 2.623777053068485,
|
2500 |
+
"grad_norm": 0.020597679540514946,
|
2501 |
+
"learning_rate": 4.1934775447458e-06,
|
2502 |
+
"loss": 0.0001,
|
2503 |
+
"step": 8850
|
2504 |
+
},
|
2505 |
+
{
|
2506 |
+
"epoch": 2.631188852653424,
|
2507 |
+
"grad_norm": 0.0005912929191254079,
|
2508 |
+
"learning_rate": 4.111123311738223e-06,
|
2509 |
+
"loss": 0.0005,
|
2510 |
+
"step": 8875
|
2511 |
+
},
|
2512 |
+
{
|
2513 |
+
"epoch": 2.6386006522383636,
|
2514 |
+
"grad_norm": 0.005026647355407476,
|
2515 |
+
"learning_rate": 4.028769078730647e-06,
|
2516 |
+
"loss": 0.0002,
|
2517 |
+
"step": 8900
|
2518 |
+
},
|
2519 |
+
{
|
2520 |
+
"epoch": 2.6460124518233026,
|
2521 |
+
"grad_norm": 0.0007542280363850296,
|
2522 |
+
"learning_rate": 3.94641484572307e-06,
|
2523 |
+
"loss": 0.0001,
|
2524 |
+
"step": 8925
|
2525 |
+
},
|
2526 |
+
{
|
2527 |
+
"epoch": 2.653424251408242,
|
2528 |
+
"grad_norm": 0.0026307408697903156,
|
2529 |
+
"learning_rate": 3.864060612715494e-06,
|
2530 |
+
"loss": 0.0001,
|
2531 |
+
"step": 8950
|
2532 |
+
},
|
2533 |
+
{
|
2534 |
+
"epoch": 2.660836050993181,
|
2535 |
+
"grad_norm": 0.0027355581987649202,
|
2536 |
+
"learning_rate": 3.781706379707917e-06,
|
2537 |
+
"loss": 0.0001,
|
2538 |
+
"step": 8975
|
2539 |
+
},
|
2540 |
+
{
|
2541 |
+
"epoch": 2.6682478505781204,
|
2542 |
+
"grad_norm": 0.0014947542222216725,
|
2543 |
+
"learning_rate": 3.6993521467003403e-06,
|
2544 |
+
"loss": 0.0,
|
2545 |
+
"step": 9000
|
2546 |
+
},
|
2547 |
+
{
|
2548 |
+
"epoch": 2.67565965016306,
|
2549 |
+
"grad_norm": 0.004945519380271435,
|
2550 |
+
"learning_rate": 3.616997913692764e-06,
|
2551 |
+
"loss": 0.0004,
|
2552 |
+
"step": 9025
|
2553 |
+
},
|
2554 |
+
{
|
2555 |
+
"epoch": 2.683071449747999,
|
2556 |
+
"grad_norm": 0.0699424147605896,
|
2557 |
+
"learning_rate": 3.534643680685187e-06,
|
2558 |
+
"loss": 0.0002,
|
2559 |
+
"step": 9050
|
2560 |
+
},
|
2561 |
+
{
|
2562 |
+
"epoch": 2.690483249332938,
|
2563 |
+
"grad_norm": 0.02918228507041931,
|
2564 |
+
"learning_rate": 3.4522894476776106e-06,
|
2565 |
+
"loss": 0.0001,
|
2566 |
+
"step": 9075
|
2567 |
+
},
|
2568 |
+
{
|
2569 |
+
"epoch": 2.6978950489178772,
|
2570 |
+
"grad_norm": 0.0005474461358971894,
|
2571 |
+
"learning_rate": 3.369935214670034e-06,
|
2572 |
+
"loss": 0.0001,
|
2573 |
+
"step": 9100
|
2574 |
+
},
|
2575 |
+
{
|
2576 |
+
"epoch": 2.7053068485028167,
|
2577 |
+
"grad_norm": 0.005172941833734512,
|
2578 |
+
"learning_rate": 3.2875809816624577e-06,
|
2579 |
+
"loss": 0.0,
|
2580 |
+
"step": 9125
|
2581 |
+
},
|
2582 |
+
{
|
2583 |
+
"epoch": 2.7127186480877556,
|
2584 |
+
"grad_norm": 0.0016668111784383655,
|
2585 |
+
"learning_rate": 3.205226748654881e-06,
|
2586 |
+
"loss": 0.0002,
|
2587 |
+
"step": 9150
|
2588 |
+
},
|
2589 |
+
{
|
2590 |
+
"epoch": 2.720130447672695,
|
2591 |
+
"grad_norm": 0.009217623621225357,
|
2592 |
+
"learning_rate": 3.1228725156473044e-06,
|
2593 |
+
"loss": 0.0014,
|
2594 |
+
"step": 9175
|
2595 |
+
},
|
2596 |
+
{
|
2597 |
+
"epoch": 2.727542247257634,
|
2598 |
+
"grad_norm": 0.020931560546159744,
|
2599 |
+
"learning_rate": 3.040518282639728e-06,
|
2600 |
+
"loss": 0.0003,
|
2601 |
+
"step": 9200
|
2602 |
+
},
|
2603 |
+
{
|
2604 |
+
"epoch": 2.7349540468425735,
|
2605 |
+
"grad_norm": 0.0015090492088347673,
|
2606 |
+
"learning_rate": 2.958164049632151e-06,
|
2607 |
+
"loss": 0.0006,
|
2608 |
+
"step": 9225
|
2609 |
+
},
|
2610 |
+
{
|
2611 |
+
"epoch": 2.7423658464275125,
|
2612 |
+
"grad_norm": 0.0008311509736813605,
|
2613 |
+
"learning_rate": 2.8758098166245747e-06,
|
2614 |
+
"loss": 0.0002,
|
2615 |
+
"step": 9250
|
2616 |
+
},
|
2617 |
+
{
|
2618 |
+
"epoch": 2.749777646012452,
|
2619 |
+
"grad_norm": 0.002105722902342677,
|
2620 |
+
"learning_rate": 2.7934555836169982e-06,
|
2621 |
+
"loss": 0.001,
|
2622 |
+
"step": 9275
|
2623 |
+
},
|
2624 |
+
{
|
2625 |
+
"epoch": 2.7571894455973913,
|
2626 |
+
"grad_norm": 0.002964325714856386,
|
2627 |
+
"learning_rate": 2.7111013506094214e-06,
|
2628 |
+
"loss": 0.0002,
|
2629 |
+
"step": 9300
|
2630 |
+
},
|
2631 |
+
{
|
2632 |
+
"epoch": 2.7646012451823303,
|
2633 |
+
"grad_norm": 0.03895282372832298,
|
2634 |
+
"learning_rate": 2.628747117601845e-06,
|
2635 |
+
"loss": 0.0002,
|
2636 |
+
"step": 9325
|
2637 |
+
},
|
2638 |
+
{
|
2639 |
+
"epoch": 2.7720130447672693,
|
2640 |
+
"grad_norm": 0.0005878718220628798,
|
2641 |
+
"learning_rate": 2.5463928845942685e-06,
|
2642 |
+
"loss": 0.0021,
|
2643 |
+
"step": 9350
|
2644 |
+
},
|
2645 |
+
{
|
2646 |
+
"epoch": 2.7794248443522087,
|
2647 |
+
"grad_norm": 0.0044658612459897995,
|
2648 |
+
"learning_rate": 2.4640386515866916e-06,
|
2649 |
+
"loss": 0.0001,
|
2650 |
+
"step": 9375
|
2651 |
+
},
|
2652 |
+
{
|
2653 |
+
"epoch": 2.786836643937148,
|
2654 |
+
"grad_norm": 0.002798914909362793,
|
2655 |
+
"learning_rate": 2.3816844185791147e-06,
|
2656 |
+
"loss": 0.0033,
|
2657 |
+
"step": 9400
|
2658 |
+
},
|
2659 |
+
{
|
2660 |
+
"epoch": 2.794248443522087,
|
2661 |
+
"grad_norm": 0.008348217234015465,
|
2662 |
+
"learning_rate": 2.2993301855715383e-06,
|
2663 |
+
"loss": 0.0,
|
2664 |
+
"step": 9425
|
2665 |
+
},
|
2666 |
+
{
|
2667 |
+
"epoch": 2.8016602431070265,
|
2668 |
+
"grad_norm": 1.2814829349517822,
|
2669 |
+
"learning_rate": 2.216975952563962e-06,
|
2670 |
+
"loss": 0.0005,
|
2671 |
+
"step": 9450
|
2672 |
+
},
|
2673 |
+
{
|
2674 |
+
"epoch": 2.8090720426919655,
|
2675 |
+
"grad_norm": 0.0006835550884716213,
|
2676 |
+
"learning_rate": 2.134621719556385e-06,
|
2677 |
+
"loss": 0.0002,
|
2678 |
+
"step": 9475
|
2679 |
+
},
|
2680 |
+
{
|
2681 |
+
"epoch": 2.816483842276905,
|
2682 |
+
"grad_norm": 0.0062784478068351746,
|
2683 |
+
"learning_rate": 2.0522674865488086e-06,
|
2684 |
+
"loss": 0.0001,
|
2685 |
+
"step": 9500
|
2686 |
+
},
|
2687 |
+
{
|
2688 |
+
"epoch": 2.823895641861844,
|
2689 |
+
"grad_norm": 0.0010312970262020826,
|
2690 |
+
"learning_rate": 1.969913253541232e-06,
|
2691 |
+
"loss": 0.0002,
|
2692 |
+
"step": 9525
|
2693 |
+
},
|
2694 |
+
{
|
2695 |
+
"epoch": 2.8313074414467834,
|
2696 |
+
"grad_norm": 0.0003074634587392211,
|
2697 |
+
"learning_rate": 1.8875590205336553e-06,
|
2698 |
+
"loss": 0.0006,
|
2699 |
+
"step": 9550
|
2700 |
+
},
|
2701 |
+
{
|
2702 |
+
"epoch": 2.8387192410317224,
|
2703 |
+
"grad_norm": 0.012108029797673225,
|
2704 |
+
"learning_rate": 1.8052047875260788e-06,
|
2705 |
+
"loss": 0.0002,
|
2706 |
+
"step": 9575
|
2707 |
+
},
|
2708 |
+
{
|
2709 |
+
"epoch": 2.846131040616662,
|
2710 |
+
"grad_norm": 0.001735036144964397,
|
2711 |
+
"learning_rate": 1.7228505545185024e-06,
|
2712 |
+
"loss": 0.0001,
|
2713 |
+
"step": 9600
|
2714 |
+
},
|
2715 |
+
{
|
2716 |
+
"epoch": 2.8535428402016008,
|
2717 |
+
"grad_norm": 0.00708107789978385,
|
2718 |
+
"learning_rate": 1.6404963215109257e-06,
|
2719 |
+
"loss": 0.0,
|
2720 |
+
"step": 9625
|
2721 |
+
},
|
2722 |
+
{
|
2723 |
+
"epoch": 2.86095463978654,
|
2724 |
+
"grad_norm": 0.12342353910207748,
|
2725 |
+
"learning_rate": 1.5581420885033493e-06,
|
2726 |
+
"loss": 0.0001,
|
2727 |
+
"step": 9650
|
2728 |
+
},
|
2729 |
+
{
|
2730 |
+
"epoch": 2.8683664393714796,
|
2731 |
+
"grad_norm": 0.013508424162864685,
|
2732 |
+
"learning_rate": 1.4757878554957726e-06,
|
2733 |
+
"loss": 0.0003,
|
2734 |
+
"step": 9675
|
2735 |
+
},
|
2736 |
+
{
|
2737 |
+
"epoch": 2.8757782389564186,
|
2738 |
+
"grad_norm": 0.010019199922680855,
|
2739 |
+
"learning_rate": 1.3934336224881958e-06,
|
2740 |
+
"loss": 0.0011,
|
2741 |
+
"step": 9700
|
2742 |
+
},
|
2743 |
+
{
|
2744 |
+
"epoch": 2.8831900385413576,
|
2745 |
+
"grad_norm": 0.0018716283375397325,
|
2746 |
+
"learning_rate": 1.3110793894806193e-06,
|
2747 |
+
"loss": 0.0024,
|
2748 |
+
"step": 9725
|
2749 |
+
},
|
2750 |
+
{
|
2751 |
+
"epoch": 2.890601838126297,
|
2752 |
+
"grad_norm": 0.002642314415425062,
|
2753 |
+
"learning_rate": 1.2287251564730427e-06,
|
2754 |
+
"loss": 0.0002,
|
2755 |
+
"step": 9750
|
2756 |
+
},
|
2757 |
+
{
|
2758 |
+
"epoch": 2.8980136377112364,
|
2759 |
+
"grad_norm": 0.17313824594020844,
|
2760 |
+
"learning_rate": 1.1463709234654662e-06,
|
2761 |
+
"loss": 0.0001,
|
2762 |
+
"step": 9775
|
2763 |
+
},
|
2764 |
+
{
|
2765 |
+
"epoch": 2.9054254372961754,
|
2766 |
+
"grad_norm": 0.0017125840531662107,
|
2767 |
+
"learning_rate": 1.0640166904578896e-06,
|
2768 |
+
"loss": 0.0001,
|
2769 |
+
"step": 9800
|
2770 |
+
},
|
2771 |
+
{
|
2772 |
+
"epoch": 2.912837236881115,
|
2773 |
+
"grad_norm": 0.018445823341608047,
|
2774 |
+
"learning_rate": 9.81662457450313e-07,
|
2775 |
+
"loss": 0.0002,
|
2776 |
+
"step": 9825
|
2777 |
+
},
|
2778 |
+
{
|
2779 |
+
"epoch": 2.920249036466054,
|
2780 |
+
"grad_norm": 0.0010156502248719335,
|
2781 |
+
"learning_rate": 8.993082244427364e-07,
|
2782 |
+
"loss": 0.0001,
|
2783 |
+
"step": 9850
|
2784 |
+
},
|
2785 |
+
{
|
2786 |
+
"epoch": 2.9276608360509933,
|
2787 |
+
"grad_norm": 0.014282972551882267,
|
2788 |
+
"learning_rate": 8.169539914351597e-07,
|
2789 |
+
"loss": 0.0013,
|
2790 |
+
"step": 9875
|
2791 |
+
},
|
2792 |
+
{
|
2793 |
+
"epoch": 2.9350726356359322,
|
2794 |
+
"grad_norm": 0.0004315798287279904,
|
2795 |
+
"learning_rate": 7.345997584275832e-07,
|
2796 |
+
"loss": 0.0,
|
2797 |
+
"step": 9900
|
2798 |
+
},
|
2799 |
+
{
|
2800 |
+
"epoch": 2.9424844352208717,
|
2801 |
+
"grad_norm": 0.002567910123616457,
|
2802 |
+
"learning_rate": 6.522455254200066e-07,
|
2803 |
+
"loss": 0.0002,
|
2804 |
+
"step": 9925
|
2805 |
+
},
|
2806 |
+
{
|
2807 |
+
"epoch": 2.949896234805811,
|
2808 |
+
"grad_norm": 0.003978910855948925,
|
2809 |
+
"learning_rate": 5.6989129241243e-07,
|
2810 |
+
"loss": 0.0,
|
2811 |
+
"step": 9950
|
2812 |
+
},
|
2813 |
+
{
|
2814 |
+
"epoch": 2.95730803439075,
|
2815 |
+
"grad_norm": 0.009646277874708176,
|
2816 |
+
"learning_rate": 4.875370594048534e-07,
|
2817 |
+
"loss": 0.0,
|
2818 |
+
"step": 9975
|
2819 |
+
},
|
2820 |
+
{
|
2821 |
+
"epoch": 2.964719833975689,
|
2822 |
+
"grad_norm": 0.000439764786278829,
|
2823 |
+
"learning_rate": 4.0518282639727684e-07,
|
2824 |
+
"loss": 0.0002,
|
2825 |
+
"step": 10000
|
2826 |
+
},
|
2827 |
+
{
|
2828 |
+
"epoch": 2.9721316335606285,
|
2829 |
+
"grad_norm": 0.020473280921578407,
|
2830 |
+
"learning_rate": 3.2282859338970024e-07,
|
2831 |
+
"loss": 0.0001,
|
2832 |
+
"step": 10025
|
2833 |
+
},
|
2834 |
+
{
|
2835 |
+
"epoch": 2.979543433145568,
|
2836 |
+
"grad_norm": 0.0054059335961937904,
|
2837 |
+
"learning_rate": 2.4047436038212364e-07,
|
2838 |
+
"loss": 0.0001,
|
2839 |
+
"step": 10050
|
2840 |
+
},
|
2841 |
+
{
|
2842 |
+
"epoch": 2.986955232730507,
|
2843 |
+
"grad_norm": 0.0017266918439418077,
|
2844 |
+
"learning_rate": 1.5812012737454706e-07,
|
2845 |
+
"loss": 0.0,
|
2846 |
+
"step": 10075
|
2847 |
+
},
|
2848 |
+
{
|
2849 |
+
"epoch": 2.9943670323154463,
|
2850 |
+
"grad_norm": 0.026864001527428627,
|
2851 |
+
"learning_rate": 7.576589436697046e-08,
|
2852 |
+
"loss": 0.0002,
|
2853 |
+
"step": 10100
|
2854 |
+
},
|
2855 |
+
{
|
2856 |
+
"epoch": 3.0,
|
2857 |
+
"eval_loss": 0.0011504614958539605,
|
2858 |
+
"eval_runtime": 12.7367,
|
2859 |
+
"eval_samples_per_second": 111.254,
|
2860 |
+
"eval_steps_per_second": 6.988,
|
2861 |
+
"step": 10119
|
2862 |
+
}
|
2863 |
+
],
|
2864 |
+
"logging_steps": 25,
|
2865 |
+
"max_steps": 10119,
|
2866 |
+
"num_input_tokens_seen": 0,
|
2867 |
+
"num_train_epochs": 3,
|
2868 |
+
"save_steps": 500,
|
2869 |
+
"stateful_callbacks": {
|
2870 |
+
"EarlyStoppingCallback": {
|
2871 |
+
"args": {
|
2872 |
+
"early_stopping_patience": 5,
|
2873 |
+
"early_stopping_threshold": 0.01
|
2874 |
+
},
|
2875 |
+
"attributes": {
|
2876 |
+
"early_stopping_patience_counter": 0
|
2877 |
+
}
|
2878 |
+
},
|
2879 |
+
"TrainerControl": {
|
2880 |
+
"args": {
|
2881 |
+
"should_epoch_stop": false,
|
2882 |
+
"should_evaluate": false,
|
2883 |
+
"should_log": false,
|
2884 |
+
"should_save": true,
|
2885 |
+
"should_training_stop": true
|
2886 |
+
},
|
2887 |
+
"attributes": {}
|
2888 |
+
}
|
2889 |
+
},
|
2890 |
+
"total_flos": 0.0,
|
2891 |
+
"train_batch_size": 8,
|
2892 |
+
"trial_name": null,
|
2893 |
+
"trial_params": null
|
2894 |
+
}
|
checkpoint-10119/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f97648ab2ccbb7cbb3c9b98d0d6657cf08efa67a7046b1c06f57fd80342c95a1
|
3 |
+
size 5496
|
checkpoint-10119/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nomic-ai/nomic-embed-text-v1.5",
|
3 |
+
"activation_function": "swiglu",
|
4 |
+
"architectures": [
|
5 |
+
"NomicBertModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.0,
|
8 |
+
"auto_map": {
|
9 |
+
"AutoConfig": "nomic-ai/nomic-bert-2048--configuration_hf_nomic_bert.NomicBertConfig",
|
10 |
+
"AutoModel": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertModel",
|
11 |
+
"AutoModelForMaskedLM": "nomic-ai/nomic-bert-2048--modeling_hf_nomic_bert.NomicBertForPreTraining"
|
12 |
+
},
|
13 |
+
"bos_token_id": null,
|
14 |
+
"causal": false,
|
15 |
+
"dense_seq_output": true,
|
16 |
+
"embd_pdrop": 0.0,
|
17 |
+
"eos_token_id": null,
|
18 |
+
"fused_bias_fc": true,
|
19 |
+
"fused_dropout_add_ln": true,
|
20 |
+
"initializer_range": 0.02,
|
21 |
+
"layer_norm_epsilon": 1e-12,
|
22 |
+
"max_trained_positions": 2048,
|
23 |
+
"mlp_fc1_bias": false,
|
24 |
+
"mlp_fc2_bias": false,
|
25 |
+
"model_type": "nomic_bert",
|
26 |
+
"n_embd": 768,
|
27 |
+
"n_head": 12,
|
28 |
+
"n_inner": 3072,
|
29 |
+
"n_layer": 12,
|
30 |
+
"n_positions": 8192,
|
31 |
+
"pad_vocab_size_multiple": 64,
|
32 |
+
"parallel_block": false,
|
33 |
+
"parallel_block_tied_norm": false,
|
34 |
+
"prenorm": false,
|
35 |
+
"qkv_proj_bias": false,
|
36 |
+
"reorder_and_upcast_attn": false,
|
37 |
+
"resid_pdrop": 0.0,
|
38 |
+
"rotary_emb_base": 1000,
|
39 |
+
"rotary_emb_fraction": 1.0,
|
40 |
+
"rotary_emb_interleaved": false,
|
41 |
+
"rotary_emb_scale_base": null,
|
42 |
+
"rotary_scaling_factor": null,
|
43 |
+
"scale_attn_by_inverse_layer_idx": false,
|
44 |
+
"scale_attn_weights": true,
|
45 |
+
"summary_activation": null,
|
46 |
+
"summary_first_dropout": 0.0,
|
47 |
+
"summary_proj_to_labels": true,
|
48 |
+
"summary_type": "cls_index",
|
49 |
+
"summary_use_proj": true,
|
50 |
+
"torch_dtype": "float32",
|
51 |
+
"transformers_version": "4.44.1",
|
52 |
+
"type_vocab_size": 2,
|
53 |
+
"use_cache": true,
|
54 |
+
"use_flash_attn": true,
|
55 |
+
"use_rms_norm": false,
|
56 |
+
"use_xentropy": true,
|
57 |
+
"vocab_size": 30528
|
58 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.0.1",
|
4 |
+
"transformers": "4.44.1",
|
5 |
+
"pytorch": "2.3.0"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": null
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:759cbec716931ff76cdbd4a7871383f735a315ed6d81bcd06b39a1b086f4ffaf
|
3 |
+
size 546938168
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
runs/Sep03_17-43-39_r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc/events.out.tfevents.1725385422.r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc.91.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d819e0eebd57384fd3a64934131684193f2456209109239fbe0cea4195876d8
|
3 |
+
size 90699
|
runs/Sep03_17-43-39_r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc/events.out.tfevents.1725388175.r-olivernormand-playground-c5ew0tu8-cbc63-h8ipc.91.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2df92524d9adf5173926ab0a09123736f738fdf0127fcf1d73fff1dca6169626
|
3 |
+
size 359
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_lower_case": true,
|
47 |
+
"mask_token": "[MASK]",
|
48 |
+
"model_max_length": 8192,
|
49 |
+
"pad_token": "[PAD]",
|
50 |
+
"sep_token": "[SEP]",
|
51 |
+
"strip_accents": null,
|
52 |
+
"tokenize_chinese_chars": true,
|
53 |
+
"tokenizer_class": "BertTokenizer",
|
54 |
+
"unk_token": "[UNK]"
|
55 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f97648ab2ccbb7cbb3c9b98d0d6657cf08efa67a7046b1c06f57fd80342c95a1
|
3 |
+
size 5496
|
training_params.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"data_path": "autotrain-7flvh-khn72/autotrain-data",
|
3 |
+
"model": "nomic-ai/nomic-embed-text-v1.5",
|
4 |
+
"lr": 3e-05,
|
5 |
+
"epochs": 3,
|
6 |
+
"max_seq_length": 128,
|
7 |
+
"batch_size": 8,
|
8 |
+
"warmup_ratio": 0.1,
|
9 |
+
"gradient_accumulation": 1,
|
10 |
+
"optimizer": "adamw_torch",
|
11 |
+
"scheduler": "linear",
|
12 |
+
"weight_decay": 0.0,
|
13 |
+
"max_grad_norm": 1.0,
|
14 |
+
"seed": 42,
|
15 |
+
"train_split": "train",
|
16 |
+
"valid_split": "validation",
|
17 |
+
"logging_steps": -1,
|
18 |
+
"project_name": "autotrain-7flvh-khn72",
|
19 |
+
"auto_find_batch_size": false,
|
20 |
+
"mixed_precision": "fp16",
|
21 |
+
"save_total_limit": 1,
|
22 |
+
"push_to_hub": true,
|
23 |
+
"eval_strategy": "epoch",
|
24 |
+
"username": "olivernormand",
|
25 |
+
"log": "tensorboard",
|
26 |
+
"early_stopping_patience": 5,
|
27 |
+
"early_stopping_threshold": 0.01,
|
28 |
+
"trainer": "pair",
|
29 |
+
"sentence1_column": "autotrain_sentence1",
|
30 |
+
"sentence2_column": "autotrain_sentence2",
|
31 |
+
"sentence3_column": "autotrain_sentence3",
|
32 |
+
"target_column": "autotrain_target"
|
33 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|