Mihaiii commited on
Commit
a739812
1 Parent(s): a87193b

Upload 13 files

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 384,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sentence-transformers
3
+ pipeline_tag: sentence-similarity
4
+ tags:
5
+ - sentence-transformers
6
+ - feature-extraction
7
+ - sentence-similarity
8
+ - transformers
9
+
10
+ ---
11
+
12
+ # {MODEL_NAME}
13
+
14
+ This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 384 dimensional dense vector space and can be used for tasks like clustering or semantic search.
15
+
16
+ <!--- Describe your model here -->
17
+
18
+ ## Usage (Sentence-Transformers)
19
+
20
+ Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed:
21
+
22
+ ```
23
+ pip install -U sentence-transformers
24
+ ```
25
+
26
+ Then you can use the model like this:
27
+
28
+ ```python
29
+ from sentence_transformers import SentenceTransformer
30
+ sentences = ["This is an example sentence", "Each sentence is converted"]
31
+
32
+ model = SentenceTransformer('{MODEL_NAME}')
33
+ embeddings = model.encode(sentences)
34
+ print(embeddings)
35
+ ```
36
+
37
+
38
+
39
+ ## Usage (HuggingFace Transformers)
40
+ Without [sentence-transformers](https://www.SBERT.net), you can use the model like this: First, you pass your input through the transformer model, then you have to apply the right pooling-operation on-top of the contextualized word embeddings.
41
+
42
+ ```python
43
+ from transformers import AutoTokenizer, AutoModel
44
+ import torch
45
+
46
+
47
+ #Mean Pooling - Take attention mask into account for correct averaging
48
+ def mean_pooling(model_output, attention_mask):
49
+ token_embeddings = model_output[0] #First element of model_output contains all token embeddings
50
+ input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
51
+ return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
52
+
53
+
54
+ # Sentences we want sentence embeddings for
55
+ sentences = ['This is an example sentence', 'Each sentence is converted']
56
+
57
+ # Load model from HuggingFace Hub
58
+ tokenizer = AutoTokenizer.from_pretrained('{MODEL_NAME}')
59
+ model = AutoModel.from_pretrained('{MODEL_NAME}')
60
+
61
+ # Tokenize sentences
62
+ encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
63
+
64
+ # Compute token embeddings
65
+ with torch.no_grad():
66
+ model_output = model(**encoded_input)
67
+
68
+ # Perform pooling. In this case, mean pooling.
69
+ sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])
70
+
71
+ print("Sentence embeddings:")
72
+ print(sentence_embeddings)
73
+ ```
74
+
75
+
76
+
77
+ ## Evaluation Results
78
+
79
+ <!--- Describe how your model was evaluated -->
80
+
81
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME})
82
+
83
+
84
+ ## Training
85
+ The model was trained with the parameters:
86
+
87
+ **DataLoader**:
88
+
89
+ `torch.utils.data.dataloader.DataLoader` of length 137553 with parameters:
90
+ ```
91
+ {'batch_size': 64, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
92
+ ```
93
+
94
+ **Loss**:
95
+
96
+ `sentence_transformers.losses.MSELoss.MSELoss`
97
+
98
+ Parameters of the fit()-Method:
99
+ ```
100
+ {
101
+ "epochs": 1,
102
+ "evaluation_steps": 5000,
103
+ "evaluator": "sentence_transformers.evaluation.SequentialEvaluator.SequentialEvaluator",
104
+ "max_grad_norm": 1,
105
+ "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
106
+ "optimizer_params": {
107
+ "eps": 1e-06,
108
+ "lr": 0.0001
109
+ },
110
+ "scheduler": "WarmupLinear",
111
+ "steps_per_epoch": null,
112
+ "warmup_steps": 1000,
113
+ "weight_decay": 0.01
114
+ }
115
+ ```
116
+
117
+
118
+ ## Full Model Architecture
119
+ ```
120
+ SentenceTransformer(
121
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
122
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
123
+ )
124
+ ```
125
+
126
+ ## Citing & Authors
127
+
128
+ <!--- Describe where people can find more information -->
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TaylorAI/gte-tiny",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 384,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 1536,
13
+ "layer_norm_eps": 1e-12,
14
+ "max_position_embeddings": 512,
15
+ "model_type": "bert",
16
+ "num_attention_heads": 12,
17
+ "num_hidden_layers": 4,
18
+ "pad_token_id": 0,
19
+ "position_embedding_type": "absolute",
20
+ "torch_dtype": "float32",
21
+ "transformers_version": "4.39.3",
22
+ "type_vocab_size": 2,
23
+ "use_cache": true,
24
+ "vocab_size": 30522
25
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "2.2.2",
4
+ "transformers": "4.34.0",
5
+ "pytorch": "2.0.1+cu118"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null
9
+ }
eval/mse_evaluation__results.csv ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,MSE
2
+ 0,5000,0.3714520949870348
3
+ 0,10000,0.2896514721214771
4
+ 0,15000,0.2547818934544921
5
+ 0,20000,0.2361571416258812
6
+ 0,25000,0.22217468358576298
7
+ 0,30000,0.21193132270127535
8
+ 0,35000,0.20398914348334074
9
+ 0,40000,0.19615092314779758
10
+ 0,45000,0.1920680166222155
11
+ 0,50000,0.1865455531515181
12
+ 0,55000,0.18320685485377908
13
+ 0,60000,0.17893384210765362
14
+ 0,65000,0.17589490162208676
15
+ 0,70000,0.17232069512829185
16
+ 0,75000,0.17010787269100547
17
+ 0,80000,0.16879495233297348
18
+ 0,85000,0.16547456616535783
19
+ 0,90000,0.16345266485586762
20
+ 0,95000,0.16111125005409122
21
+ 0,100000,0.15953187830746174
22
+ 0,105000,0.15859371051192284
23
+ 0,110000,0.15645871171727777
24
+ 0,115000,0.1554432325065136
25
+ 0,120000,0.15427195467054844
26
+ 0,125000,0.15326582361012697
27
+ 0,130000,0.15227107796818018
28
+ 0,135000,0.15172158600762486
29
+ 0,-1,0.1515676500275731
eval/similarity_evaluation_sts-dev_results.csv ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ epoch,steps,cosine_pearson,cosine_spearman,euclidean_pearson,euclidean_spearman,manhattan_pearson,manhattan_spearman,dot_pearson,dot_spearman
2
+ 0,5000,0.8529967485708335,0.8569314561462271,0.8577166497967803,0.8577977940599011,0.8566322923437525,0.8567334079668474,0.732106506118421,0.7274901370370027
3
+ 0,10000,0.8570339422200822,0.8607591962077147,0.8615491771563923,0.8614868143960486,0.8606676740356645,0.8607253564512709,0.7430811494229096,0.7403112575696066
4
+ 0,15000,0.85693145386498,0.8604363134223252,0.861443169622676,0.8609285621075196,0.8605198103549558,0.8599266941036979,0.749807823386899,0.74640510696535
5
+ 0,20000,0.8572548809025391,0.8609120905659033,0.8614916188026426,0.8616359558341337,0.8604978866100457,0.8607469018398051,0.7499006962030211,0.7470575013174795
6
+ 0,25000,0.8586005337778683,0.8619481428823176,0.862830513647908,0.8627031397217975,0.8619042940043502,0.8618378858123918,0.7474515494710098,0.7441266943629898
7
+ 0,30000,0.8598611417471217,0.8626586494775316,0.8635225244048773,0.8633572800980257,0.8625353801847323,0.8624816806170656,0.7578415348062301,0.7553843622643686
8
+ 0,35000,0.8599401905471226,0.862989364241445,0.8639590423041879,0.8637754590899872,0.862982229706849,0.8629706204562677,0.7521965082990318,0.7495420668656239
9
+ 0,40000,0.8595887693374328,0.8627713558383334,0.8636878914216484,0.8635632197417432,0.8628197493287058,0.8629699310766079,0.7542878750454836,0.7530156620128997
10
+ 0,45000,0.858815111214565,0.8622160352926816,0.8630412302856697,0.8628559228682168,0.8622122526214078,0.8621842739514861,0.7516044820739415,0.7494027129003479
11
+ 0,50000,0.8591692293443106,0.8626720028124659,0.8636856189421704,0.8633673271357332,0.8628873908166421,0.8627424166849248,0.7517682473527867,0.7499709121346463
12
+ 0,55000,0.8599231011287441,0.8629717638180847,0.8640072565887859,0.8638756139633755,0.8632278103470081,0.8633504036365096,0.7529712676503949,0.7507750821748368
13
+ 0,60000,0.8600061470601383,0.8632667219617661,0.8638802964781517,0.8641460287407391,0.8629055786690929,0.8632943573344862,0.7567153464291951,0.7546862870501438
14
+ 0,65000,0.8594735606520902,0.8629665349215968,0.8641143156747988,0.8639083801219839,0.8631510680497997,0.863166592121759,0.7511925285441974,0.7490839855882357
15
+ 0,70000,0.8595644736281991,0.8629605900776313,0.8639180778631049,0.8636172285537442,0.8631040220568225,0.8631070867426152,0.7530389575635879,0.7513544733658849
16
+ 0,75000,0.8590237203969155,0.8626500401638574,0.8633429756437757,0.863279435493683,0.8625049213056878,0.8626845892429333,0.7523925794252537,0.750830060860465
17
+ 0,80000,0.8599369760095312,0.863066627639961,0.8642325309327606,0.8639599406557412,0.8633415678997264,0.8632086287500204,0.7522313280030851,0.7501891874195455
18
+ 0,85000,0.8602686535569606,0.8636481308322942,0.8647274196744307,0.864492467713162,0.8640353877926166,0.8638898676537089,0.7552674537117355,0.7533815412689555
19
+ 0,90000,0.8601860785121899,0.8634406259965246,0.8645443072696766,0.8642410974331506,0.8637995910835322,0.8638017003331554,0.7555013645436569,0.754138910789372
20
+ 0,95000,0.8598979404829128,0.8631251241216111,0.8641833098370856,0.8638855473660336,0.8634420148625945,0.8633754196680082,0.7525492732723248,0.7512360718168141
21
+ 0,100000,0.8602154737793726,0.8633154303027656,0.8642212049288928,0.8641583073405027,0.8634994101104654,0.8636863301468782,0.7536549654825723,0.7519325756349021
22
+ 0,105000,0.860416424398357,0.8636345461813537,0.8646164466420421,0.8644228057434413,0.8637875187008419,0.8638777003855362,0.7533407147120194,0.7519276167780422
23
+ 0,110000,0.8597471475617817,0.8630478864012983,0.8640193160737805,0.8637512732521366,0.8633857243389088,0.8634786706289426,0.7501362063768577,0.7484972220619358
24
+ 0,115000,0.86060058319242,0.8637484839824034,0.8645957280919195,0.8643298643982453,0.8638993350708991,0.8639788649243099,0.7577072996536877,0.7562017425034627
25
+ 0,120000,0.8600722214505151,0.8635456411936453,0.8643556855684674,0.8642583366474254,0.8636223168461076,0.8639433026221879,0.7521480812232234,0.7509521819411832
26
+ 0,125000,0.859879180036456,0.8633341687498608,0.8641765769381247,0.8640498624259023,0.8634895646632745,0.8634976912268286,0.7548535401477029,0.7538702702539195
27
+ 0,130000,0.8603000563754856,0.8638057865130517,0.86461961970605,0.8645731065587757,0.8639021403793129,0.8640831080004173,0.7541095213999719,0.7526320852357338
28
+ 0,135000,0.8601450006819295,0.863479263156042,0.8643447846128551,0.864291360812989,0.863629030777422,0.8637706789594618,0.7537494563863082,0.7524298395445603
29
+ 0,-1,0.8601785057284146,0.8635155735572102,0.8644453733732312,0.864338793230022,0.8637289931694778,0.8638641363192482,0.7533330470758138,0.7520381235293386
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42f90b1885a8957f61f3ba4fd8844bbb90c44384cc59f421c4bf33066706afdd
3
+ size 76664936
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "[PAD]",
4
+ "[UNK]",
5
+ "[CLS]",
6
+ "[SEP]",
7
+ "[MASK]"
8
+ ],
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "mask_token": {
17
+ "content": "[MASK]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "pad_token": {
24
+ "content": "[PAD]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "sep_token": {
31
+ "content": "[SEP]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "unk_token": {
38
+ "content": "[UNK]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ }
44
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "additional_special_tokens": [
45
+ "[PAD]",
46
+ "[UNK]",
47
+ "[CLS]",
48
+ "[SEP]",
49
+ "[MASK]"
50
+ ],
51
+ "clean_up_tokenization_spaces": true,
52
+ "cls_token": "[CLS]",
53
+ "do_basic_tokenize": true,
54
+ "do_lower_case": true,
55
+ "mask_token": "[MASK]",
56
+ "max_length": 128,
57
+ "model_max_length": 512,
58
+ "never_split": null,
59
+ "pad_to_multiple_of": null,
60
+ "pad_token": "[PAD]",
61
+ "pad_token_type_id": 0,
62
+ "padding_side": "right",
63
+ "sep_token": "[SEP]",
64
+ "stride": 0,
65
+ "strip_accents": null,
66
+ "tokenize_chinese_chars": true,
67
+ "tokenizer_class": "BertTokenizer",
68
+ "truncation_side": "right",
69
+ "truncation_strategy": "longest_first",
70
+ "unk_token": "[UNK]"
71
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff