Rolv-Arild
commited on
Commit
•
ef3bfb7
1
Parent(s):
fa98048
Training in progress, step 250
Browse files- .run_speech_recognition_ctc.py.swp +0 -0
- added_tokens.json +1 -1
- config.json +2 -2
- pytorch_model.bin +2 -2
- run.sh +5 -5
- run_speech_recognition_ctc.py +1 -1
- runs/Jan30_14-45-48_ficino/events.out.tfevents.1643550375.ficino.242588.0 +2 -2
- runs/Jan31_10-51-54_ficino/1643622827.5155172/events.out.tfevents.1643622827.ficino.268894.1 +3 -0
- runs/Jan31_10-51-54_ficino/events.out.tfevents.1643622827.ficino.268894.0 +3 -0
- runs/Jan31_11-10-54_ficino/1643623929.218647/events.out.tfevents.1643623929.ficino.269307.1 +3 -0
- runs/Jan31_11-10-54_ficino/events.out.tfevents.1643623929.ficino.269307.0 +3 -0
- training_args.bin +1 -1
- vocab.json +1 -1
.run_speech_recognition_ctc.py.swp
DELETED
Binary file (1.02 kB)
|
|
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"<s>":
|
|
|
1 |
+
{"<s>": 32, "</s>": 33}
|
config.json
CHANGED
@@ -76,7 +76,7 @@
|
|
76 |
"num_hidden_layers": 24,
|
77 |
"num_negatives": 100,
|
78 |
"output_hidden_size": 1024,
|
79 |
-
"pad_token_id":
|
80 |
"proj_codevector_dim": 768,
|
81 |
"tdnn_dilation": [
|
82 |
1,
|
@@ -102,6 +102,6 @@
|
|
102 |
"torch_dtype": "float32",
|
103 |
"transformers_version": "4.17.0.dev0",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
-
"vocab_size":
|
106 |
"xvector_output_dim": 512
|
107 |
}
|
|
|
76 |
"num_hidden_layers": 24,
|
77 |
"num_negatives": 100,
|
78 |
"output_hidden_size": 1024,
|
79 |
+
"pad_token_id": 31,
|
80 |
"proj_codevector_dim": 768,
|
81 |
"tdnn_dilation": [
|
82 |
1,
|
|
|
102 |
"torch_dtype": "float32",
|
103 |
"transformers_version": "4.17.0.dev0",
|
104 |
"use_weighted_layer_sum": false,
|
105 |
+
"vocab_size": 34,
|
106 |
"xvector_output_dim": 512
|
107 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69a40f7466b01425d4ab0b61aab89e537e0e8d41f960628770413ded8e104ebc
|
3 |
+
size 1262063089
|
run.sh
CHANGED
@@ -8,14 +8,14 @@ python run_speech_recognition_ctc.py \
|
|
8 |
--per_device_train_batch_size="16" \
|
9 |
--per_device_eval_batch_size="16" \
|
10 |
--gradient_accumulation_steps="4" \
|
11 |
-
--learning_rate="
|
12 |
-
--warmup_steps="
|
13 |
--length_column_name="input_length" \
|
14 |
--evaluation_strategy="steps" \
|
15 |
--text_column_name="text" \
|
16 |
-
--save_steps="
|
17 |
-
--eval_steps="
|
18 |
-
--logging_steps="
|
19 |
--layerdrop="0.0" \
|
20 |
--activation_dropout="0.1" \
|
21 |
--save_total_limit="3" \
|
|
|
8 |
--per_device_train_batch_size="16" \
|
9 |
--per_device_eval_batch_size="16" \
|
10 |
--gradient_accumulation_steps="4" \
|
11 |
+
--learning_rate="3e-5" \
|
12 |
+
--warmup_steps="500" \
|
13 |
--length_column_name="input_length" \
|
14 |
--evaluation_strategy="steps" \
|
15 |
--text_column_name="text" \
|
16 |
+
--save_steps="250" \
|
17 |
+
--eval_steps="250" \
|
18 |
+
--logging_steps="50" \
|
19 |
--layerdrop="0.0" \
|
20 |
--activation_dropout="0.1" \
|
21 |
--save_total_limit="3" \
|
run_speech_recognition_ctc.py
CHANGED
@@ -407,7 +407,7 @@ def main():
|
|
407 |
batch["text"] = re.sub('[ç]', 'c', batch["text"])
|
408 |
batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
|
409 |
batch["text"] = re.sub('\s', ' ', batch["text"])
|
410 |
-
|
411 |
batch["text"] = re.sub('<ee>', 'eee', batch["text"])
|
412 |
batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
|
413 |
batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
|
|
|
407 |
batch["text"] = re.sub('[ç]', 'c', batch["text"])
|
408 |
batch["text"] = re.sub('[úùüû]', 'u', batch["text"])
|
409 |
batch["text"] = re.sub('\s', ' ', batch["text"])
|
410 |
+
batch["text"] = re.sub(r'\\', '', batch["text"])
|
411 |
batch["text"] = re.sub('<ee>', 'eee', batch["text"])
|
412 |
batch["text"] = re.sub('<qq>', 'qqq', batch["text"])
|
413 |
batch["text"] = re.sub('<mm>', 'mmm', batch["text"])
|
runs/Jan30_14-45-48_ficino/events.out.tfevents.1643550375.ficino.242588.0
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65756a9e9f52a0bf8493cc453294a44feec30d15e24753277bad09167cef1b35
|
3 |
+
size 32377
|
runs/Jan31_10-51-54_ficino/1643622827.5155172/events.out.tfevents.1643622827.ficino.268894.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7502f1871641581204198fcad8ed3b4934654742670ef51be72521c585188815
|
3 |
+
size 4719
|
runs/Jan31_10-51-54_ficino/events.out.tfevents.1643622827.ficino.268894.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7633de4c7836b94d3d45c69df74ad0d38dcdaeb9d2de5122f6294358ad5aa3ff
|
3 |
+
size 4800
|
runs/Jan31_11-10-54_ficino/1643623929.218647/events.out.tfevents.1643623929.ficino.269307.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:544f6abd094e98beabe308d011fd44ddffbdcfe54f3429df6b259f6a9c9c165d
|
3 |
+
size 4719
|
runs/Jan31_11-10-54_ficino/events.out.tfevents.1643623929.ficino.269307.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b13e891e3bd563eeddfa27476cf83f9370cb72c19d304183812ef827d260ea18
|
3 |
+
size 5743
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2991
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc6cbb10c3c21f7e4c00ecbefa42f06369abcd99e39d4879ec6f4e6804ee755e
|
3 |
size 2991
|
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "q": 17, "r": 18, "s": 19, "t": 20, "u": 21, "v": 22, "w": 23, "x": 24, "y": 25, "z": 26, "å": 27, "æ": 28, "ø": 29, "|": 0, "[UNK]": 30, "[PAD]": 31}
|