Training in progress, step 500
Browse files- model.safetensors +1 -1
- test.ipynb +89 -0
- tokenizer.json +2 -2
- training_args.bin +1 -1
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1109845500
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33c2cc08a43997f63157156d4f38cf5f5b59ba2f092671cc61b27571000a547c
|
3 |
size 1109845500
|
test.ipynb
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"id": "initial_id",
|
6 |
+
"metadata": {
|
7 |
+
"collapsed": true,
|
8 |
+
"ExecuteTime": {
|
9 |
+
"end_time": "2024-09-24T11:11:31.348040Z",
|
10 |
+
"start_time": "2024-09-24T11:11:28.329888Z"
|
11 |
+
}
|
12 |
+
},
|
13 |
+
"source": [
|
14 |
+
"import os\n",
|
15 |
+
"import re\n",
|
16 |
+
"\n",
|
17 |
+
"# Function to capitalize the first letter of each sentence\n",
|
18 |
+
"def capitalize_sentences(text):\n",
|
19 |
+
" # Use regular expression to match sentence boundaries\n",
|
20 |
+
" return re.sub(r'(?<!\\w)([.!?]\\s+|^)(\\w)', lambda m: m.group(0).upper(), text)\n",
|
21 |
+
"\n",
|
22 |
+
"folder_path = \"/home/vahan/Downloads/data_news_preprocessed/data_news_preprocessed\" # Change this to your folder path\n",
|
23 |
+
"# Replace with the path to your folder\n",
|
24 |
+
"\n",
|
25 |
+
"# Loop through each file in the folder\n",
|
26 |
+
"for filename in os.listdir(folder_path):\n",
|
27 |
+
" if filename.endswith('.txt'):\n",
|
28 |
+
" file_path = os.path.join(folder_path, filename)\n",
|
29 |
+
"\n",
|
30 |
+
" # Open and read the content of the file\n",
|
31 |
+
" with open(file_path, 'r', encoding='utf-8') as file:\n",
|
32 |
+
" content = file.read()\n",
|
33 |
+
"\n",
|
34 |
+
" # Capitalize the first letter of each sentence\n",
|
35 |
+
" updated_content = capitalize_sentences(content)\n",
|
36 |
+
"\n",
|
37 |
+
" # Write the updated content back to the file\n",
|
38 |
+
" with open(file_path, 'w', encoding='utf-8') as file:\n",
|
39 |
+
" file.write(updated_content)\n",
|
40 |
+
"\n",
|
41 |
+
"print(\"All text files have been updated.\")\n",
|
42 |
+
"\n",
|
43 |
+
"\n",
|
44 |
+
"# Specify the folder containing txt files\n",
|
45 |
+
"folder_path = \"/home/vahan/Downloads/data_news_preprocessed/data_news_preprocessed\" # Change this to your folder path\n",
|
46 |
+
"\n"
|
47 |
+
],
|
48 |
+
"outputs": [
|
49 |
+
{
|
50 |
+
"name": "stdout",
|
51 |
+
"output_type": "stream",
|
52 |
+
"text": [
|
53 |
+
"All text files have been updated.\n"
|
54 |
+
]
|
55 |
+
}
|
56 |
+
],
|
57 |
+
"execution_count": 2
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"metadata": {},
|
61 |
+
"cell_type": "code",
|
62 |
+
"outputs": [],
|
63 |
+
"execution_count": null,
|
64 |
+
"source": "",
|
65 |
+
"id": "fe6f29ac4735fa3e"
|
66 |
+
}
|
67 |
+
],
|
68 |
+
"metadata": {
|
69 |
+
"kernelspec": {
|
70 |
+
"display_name": "Python 3",
|
71 |
+
"language": "python",
|
72 |
+
"name": "python3"
|
73 |
+
},
|
74 |
+
"language_info": {
|
75 |
+
"codemirror_mode": {
|
76 |
+
"name": "ipython",
|
77 |
+
"version": 2
|
78 |
+
},
|
79 |
+
"file_extension": ".py",
|
80 |
+
"mimetype": "text/x-python",
|
81 |
+
"name": "python",
|
82 |
+
"nbconvert_exporter": "python",
|
83 |
+
"pygments_lexer": "ipython2",
|
84 |
+
"version": "2.7.6"
|
85 |
+
}
|
86 |
+
},
|
87 |
+
"nbformat": 4,
|
88 |
+
"nbformat_minor": 5
|
89 |
+
}
|
tokenizer.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8373f9cd3d27591e1924426bcc1c8799bc5a9affc4fc857982c5d66668dd1f41
|
3 |
+
size 17082832
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5176
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b9d480d2f17b5d1db32bad3f73705690368568b8ddf0ea590ce0ed2eefdfa965
|
3 |
size 5176
|