Vahan123 commited on
Commit
89b0168
1 Parent(s): 5feecc2

Training in progress, step 500

Browse files
Files changed (4) hide show
  1. model.safetensors +1 -1
  2. test.ipynb +89 -0
  3. tokenizer.json +2 -2
  4. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1e4b3d17a2ed0ece19292df25e2b8be673333ff43fcb2f8a423bd7965f53d07
3
  size 1109845500
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c2cc08a43997f63157156d4f38cf5f5b59ba2f092671cc61b27571000a547c
3
  size 1109845500
test.ipynb ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "id": "initial_id",
6
+ "metadata": {
7
+ "collapsed": true,
8
+ "ExecuteTime": {
9
+ "end_time": "2024-09-24T11:11:31.348040Z",
10
+ "start_time": "2024-09-24T11:11:28.329888Z"
11
+ }
12
+ },
13
+ "source": [
14
+ "import os\n",
15
+ "import re\n",
16
+ "\n",
17
+ "# Function to capitalize the first letter of each sentence\n",
18
+ "def capitalize_sentences(text):\n",
19
+ " # Use regular expression to match sentence boundaries\n",
20
+ " return re.sub(r'(?<!\\w)([.!?]\\s+|^)(\\w)', lambda m: m.group(0).upper(), text)\n",
21
+ "\n",
22
+ "folder_path = \"/home/vahan/Downloads/data_news_preprocessed/data_news_preprocessed\" # Change this to your folder path\n",
23
+ "# Replace with the path to your folder\n",
24
+ "\n",
25
+ "# Loop through each file in the folder\n",
26
+ "for filename in os.listdir(folder_path):\n",
27
+ " if filename.endswith('.txt'):\n",
28
+ " file_path = os.path.join(folder_path, filename)\n",
29
+ "\n",
30
+ " # Open and read the content of the file\n",
31
+ " with open(file_path, 'r', encoding='utf-8') as file:\n",
32
+ " content = file.read()\n",
33
+ "\n",
34
+ " # Capitalize the first letter of each sentence\n",
35
+ " updated_content = capitalize_sentences(content)\n",
36
+ "\n",
37
+ " # Write the updated content back to the file\n",
38
+ " with open(file_path, 'w', encoding='utf-8') as file:\n",
39
+ " file.write(updated_content)\n",
40
+ "\n",
41
+ "print(\"All text files have been updated.\")\n",
42
+ "\n",
43
+ "\n",
44
+ "# Specify the folder containing txt files\n",
45
+ "folder_path = \"/home/vahan/Downloads/data_news_preprocessed/data_news_preprocessed\" # Change this to your folder path\n",
46
+ "\n"
47
+ ],
48
+ "outputs": [
49
+ {
50
+ "name": "stdout",
51
+ "output_type": "stream",
52
+ "text": [
53
+ "All text files have been updated.\n"
54
+ ]
55
+ }
56
+ ],
57
+ "execution_count": 2
58
+ },
59
+ {
60
+ "metadata": {},
61
+ "cell_type": "code",
62
+ "outputs": [],
63
+ "execution_count": null,
64
+ "source": "",
65
+ "id": "fe6f29ac4735fa3e"
66
+ }
67
+ ],
68
+ "metadata": {
69
+ "kernelspec": {
70
+ "display_name": "Python 3",
71
+ "language": "python",
72
+ "name": "python3"
73
+ },
74
+ "language_info": {
75
+ "codemirror_mode": {
76
+ "name": "ipython",
77
+ "version": 2
78
+ },
79
+ "file_extension": ".py",
80
+ "mimetype": "text/x-python",
81
+ "name": "python",
82
+ "nbconvert_exporter": "python",
83
+ "pygments_lexer": "ipython2",
84
+ "version": "2.7.6"
85
+ }
86
+ },
87
+ "nbformat": 4,
88
+ "nbformat_minor": 5
89
+ }
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
3
- size 17082734
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8373f9cd3d27591e1924426bcc1c8799bc5a9affc4fc857982c5d66668dd1f41
3
+ size 17082832
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:42e6c0ce7652b3d9a5035d0919b7e64314ab114aa806fabfcceec954840793a7
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d480d2f17b5d1db32bad3f73705690368568b8ddf0ea590ce0ed2eefdfa965
3
  size 5176