Training in progress, step 500

Browse files

Files changed (4) hide show

model.safetensors +1 -1
test.ipynb +89 -0
tokenizer.json +2 -2
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1e4b3d17a2ed0ece19292df25e2b8be673333ff43fcb2f8a423bd7965f53d07
 size 1109845500

 version https://git-lfs.github.com/spec/v1
+oid sha256:33c2cc08a43997f63157156d4f38cf5f5b59ba2f092671cc61b27571000a547c
 size 1109845500

test.ipynb ADDED Viewed

	@@ -0,0 +1,89 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true,
+    "ExecuteTime": {
+     "end_time": "2024-09-24T11:11:31.348040Z",
+     "start_time": "2024-09-24T11:11:28.329888Z"
+    }
+   },
+   "source": [
+    "import os\n",
+    "import re\n",
+    "\n",
+    "# Function to capitalize the first letter of each sentence\n",
+    "def capitalize_sentences(text):\n",
+    "    # Use regular expression to match sentence boundaries\n",
+    "    return re.sub(r'(?<!\\w)([.!?]\\s+|^)(\\w)', lambda m: m.group(0).upper(), text)\n",
+    "\n",
+    "folder_path = \"/home/vahan/Downloads/data_news_preprocessed/data_news_preprocessed\"  # Change this to your folder path\n",
+    "# Replace with the path to your folder\n",
+    "\n",
+    "# Loop through each file in the folder\n",
+    "for filename in os.listdir(folder_path):\n",
+    "    if filename.endswith('.txt'):\n",
+    "        file_path = os.path.join(folder_path, filename)\n",
+    "\n",
+    "        # Open and read the content of the file\n",
+    "        with open(file_path, 'r', encoding='utf-8') as file:\n",
+    "            content = file.read()\n",
+    "\n",
+    "        # Capitalize the first letter of each sentence\n",
+    "        updated_content = capitalize_sentences(content)\n",
+    "\n",
+    "        # Write the updated content back to the file\n",
+    "        with open(file_path, 'w', encoding='utf-8') as file:\n",
+    "            file.write(updated_content)\n",
+    "\n",
+    "print(\"All text files have been updated.\")\n",
+    "\n",
+    "\n",
+    "# Specify the folder containing txt files\n",
+    "folder_path = \"/home/vahan/Downloads/data_news_preprocessed/data_news_preprocessed\"  # Change this to your folder path\n",
+    "\n"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "All text files have been updated.\n"
+     ]
+    }
+   ],
+   "execution_count": 2
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "",
+   "id": "fe6f29ac4735fa3e"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3a56def25aa40facc030ea8b0b87f3688e4b3c39eb8b45d5702b3a1300fe2a20
-size 17082734

 version https://git-lfs.github.com/spec/v1
+oid sha256:8373f9cd3d27591e1924426bcc1c8799bc5a9affc4fc857982c5d66668dd1f41
+size 17082832

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42e6c0ce7652b3d9a5035d0919b7e64314ab114aa806fabfcceec954840793a7
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:b9d480d2f17b5d1db32bad3f73705690368568b8ddf0ea590ce0ed2eefdfa965
 size 5176