{ "cells": [ { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "#dependencies:\n", "import pandas as pd\n", "\n", "import torch\n", "from transformers import GPT2Tokenizer\n", "\n", "from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b8a22b8d60c0417eafbf554832398287", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Resolving data files: 0%| | 0/18 [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
upvote_ratiohistoryscore_Ascore_Bhuman_ref_Ahuman_ref_Blabelsscore_ratio
00.99In an interview right before receiving the 201...5254Currently wrapping up my PhD. There is a stark...It’s ironic to me that research has shown that...01.038462
10.95If any professor is reading this: please do no...517And when your teacher doesn't listen or pay at...I'm pretty strict on time, to the point where ...03.400000
20.95If any professor is reading this: please do no...57Profs can be oblivious? What’s new!This sounds like a problem with a specific pro...01.400000
30.95If any professor is reading this: please do no...75This sounds like a problem with a specific pro...And when your teacher doesn't listen or pay at...11.400000
40.95If any professor is reading this: please do no...67This would be totally unacceptable in my class...This sounds like a problem with a specific pro...01.166667
...........................
3487130.94Can I get in trouble for giving my neighbor hi...725Just put up a fence. Legally he isn't responsi...Whatever you do, don't cut his trees down.03.571429
3487140.94Can I get in trouble for giving my neighbor hi...225If OP pays someone to clean his yard, and then...Whatever you do, don't cut his trees down.012.500000
3487150.94Can I get in trouble for giving my neighbor hi...97My observation is that both of you are idiots...Are you Rand Paul's neighbor? https://www.gq....11.285714
3487160.94Can I get in trouble for giving my neighbor hi...97My observation is that both of you are idiots...Just put up a fence. Legally he isn't responsi...11.285714
3487170.94Can I get in trouble for giving my neighbor hi...72Capture his acts on camera. Collect and bag l...If OP pays someone to clean his yard, and then...13.500000
\n", "

348718 rows × 8 columns

\n", "" ], "text/plain": [ " upvote_ratio history \\\n", "0 0.99 In an interview right before receiving the 201... \n", "1 0.95 If any professor is reading this: please do no... \n", "2 0.95 If any professor is reading this: please do no... \n", "3 0.95 If any professor is reading this: please do no... \n", "4 0.95 If any professor is reading this: please do no... \n", "... ... ... \n", "348713 0.94 Can I get in trouble for giving my neighbor hi... \n", "348714 0.94 Can I get in trouble for giving my neighbor hi... \n", "348715 0.94 Can I get in trouble for giving my neighbor hi... \n", "348716 0.94 Can I get in trouble for giving my neighbor hi... \n", "348717 0.94 Can I get in trouble for giving my neighbor hi... \n", "\n", " score_A score_B human_ref_A \\\n", "0 52 54 Currently wrapping up my PhD. There is a stark... \n", "1 5 17 And when your teacher doesn't listen or pay at... \n", "2 5 7 Profs can be oblivious? What’s new! \n", "3 7 5 This sounds like a problem with a specific pro... \n", "4 6 7 This would be totally unacceptable in my class... \n", "... ... ... ... \n", "348713 7 25 Just put up a fence. Legally he isn't responsi... \n", "348714 2 25 If OP pays someone to clean his yard, and then... \n", "348715 9 7 My observation is that both of you are idiots... \n", "348716 9 7 My observation is that both of you are idiots... \n", "348717 7 2 Capture his acts on camera. Collect and bag l... \n", "\n", " human_ref_B labels score_ratio \n", "0 It’s ironic to me that research has shown that... 0 1.038462 \n", "1 I'm pretty strict on time, to the point where ... 0 3.400000 \n", "2 This sounds like a problem with a specific pro... 0 1.400000 \n", "3 And when your teacher doesn't listen or pay at... 1 1.400000 \n", "4 This sounds like a problem with a specific pro... 0 1.166667 \n", "... ... ... ... \n", "348713 Whatever you do, don't cut his trees down. 0 3.571429 \n", "348714 Whatever you do, don't cut his trees down. 0 12.500000 \n", "348715 Are you Rand Paul's neighbor? https://www.gq.... 1 1.285714 \n", "348716 Just put up a fence. Legally he isn't responsi... 1 1.285714 \n", "348717 If OP pays someone to clean his yard, and then... 1 3.500000 \n", "\n", "[348718 rows x 8 columns]" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# df['response_length'] = df['history'].apply(len)\n", "# df['label'] = df['response_length'].apply(lambda x: 'long' if x > 100 else 'short')\n", "df.drop(columns=['post_id', 'domain', 'c_root_id_A', 'c_root_id_B', 'created_at_utc_A', 'created_at_utc_B', 'seconds_difference'])" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/riddhib/.pyenv/versions/3.10.13/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] } ], "source": [ "model = AutoModelForCausalLMWithValueHead.from_pretrained(\"gpt2\")\n", "ref_model = AutoModelForCausalLMWithValueHead.from_pretrained(\"gpt2\")\n", "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", "tokenizer.pad_token = tokenizer.eos_token" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "from trl_rlhf_data import runner, ScriptArguments\n", "import re\n", "from dataclasses import dataclass\n", "from typing import Dict, List, Optional\n", "\n", "from datasets import load_dataset\n", "from transformers import HfArgumentParser" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "ename": "TypeError", "evalue": "runner() takes 0 positional arguments but 1 was given", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[49], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m dataset \u001b[38;5;241m=\u001b[39m \u001b[43mrunner\u001b[49m\u001b[43m(\u001b[49m\u001b[43mScriptArguments\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[0;31mTypeError\u001b[0m: runner() takes 0 positional arguments but 1 was given" ] } ], "source": [ "dataset = runner(ScriptArguments)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.13" } }, "nbformat": 4, "nbformat_minor": 2 }