diff --git "a/Capstone_1_SubjQATransformer.ipynb" "b/Capstone_1_SubjQATransformer.ipynb" new file mode 100644--- /dev/null +++ "b/Capstone_1_SubjQATransformer.ipynb" @@ -0,0 +1,5368 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "3238b837634a47db899e9151534dbde8": { + "model_module": "@jupyter-widgets/controls", + "model_name": "VBoxModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "VBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "VBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_017feac2ec2c414a9d9fed06a0236450", + "IPY_MODEL_f33a070eaa7a461799076726260c2810", + "IPY_MODEL_d6b917dc13614bb5b98e352ba7af0ded", + "IPY_MODEL_ef71f921a16541b0b5e29f6c0cddd1da" + ], + "layout": "IPY_MODEL_f28d5164aedb452db1c9e649b564ca06" + } + }, + "b0e8e35e9046496aaa5a7d2b1edf1958": { + "model_module": "@jupyter-widgets/controls", + "model_name": "HTMLModel", + "model_module_version": "1.5.0", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_9a00148607964bc6ab902316c6a94b3d", + "placeholder": "", + "style": "IPY_MODEL_59f55019601f4a898ec0f094686916ff", + "value": "
\n", + " | item_id | \n", + "domain | \n", + "nn_mod | \n", + "nn_asp | \n", + "query_mod | \n", + "query_asp | \n", + "q_review_id | \n", + "q_reviews_id | \n", + "question | \n", + "question_subj_level | \n", + "ques_subj_score | \n", + "is_ques_subjective | \n", + "review_id | \n", + "review | \n", + "human_ans_spans | \n", + "human_ans_indices | \n", + "answer_subj_level | \n", + "ans_subj_score | \n", + "is_ans_subjective | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "B00BVMXBDO | \n", + "movies | \n", + "addictive | \n", + "show | \n", + "full | \n", + "series | \n", + "d9a9615d45df2f6e6108db4ca46bfded | \n", + "399f1046fe6bd97990107f9d7aa86f4a | \n", + "Who is the author of this series? | \n", + "1 | \n", + "0.0 | \n", + "False | \n", + "090671369dddfeb02db9bf7125a47c79 | \n", + "Whether it be in her portrayal of a nerdy lesb... | \n", + "ANSWERNOTFOUND | \n", + "(251, 265) | \n", + "1 | \n", + "0.000 | \n", + "False | \n", + "
1 | \n", + "1404918051 | \n", + "movies | \n", + "enough simple | \n", + "film | \n", + "charming | \n", + "movie | \n", + "06ffe37a8023636a3ce00b020a517e87 | \n", + "42d9dd5b0c67150cac1e13308811cbb5 | \n", + "Can we enjoy the movie along with our family ? | \n", + "1 | \n", + "0.5 | \n", + "False | \n", + "a29821121e74d319cb93f77101e99c88 | \n", + "An outstanding romantic comedy, 13 Going on 30... | \n", + "ANSWERNOTFOUND | \n", + "(1195, 1209) | \n", + "1 | \n", + "0.000 | \n", + "False | \n", + "
2 | \n", + "B0000633ZP | \n", + "movies | \n", + "weak | \n", + "plot | \n", + "bad | \n", + "one | \n", + "3b625c68e91b9e6987a08b84a9a9d234 | \n", + "32d06ccf2132cda644aea791fa688c53 | \n", + "Does this one good? | \n", + "5 | \n", + "0.6 | \n", + "True | \n", + "12a1b821f761bd19a75be7b16cef4a7c | \n", + "To let the truth be known, I watched this movi... | \n", + "ANSWERNOTFOUND | \n", + "(1476, 1490) | \n", + "5 | \n", + "0.000 | \n", + "False | \n", + "
3 | \n", + "B0000AQS0F | \n", + "movies | \n", + "outstanding | \n", + "show | \n", + "wonderful | \n", + "series | \n", + "f3abfa98b011127e7cb49bcd07f8deeb | \n", + "e546636f0bb9f93d5f24b4ade9ebab45 | \n", + "Is this series good and excelent? | \n", + "1 | \n", + "0.6 | \n", + "True | \n", + "cd0f92322e67cc9d70de6674caace78c | \n", + "At the time of my review, there had been 910 c... | \n", + "this show is OUTSTANDING | \n", + "(296, 320) | \n", + "1 | \n", + "0.875 | \n", + "True | \n", + "
4 | \n", + "B003Y5H5FG | \n", + "movies | \n", + "great | \n", + "production design | \n", + "great | \n", + "costume design | \n", + "1b03744e764b257592c2c768345c14bc | \n", + "a0a97e460a194bcb3286fe68d20aadc2 | \n", + "How is the costume design? | \n", + "1 | \n", + "0.0 | \n", + "False | \n", + "f6b5024393ebc70287befdaf47a50b75 | \n", + "\"Fright Night\" is great! This is how the story... | \n", + "The costume design by Susan Matheson is great | \n", + "(1254, 1299) | \n", + "1 | \n", + "0.750 | \n", + "True | \n", + "
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -> None
\n", + " \n", + " Print a concise summary of a DataFrame.\n", + "\n", + "This method prints information about a DataFrame including\n", + "the index dtype and columns, non-null values and memory usage.\n", + "\n", + "Parameters\n", + "----------\n", + "verbose : bool, optional\n", + " Whether to print the full summary. By default, the setting in\n", + " ``pandas.options.display.max_info_columns`` is followed.\n", + "buf : writable buffer, defaults to sys.stdout\n", + " Where to send the output. By default, the output is printed to\n", + " sys.stdout. Pass a writable buffer if you need to further process\n", + " the output. max_cols : int, optional\n", + " When to switch from the verbose to the truncated output. If the\n", + " DataFrame has more than `max_cols` columns, the truncated output\n", + " is used. By default, the setting in\n", + " ``pandas.options.display.max_info_columns`` is used.\n", + "memory_usage : bool, str, optional\n", + " Specifies whether total memory usage of the DataFrame\n", + " elements (including the index) should be displayed. By default,\n", + " this follows the ``pandas.options.display.memory_usage`` setting.\n", + "\n", + " True always show memory usage. False never shows memory usage.\n", + " A value of 'deep' is equivalent to "True with deep introspection".\n", + " Memory usage is shown in human-readable units (base-2\n", + " representation). Without deep introspection a memory estimation is\n", + " made based in column dtype and number of rows assuming values\n", + " consume the same memory amount for corresponding dtypes. With deep\n", + " memory introspection, a real memory usage calculation is performed\n", + " at the cost of computational resources. See the\n", + " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", + " details.\n", + "show_counts : bool, optional\n", + " Whether to show the non-null counts. By default, this is shown\n", + " only if the DataFrame is smaller than\n", + " ``pandas.options.display.max_info_rows`` and\n", + " ``pandas.options.display.max_info_columns``. A value of True always\n", + " shows the counts, and False never shows the counts.\n", + "null_counts : bool, optional\n", + " .. deprecated:: 1.2.0\n", + " Use show_counts instead.\n", + "\n", + "Returns\n", + "-------\n", + "None\n", + " This method prints a summary of a DataFrame and returns None.\n", + "\n", + "See Also\n", + "--------\n", + "DataFrame.describe: Generate descriptive statistics of DataFrame\n", + " columns.\n", + "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", + "\n", + "Examples\n", + "--------\n", + ">>> int_values = [1, 2, 3, 4, 5]\n", + ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", + ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", + ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", + "... "float_col": float_values})\n", + ">>> df\n", + " int_col text_col float_col\n", + "0 1 alpha 0.00\n", + "1 2 beta 0.25\n", + "2 3 gamma 0.50\n", + "3 4 delta 0.75\n", + "4 5 epsilon 1.00\n", + "\n", + "Prints information of all columns:\n", + "\n", + ">>> df.info(verbose=True)\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 int_col 5 non-null int64\n", + " 1 text_col 5 non-null object\n", + " 2 float_col 5 non-null float64\n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n", + "\n", + "Prints a summary of columns count and its dtypes but not per column\n", + "information:\n", + "\n", + ">>> df.info(verbose=False)\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Columns: 3 entries, int_col to float_col\n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n", + "\n", + "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", + "buffer content and writes to a text file:\n", + "\n", + ">>> import io\n", + ">>> buffer = io.StringIO()\n", + ">>> df.info(buf=buffer)\n", + ">>> s = buffer.getvalue()\n", + ">>> with open("df_info.txt", "w",\n", + "... encoding="utf-8") as f: # doctest: +SKIP\n", + "... f.write(s)\n", + "260\n", + "\n", + "The `memory_usage` parameter allows deep introspection mode, specially\n", + "useful for big DataFrames and fine-tune memory optimization:\n", + "\n", + ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", + ">>> df = pd.DataFrame({\n", + "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", + "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", + "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", + "... })\n", + ">>> df.info()\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 1000000 entries, 0 to 999999\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 column_1 1000000 non-null object\n", + " 1 column_2 1000000 non-null object\n", + " 2 column_3 1000000 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 22.9+ MB\n", + "\n", + ">>> df.info(memory_usage='deep')\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 1000000 entries, 0 to 999999\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 column_1 1000000 non-null object\n", + " 1 column_2 1000000 non-null object\n", + " 2 column_3 1000000 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 165.9 MB
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -> None
\n", + " \n", + " Print a concise summary of a DataFrame.\n", + "\n", + "This method prints information about a DataFrame including\n", + "the index dtype and columns, non-null values and memory usage.\n", + "\n", + "Parameters\n", + "----------\n", + "verbose : bool, optional\n", + " Whether to print the full summary. By default, the setting in\n", + " ``pandas.options.display.max_info_columns`` is followed.\n", + "buf : writable buffer, defaults to sys.stdout\n", + " Where to send the output. By default, the output is printed to\n", + " sys.stdout. Pass a writable buffer if you need to further process\n", + " the output. max_cols : int, optional\n", + " When to switch from the verbose to the truncated output. If the\n", + " DataFrame has more than `max_cols` columns, the truncated output\n", + " is used. By default, the setting in\n", + " ``pandas.options.display.max_info_columns`` is used.\n", + "memory_usage : bool, str, optional\n", + " Specifies whether total memory usage of the DataFrame\n", + " elements (including the index) should be displayed. By default,\n", + " this follows the ``pandas.options.display.memory_usage`` setting.\n", + "\n", + " True always show memory usage. False never shows memory usage.\n", + " A value of 'deep' is equivalent to "True with deep introspection".\n", + " Memory usage is shown in human-readable units (base-2\n", + " representation). Without deep introspection a memory estimation is\n", + " made based in column dtype and number of rows assuming values\n", + " consume the same memory amount for corresponding dtypes. With deep\n", + " memory introspection, a real memory usage calculation is performed\n", + " at the cost of computational resources. See the\n", + " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", + " details.\n", + "show_counts : bool, optional\n", + " Whether to show the non-null counts. By default, this is shown\n", + " only if the DataFrame is smaller than\n", + " ``pandas.options.display.max_info_rows`` and\n", + " ``pandas.options.display.max_info_columns``. A value of True always\n", + " shows the counts, and False never shows the counts.\n", + "null_counts : bool, optional\n", + " .. deprecated:: 1.2.0\n", + " Use show_counts instead.\n", + "\n", + "Returns\n", + "-------\n", + "None\n", + " This method prints a summary of a DataFrame and returns None.\n", + "\n", + "See Also\n", + "--------\n", + "DataFrame.describe: Generate descriptive statistics of DataFrame\n", + " columns.\n", + "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", + "\n", + "Examples\n", + "--------\n", + ">>> int_values = [1, 2, 3, 4, 5]\n", + ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", + ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", + ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", + "... "float_col": float_values})\n", + ">>> df\n", + " int_col text_col float_col\n", + "0 1 alpha 0.00\n", + "1 2 beta 0.25\n", + "2 3 gamma 0.50\n", + "3 4 delta 0.75\n", + "4 5 epsilon 1.00\n", + "\n", + "Prints information of all columns:\n", + "\n", + ">>> df.info(verbose=True)\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 int_col 5 non-null int64\n", + " 1 text_col 5 non-null object\n", + " 2 float_col 5 non-null float64\n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n", + "\n", + "Prints a summary of columns count and its dtypes but not per column\n", + "information:\n", + "\n", + ">>> df.info(verbose=False)\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Columns: 3 entries, int_col to float_col\n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n", + "\n", + "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", + "buffer content and writes to a text file:\n", + "\n", + ">>> import io\n", + ">>> buffer = io.StringIO()\n", + ">>> df.info(buf=buffer)\n", + ">>> s = buffer.getvalue()\n", + ">>> with open("df_info.txt", "w",\n", + "... encoding="utf-8") as f: # doctest: +SKIP\n", + "... f.write(s)\n", + "260\n", + "\n", + "The `memory_usage` parameter allows deep introspection mode, specially\n", + "useful for big DataFrames and fine-tune memory optimization:\n", + "\n", + ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", + ">>> df = pd.DataFrame({\n", + "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", + "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", + "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", + "... })\n", + ">>> df.info()\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 1000000 entries, 0 to 999999\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 column_1 1000000 non-null object\n", + " 1 column_2 1000000 non-null object\n", + " 2 column_3 1000000 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 22.9+ MB\n", + "\n", + ">>> df.info(memory_usage='deep')\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 1000000 entries, 0 to 999999\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 column_1 1000000 non-null object\n", + " 1 column_2 1000000 non-null object\n", + " 2 column_3 1000000 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 165.9 MB
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -> None
\n", + " \n", + " Print a concise summary of a DataFrame.\n", + "\n", + "This method prints information about a DataFrame including\n", + "the index dtype and columns, non-null values and memory usage.\n", + "\n", + "Parameters\n", + "----------\n", + "verbose : bool, optional\n", + " Whether to print the full summary. By default, the setting in\n", + " ``pandas.options.display.max_info_columns`` is followed.\n", + "buf : writable buffer, defaults to sys.stdout\n", + " Where to send the output. By default, the output is printed to\n", + " sys.stdout. Pass a writable buffer if you need to further process\n", + " the output. max_cols : int, optional\n", + " When to switch from the verbose to the truncated output. If the\n", + " DataFrame has more than `max_cols` columns, the truncated output\n", + " is used. By default, the setting in\n", + " ``pandas.options.display.max_info_columns`` is used.\n", + "memory_usage : bool, str, optional\n", + " Specifies whether total memory usage of the DataFrame\n", + " elements (including the index) should be displayed. By default,\n", + " this follows the ``pandas.options.display.memory_usage`` setting.\n", + "\n", + " True always show memory usage. False never shows memory usage.\n", + " A value of 'deep' is equivalent to "True with deep introspection".\n", + " Memory usage is shown in human-readable units (base-2\n", + " representation). Without deep introspection a memory estimation is\n", + " made based in column dtype and number of rows assuming values\n", + " consume the same memory amount for corresponding dtypes. With deep\n", + " memory introspection, a real memory usage calculation is performed\n", + " at the cost of computational resources. See the\n", + " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", + " details.\n", + "show_counts : bool, optional\n", + " Whether to show the non-null counts. By default, this is shown\n", + " only if the DataFrame is smaller than\n", + " ``pandas.options.display.max_info_rows`` and\n", + " ``pandas.options.display.max_info_columns``. A value of True always\n", + " shows the counts, and False never shows the counts.\n", + "null_counts : bool, optional\n", + " .. deprecated:: 1.2.0\n", + " Use show_counts instead.\n", + "\n", + "Returns\n", + "-------\n", + "None\n", + " This method prints a summary of a DataFrame and returns None.\n", + "\n", + "See Also\n", + "--------\n", + "DataFrame.describe: Generate descriptive statistics of DataFrame\n", + " columns.\n", + "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", + "\n", + "Examples\n", + "--------\n", + ">>> int_values = [1, 2, 3, 4, 5]\n", + ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", + ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", + ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", + "... "float_col": float_values})\n", + ">>> df\n", + " int_col text_col float_col\n", + "0 1 alpha 0.00\n", + "1 2 beta 0.25\n", + "2 3 gamma 0.50\n", + "3 4 delta 0.75\n", + "4 5 epsilon 1.00\n", + "\n", + "Prints information of all columns:\n", + "\n", + ">>> df.info(verbose=True)\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 int_col 5 non-null int64\n", + " 1 text_col 5 non-null object\n", + " 2 float_col 5 non-null float64\n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n", + "\n", + "Prints a summary of columns count and its dtypes but not per column\n", + "information:\n", + "\n", + ">>> df.info(verbose=False)\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 5 entries, 0 to 4\n", + "Columns: 3 entries, int_col to float_col\n", + "dtypes: float64(1), int64(1), object(1)\n", + "memory usage: 248.0+ bytes\n", + "\n", + "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", + "buffer content and writes to a text file:\n", + "\n", + ">>> import io\n", + ">>> buffer = io.StringIO()\n", + ">>> df.info(buf=buffer)\n", + ">>> s = buffer.getvalue()\n", + ">>> with open("df_info.txt", "w",\n", + "... encoding="utf-8") as f: # doctest: +SKIP\n", + "... f.write(s)\n", + "260\n", + "\n", + "The `memory_usage` parameter allows deep introspection mode, specially\n", + "useful for big DataFrames and fine-tune memory optimization:\n", + "\n", + ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", + ">>> df = pd.DataFrame({\n", + "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", + "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", + "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", + "... })\n", + ">>> df.info()\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 1000000 entries, 0 to 999999\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 column_1 1000000 non-null object\n", + " 1 column_2 1000000 non-null object\n", + " 2 column_3 1000000 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 22.9+ MB\n", + "\n", + ">>> df.info(memory_usage='deep')\n", + "<class 'pandas.core.frame.DataFrame'>\n", + "RangeIndex: 1000000 entries, 0 to 999999\n", + "Data columns (total 3 columns):\n", + " # Column Non-Null Count Dtype\n", + "--- ------ -------------- -----\n", + " 0 column_1 1000000 non-null object\n", + " 1 column_2 1000000 non-null object\n", + " 2 column_3 1000000 non-null object\n", + "dtypes: object(3)\n", + "memory usage: 165.9 MB