{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "3238b837634a47db899e9151534dbde8": { "model_module": "@jupyter-widgets/controls", "model_name": "VBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "VBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "VBoxView", "box_style": "", "children": [ "IPY_MODEL_017feac2ec2c414a9d9fed06a0236450", "IPY_MODEL_f33a070eaa7a461799076726260c2810", "IPY_MODEL_d6b917dc13614bb5b98e352ba7af0ded", "IPY_MODEL_ef71f921a16541b0b5e29f6c0cddd1da" ], "layout": "IPY_MODEL_f28d5164aedb452db1c9e649b564ca06" } }, "b0e8e35e9046496aaa5a7d2b1edf1958": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9a00148607964bc6ab902316c6a94b3d", "placeholder": "", "style": "IPY_MODEL_59f55019601f4a898ec0f094686916ff", "value": "
\n", " | item_id | \n", "domain | \n", "nn_mod | \n", "nn_asp | \n", "query_mod | \n", "query_asp | \n", "q_review_id | \n", "q_reviews_id | \n", "question | \n", "question_subj_level | \n", "ques_subj_score | \n", "is_ques_subjective | \n", "review_id | \n", "review | \n", "human_ans_spans | \n", "human_ans_indices | \n", "answer_subj_level | \n", "ans_subj_score | \n", "is_ans_subjective | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "B00BVMXBDO | \n", "movies | \n", "addictive | \n", "show | \n", "full | \n", "series | \n", "d9a9615d45df2f6e6108db4ca46bfded | \n", "399f1046fe6bd97990107f9d7aa86f4a | \n", "Who is the author of this series? | \n", "1 | \n", "0.0 | \n", "False | \n", "090671369dddfeb02db9bf7125a47c79 | \n", "Whether it be in her portrayal of a nerdy lesb... | \n", "ANSWERNOTFOUND | \n", "(251, 265) | \n", "1 | \n", "0.000 | \n", "False | \n", "
1 | \n", "1404918051 | \n", "movies | \n", "enough simple | \n", "film | \n", "charming | \n", "movie | \n", "06ffe37a8023636a3ce00b020a517e87 | \n", "42d9dd5b0c67150cac1e13308811cbb5 | \n", "Can we enjoy the movie along with our family ? | \n", "1 | \n", "0.5 | \n", "False | \n", "a29821121e74d319cb93f77101e99c88 | \n", "An outstanding romantic comedy, 13 Going on 30... | \n", "ANSWERNOTFOUND | \n", "(1195, 1209) | \n", "1 | \n", "0.000 | \n", "False | \n", "
2 | \n", "B0000633ZP | \n", "movies | \n", "weak | \n", "plot | \n", "bad | \n", "one | \n", "3b625c68e91b9e6987a08b84a9a9d234 | \n", "32d06ccf2132cda644aea791fa688c53 | \n", "Does this one good? | \n", "5 | \n", "0.6 | \n", "True | \n", "12a1b821f761bd19a75be7b16cef4a7c | \n", "To let the truth be known, I watched this movi... | \n", "ANSWERNOTFOUND | \n", "(1476, 1490) | \n", "5 | \n", "0.000 | \n", "False | \n", "
3 | \n", "B0000AQS0F | \n", "movies | \n", "outstanding | \n", "show | \n", "wonderful | \n", "series | \n", "f3abfa98b011127e7cb49bcd07f8deeb | \n", "e546636f0bb9f93d5f24b4ade9ebab45 | \n", "Is this series good and excelent? | \n", "1 | \n", "0.6 | \n", "True | \n", "cd0f92322e67cc9d70de6674caace78c | \n", "At the time of my review, there had been 910 c... | \n", "this show is OUTSTANDING | \n", "(296, 320) | \n", "1 | \n", "0.875 | \n", "True | \n", "
4 | \n", "B003Y5H5FG | \n", "movies | \n", "great | \n", "production design | \n", "great | \n", "costume design | \n", "1b03744e764b257592c2c768345c14bc | \n", "a0a97e460a194bcb3286fe68d20aadc2 | \n", "How is the costume design? | \n", "1 | \n", "0.0 | \n", "False | \n", "f6b5024393ebc70287befdaf47a50b75 | \n", "\"Fright Night\" is great! This is how the story... | \n", "The costume design by Susan Matheson is great | \n", "(1254, 1299) | \n", "1 | \n", "0.750 | \n", "True | \n", "
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -> None
\n", " \n", " Print a concise summary of a DataFrame.\n", "\n", "This method prints information about a DataFrame including\n", "the index dtype and columns, non-null values and memory usage.\n", "\n", "Parameters\n", "----------\n", "verbose : bool, optional\n", " Whether to print the full summary. By default, the setting in\n", " ``pandas.options.display.max_info_columns`` is followed.\n", "buf : writable buffer, defaults to sys.stdout\n", " Where to send the output. By default, the output is printed to\n", " sys.stdout. Pass a writable buffer if you need to further process\n", " the output. max_cols : int, optional\n", " When to switch from the verbose to the truncated output. If the\n", " DataFrame has more than `max_cols` columns, the truncated output\n", " is used. By default, the setting in\n", " ``pandas.options.display.max_info_columns`` is used.\n", "memory_usage : bool, str, optional\n", " Specifies whether total memory usage of the DataFrame\n", " elements (including the index) should be displayed. By default,\n", " this follows the ``pandas.options.display.memory_usage`` setting.\n", "\n", " True always show memory usage. False never shows memory usage.\n", " A value of 'deep' is equivalent to "True with deep introspection".\n", " Memory usage is shown in human-readable units (base-2\n", " representation). Without deep introspection a memory estimation is\n", " made based in column dtype and number of rows assuming values\n", " consume the same memory amount for corresponding dtypes. With deep\n", " memory introspection, a real memory usage calculation is performed\n", " at the cost of computational resources. See the\n", " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", " details.\n", "show_counts : bool, optional\n", " Whether to show the non-null counts. By default, this is shown\n", " only if the DataFrame is smaller than\n", " ``pandas.options.display.max_info_rows`` and\n", " ``pandas.options.display.max_info_columns``. A value of True always\n", " shows the counts, and False never shows the counts.\n", "null_counts : bool, optional\n", " .. deprecated:: 1.2.0\n", " Use show_counts instead.\n", "\n", "Returns\n", "-------\n", "None\n", " This method prints a summary of a DataFrame and returns None.\n", "\n", "See Also\n", "--------\n", "DataFrame.describe: Generate descriptive statistics of DataFrame\n", " columns.\n", "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", "\n", "Examples\n", "--------\n", ">>> int_values = [1, 2, 3, 4, 5]\n", ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", "... "float_col": float_values})\n", ">>> df\n", " int_col text_col float_col\n", "0 1 alpha 0.00\n", "1 2 beta 0.25\n", "2 3 gamma 0.50\n", "3 4 delta 0.75\n", "4 5 epsilon 1.00\n", "\n", "Prints information of all columns:\n", "\n", ">>> df.info(verbose=True)\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 5 entries, 0 to 4\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 int_col 5 non-null int64\n", " 1 text_col 5 non-null object\n", " 2 float_col 5 non-null float64\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 248.0+ bytes\n", "\n", "Prints a summary of columns count and its dtypes but not per column\n", "information:\n", "\n", ">>> df.info(verbose=False)\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 5 entries, 0 to 4\n", "Columns: 3 entries, int_col to float_col\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 248.0+ bytes\n", "\n", "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", "buffer content and writes to a text file:\n", "\n", ">>> import io\n", ">>> buffer = io.StringIO()\n", ">>> df.info(buf=buffer)\n", ">>> s = buffer.getvalue()\n", ">>> with open("df_info.txt", "w",\n", "... encoding="utf-8") as f: # doctest: +SKIP\n", "... f.write(s)\n", "260\n", "\n", "The `memory_usage` parameter allows deep introspection mode, specially\n", "useful for big DataFrames and fine-tune memory optimization:\n", "\n", ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", ">>> df = pd.DataFrame({\n", "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", "... })\n", ">>> df.info()\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1000000 entries, 0 to 999999\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 column_1 1000000 non-null object\n", " 1 column_2 1000000 non-null object\n", " 2 column_3 1000000 non-null object\n", "dtypes: object(3)\n", "memory usage: 22.9+ MB\n", "\n", ">>> df.info(memory_usage='deep')\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1000000 entries, 0 to 999999\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 column_1 1000000 non-null object\n", " 1 column_2 1000000 non-null object\n", " 2 column_3 1000000 non-null object\n", "dtypes: object(3)\n", "memory usage: 165.9 MB
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -> None
\n", " \n", " Print a concise summary of a DataFrame.\n", "\n", "This method prints information about a DataFrame including\n", "the index dtype and columns, non-null values and memory usage.\n", "\n", "Parameters\n", "----------\n", "verbose : bool, optional\n", " Whether to print the full summary. By default, the setting in\n", " ``pandas.options.display.max_info_columns`` is followed.\n", "buf : writable buffer, defaults to sys.stdout\n", " Where to send the output. By default, the output is printed to\n", " sys.stdout. Pass a writable buffer if you need to further process\n", " the output. max_cols : int, optional\n", " When to switch from the verbose to the truncated output. If the\n", " DataFrame has more than `max_cols` columns, the truncated output\n", " is used. By default, the setting in\n", " ``pandas.options.display.max_info_columns`` is used.\n", "memory_usage : bool, str, optional\n", " Specifies whether total memory usage of the DataFrame\n", " elements (including the index) should be displayed. By default,\n", " this follows the ``pandas.options.display.memory_usage`` setting.\n", "\n", " True always show memory usage. False never shows memory usage.\n", " A value of 'deep' is equivalent to "True with deep introspection".\n", " Memory usage is shown in human-readable units (base-2\n", " representation). Without deep introspection a memory estimation is\n", " made based in column dtype and number of rows assuming values\n", " consume the same memory amount for corresponding dtypes. With deep\n", " memory introspection, a real memory usage calculation is performed\n", " at the cost of computational resources. See the\n", " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", " details.\n", "show_counts : bool, optional\n", " Whether to show the non-null counts. By default, this is shown\n", " only if the DataFrame is smaller than\n", " ``pandas.options.display.max_info_rows`` and\n", " ``pandas.options.display.max_info_columns``. A value of True always\n", " shows the counts, and False never shows the counts.\n", "null_counts : bool, optional\n", " .. deprecated:: 1.2.0\n", " Use show_counts instead.\n", "\n", "Returns\n", "-------\n", "None\n", " This method prints a summary of a DataFrame and returns None.\n", "\n", "See Also\n", "--------\n", "DataFrame.describe: Generate descriptive statistics of DataFrame\n", " columns.\n", "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", "\n", "Examples\n", "--------\n", ">>> int_values = [1, 2, 3, 4, 5]\n", ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", "... "float_col": float_values})\n", ">>> df\n", " int_col text_col float_col\n", "0 1 alpha 0.00\n", "1 2 beta 0.25\n", "2 3 gamma 0.50\n", "3 4 delta 0.75\n", "4 5 epsilon 1.00\n", "\n", "Prints information of all columns:\n", "\n", ">>> df.info(verbose=True)\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 5 entries, 0 to 4\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 int_col 5 non-null int64\n", " 1 text_col 5 non-null object\n", " 2 float_col 5 non-null float64\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 248.0+ bytes\n", "\n", "Prints a summary of columns count and its dtypes but not per column\n", "information:\n", "\n", ">>> df.info(verbose=False)\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 5 entries, 0 to 4\n", "Columns: 3 entries, int_col to float_col\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 248.0+ bytes\n", "\n", "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", "buffer content and writes to a text file:\n", "\n", ">>> import io\n", ">>> buffer = io.StringIO()\n", ">>> df.info(buf=buffer)\n", ">>> s = buffer.getvalue()\n", ">>> with open("df_info.txt", "w",\n", "... encoding="utf-8") as f: # doctest: +SKIP\n", "... f.write(s)\n", "260\n", "\n", "The `memory_usage` parameter allows deep introspection mode, specially\n", "useful for big DataFrames and fine-tune memory optimization:\n", "\n", ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", ">>> df = pd.DataFrame({\n", "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", "... })\n", ">>> df.info()\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1000000 entries, 0 to 999999\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 column_1 1000000 non-null object\n", " 1 column_2 1000000 non-null object\n", " 2 column_3 1000000 non-null object\n", "dtypes: object(3)\n", "memory usage: 22.9+ MB\n", "\n", ">>> df.info(memory_usage='deep')\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1000000 entries, 0 to 999999\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 column_1 1000000 non-null object\n", " 1 column_2 1000000 non-null object\n", " 2 column_3 1000000 non-null object\n", "dtypes: object(3)\n", "memory usage: 165.9 MB
pandas.core.frame.DataFrame.info
def info(verbose: bool | None=None, buf: WriteBuffer[str] | None=None, max_cols: int | None=None, memory_usage: bool | str | None=None, show_counts: bool | None=None, null_counts: bool | None=None) -> None
\n", " \n", " Print a concise summary of a DataFrame.\n", "\n", "This method prints information about a DataFrame including\n", "the index dtype and columns, non-null values and memory usage.\n", "\n", "Parameters\n", "----------\n", "verbose : bool, optional\n", " Whether to print the full summary. By default, the setting in\n", " ``pandas.options.display.max_info_columns`` is followed.\n", "buf : writable buffer, defaults to sys.stdout\n", " Where to send the output. By default, the output is printed to\n", " sys.stdout. Pass a writable buffer if you need to further process\n", " the output. max_cols : int, optional\n", " When to switch from the verbose to the truncated output. If the\n", " DataFrame has more than `max_cols` columns, the truncated output\n", " is used. By default, the setting in\n", " ``pandas.options.display.max_info_columns`` is used.\n", "memory_usage : bool, str, optional\n", " Specifies whether total memory usage of the DataFrame\n", " elements (including the index) should be displayed. By default,\n", " this follows the ``pandas.options.display.memory_usage`` setting.\n", "\n", " True always show memory usage. False never shows memory usage.\n", " A value of 'deep' is equivalent to "True with deep introspection".\n", " Memory usage is shown in human-readable units (base-2\n", " representation). Without deep introspection a memory estimation is\n", " made based in column dtype and number of rows assuming values\n", " consume the same memory amount for corresponding dtypes. With deep\n", " memory introspection, a real memory usage calculation is performed\n", " at the cost of computational resources. See the\n", " :ref:`Frequently Asked Questions <df-memory-usage>` for more\n", " details.\n", "show_counts : bool, optional\n", " Whether to show the non-null counts. By default, this is shown\n", " only if the DataFrame is smaller than\n", " ``pandas.options.display.max_info_rows`` and\n", " ``pandas.options.display.max_info_columns``. A value of True always\n", " shows the counts, and False never shows the counts.\n", "null_counts : bool, optional\n", " .. deprecated:: 1.2.0\n", " Use show_counts instead.\n", "\n", "Returns\n", "-------\n", "None\n", " This method prints a summary of a DataFrame and returns None.\n", "\n", "See Also\n", "--------\n", "DataFrame.describe: Generate descriptive statistics of DataFrame\n", " columns.\n", "DataFrame.memory_usage: Memory usage of DataFrame columns.\n", "\n", "Examples\n", "--------\n", ">>> int_values = [1, 2, 3, 4, 5]\n", ">>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']\n", ">>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]\n", ">>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,\n", "... "float_col": float_values})\n", ">>> df\n", " int_col text_col float_col\n", "0 1 alpha 0.00\n", "1 2 beta 0.25\n", "2 3 gamma 0.50\n", "3 4 delta 0.75\n", "4 5 epsilon 1.00\n", "\n", "Prints information of all columns:\n", "\n", ">>> df.info(verbose=True)\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 5 entries, 0 to 4\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 int_col 5 non-null int64\n", " 1 text_col 5 non-null object\n", " 2 float_col 5 non-null float64\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 248.0+ bytes\n", "\n", "Prints a summary of columns count and its dtypes but not per column\n", "information:\n", "\n", ">>> df.info(verbose=False)\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 5 entries, 0 to 4\n", "Columns: 3 entries, int_col to float_col\n", "dtypes: float64(1), int64(1), object(1)\n", "memory usage: 248.0+ bytes\n", "\n", "Pipe output of DataFrame.info to buffer instead of sys.stdout, get\n", "buffer content and writes to a text file:\n", "\n", ">>> import io\n", ">>> buffer = io.StringIO()\n", ">>> df.info(buf=buffer)\n", ">>> s = buffer.getvalue()\n", ">>> with open("df_info.txt", "w",\n", "... encoding="utf-8") as f: # doctest: +SKIP\n", "... f.write(s)\n", "260\n", "\n", "The `memory_usage` parameter allows deep introspection mode, specially\n", "useful for big DataFrames and fine-tune memory optimization:\n", "\n", ">>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", ">>> df = pd.DataFrame({\n", "... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", "... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),\n", "... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)\n", "... })\n", ">>> df.info()\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1000000 entries, 0 to 999999\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 column_1 1000000 non-null object\n", " 1 column_2 1000000 non-null object\n", " 2 column_3 1000000 non-null object\n", "dtypes: object(3)\n", "memory usage: 22.9+ MB\n", "\n", ">>> df.info(memory_usage='deep')\n", "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 1000000 entries, 0 to 999999\n", "Data columns (total 3 columns):\n", " # Column Non-Null Count Dtype\n", "--- ------ -------------- -----\n", " 0 column_1 1000000 non-null object\n", " 1 column_2 1000000 non-null object\n", " 2 column_3 1000000 non-null object\n", "dtypes: object(3)\n", "memory usage: 165.9 MB