Spaces:

miscjose
/

Nuanced_Recommendation_System

Sleeping

File size: 10,269 Bytes

699b928

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Jose\\Desktop\\Nuanced_Recommendation_System\\.venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "import json\n",
    "\n",
    "from sentence_transformers import SentenceTransformer\n",
    "from sklearn.metrics.pairwise import cosine_similarity\n",
    "\n",
    "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n",
    "\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_n_weighted_scores(embeddings, query, n, objective_weight, subjective_weight):\n",
    "    query = [model.encode(query)]\n",
    "\n",
    "    weighted_scores = []\n",
    "\n",
    "    for key, value in embeddings.items():\n",
    "        objective_embedding = value['objective_embedding']\n",
    "        subjective_embeddings = value['subjective_embeddings']\n",
    "        \n",
    "        objective_score = cosine_similarity(query, objective_embedding).item()\n",
    "        subjective_scores = cosine_similarity(query, subjective_embeddings)\n",
    "\n",
    "        max_score = 0\n",
    "        max_review_index = 0\n",
    "        for idx, score in enumerate(subjective_scores[0].tolist()):\n",
    "            weighted_score = ((objective_score * objective_weight)+(score * subjective_weight))\n",
    "            if weighted_score > max_score:\n",
    "                max_score = weighted_score\n",
    "                max_review_index = idx\n",
    "        \n",
    "        weighted_scores.append((key, max_score, max_review_index))\n",
    "    \n",
    "    return sorted(weighted_scores, key=lambda x: x[1], reverse=True)[:n]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def filter_anime(embeddings, genres, themes, rating):\n",
    "    genres = set(genres)\n",
    "    themes = set(themes)\n",
    "    rating = set(rating)\n",
    "\n",
    "    filtered_anime = embeddings.copy()\n",
    "    for key, anime in embeddings.items():\n",
    "\n",
    "        anime_genres = set(anime['genres'])\n",
    "        anime_themes = set(anime['themes'])\n",
    "        anime_rating = set([anime['rating']])\n",
    "\n",
    "        if genres.intersection(anime_genres) or 'ALL' in genres:\n",
    "            pass\n",
    "        else:\n",
    "            filtered_anime.pop(key)\n",
    "            continue\n",
    "        if themes.intersection(anime_themes) or 'ALL' in themes:\n",
    "            pass\n",
    "        else:\n",
    "            filtered_anime.pop(key)\n",
    "            continue\n",
    "        if rating.intersection(anime_rating) or 'ALL' in rating:\n",
    "            pass\n",
    "        else:\n",
    "            filtered_anime.pop(key)\n",
    "            continue\n",
    "        \n",
    "    return filtered_anime"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('./embeddings/data.json') as f:\n",
    "        data = json.load(f)\n",
    "        embeddings = data['embeddings']\n",
    "        filters = data['filters']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_recommendation(query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight):\n",
    "    filtered_anime = filter_anime(embeddings, genres, themes, rating)\n",
    "    results = []\n",
    "    weighted_scores = get_n_weighted_scores(filtered_anime, query, number_of_recommendations, float(objective_weight), float(subjective_weight))\n",
    "    for idx, (key, score, review_index) in enumerate(weighted_scores, start=1):\n",
    "        data = embeddings[key]\n",
    "        if not data['english']:\n",
    "            name = data['japanese']\n",
    "        else:\n",
    "            name = data['english']\n",
    "        description = data['description']\n",
    "        review = data['reviews'][review_index]['text']\n",
    "        image = data['image']\n",
    "\n",
    "        results.append(gr.Image(label=f\"Recommendation {idx}: {name}\",value=image, height=435, width=500, visible=True))\n",
    "        results.append(gr.Textbox(label=f\"Synopsis\", value=description, max_lines=7, visible=True))\n",
    "        results.append(gr.Textbox(label=f\"Most Relevant User Review\",value=review, max_lines=7, visible=True))\n",
    "\n",
    "    for _ in range(10-number_of_recommendations):\n",
    "        results.append(gr.Image(visible=False))\n",
    "        results.append(gr.Textbox(visible=False))\n",
    "        results.append(gr.Textbox(visible=False))\n",
    "    \n",
    "    return results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running on local URL:  http://127.0.0.1:7863\n",
      "\n",
      "To create a public link, set `share=True` in `launch()`.\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div><iframe src=\"http://127.0.0.1:7863/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\Jose\\Desktop\\Nuanced_Recommendation_System\\.venv\\Lib\\site-packages\\gradio\\analytics.py:106: UserWarning: IMPORTANT: You are using gradio version 4.44.1, however version 5.0.1 is available, please upgrade. \n",
      "--------\n",
      "  warnings.warn(\n"
     ]
    }
   ],
   "source": [
    "with gr.Blocks(theme=gr.themes.Soft(primary_hue='red')) as demo:\n",
    "    with gr.Row():\n",
    "        with gr.Column():\n",
    "            gr.Markdown(\n",
    "                '''\n",
    "                # Welcome to the Nuanced Recommendation System!\n",
    "                ### This system **combines** both objective (synopsis, episode count, themes) and subjective (user reviews) data, in order to recommend the most approprate anime. Feel free to refine using the **optional** filters below! \n",
    "                '''\n",
    "            )\n",
    "        with gr.Column():\n",
    "            pass\n",
    "        \n",
    "\n",
    "    with gr.Row():\n",
    "        with gr.Column() as input_col:\n",
    "            query = gr.Textbox(label=\"What are you looking for?\")\n",
    "            number_of_recommendations = gr.Slider(label= \"# of Recommendations\", minimum=1, maximum=10, value=3, step=1)\n",
    "            genres = gr.Dropdown(label='Genres',multiselect=True,choices=filters['genres'], value=['ALL'])\n",
    "            themes = gr.Dropdown(label='Themes',multiselect=True,choices=filters['themes'], value=['ALL'])\n",
    "            rating = gr.Dropdown(label='Rating',multiselect=True,choices=filters['rating'], value=['ALL'])\n",
    "            objective_weight = gr.Slider(label= \"Objective Weight\", minimum=0, maximum=1, value=.5, step=.1)\n",
    "            subjective_weight = gr.Slider(label= \"Subjective Weight\", minimum=0, maximum=1, value=.5, step=.1)\n",
    "            submit_btn = gr.Button(\"Submit\")\n",
    "\n",
    "            examples = gr.Examples(\n",
    "                examples=[\n",
    "                    ['A sci-fi anime set in a future where AI and robots have become self-aware', 3, ['Action', 'Sci-Fi', 'Fantasy'], ['ALL'], ['PG-13 - Teens 13 or older'], .8, .2],\n",
    "                    ['An anime where a group of students form a band, and the story focuses on their personal growth and struggles with adulthood', 5, ['ALL'], ['Music'], ['PG-13 - Teens 13 or older', 'R - 17+ (violence & profanity)'], .3, .7],\n",
    "                    ['An anime where the main character starts as a villain but slowly redeems themselves', 3, ['Suspense', 'Action'], ['ALL'], ['PG-13 - Teens 13 or older', 'R - 17+ (violence & profanity)'], .2, .8],\n",
    "                ],\n",
    "                inputs=[query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight],\n",
    "            )\n",
    "\n",
    "        outputs = []\n",
    "        with gr.Column():\n",
    "            for i in range(10):\n",
    "                with gr.Row():\n",
    "                    with gr.Column():\n",
    "                        outputs.append(gr.Image(height=435, width=500, visible=False))\n",
    "                    with gr.Column():\n",
    "                        outputs.append(gr.Textbox(max_lines=7, visible=False))\n",
    "                        outputs.append(gr.Textbox(max_lines=7, visible=False))\n",
    "                        \n",
    "\n",
    "    submit_btn.click(\n",
    "        get_recommendation,\n",
    "        [query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight],\n",
    "        outputs\n",
    "    )\n",
    "\n",
    "    demo.launch()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}