{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Jose\\Desktop\\Nuanced_Recommendation_System\\.venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] } ], "source": [ "import os\n", "import json\n", "\n", "from sentence_transformers import SentenceTransformer\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n", "\n", "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def get_n_weighted_scores(embeddings, query, n, objective_weight, subjective_weight):\n", " query = [model.encode(query)]\n", "\n", " weighted_scores = []\n", "\n", " for key, value in embeddings.items():\n", " objective_embedding = value['objective_embedding']\n", " subjective_embeddings = value['subjective_embeddings']\n", " \n", " objective_score = cosine_similarity(query, objective_embedding).item()\n", " subjective_scores = cosine_similarity(query, subjective_embeddings)\n", "\n", " max_score = 0\n", " max_review_index = 0\n", " for idx, score in enumerate(subjective_scores[0].tolist()):\n", " weighted_score = ((objective_score * objective_weight)+(score * subjective_weight))\n", " if weighted_score > max_score:\n", " max_score = weighted_score\n", " max_review_index = idx\n", " \n", " weighted_scores.append((key, max_score, max_review_index))\n", " \n", " return sorted(weighted_scores, key=lambda x: x[1], reverse=True)[:n]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def filter_anime(embeddings, genres, themes, rating):\n", " genres = set(genres)\n", " themes = set(themes)\n", " rating = set(rating)\n", "\n", " filtered_anime = embeddings.copy()\n", " for key, anime in embeddings.items():\n", "\n", " anime_genres = set(anime['genres'])\n", " anime_themes = set(anime['themes'])\n", " anime_rating = set([anime['rating']])\n", "\n", " if genres.intersection(anime_genres) or 'ALL' in genres:\n", " pass\n", " else:\n", " filtered_anime.pop(key)\n", " continue\n", " if themes.intersection(anime_themes) or 'ALL' in themes:\n", " pass\n", " else:\n", " filtered_anime.pop(key)\n", " continue\n", " if rating.intersection(anime_rating) or 'ALL' in rating:\n", " pass\n", " else:\n", " filtered_anime.pop(key)\n", " continue\n", " \n", " return filtered_anime" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "with open('./embeddings/data.json') as f:\n", " data = json.load(f)\n", " embeddings = data['embeddings']\n", " filters = data['filters']" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "def get_recommendation(query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight):\n", " filtered_anime = filter_anime(embeddings, genres, themes, rating)\n", " results = []\n", " weighted_scores = get_n_weighted_scores(filtered_anime, query, number_of_recommendations, float(objective_weight), float(subjective_weight))\n", " for idx, (key, score, review_index) in enumerate(weighted_scores, start=1):\n", " data = embeddings[key]\n", " if not data['english']:\n", " name = data['japanese']\n", " else:\n", " name = data['english']\n", " description = data['description']\n", " review = data['reviews'][review_index]['text']\n", " image = data['image']\n", "\n", " results.append(gr.Image(label=f\"Recommendation {idx}: {name}\",value=image, height=435, width=500, visible=True))\n", " results.append(gr.Textbox(label=f\"Synopsis\", value=description, max_lines=7, visible=True))\n", " results.append(gr.Textbox(label=f\"Most Relevant User Review\",value=review, max_lines=7, visible=True))\n", "\n", " for _ in range(10-number_of_recommendations):\n", " results.append(gr.Image(visible=False))\n", " results.append(gr.Textbox(visible=False))\n", " results.append(gr.Textbox(visible=False))\n", " \n", " return results" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7863\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Jose\\Desktop\\Nuanced_Recommendation_System\\.venv\\Lib\\site-packages\\gradio\\analytics.py:106: UserWarning: IMPORTANT: You are using gradio version 4.44.1, however version 5.0.1 is available, please upgrade. \n", "--------\n", " warnings.warn(\n" ] } ], "source": [ "with gr.Blocks(theme=gr.themes.Soft(primary_hue='red')) as demo:\n", " with gr.Row():\n", " with gr.Column():\n", " gr.Markdown(\n", " '''\n", " # Welcome to the Nuanced Recommendation System!\n", " ### This system **combines** both objective (synopsis, episode count, themes) and subjective (user reviews) data, in order to recommend the most approprate anime. Feel free to refine using the **optional** filters below! \n", " '''\n", " )\n", " with gr.Column():\n", " pass\n", " \n", "\n", " with gr.Row():\n", " with gr.Column() as input_col:\n", " query = gr.Textbox(label=\"What are you looking for?\")\n", " number_of_recommendations = gr.Slider(label= \"# of Recommendations\", minimum=1, maximum=10, value=3, step=1)\n", " genres = gr.Dropdown(label='Genres',multiselect=True,choices=filters['genres'], value=['ALL'])\n", " themes = gr.Dropdown(label='Themes',multiselect=True,choices=filters['themes'], value=['ALL'])\n", " rating = gr.Dropdown(label='Rating',multiselect=True,choices=filters['rating'], value=['ALL'])\n", " objective_weight = gr.Slider(label= \"Objective Weight\", minimum=0, maximum=1, value=.5, step=.1)\n", " subjective_weight = gr.Slider(label= \"Subjective Weight\", minimum=0, maximum=1, value=.5, step=.1)\n", " submit_btn = gr.Button(\"Submit\")\n", "\n", " examples = gr.Examples(\n", " examples=[\n", " ['A sci-fi anime set in a future where AI and robots have become self-aware', 3, ['Action', 'Sci-Fi', 'Fantasy'], ['ALL'], ['PG-13 - Teens 13 or older'], .8, .2],\n", " ['An anime where a group of students form a band, and the story focuses on their personal growth and struggles with adulthood', 5, ['ALL'], ['Music'], ['PG-13 - Teens 13 or older', 'R - 17+ (violence & profanity)'], .3, .7],\n", " ['An anime where the main character starts as a villain but slowly redeems themselves', 3, ['Suspense', 'Action'], ['ALL'], ['PG-13 - Teens 13 or older', 'R - 17+ (violence & profanity)'], .2, .8],\n", " ],\n", " inputs=[query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight],\n", " )\n", "\n", " outputs = []\n", " with gr.Column():\n", " for i in range(10):\n", " with gr.Row():\n", " with gr.Column():\n", " outputs.append(gr.Image(height=435, width=500, visible=False))\n", " with gr.Column():\n", " outputs.append(gr.Textbox(max_lines=7, visible=False))\n", " outputs.append(gr.Textbox(max_lines=7, visible=False))\n", " \n", "\n", " submit_btn.click(\n", " get_recommendation,\n", " [query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight],\n", " outputs\n", " )\n", "\n", " demo.launch()" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 2 }