{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\Jose\\Desktop\\Nuanced_Recommendation_System\\.venv\\Lib\\site-packages\\transformers\\tokenization_utils_base.py:1617: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be deprecated in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884\n", " warnings.warn(\n" ] } ], "source": [ "import os\n", "import json\n", "\n", "from sentence_transformers import SentenceTransformer\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n", "\n", "import gradio as gr" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def get_n_weighted_scores(embeddings, query, n, objective_weight, subjective_weight):\n", " query = [model.encode(query)]\n", "\n", " weighted_scores = []\n", "\n", " for key, value in embeddings.items():\n", " objective_embedding = value['objective_embedding']\n", " subjective_embeddings = value['subjective_embeddings']\n", " \n", " objective_score = cosine_similarity(query, objective_embedding).item()\n", " subjective_scores = cosine_similarity(query, subjective_embeddings)\n", "\n", " max_score = 0\n", " max_review_index = 0\n", " for idx, score in enumerate(subjective_scores[0].tolist()):\n", " weighted_score = ((objective_score * objective_weight)+(score * subjective_weight))\n", " if weighted_score > max_score:\n", " max_score = weighted_score\n", " max_review_index = idx\n", " \n", " weighted_scores.append((key, max_score, max_review_index))\n", " \n", " return sorted(weighted_scores, key=lambda x: x[1], reverse=True)[:n]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "def filter_anime(embeddings, genres, themes, rating):\n", " genres = set(genres)\n", " themes = set(themes)\n", " rating = set(rating)\n", "\n", " filtered_anime = embeddings.copy()\n", " for key, anime in embeddings.items():\n", "\n", " anime_genres = set(anime['genres'])\n", " anime_themes = set(anime['themes'])\n", " anime_rating = set([anime['rating']])\n", "\n", " if genres.intersection(anime_genres) or 'ALL' in genres:\n", " pass\n", " else:\n", " filtered_anime.pop(key)\n", " continue\n", " if themes.intersection(anime_themes) or 'ALL' in themes:\n", " pass\n", " else:\n", " filtered_anime.pop(key)\n", " continue\n", " if rating.intersection(anime_rating) or 'ALL' in rating:\n", " pass\n", " else:\n", " filtered_anime.pop(key)\n", " continue\n", " \n", " return filtered_anime" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "with open('./embeddings/data.json') as f:\n", " data = json.load(f)\n", " embeddings = data['embeddings']\n", " filters = data['filters']" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "def get_recommendation(query, number_of_recommendations, genres, themes, rating, objective_weight, subjective_weight):\n", " filtered_anime = filter_anime(embeddings, genres, themes, rating)\n", " results = []\n", " weighted_scores = get_n_weighted_scores(filtered_anime, query, number_of_recommendations, float(objective_weight), float(subjective_weight))\n", " for idx, (key, score, review_index) in enumerate(weighted_scores, start=1):\n", " data = embeddings[key]\n", " if not data['english']:\n", " name = data['japanese']\n", " else:\n", " name = data['english']\n", " description = data['description']\n", " review = data['reviews'][review_index]['text']\n", " image = data['image']\n", "\n", " results.append(gr.Image(label=f\"Recommendation {idx}: {name}\",value=image, height=435, width=500, visible=True))\n", " results.append(gr.Textbox(label=f\"Synopsis\", value=description, max_lines=7, visible=True))\n", " results.append(gr.Textbox(label=f\"Most Relevant User Review\",value=review, max_lines=7, visible=True))\n", "\n", " for _ in range(10-number_of_recommendations):\n", " results.append(gr.Image(visible=False))\n", " results.append(gr.Textbox(visible=False))\n", " results.append(gr.Textbox(visible=False))\n", " \n", " return results" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7863\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "