sairamn commited on
Commit
36dc14c
1 Parent(s): 8af8121

Changed Files for Adding Streamlit app for movie recommendation

Browse files
Files changed (2) hide show
  1. app.py +21 -23
  2. requirements.txt +2 -3
app.py CHANGED
@@ -4,18 +4,20 @@ import pandas as pd
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  import numpy as np
7
- import ipywidgets as widgets
8
- from IPython.display import display
9
 
10
  # Google Drive file IDs
11
  movies_file_id = "1HWlVK-nXM5JG4GfSDHyR-x8T1AlfQQYw"
12
  ratings_file_id = "1V2s1rpu4Gfjbt8z2a1Xml9IJr5KSozK1"
13
 
 
14
  # Download the files if they don't exist
15
  def download_file_from_google_drive(file_id, output):
16
  url = f"https://drive.google.com/uc?id={file_id}"
17
  gdown.download(url, output, quiet=False)
18
 
 
19
  if not os.path.exists("movies.csv"):
20
  download_file_from_google_drive(movies_file_id, "movies.csv")
21
 
@@ -26,18 +28,20 @@ if not os.path.exists("ratings.csv"):
26
  movies = pd.read_csv("movies.csv")
27
  ratings = pd.read_csv("ratings.csv")
28
 
 
29
  # Clean movie titles
30
- import re
31
  def clean_title(title):
32
  title = re.sub("[^a-zA-Z0-9 ]", "", title)
33
  return title
34
 
 
35
  movies["clean_title"] = movies["title"].apply(clean_title)
36
 
37
  # Vectorize the titles
38
  vectorizer = TfidfVectorizer(ngram_range=(1, 2))
39
  tfidf = vectorizer.fit_transform(movies["clean_title"])
40
 
 
41
  # Function to search for movies
42
  def search(title):
43
  title = clean_title(title)
@@ -45,9 +49,9 @@ def search(title):
45
  similarity = cosine_similarity(query_vec, tfidf).flatten()
46
  indices = np.argpartition(similarity, -5)[-5:]
47
  results = movies.iloc[indices].iloc[::-1]
48
-
49
  return results
50
 
 
51
  # Function to find similar movies
52
  def find_similar_movies(movie_id):
53
  similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
@@ -65,24 +69,18 @@ def find_similar_movies(movie_id):
65
  return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]
66
 
67
 
68
- # Widgets for the UI
69
- movie_name_input = widgets.Text(
70
- value='Toy Story',
71
- description='Movie Title:',
72
- disabled=False
73
- )
74
- recommendation_list = widgets.Output()
75
-
76
- def on_type(data):
77
- with recommendation_list:
78
- recommendation_list.clear_output()
79
- title = data["new"]
80
- if len(title) > 5:
81
- results = search(title)
82
- movie_id = results.iloc[0]["movieId"]
83
- display(find_similar_movies(movie_id))
84
 
85
- movie_name_input.observe(on_type, names='value')
86
 
87
- # Display the widgets
88
- display(movie_name_input, recommendation_list)
 
 
 
 
 
 
 
 
 
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  import numpy as np
7
+ import re
8
+ import streamlit as st
9
 
10
  # Google Drive file IDs
11
  movies_file_id = "1HWlVK-nXM5JG4GfSDHyR-x8T1AlfQQYw"
12
  ratings_file_id = "1V2s1rpu4Gfjbt8z2a1Xml9IJr5KSozK1"
13
 
14
+
15
  # Download the files if they don't exist
16
  def download_file_from_google_drive(file_id, output):
17
  url = f"https://drive.google.com/uc?id={file_id}"
18
  gdown.download(url, output, quiet=False)
19
 
20
+
21
  if not os.path.exists("movies.csv"):
22
  download_file_from_google_drive(movies_file_id, "movies.csv")
23
 
 
28
  movies = pd.read_csv("movies.csv")
29
  ratings = pd.read_csv("ratings.csv")
30
 
31
+
32
  # Clean movie titles
 
33
  def clean_title(title):
34
  title = re.sub("[^a-zA-Z0-9 ]", "", title)
35
  return title
36
 
37
+
38
  movies["clean_title"] = movies["title"].apply(clean_title)
39
 
40
  # Vectorize the titles
41
  vectorizer = TfidfVectorizer(ngram_range=(1, 2))
42
  tfidf = vectorizer.fit_transform(movies["clean_title"])
43
 
44
+
45
  # Function to search for movies
46
  def search(title):
47
  title = clean_title(title)
 
49
  similarity = cosine_similarity(query_vec, tfidf).flatten()
50
  indices = np.argpartition(similarity, -5)[-5:]
51
  results = movies.iloc[indices].iloc[::-1]
 
52
  return results
53
 
54
+
55
  # Function to find similar movies
56
  def find_similar_movies(movie_id):
57
  similar_users = ratings[(ratings["movieId"] == movie_id) & (ratings["rating"] > 4)]["userId"].unique()
 
69
  return rec_percentages.head(10).merge(movies, left_index=True, right_on="movieId")[["score", "title", "genres"]]
70
 
71
 
72
+ # Streamlit UI
73
+ st.title("Movie Recommendation System")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
+ movie_name = st.text_input("Enter a movie title", "Toy Story")
76
 
77
+ if len(movie_name) > 5:
78
+ results = search(movie_name)
79
+ if not results.empty:
80
+ movie_id = results.iloc[0]["movieId"]
81
+ st.write(f"Top recommendations based on '{results.iloc[0]['title']}':")
82
+ recommendations = find_similar_movies(movie_id)
83
+ for index, row in recommendations.iterrows():
84
+ st.write(f"{row['title']} ({row['genres']}) - Score: {row['score']:.2f}")
85
+ else:
86
+ st.write("No movies found. Please try a different title.")
requirements.txt CHANGED
@@ -1,5 +1,4 @@
1
- streamlit
2
  pandas
3
  scikit-learn
4
- numpy
5
- gdown
 
1
+ gdown
2
  pandas
3
  scikit-learn
4
+ streamlit