test / app.py
sarahai's picture
Update app.py
c30d8ca verified
raw
history blame
No virus
2.54 kB
import streamlit as st
import pandas as pd
import zipfile
import requests
import io
from sklearn.metrics.pairwise import cosine_similarity
# Function to download and load data
def load_data():
url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
response = requests.get(url)
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
ratings = pd.read_csv(zip_file.open('ml-latest-small/ratings.csv'))
movies = pd.read_csv(zip_file.open('ml-latest-small/movies.csv'))
data = pd.merge(ratings, movies, on='movieId')
return data, movies
# Function to build user-item matrix and similarity matrix
def build_matrices(data):
user_item_matrix = data.pivot_table(index='userId', columns='title', values='rating').fillna(0)
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
return user_item_matrix, user_similarity_df
# Function to get recommendations
def get_recommendations(selected_movies, user_item_matrix, num_recommendations=5):
# Calculate the mean ratings for the selected movies
movie_ratings = user_item_matrix[selected_movies].mean(axis=1)
# Find the most similar users based on the selected movies
similar_users = movie_ratings.sort_values(ascending=False).index
# Get the movies rated by similar users
similar_users_ratings = user_item_matrix.loc[similar_users]
# Calculate the weighted sum of ratings
weighted_ratings = similar_users_ratings.T.dot(movie_ratings)
# Normalize the ratings
weighted_ratings = weighted_ratings / movie_ratings.sum()
# Get the top N recommendations
recommendations = weighted_ratings.sort_values(ascending=False).head(num_recommendations)
return recommendations
# Load data and build matrices
data, movies = load_data()
user_item_matrix, user_similarity_df = build_matrices(data)
# Streamlit app
st.title("Collaborative Filtering Recommendation System")
# Let user select favorite movies
selected_movies = st.multiselect("Select your favorite movies", movies['title'].unique())
if st.button("Get Recommendations"):
if selected_movies:
recommendations = get_recommendations(selected_movies, user_item_matrix)
st.write("Top Recommendations:")
for movie, score in recommendations.items():
st.write(f"{movie}: {score:.2f}")
else:
st.write("Please select at least one movie.")