test / app.py
sarahai's picture
Update app.py
c30d8ca verified
raw
history blame contribute delete
No virus
2.54 kB
import streamlit as st
import pandas as pd
import zipfile
import requests
import io
from sklearn.metrics.pairwise import cosine_similarity
# Function to download and load data
def load_data():
url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
response = requests.get(url)
zip_file = zipfile.ZipFile(io.BytesIO(response.content))
ratings = pd.read_csv(zip_file.open('ml-latest-small/ratings.csv'))
movies = pd.read_csv(zip_file.open('ml-latest-small/movies.csv'))
data = pd.merge(ratings, movies, on='movieId')
return data, movies
# Function to build user-item matrix and similarity matrix
def build_matrices(data):
user_item_matrix = data.pivot_table(index='userId', columns='title', values='rating').fillna(0)
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
return user_item_matrix, user_similarity_df
# Function to get recommendations
def get_recommendations(selected_movies, user_item_matrix, num_recommendations=5):
# Calculate the mean ratings for the selected movies
movie_ratings = user_item_matrix[selected_movies].mean(axis=1)
# Find the most similar users based on the selected movies
similar_users = movie_ratings.sort_values(ascending=False).index
# Get the movies rated by similar users
similar_users_ratings = user_item_matrix.loc[similar_users]
# Calculate the weighted sum of ratings
weighted_ratings = similar_users_ratings.T.dot(movie_ratings)
# Normalize the ratings
weighted_ratings = weighted_ratings / movie_ratings.sum()
# Get the top N recommendations
recommendations = weighted_ratings.sort_values(ascending=False).head(num_recommendations)
return recommendations
# Load data and build matrices
data, movies = load_data()
user_item_matrix, user_similarity_df = build_matrices(data)
# Streamlit app
st.title("Collaborative Filtering Recommendation System")
# Let user select favorite movies
selected_movies = st.multiselect("Select your favorite movies", movies['title'].unique())
if st.button("Get Recommendations"):
if selected_movies:
recommendations = get_recommendations(selected_movies, user_item_matrix)
st.write("Top Recommendations:")
for movie, score in recommendations.items():
st.write(f"{movie}: {score:.2f}")
else:
st.write("Please select at least one movie.")