File size: 2,538 Bytes
0cb44a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c30d8ca
0cb44a0
 
 
 
 
 
 
 
 
c30d8ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cb44a0
c30d8ca
0cb44a0
 
 
c30d8ca
0cb44a0
 
 
 
 
c30d8ca
 
0cb44a0
 
c30d8ca
 
0cb44a0
 
 
 
c30d8ca
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
import pandas as pd
import zipfile
import requests
import io
from sklearn.metrics.pairwise import cosine_similarity

# Function to download and load data
def load_data():
    url = "https://files.grouplens.org/datasets/movielens/ml-latest-small.zip"
    response = requests.get(url)
    zip_file = zipfile.ZipFile(io.BytesIO(response.content))
    
    ratings = pd.read_csv(zip_file.open('ml-latest-small/ratings.csv'))
    movies = pd.read_csv(zip_file.open('ml-latest-small/movies.csv'))
    
    data = pd.merge(ratings, movies, on='movieId')
    return data, movies

# Function to build user-item matrix and similarity matrix
def build_matrices(data):
    user_item_matrix = data.pivot_table(index='userId', columns='title', values='rating').fillna(0)
    user_similarity = cosine_similarity(user_item_matrix)
    user_similarity_df = pd.DataFrame(user_similarity, index=user_item_matrix.index, columns=user_item_matrix.index)
    return user_item_matrix, user_similarity_df

# Function to get recommendations
def get_recommendations(selected_movies, user_item_matrix, num_recommendations=5):
    # Calculate the mean ratings for the selected movies
    movie_ratings = user_item_matrix[selected_movies].mean(axis=1)
    
    # Find the most similar users based on the selected movies
    similar_users = movie_ratings.sort_values(ascending=False).index
    
    # Get the movies rated by similar users
    similar_users_ratings = user_item_matrix.loc[similar_users]
    
    # Calculate the weighted sum of ratings
    weighted_ratings = similar_users_ratings.T.dot(movie_ratings)
    
    # Normalize the ratings
    weighted_ratings = weighted_ratings / movie_ratings.sum()
    
    # Get the top N recommendations
    recommendations = weighted_ratings.sort_values(ascending=False).head(num_recommendations)
    
    return recommendations

# Load data and build matrices
data, movies = load_data()
user_item_matrix, user_similarity_df = build_matrices(data)

# Streamlit app
st.title("Collaborative Filtering Recommendation System")

# Let user select favorite movies
selected_movies = st.multiselect("Select your favorite movies", movies['title'].unique())

if st.button("Get Recommendations"):
    if selected_movies:
        recommendations = get_recommendations(selected_movies, user_item_matrix)
        st.write("Top Recommendations:")
        for movie, score in recommendations.items():
            st.write(f"{movie}: {score:.2f}")
    else:
        st.write("Please select at least one movie.")