Spaces:

schibsted-presplit
/

ai-academy-2024-gr8-recommender-api

Runtime error

App Files Files Community

jrno commited on Apr 29

Commit

0bd1550

•

1 Parent(s): 22854cc

Only recommend tracks that model has seen

Browse files

Files changed (2) hide show

recommendation-api/recommender.py +1 -1
recommendation-api/tracks.py +17 -12

recommendation-api/recommender.py CHANGED Viewed

@@ -5,7 +5,7 @@ from tracks import get_unlistened_tracks_for_user, predictions_to_tracks
 def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
     not_listened_tracks = get_unlistened_tracks_for_user(user_id)
     # Get predictions for the tracks user hasn't listened yet
     input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
     test_dl = learn.dls.test_dl(input_dataframe)

 def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
     not_listened_tracks = get_unlistened_tracks_for_user(user_id)
     # Get predictions for the tracks user hasn't listened yet
     input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
     test_dl = learn.dls.test_dl(input_dataframe)

recommendation-api/tracks.py CHANGED Viewed

@@ -1,21 +1,26 @@
 import pandas as pd
-# Read the CSV files
 tracks_df = pd.read_csv('data/music_info.csv')
 tracks_df.fillna('', inplace=True)
 tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
-track_interactions_df = pd.read_csv('data/model.csv')[['user_id', 'track_id']]
-# Merge data on those two csvs
-dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
-# Convert all columns to string type
-dataframe = dataframe.astype(str)
-# Create a history lookup dictionary by 'user_id'
 user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
-                              for user_id, group in dataframe.groupby('user_id')}
 def get_users_with_track_interactions(ascending=False, limit=10):
-    playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
     playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
     if limit is not None:
         playcount_summary = playcount_summary.head(limit)
@@ -28,10 +33,10 @@ def get_top_tracks_for_user(user_id: str, limit=10):
         sorted_tracks = sorted_tracks[:limit]
     return sorted_tracks
-def get_unlistened_tracks_for_user(user_id:str):
-    all_tracks = tracks_df['entry'].tolist()
     listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
-    return list(set(all_tracks) - set(listened_tracks))
 def predictions_to_tracks(entries_and_predictions):
     tracks = []

 import pandas as pd
+# Read track infos and build the entry representation
 tracks_df = pd.read_csv('data/music_info.csv')
 tracks_df.fillna('', inplace=True)
 tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
+# Raw dataframe from the training set
+model_df = pd.read_csv('data/model.csv')
+model_interactions_df = model_df[['user_id', 'track_id']]
+model_tracks_df = model_df[['entry']].drop_duplicates()
+# Create a dictionary where user_id is the key and full track history value
+user_to_track_history_df = pd.merge(tracks_df, model_interactions_df, on='track_id', how='left').astype(str)
 user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
+                              for user_id, group in user_to_track_history_df.groupby('user_id')}
+print("Count of tracks:", tracks_df.shape[0])
+print("Count of interactions (model):", model_interactions_df.shape[0])
+print("Count of tracks (model):", model_tracks_df.shape[0])
 def get_users_with_track_interactions(ascending=False, limit=10):
+    playcount_summary = model_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
     playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
     if limit is not None:
         playcount_summary = playcount_summary.head(limit)
         sorted_tracks = sorted_tracks[:limit]
     return sorted_tracks
+def get_unlistened_tracks_for_user(user_id: str):
+    possible_tracks = model_tracks_df['entry'].tolist()
     listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
+    return list(set(possible_tracks) - set(listened_tracks))
 def predictions_to_tracks(entries_and_predictions):
     tracks = []