Spaces:
Sleeping
Sleeping
import pandas as pd | |
# Read the CSV files | |
tracks_df = pd.read_csv('data/music_info.csv') | |
tracks_df.fillna('', inplace=True) | |
tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str) | |
track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv') | |
# Merge data on those two csvs | |
dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left') | |
# Convert all columns to string type | |
dataframe = dataframe.astype(str) | |
# Create a history lookup dictionary by 'user_id' | |
user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records') | |
for user_id, group in dataframe.groupby('user_id')} | |
def get_users_with_track_interactions(ascending=False, limit=10): | |
playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions') | |
playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True) | |
if limit is not None: | |
playcount_summary = playcount_summary.head(limit) | |
return playcount_summary.to_dict(orient='records') | |
def get_top_tracks_for_user(user_id: str, limit=10): | |
track_list = user_to_track_history_dict.get(user_id, []) | |
sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True) | |
if limit is not None: | |
sorted_tracks = sorted_tracks[:limit] | |
return sorted_tracks | |
def get_unlistened_tracks_for_user(user_id:str): | |
all_tracks = tracks_df['entry'].tolist() | |
listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])] | |
return list(set(all_tracks) - set(listened_tracks)) | |
def predictions_to_tracks(entries_and_predictions): | |
tracks = [] | |
for entry, score in entries_and_predictions: | |
track_info = tracks_df[tracks_df['entry'] == entry] | |
if not track_info.empty: | |
track_dict = track_info.to_dict('records')[0] | |
track_dict['score'] = score.astype(str) | |
tracks.append(track_dict) | |
return tracks |