Spaces:

schibsted-presplit
/

ai-academy-2024-gr8-recommender-api

Runtime error

App Files Files Community

ai-academy-2024-gr8-recommender-api / recommendation-api /tracks.py

jrno

Try out new version

aad08d3 6 months ago

raw

history blame

2.5 kB

	import pandas as pd

	print("Initializing data")

	# Read track infos and build the entry representation
	tracks_df = pd.read_csv('data/music_info.csv')
	tracks_df.fillna('', inplace=True)
	tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)

	print("Music info parsed")

	# Raw dataframe from the training set
	model_df = pd.read_csv('data/model.csv')
	model_interactions_df = model_df[['user_id', 'track_id']]
	model_tracks_df = model_df[['entry']].drop_duplicates()

	print("Model data parsed")

	# Create a dictionary where user_id is the key and full track history value
	user_to_track_history_df = pd.merge(tracks_df, model_interactions_df, on='track_id', how='left').astype(str)
	user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
	for user_id, group in user_to_track_history_df.groupby('user_id')}

	print("Count of tracks:", tracks_df.shape[0])
	print("Count of interactions (model):", model_interactions_df.shape[0])
	print("Count of tracks (model):", model_tracks_df.shape[0])

	def get_users_with_track_interactions(ascending=False, limit=10):
	playcount_summary = model_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
	playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
	if limit is not None:
	playcount_summary = playcount_summary.head(limit)
	return playcount_summary.to_dict(orient='records')

	def get_top_tracks_for_user(user_id: str, limit=10):
	track_list = user_to_track_history_dict.get(user_id, [])
	sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
	if limit is not None:
	sorted_tracks = sorted_tracks[:limit]
	return sorted_tracks

	def get_unlistened_tracks_for_user(user_id: str):
	possible_tracks = model_tracks_df['entry'].tolist()
	listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
	return list(set(possible_tracks) - set(listened_tracks))

	def predictions_to_tracks(entries_and_predictions):
	tracks = []
	for entry, score in entries_and_predictions:
	track_info = tracks_df[tracks_df['entry'] == entry]
	if not track_info.empty:
	track_dict = track_info.to_dict('records')[0]
	track_dict['score'] = score[0].astype(str)
	tracks.append(track_dict)
	return tracks