Spaces:

jrno
/

song-recommender

Sleeping

App Files Files Community

jrno commited on Apr 20

Commit

b571090

•

1 Parent(s): b55b671

clean-up

Browse files

Files changed (4) hide show

recommendation-api/{custom_models.py → learner.py} +11 -5
recommendation-api/recommender.py +4 -4
recommendation-api/server.py +5 -29
recommendation-api/tracks.py +12 -24

recommendation-api/{custom_models.py → learner.py} RENAMED Viewed

@@ -1,3 +1,4 @@
 from fastai.tabular.all import *
 def create_params(size):
@@ -11,10 +12,15 @@ class DotProductBias(Module):
         self.item_factors = create_params([n_items, n_factors])
         self.item_bias = create_params([n_items])
         self.y_range = y_range
     def forward(self, x):
-        users = self.user_factors[x[:,0]]
-        items = self.item_factors[x[:,1]]
         res = (users * items).sum(dim=1)
-        res += self.user_bias[x[:,0]] + self.item_bias[x[:,1]]
-        return sigmoid_range(res, *self.y_range)

+from fastai.collab import load_learner
 from fastai.tabular.all import *
 def create_params(size):
         self.item_factors = create_params([n_items, n_factors])
         self.item_bias = create_params([n_items])
         self.y_range = y_range
     def forward(self, x):
+        users = self.user_factors[x[:, 0]]
+        items = self.item_factors[x[:, 1]]
         res = (users * items).sum(dim=1)
+        res += self.user_bias[x[:, 0]] + self.item_bias[x[:, 1]]
+        return sigmoid_range(res, *self.y_range)
+async def setup_learner(model_filename: str):
+    learn = load_learner(model_filename)
+    learn.dls.device = 'cpu'
+    return learn

recommendation-api/recommender.py CHANGED Viewed

@@ -1,21 +1,21 @@
 from fastai.learner import Learner
 import pandas as pd
-from tracks import get_unlistened_tracks_for_user, predictions_to_tracks, check_user_exists
 def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
     not_listened_tracks = get_unlistened_tracks_for_user(user_id)
-    # Get predictions for tracks
     input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
     test_dl = learn.dls.test_dl(input_dataframe)
     predictions = learn.get_preds(dl=test_dl)
-    # Associate track with prediction and sort by score
     tracks_with_predictions = list(zip(not_listened_tracks, predictions[0].numpy()))
     tracks_with_predictions.sort(key=lambda x: x[1], reverse=True)
-    # Convert predictions to full track entries with score
     recommendations = predictions_to_tracks(tracks_with_predictions[:limit])
     return {

 from fastai.learner import Learner
 import pandas as pd
+from tracks import get_unlistened_tracks_for_user, predictions_to_tracks
 def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
     not_listened_tracks = get_unlistened_tracks_for_user(user_id)
+    # Get predictions for the tracks user hasn't listened yet
     input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
     test_dl = learn.dls.test_dl(input_dataframe)
     predictions = learn.get_preds(dl=test_dl)
+    # Associate them with prediction score and sort
     tracks_with_predictions = list(zip(not_listened_tracks, predictions[0].numpy()))
     tracks_with_predictions.sort(key=lambda x: x[1], reverse=True)
+    # Pick n and return as full tracks
     recommendations = predictions_to_tracks(tracks_with_predictions[:limit])
     return {

recommendation-api/server.py CHANGED Viewed

@@ -1,43 +1,20 @@
-from fastai.collab import load_learner
 from fastapi import FastAPI, Query
-from fastapi.middleware.cors import CORSMiddleware
-from custom_models import DotProductBias
 import asyncio
 import uvicorn
 import os
-from tracks import get_top_tracks_for_user, get_users_with_track_interactions, check_user_exists
 from recommender import get_recommendations_for_user
-# Get the absolute path of the directory where the python file resides
-dir_path = os.path.dirname(os.path.realpath(__file__))
-# FastAPI app
 app = FastAPI()
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-# Model filename
-model_filename = os.path.join(dir_path, 'model.pkl')
-async def setup_learner():
-    learn = load_learner(model_filename)
-    learn.dls.device = 'cpu'
-    return learn
 learn = None
 @app.on_event("startup")
 async def startup_event():
-    """Setup the learner on server start"""
     global learn
-    loop = asyncio.get_event_loop()  # get event loop
-    tasks = [asyncio.ensure_future(setup_learner())]  # assign some task
     learn = (await asyncio.gather(*tasks))[0]
 @app.get("/users")
@@ -55,4 +32,3 @@ async def get_recommendations(user_id: str, limit: int = Query(5)):
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

 from fastapi import FastAPI, Query
 import asyncio
 import uvicorn
 import os
+from tracks import get_top_tracks_for_user, get_users_with_track_interactions
 from recommender import get_recommendations_for_user
+from learner import setup_learner, DotProductBias # Note that DotProductBias must be imported to global namespace
 app = FastAPI()
+model_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model.pkl')
 learn = None
 @app.on_event("startup")
 async def startup_event():
     global learn
+    tasks = [asyncio.ensure_future(setup_learner(model_filename))]  # assign some task
     learn = (await asyncio.gather(*tasks))[0]
 @app.get("/users")
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))

recommendation-api/tracks.py CHANGED Viewed

@@ -1,56 +1,44 @@
 import pandas as pd
 # Read the CSV files
-print("Reading tracks data from csv ...")
 tracks_df = pd.read_csv('data/music_info.csv')
 tracks_df.fillna('', inplace=True)
 tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
-print("Reading user listening history ...")
-track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv', nrows=1000)
-# Merge the dataframes on 'track_id'
 dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
-# Convert all NaN values to empty strings and all columns to string type
-# dataframe.fillna('', inplace=True)
 dataframe = dataframe.astype(str)
-# Group by 'user_id' and then create a list of dictionaries for each group
 user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
                               for user_id, group in dataframe.groupby('user_id')}
-def check_user_exists(user_id: str):
-    if (user_id not in user_to_track_history_dict):
-        raise ValueError(f"User {user_id} not found")
 def get_users_with_track_interactions(ascending=False, limit=10):
-    # Count the number of rows for each 'user_id'
     playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
-    # Sort the DataFrame based on 'track_interactions', either ascending or descending
     playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
-    # Limit the results if limit is specified
     if limit is not None:
         playcount_summary = playcount_summary.head(limit)
-    # Convert the DataFrame to a list of dictionaries
     return playcount_summary.to_dict(orient='records')
-def get_top_tracks_for_user(user_id: str, limit=20):
-    # Retrieve the user's track list from the lookup table or an empty list if not found
     track_list = user_to_track_history_dict.get(user_id, [])
-    # Sort the track list by 'playcount' in descending order (assuming 'playcount' is stored as a string)
     sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
-    # Apply the limit if specified
     if limit is not None:
         sorted_tracks = sorted_tracks[:limit]
     return sorted_tracks
 def get_unlistened_tracks_for_user(user_id:str):
-    # Get all tracks
     all_tracks = tracks_df['entry'].tolist()
-    # Get tracks user has listened to
     listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
-    # Get unlistened tracks
     return list(set(all_tracks) - set(listened_tracks))
 def predictions_to_tracks(entries_and_predictions):

 import pandas as pd
+import logging
+logger = logging.getLogger(__name__)
 # Read the CSV files
+logger.info("Reading tracks data from csv ...")
 tracks_df = pd.read_csv('data/music_info.csv')
+# Remove NaN's from data and construct concatenated format (as with trained model)
 tracks_df.fillna('', inplace=True)
 tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
+logger.info("Reading user listening history from csv ...")
+track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv')
+# Merge data on those two csvs
 dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
+# Convert all columns to string type
 dataframe = dataframe.astype(str)
+# Create a history lookup dictionary by 'user_id'
 user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
                               for user_id, group in dataframe.groupby('user_id')}
 def get_users_with_track_interactions(ascending=False, limit=10):
     playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
     playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
     if limit is not None:
         playcount_summary = playcount_summary.head(limit)
     return playcount_summary.to_dict(orient='records')
+def get_top_tracks_for_user(user_id: str, limit=10):
     track_list = user_to_track_history_dict.get(user_id, [])
     sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
     if limit is not None:
         sorted_tracks = sorted_tracks[:limit]
     return sorted_tracks
 def get_unlistened_tracks_for_user(user_id:str):
     all_tracks = tracks_df['entry'].tolist()
     listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
     return list(set(all_tracks) - set(listened_tracks))
 def predictions_to_tracks(entries_and_predictions):