jrno commited on
Commit
63d0aa5
1 Parent(s): 4efe144

Dockerfile and recommendation api with example data

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.csv filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ data/*
2
+ !data/music_info.csv
3
+ !data/user_listening_history_10k.csv
4
+ recommendation-api/__pycache__
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY recommendation-api .
6
+ COPY data/music_info.csv data/music_info.csv
7
+ COPY data/user_listening_history_10k.csv data/user_listening_history_10k.csv
8
+
9
+ # Install dependencies from the requirements.txt file located within the recommendation-api directory
10
+ RUN pip install -r requirements.txt
11
+
12
+ # Command to run the server, assuming server.py is inside the recommendation-api directory
13
+ CMD ["python", "server.py"]
data/music_info.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d930430f811ba3c77f217b3f456f2b6271c238b828d6d9ad76e889b5d725f187
3
+ size 14985870
data/user_listening_history_10k.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47a82d52ec512f00bf1a3416ecbf153aaa478266e87f6d3c0c4bff85ce4e1d4a
3
+ size 620427
recommendation-api/learner.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastai.collab import load_learner
2
+ from fastai.tabular.all import *
3
+
4
+ def create_params(size):
5
+ return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))
6
+
7
+ class DotProductBias(Module):
8
+ def __init__(self, n_users, n_items, n_factors, y_range=(0, 1.5)):
9
+ super().__init__()
10
+ self.user_factors = create_params([n_users, n_factors])
11
+ self.user_bias = create_params([n_users])
12
+ self.item_factors = create_params([n_items, n_factors])
13
+ self.item_bias = create_params([n_items])
14
+ self.y_range = y_range
15
+
16
+ def forward(self, x):
17
+ users = self.user_factors[x[:, 0]]
18
+ items = self.item_factors[x[:, 1]]
19
+ res = (users * items).sum(dim=1)
20
+ res += self.user_bias[x[:, 0]] + self.item_bias[x[:, 1]]
21
+ return sigmoid_range(res, *self.y_range)
22
+
23
+ def custom_accuracy(prediction, target):
24
+ # set all predictions above 0.95 as true positive (correct prediction)
25
+ prediction = torch.where(prediction > 0.95, torch.tensor(1.0), prediction)
26
+ # shape [64, 1] to [64]
27
+ target = target.squeeze(1)
28
+ correct = (prediction == target).float()
29
+ accuracy = correct.sum() / len(target)
30
+ return accuracy
31
+
32
+ async def setup_learner(model_filename: str):
33
+ learn = load_learner(model_filename)
34
+ learn.dls.device = 'cpu'
35
+ return learn
recommendation-api/model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:572c13e535c670418a3391e69edfeaa6249964a247bf9dcf978ca15333b9b494
3
+ size 10347577
recommendation-api/recommender.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastai.learner import Learner
2
+ import pandas as pd
3
+
4
+ from tracks import get_unlistened_tracks_for_user, predictions_to_tracks
5
+
6
+ def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
7
+ not_listened_tracks = get_unlistened_tracks_for_user(user_id)
8
+
9
+ # Get predictions for the tracks user hasn't listened yet
10
+ input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
11
+ test_dl = learn.dls.test_dl(input_dataframe)
12
+ predictions = learn.get_preds(dl=test_dl)
13
+
14
+ # Associate them with prediction score and sort
15
+ tracks_with_predictions = list(zip(not_listened_tracks, predictions[0].numpy()))
16
+ tracks_with_predictions.sort(key=lambda x: x[1], reverse=True)
17
+
18
+ # Pick n and return as full tracks
19
+ recommendations = predictions_to_tracks(tracks_with_predictions[:limit])
20
+
21
+ return {
22
+ "user_id": user_id,
23
+ "limit": limit,
24
+ "recommendations": recommendations
25
+ }
recommendation-api/requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastai
2
+ fastapi
3
+ uvicorn
4
+ asyncio
recommendation-api/server.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Query
2
+ import asyncio
3
+ import uvicorn
4
+ import os
5
+
6
+ from tracks import get_top_tracks_for_user, get_users_with_track_interactions
7
+ from recommender import get_recommendations_for_user
8
+ from learner import setup_learner, custom_accuracy # Note that DotProductBias must be imported to global namespace
9
+
10
+ app = FastAPI()
11
+ model_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model.pkl')
12
+ learn = None
13
+
14
+ @app.on_event("startup")
15
+ async def startup_event():
16
+ global learn
17
+ tasks = [asyncio.ensure_future(setup_learner(model_filename))] # assign some task
18
+ learn = (await asyncio.gather(*tasks))[0]
19
+
20
+ @app.get("/users")
21
+ async def get_users(limit: int = Query(10)):
22
+ return get_users_with_track_interactions(limit=limit)
23
+
24
+ @app.get('/users/{user_id}')
25
+ async def get_user_track_history(user_id: str, limit:int = Query(5)):
26
+ user_history = get_top_tracks_for_user(user_id, limit)
27
+ return {"user_id": user_id, "history": user_history}
28
+
29
+ @app.get("/recommend/{user_id}")
30
+ async def get_recommendations(user_id: str, limit: int = Query(5)):
31
+ return get_recommendations_for_user(learn, user_id, limit)
32
+
33
+ if __name__ == "__main__":
34
+ uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
recommendation-api/tracks.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ # Read the CSV files
4
+ tracks_df = pd.read_csv('data/music_info.csv')
5
+ tracks_df.fillna('', inplace=True)
6
+ tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
7
+ track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv')
8
+
9
+ # Merge data on those two csvs
10
+ dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
11
+ # Convert all columns to string type
12
+ dataframe = dataframe.astype(str)
13
+ # Create a history lookup dictionary by 'user_id'
14
+ user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
15
+ for user_id, group in dataframe.groupby('user_id')}
16
+
17
+ def get_users_with_track_interactions(ascending=False, limit=10):
18
+ playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
19
+ playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
20
+ if limit is not None:
21
+ playcount_summary = playcount_summary.head(limit)
22
+ return playcount_summary.to_dict(orient='records')
23
+
24
+ def get_top_tracks_for_user(user_id: str, limit=10):
25
+ track_list = user_to_track_history_dict.get(user_id, [])
26
+ sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
27
+ if limit is not None:
28
+ sorted_tracks = sorted_tracks[:limit]
29
+ return sorted_tracks
30
+
31
+ def get_unlistened_tracks_for_user(user_id:str):
32
+ all_tracks = tracks_df['entry'].tolist()
33
+ listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
34
+ return list(set(all_tracks) - set(listened_tracks))
35
+
36
+ def predictions_to_tracks(entries_and_predictions):
37
+ tracks = []
38
+ for entry, score in entries_and_predictions:
39
+ track_info = tracks_df[tracks_df['entry'] == entry]
40
+ if not track_info.empty:
41
+ track_dict = track_info.to_dict('records')[0]
42
+ track_dict['score'] = score.astype(str)
43
+ tracks.append(track_dict)
44
+ return tracks