Spaces:
Runtime error
Runtime error
Dockerfile and recommendation api with example data
Browse files- .gitattributes +1 -0
- .gitignore +4 -0
- Dockerfile +13 -0
- data/music_info.csv +3 -0
- data/user_listening_history_10k.csv +3 -0
- recommendation-api/learner.py +35 -0
- recommendation-api/model.pkl +3 -0
- recommendation-api/recommender.py +25 -0
- recommendation-api/requirements.txt +4 -0
- recommendation-api/server.py +34 -0
- recommendation-api/tracks.py +44 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data/*
|
2 |
+
!data/music_info.csv
|
3 |
+
!data/user_listening_history_10k.csv
|
4 |
+
recommendation-api/__pycache__
|
Dockerfile
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10.9-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY recommendation-api .
|
6 |
+
COPY data/music_info.csv data/music_info.csv
|
7 |
+
COPY data/user_listening_history_10k.csv data/user_listening_history_10k.csv
|
8 |
+
|
9 |
+
# Install dependencies from the requirements.txt file located within the recommendation-api directory
|
10 |
+
RUN pip install -r requirements.txt
|
11 |
+
|
12 |
+
# Command to run the server, assuming server.py is inside the recommendation-api directory
|
13 |
+
CMD ["python", "server.py"]
|
data/music_info.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d930430f811ba3c77f217b3f456f2b6271c238b828d6d9ad76e889b5d725f187
|
3 |
+
size 14985870
|
data/user_listening_history_10k.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47a82d52ec512f00bf1a3416ecbf153aaa478266e87f6d3c0c4bff85ce4e1d4a
|
3 |
+
size 620427
|
recommendation-api/learner.py
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastai.collab import load_learner
|
2 |
+
from fastai.tabular.all import *
|
3 |
+
|
4 |
+
def create_params(size):
|
5 |
+
return nn.Parameter(torch.zeros(*size).normal_(0, 0.01))
|
6 |
+
|
7 |
+
class DotProductBias(Module):
|
8 |
+
def __init__(self, n_users, n_items, n_factors, y_range=(0, 1.5)):
|
9 |
+
super().__init__()
|
10 |
+
self.user_factors = create_params([n_users, n_factors])
|
11 |
+
self.user_bias = create_params([n_users])
|
12 |
+
self.item_factors = create_params([n_items, n_factors])
|
13 |
+
self.item_bias = create_params([n_items])
|
14 |
+
self.y_range = y_range
|
15 |
+
|
16 |
+
def forward(self, x):
|
17 |
+
users = self.user_factors[x[:, 0]]
|
18 |
+
items = self.item_factors[x[:, 1]]
|
19 |
+
res = (users * items).sum(dim=1)
|
20 |
+
res += self.user_bias[x[:, 0]] + self.item_bias[x[:, 1]]
|
21 |
+
return sigmoid_range(res, *self.y_range)
|
22 |
+
|
23 |
+
def custom_accuracy(prediction, target):
|
24 |
+
# set all predictions above 0.95 as true positive (correct prediction)
|
25 |
+
prediction = torch.where(prediction > 0.95, torch.tensor(1.0), prediction)
|
26 |
+
# shape [64, 1] to [64]
|
27 |
+
target = target.squeeze(1)
|
28 |
+
correct = (prediction == target).float()
|
29 |
+
accuracy = correct.sum() / len(target)
|
30 |
+
return accuracy
|
31 |
+
|
32 |
+
async def setup_learner(model_filename: str):
|
33 |
+
learn = load_learner(model_filename)
|
34 |
+
learn.dls.device = 'cpu'
|
35 |
+
return learn
|
recommendation-api/model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:572c13e535c670418a3391e69edfeaa6249964a247bf9dcf978ca15333b9b494
|
3 |
+
size 10347577
|
recommendation-api/recommender.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastai.learner import Learner
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
from tracks import get_unlistened_tracks_for_user, predictions_to_tracks
|
5 |
+
|
6 |
+
def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
|
7 |
+
not_listened_tracks = get_unlistened_tracks_for_user(user_id)
|
8 |
+
|
9 |
+
# Get predictions for the tracks user hasn't listened yet
|
10 |
+
input_dataframe = pd.DataFrame({'user_id': [user_id] * len(not_listened_tracks), 'entry': not_listened_tracks})
|
11 |
+
test_dl = learn.dls.test_dl(input_dataframe)
|
12 |
+
predictions = learn.get_preds(dl=test_dl)
|
13 |
+
|
14 |
+
# Associate them with prediction score and sort
|
15 |
+
tracks_with_predictions = list(zip(not_listened_tracks, predictions[0].numpy()))
|
16 |
+
tracks_with_predictions.sort(key=lambda x: x[1], reverse=True)
|
17 |
+
|
18 |
+
# Pick n and return as full tracks
|
19 |
+
recommendations = predictions_to_tracks(tracks_with_predictions[:limit])
|
20 |
+
|
21 |
+
return {
|
22 |
+
"user_id": user_id,
|
23 |
+
"limit": limit,
|
24 |
+
"recommendations": recommendations
|
25 |
+
}
|
recommendation-api/requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastai
|
2 |
+
fastapi
|
3 |
+
uvicorn
|
4 |
+
asyncio
|
recommendation-api/server.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, Query
|
2 |
+
import asyncio
|
3 |
+
import uvicorn
|
4 |
+
import os
|
5 |
+
|
6 |
+
from tracks import get_top_tracks_for_user, get_users_with_track_interactions
|
7 |
+
from recommender import get_recommendations_for_user
|
8 |
+
from learner import setup_learner, custom_accuracy # Note that DotProductBias must be imported to global namespace
|
9 |
+
|
10 |
+
app = FastAPI()
|
11 |
+
model_filename = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'model.pkl')
|
12 |
+
learn = None
|
13 |
+
|
14 |
+
@app.on_event("startup")
|
15 |
+
async def startup_event():
|
16 |
+
global learn
|
17 |
+
tasks = [asyncio.ensure_future(setup_learner(model_filename))] # assign some task
|
18 |
+
learn = (await asyncio.gather(*tasks))[0]
|
19 |
+
|
20 |
+
@app.get("/users")
|
21 |
+
async def get_users(limit: int = Query(10)):
|
22 |
+
return get_users_with_track_interactions(limit=limit)
|
23 |
+
|
24 |
+
@app.get('/users/{user_id}')
|
25 |
+
async def get_user_track_history(user_id: str, limit:int = Query(5)):
|
26 |
+
user_history = get_top_tracks_for_user(user_id, limit)
|
27 |
+
return {"user_id": user_id, "history": user_history}
|
28 |
+
|
29 |
+
@app.get("/recommend/{user_id}")
|
30 |
+
async def get_recommendations(user_id: str, limit: int = Query(5)):
|
31 |
+
return get_recommendations_for_user(learn, user_id, limit)
|
32 |
+
|
33 |
+
if __name__ == "__main__":
|
34 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|
recommendation-api/tracks.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
# Read the CSV files
|
4 |
+
tracks_df = pd.read_csv('data/music_info.csv')
|
5 |
+
tracks_df.fillna('', inplace=True)
|
6 |
+
tracks_df["entry"] = tracks_df["name"] + ", " + tracks_df["artist"] + ", " + tracks_df["year"].astype(str)
|
7 |
+
track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv')
|
8 |
+
|
9 |
+
# Merge data on those two csvs
|
10 |
+
dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
|
11 |
+
# Convert all columns to string type
|
12 |
+
dataframe = dataframe.astype(str)
|
13 |
+
# Create a history lookup dictionary by 'user_id'
|
14 |
+
user_to_track_history_dict = {user_id: group.drop('user_id', axis=1).to_dict('records')
|
15 |
+
for user_id, group in dataframe.groupby('user_id')}
|
16 |
+
|
17 |
+
def get_users_with_track_interactions(ascending=False, limit=10):
|
18 |
+
playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
|
19 |
+
playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
|
20 |
+
if limit is not None:
|
21 |
+
playcount_summary = playcount_summary.head(limit)
|
22 |
+
return playcount_summary.to_dict(orient='records')
|
23 |
+
|
24 |
+
def get_top_tracks_for_user(user_id: str, limit=10):
|
25 |
+
track_list = user_to_track_history_dict.get(user_id, [])
|
26 |
+
sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
|
27 |
+
if limit is not None:
|
28 |
+
sorted_tracks = sorted_tracks[:limit]
|
29 |
+
return sorted_tracks
|
30 |
+
|
31 |
+
def get_unlistened_tracks_for_user(user_id:str):
|
32 |
+
all_tracks = tracks_df['entry'].tolist()
|
33 |
+
listened_tracks = [track['entry'] for track in user_to_track_history_dict.get(user_id, [])]
|
34 |
+
return list(set(all_tracks) - set(listened_tracks))
|
35 |
+
|
36 |
+
def predictions_to_tracks(entries_and_predictions):
|
37 |
+
tracks = []
|
38 |
+
for entry, score in entries_and_predictions:
|
39 |
+
track_info = tracks_df[tracks_df['entry'] == entry]
|
40 |
+
if not track_info.empty:
|
41 |
+
track_dict = track_info.to_dict('records')[0]
|
42 |
+
track_dict['score'] = score.astype(str)
|
43 |
+
tracks.append(track_dict)
|
44 |
+
return tracks
|