Spaces:
Sleeping
Sleeping
add csv data and endpoints to show user track history from it
Browse files- .gitignore +2 -0
- data/music_info.csv +3 -0
- data/user_listening_history_10k.csv +3 -0
- recommender.py +16 -0
- server.py +14 -22
- tracks.py +40 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
.idea
|
2 |
+
__pycache__
|
data/music_info.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d930430f811ba3c77f217b3f456f2b6271c238b828d6d9ad76e889b5d725f187
|
3 |
+
size 14985870
|
data/user_listening_history_10k.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:47a82d52ec512f00bf1a3416ecbf153aaa478266e87f6d3c0c4bff85ce4e1d4a
|
3 |
+
size 620427
|
recommender.py
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastai.learner import Learner
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
def get_recommendations_for_user(learn: Learner, user_id: str, limit: int = 5):
|
5 |
+
# TODO: Fetch list of not listened songs as entries
|
6 |
+
not_listened_songs = ["Revelry, Kings of Leon, 2008", "Gears, Miss May I, 2010", "Sexy Bitch, David Guetta, 2009"]
|
7 |
+
input_dataframe = pd.DataFrame({'user_id': ["440abe26940ae9d9268157222a4a3d5735d44ed8"] * len(not_listened_songs), 'entry': not_listened_songs})
|
8 |
+
test_dl = learn.dls.test_dl(input_dataframe)
|
9 |
+
predictions = learn.get_preds(dl=test_dl)
|
10 |
+
|
11 |
+
# TODO: Return recommendations in track format
|
12 |
+
return {
|
13 |
+
"user_id": user_id,
|
14 |
+
"limit": limit,
|
15 |
+
"recommendations": predictions[0].numpy().tolist()
|
16 |
+
}
|
server.py
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
from fastai.collab import load_learner
|
2 |
from fastapi import FastAPI, Query
|
3 |
from fastapi.middleware.cors import CORSMiddleware
|
4 |
-
from custom_models import DotProductBias
|
5 |
import asyncio
|
6 |
import uvicorn
|
7 |
-
import pandas as pd
|
8 |
import os
|
9 |
|
|
|
|
|
|
|
10 |
# FastAPI app
|
11 |
app = FastAPI()
|
12 |
|
@@ -35,28 +37,18 @@ async def startup_event():
|
|
35 |
tasks = [asyncio.ensure_future(setup_learner())] # assign some task
|
36 |
learn = (await asyncio.gather(*tasks))[0]
|
37 |
|
38 |
-
@app.get(
|
39 |
-
async def
|
40 |
-
return
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
]
|
47 |
-
}
|
48 |
|
49 |
@app.get("/recommend/{user_id}")
|
50 |
-
async def
|
51 |
-
|
52 |
-
print(user_id)
|
53 |
-
not_listened_songs = ["Revelry, Kings of Leon, 2008", "Gears, Miss May I, 2010", "Sexy Bitch, David Guetta, 2009"]
|
54 |
-
input_dataframe = pd.DataFrame({'user_id': ["440abe26940ae9d9268157222a4a3d5735d44ed8"] * len(not_listened_songs), 'entry': not_listened_songs})
|
55 |
-
test_dl = learn.dls.test_dl(input_dataframe)
|
56 |
-
predictions = learn.get_preds(dl=test_dl)
|
57 |
-
print(predictions)
|
58 |
-
#pred = learn.predict(file)
|
59 |
-
return {"result": predictions[0].numpy().tolist()}
|
60 |
|
61 |
if __name__ == "__main__":
|
62 |
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|
|
|
1 |
from fastai.collab import load_learner
|
2 |
from fastapi import FastAPI, Query
|
3 |
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
from custom_models import DotProductBias
|
5 |
import asyncio
|
6 |
import uvicorn
|
|
|
7 |
import os
|
8 |
|
9 |
+
from tracks import get_top_tracks_for_user, get_users_with_track_interactions
|
10 |
+
from recommender import get_recommendations_for_user
|
11 |
+
|
12 |
# FastAPI app
|
13 |
app = FastAPI()
|
14 |
|
|
|
37 |
tasks = [asyncio.ensure_future(setup_learner())] # assign some task
|
38 |
learn = (await asyncio.gather(*tasks))[0]
|
39 |
|
40 |
+
@app.get("/users")
|
41 |
+
async def get_users(limit: int = Query(10)):
|
42 |
+
return get_users_with_track_interactions(limit=limit)
|
43 |
+
|
44 |
+
@app.get('/users/{user_id}')
|
45 |
+
async def get_user_track_history(user_id: str, limit:int = Query(5)):
|
46 |
+
user_history = get_top_tracks_for_user(user_id, limit)
|
47 |
+
return {"user_id": user_id, "history": user_history}
|
|
|
|
|
48 |
|
49 |
@app.get("/recommend/{user_id}")
|
50 |
+
async def get_recommendations(user_id: str, num_recommendations: int = Query(5)):
|
51 |
+
return get_recommendations_for_user(learn, user_id, num_recommendations)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
if __name__ == "__main__":
|
54 |
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", 7860)))
|
tracks.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
# Read the CSV files
|
4 |
+
print("Reading music info csv ...")
|
5 |
+
tracks_df = pd.read_csv('data/music_info.csv')
|
6 |
+
|
7 |
+
print("Reading user listening history ...")
|
8 |
+
track_interactions_df = pd.read_csv('data/user_listening_history_10k.csv', nrows=1000)
|
9 |
+
|
10 |
+
# Merge the dataframes on 'track_id'
|
11 |
+
dataframe = pd.merge(tracks_df, track_interactions_df, on='track_id', how='left')
|
12 |
+
|
13 |
+
# Convert all NaN values to empty strings and all columns to string type
|
14 |
+
dataframe.fillna('', inplace=True)
|
15 |
+
dataframe = dataframe.astype(str)
|
16 |
+
|
17 |
+
# Group by 'user_id' and then create a list of dictionaries for each group
|
18 |
+
lookup_table = {user_id: group.drop('user_id', axis=1).to_dict('records')
|
19 |
+
for user_id, group in dataframe.groupby('user_id')}
|
20 |
+
|
21 |
+
def get_users_with_track_interactions(ascending=False, limit=10):
|
22 |
+
# Count the number of rows for each 'user_id'
|
23 |
+
playcount_summary = track_interactions_df.groupby('user_id').size().reset_index(name='track_interactions')
|
24 |
+
# Sort the DataFrame based on 'track_interactions', either ascending or descending
|
25 |
+
playcount_summary.sort_values(by='track_interactions', ascending=ascending, inplace=True)
|
26 |
+
# Limit the results if limit is specified
|
27 |
+
if limit is not None:
|
28 |
+
playcount_summary = playcount_summary.head(limit)
|
29 |
+
# Convert the DataFrame to a list of dictionaries
|
30 |
+
return playcount_summary.to_dict(orient='records')
|
31 |
+
|
32 |
+
def get_top_tracks_for_user(user_id: str, limit=20):
|
33 |
+
# Retrieve the user's track list from the lookup table or an empty list if not found
|
34 |
+
track_list = lookup_table.get(user_id, [])
|
35 |
+
# Sort the track list by 'playcount' in descending order (assuming 'playcount' is stored as a string)
|
36 |
+
sorted_tracks = sorted(track_list, key=lambda x: int(x['playcount']) if 'playcount' in x and x['playcount'].isdigit() else 0, reverse=True)
|
37 |
+
# Apply the limit if specified
|
38 |
+
if limit is not None:
|
39 |
+
sorted_tracks = sorted_tracks[:limit]
|
40 |
+
return sorted_tracks
|