|
|
|
!pip install datasets |
|
|
|
from sklearn.decomposition import TruncatedSVD |
|
from scipy.sparse.linalg import svds |
|
|
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import pandas as pd |
|
import numpy as np |
|
import warnings |
|
warnings.filterwarnings("ignore") |
|
from datasets import load_dataset |
|
|
|
"""## ๋ฐ์ดํฐ๋ก๋ ๋ฐ ์ ์ฒ๋ฆฌ |
|
|
|
""" |
|
|
|
|
|
df= read_csv('places.csv', index=False, encoding='utf-8') |
|
|
|
"""# ์ฌ์ฉ์ ํ์ ๋ฐ์ดํฐ """ |
|
user_rating= read_csv('user_rating_1000.csv', index=False, encoding='utf-8') |
|
|
|
"""์์ดํ
-ํน์ฑ ๋ฐ์ดํฐ ๋ง๋ค๊ธฐ""" |
|
|
|
item_feature = df[['place_id', 'type', 'place_name']] |
|
item_feature.head() |
|
|
|
"""์ถ์ฒ์์คํ
๊ตฌํ |
|
""" |
|
|
|
|
|
df_user_place_ratings = user_place_data.pivot_table(index='user_id', columns='place_id', values='rating') |
|
df_user_place_ratings.head() |
|
|
|
""" |
|
์ดํ ํ ์ผ |
|
1)pivot table์ matrix๋ก ๋ณํ |
|
2)np.mean(axis = 1)์ ํตํด ์ฅ์๋ณ ๊ฐ ์ฌ์ฉ์๋ค์ด ๋งค๊ธฐ๋ ํ์ ํ๊ท ์ ๊ตฌํจ |
|
1์์ ๊ตฌํ ๊ฐ๊ณผ 2์์ ๊ตฌํ ๊ฐ์ ๋นผ์ ์ฌ์ฉ์-ํ๊ท ๋ฐ์ดํฐ ๊ฐ์ ๋ณ๊ฒฝ |
|
""" |
|
|
|
|
|
df_user_place_ratings.columns = df_user_place_ratings.columns.astype(str) |
|
|
|
|
|
df_user_place_ratings.columns = df_user_place_ratings.columns.str.strip() |
|
matrix = df_user_place_ratings.values |
|
|
|
|
|
user_ratings_mean = np.mean(matrix, axis = 1) |
|
|
|
|
|
matrix_user_mean = matrix - user_ratings_mean.reshape(-1, 1) |
|
|
|
pd.DataFrame(matrix_user_mean, columns = df_user_place_ratings.columns).head() |
|
|
|
|
|
|
|
U, sigma, Vt = svds(matrix_user_mean, k = 12) |
|
|
|
|
|
|
|
|
|
sigma = np.diag(sigma) |
|
|
|
sigma.shape |
|
|
|
|
|
|
|
svd_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1) |
|
|
|
df_svd_preds = pd.DataFrame(svd_user_predicted_ratings, columns = df_user_place_ratings.columns) |
|
df_svd_preds.head() |
|
|
|
df_svd_preds.shape |
|
|
|
|
|
|
|
|
|
|
|
user_id = 0 |
|
user_row_number = user_id |
|
sorted_user_predictions = df_svd_preds.iloc[user_row_number].sort_values(ascending=False) |
|
|
|
sorted_user_predictions = pd.DataFrame(sorted_user_predictions.reset_index()) |
|
sorted_user_predictions.columns = ['place_id', 'predict_rating'] |
|
|
|
sorted_user_predictions['place_id'] = sorted_user_predictions['place_id'].astype('int64') |
|
|
|
|
|
user_data = user_rating[user_rating['user_id'] == user_id] |
|
|
|
|
|
user_data = user_data[user_data['rating'] != 0.0] |
|
|
|
|
|
user_history = user_data.merge(item_feature, on='place_id').sort_values(['rating'], ascending=False) |
|
|
|
|
|
recommendations = item_feature[~item_feature['place_id'].isin(user_history['place_id'])] |
|
|
|
|
|
|
|
|