Diego-0121
commited on
Commit
•
3e482dc
1
Parent(s):
08f7b1b
Create recomendation
Browse files- recomendation +95 -0
recomendation
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from vectorization import spotify_data
|
5 |
+
import json
|
6 |
+
import gradio as gr
|
7 |
+
from gradio.components import Textbox
|
8 |
+
from ast import literal_eval
|
9 |
+
spotify_data_processed = pd.read_csv('dataset_modificado.csv')
|
10 |
+
|
11 |
+
def convert_string_to_array(str_vector):
|
12 |
+
# Si str_vector ya es un array de NumPy, devolverlo directamente
|
13 |
+
if isinstance(str_vector, np.ndarray):
|
14 |
+
return str_vector
|
15 |
+
|
16 |
+
try:
|
17 |
+
cleaned_str = str_vector.replace('[', '').replace(']', '').replace('\n', ' ').replace('\r', '').strip()
|
18 |
+
vector_elements = [float(item) for item in cleaned_str.split()]
|
19 |
+
return np.array(vector_elements)
|
20 |
+
except ValueError as e:
|
21 |
+
print("Error:", e)
|
22 |
+
return np.zeros((100,))
|
23 |
+
|
24 |
+
|
25 |
+
spotify_data_processed['song_vector'] = spotify_data_processed['song_vector'].apply(convert_string_to_array)
|
26 |
+
|
27 |
+
|
28 |
+
# Aplicar la función a las primeras filas para ver los resultados
|
29 |
+
sample_data = spotify_data_processed['song_vector'].head()
|
30 |
+
converted_vectors = sample_data.apply(convert_string_to_array)
|
31 |
+
print(converted_vectors)
|
32 |
+
|
33 |
+
|
34 |
+
|
35 |
+
def recommend_song(song_name, artist_name, spotify_data_processed, top_n=4):
|
36 |
+
# Filtrar para encontrar la canción específica
|
37 |
+
specific_song = spotify_data_processed[(spotify_data_processed['song'] == song_name)
|
38 |
+
& (spotify_data_processed['artist'] == artist_name)]
|
39 |
+
|
40 |
+
# Verificar si la canción existe en el dataset
|
41 |
+
if specific_song.empty:
|
42 |
+
return pd.DataFrame({"Error": ["Canción no encontrada en la base de datos."]})
|
43 |
+
|
44 |
+
|
45 |
+
# Obtener el vector de la canción específica
|
46 |
+
song_vec = specific_song['song_vector'].iloc[0]
|
47 |
+
|
48 |
+
# Asegurarte de que song_vec sea un array de NumPy
|
49 |
+
if isinstance(song_vec, str):
|
50 |
+
song_vec = convert_string_to_array(song_vec)
|
51 |
+
|
52 |
+
all_song_vectors = np.array(spotify_data_processed['song_vector'].tolist())
|
53 |
+
|
54 |
+
# Calcular similitudes
|
55 |
+
similarities = cosine_similarity([song_vec], all_song_vectors)[0]
|
56 |
+
|
57 |
+
# Obtener los índices de las canciones más similares
|
58 |
+
top_indices = np.argsort(similarities)[::-1][1:top_n+1]
|
59 |
+
|
60 |
+
# Devolver los nombres y artistas de las canciones más similares
|
61 |
+
recommended_songs = spotify_data_processed.iloc[top_indices][['song', 'artist']]
|
62 |
+
return recommended_songs
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
|
67 |
+
def recommend_song_interface(song_name, artist_name):
|
68 |
+
recommendations_df = recommend_song(song_name, artist_name, spotify_data_processed)
|
69 |
+
|
70 |
+
# Verificar si el DataFrame está vacío o si las columnas necesarias están presentes
|
71 |
+
if isinstance(recommendations_df, pd.DataFrame) and not recommendations_df.empty and {'song', 'artist'}.issubset(recommendations_df.columns):
|
72 |
+
recommendations_list = recommendations_df[['song', 'artist']].values.tolist()
|
73 |
+
formatted_recommendations = ["{} by {}".format(song, artist) for song, artist in recommendations_list]
|
74 |
+
# Rellenar con cadenas vacías si hay menos de 4 recomendaciones
|
75 |
+
while len(formatted_recommendations) < 4:
|
76 |
+
formatted_recommendations.append("")
|
77 |
+
return formatted_recommendations[:4]
|
78 |
+
else:
|
79 |
+
random_song = spotify_data_processed.sample() # Escoge una linea la azar de todo el conjunto de datos .sample()
|
80 |
+
random_song_name = random_song['song'].iloc[0] # Extrae el valor de la columna song de la fila sample (Nombre)
|
81 |
+
random_artist_name = random_song['artist'].iloc[0] # Extrae el valor de la columna artist de la fila sample (Artista)
|
82 |
+
|
83 |
+
# Obtener recomendaciones para la canción aleatoria
|
84 |
+
random_recommendations_df = recommend_song(random_song_name, random_artist_name, spotify_data_processed)
|
85 |
+
random_recommendations_list = random_recommendations_df[['song', 'artist']].values.tolist()
|
86 |
+
formatted_random_recommendations = ["{} by {}".format(song, artist) for song, artist in random_recommendations_list]
|
87 |
+
|
88 |
+
# Rellenar con cadenas vacías si hay menos de 4 recomendaciones
|
89 |
+
while len(formatted_random_recommendations) < 4:
|
90 |
+
formatted_random_recommendations.append("")
|
91 |
+
return formatted_random_recommendations[:4]
|
92 |
+
|
93 |
+
# Ejemplo de uso
|
94 |
+
# Asegúrate de que spotify_data_processed es un DataFrame de Pandas válido con las columnas 'song' y 'artist'
|
95 |
+
recommendations = recommend_song_interface("song_name", "artist_name")
|