Diego-0121 commited on
Commit
3e482dc
1 Parent(s): 08f7b1b

Create recomendation

Browse files
Files changed (1) hide show
  1. recomendation +95 -0
recomendation ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.metrics.pairwise import cosine_similarity
2
+ import pandas as pd
3
+ import numpy as np
4
+ from vectorization import spotify_data
5
+ import json
6
+ import gradio as gr
7
+ from gradio.components import Textbox
8
+ from ast import literal_eval
9
+ spotify_data_processed = pd.read_csv('dataset_modificado.csv')
10
+
11
+ def convert_string_to_array(str_vector):
12
+ # Si str_vector ya es un array de NumPy, devolverlo directamente
13
+ if isinstance(str_vector, np.ndarray):
14
+ return str_vector
15
+
16
+ try:
17
+ cleaned_str = str_vector.replace('[', '').replace(']', '').replace('\n', ' ').replace('\r', '').strip()
18
+ vector_elements = [float(item) for item in cleaned_str.split()]
19
+ return np.array(vector_elements)
20
+ except ValueError as e:
21
+ print("Error:", e)
22
+ return np.zeros((100,))
23
+
24
+
25
+ spotify_data_processed['song_vector'] = spotify_data_processed['song_vector'].apply(convert_string_to_array)
26
+
27
+
28
+ # Aplicar la función a las primeras filas para ver los resultados
29
+ sample_data = spotify_data_processed['song_vector'].head()
30
+ converted_vectors = sample_data.apply(convert_string_to_array)
31
+ print(converted_vectors)
32
+
33
+
34
+
35
+ def recommend_song(song_name, artist_name, spotify_data_processed, top_n=4):
36
+ # Filtrar para encontrar la canción específica
37
+ specific_song = spotify_data_processed[(spotify_data_processed['song'] == song_name)
38
+ & (spotify_data_processed['artist'] == artist_name)]
39
+
40
+ # Verificar si la canción existe en el dataset
41
+ if specific_song.empty:
42
+ return pd.DataFrame({"Error": ["Canción no encontrada en la base de datos."]})
43
+
44
+
45
+ # Obtener el vector de la canción específica
46
+ song_vec = specific_song['song_vector'].iloc[0]
47
+
48
+ # Asegurarte de que song_vec sea un array de NumPy
49
+ if isinstance(song_vec, str):
50
+ song_vec = convert_string_to_array(song_vec)
51
+
52
+ all_song_vectors = np.array(spotify_data_processed['song_vector'].tolist())
53
+
54
+ # Calcular similitudes
55
+ similarities = cosine_similarity([song_vec], all_song_vectors)[0]
56
+
57
+ # Obtener los índices de las canciones más similares
58
+ top_indices = np.argsort(similarities)[::-1][1:top_n+1]
59
+
60
+ # Devolver los nombres y artistas de las canciones más similares
61
+ recommended_songs = spotify_data_processed.iloc[top_indices][['song', 'artist']]
62
+ return recommended_songs
63
+
64
+
65
+
66
+
67
+ def recommend_song_interface(song_name, artist_name):
68
+ recommendations_df = recommend_song(song_name, artist_name, spotify_data_processed)
69
+
70
+ # Verificar si el DataFrame está vacío o si las columnas necesarias están presentes
71
+ if isinstance(recommendations_df, pd.DataFrame) and not recommendations_df.empty and {'song', 'artist'}.issubset(recommendations_df.columns):
72
+ recommendations_list = recommendations_df[['song', 'artist']].values.tolist()
73
+ formatted_recommendations = ["{} by {}".format(song, artist) for song, artist in recommendations_list]
74
+ # Rellenar con cadenas vacías si hay menos de 4 recomendaciones
75
+ while len(formatted_recommendations) < 4:
76
+ formatted_recommendations.append("")
77
+ return formatted_recommendations[:4]
78
+ else:
79
+ random_song = spotify_data_processed.sample() # Escoge una linea la azar de todo el conjunto de datos .sample()
80
+ random_song_name = random_song['song'].iloc[0] # Extrae el valor de la columna song de la fila sample (Nombre)
81
+ random_artist_name = random_song['artist'].iloc[0] # Extrae el valor de la columna artist de la fila sample (Artista)
82
+
83
+ # Obtener recomendaciones para la canción aleatoria
84
+ random_recommendations_df = recommend_song(random_song_name, random_artist_name, spotify_data_processed)
85
+ random_recommendations_list = random_recommendations_df[['song', 'artist']].values.tolist()
86
+ formatted_random_recommendations = ["{} by {}".format(song, artist) for song, artist in random_recommendations_list]
87
+
88
+ # Rellenar con cadenas vacías si hay menos de 4 recomendaciones
89
+ while len(formatted_random_recommendations) < 4:
90
+ formatted_random_recommendations.append("")
91
+ return formatted_random_recommendations[:4]
92
+
93
+ # Ejemplo de uso
94
+ # Asegúrate de que spotify_data_processed es un DataFrame de Pandas válido con las columnas 'song' y 'artist'
95
+ recommendations = recommend_song_interface("song_name", "artist_name")