File size: 5,676 Bytes
afadb57
9045f37
 
 
 
112b798
9045f37
 
 
afadb57
 
 
 
e0fb16c
 
 
afadb57
 
 
 
 
 
 
 
e0fb16c
afadb57
e0fb16c
 
afadb57
 
 
e0fb16c
 
 
afadb57
 
 
 
 
 
 
 
 
e0fb16c
afadb57
e0fb16c
afadb57
9045f37
 
afadb57
e0fb16c
afadb57
 
 
e0fb16c
afadb57
 
9045f37
e0fb16c
9045f37
 
e0fb16c
 
 
9045f37
e0fb16c
 
 
 
 
9045f37
 
e0fb16c
 
9045f37
 
e0fb16c
9045f37
e0fb16c
 
 
9045f37
e0fb16c
9045f37
e0fb16c
 
9045f37
e0fb16c
9045f37
 
 
 
e0fb16c
 
 
 
9045f37
 
 
e0fb16c
9045f37
 
 
 
e0fb16c
9045f37
 
 
 
fdf715a
9045f37
 
e0fb16c
9045f37
e0fb16c
 
9045f37
 
 
e0fb16c
 
 
9045f37
 
 
e0fb16c
9045f37
 
 
 
 
e0fb16c
9045f37
 
 
 
 
 
afadb57
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from joblib import dump, load
from sklearn.preprocessing import normalize
import re

def get_next_version(file_prefix, folder='RecommendationFiles/'):
    """Find the latest version of a file and return the next version's filename."""
    if not os.path.exists(folder):
        os.makedirs(folder)  # Ensure the folder exists

    # Regular expression to match files like 'file_0001.joblib'
    pattern = re.compile(rf"{file_prefix}_(\d+)\.joblib")
    files = [f for f in os.listdir(folder) if pattern.match(f)]
    
    # Extract version numbers from matching files
    versions = [int(pattern.match(f).group(1)) for f in files]
    
    # Determine the next version number
    next_version = max(versions) + 1 if versions else 1
    
    # Return the next version filename with the folder path
    return os.path.join(folder, f"{file_prefix}_{next_version:04d}.joblib")

def get_latest_version(file_prefix, folder='RecommendationFiles/'):
    """Find the latest version of a file to load."""
    if not os.path.exists(folder):
        raise FileNotFoundError(f"Folder '{folder}' does not exist")
    
    # Regular expression to match files like 'file_0001.joblib'
    pattern = re.compile(rf"{file_prefix}_(\d+)\.joblib")
    files = [f for f in os.listdir(folder) if pattern.match(f)]
    
    # Extract version numbers from matching files
    versions = [int(pattern.match(f).group(1)) for f in files]
    
    if versions:
        latest_version = max(versions)
        return os.path.join(folder, f"{file_prefix}_{latest_version:04d}.joblib")
    else:
        raise FileNotFoundError(f"No versions found for {file_prefix} in folder '{folder}'")


def recomienda_tf(new_basket, cestas, productos): 

    # Get the latest versions of the matrix and vectorizer from the folder
    tf_matrix_file = get_latest_version('count_matrix')
    count_vectorizer_file = get_latest_version('count_vectorizer')
    
    # Load the matrix TF and the vectorizer
    tf_matrix = load(tf_matrix_file)
    count = load(count_vectorizer_file)
                    
    # Convert the new basket into TF (Term Frequency) format
    new_basket_str = ' '.join(new_basket)
    new_basket_vector = count.transform([new_basket_str])
    new_basket_tf = normalize(new_basket_vector, norm='l1')  # Normalize the count matrix for the current basket

    # Compare the new basket with previous ones
    similarities = cosine_similarity(new_basket_tf, tf_matrix)
    
    # Get the indices of the most similar baskets
    similar_indices = similarities.argsort()[0][-4:]  # Top 4 most similar baskets
    
    # Create a dictionary to count recommendations
    recommendations_count = {}
    total_similarity = 0
    
    # Recommend products from similar baskets
    for idx in similar_indices:
        sim_score = similarities[0][idx]
        total_similarity += sim_score  # Sum of similarities
        products = cestas.iloc[idx]['Cestas'].split()
        
        unique_products = set(products)  # Use a set to get unique products
        
        for product in unique_products:
            if product.strip() not in new_basket:  # Avoid recommending items already in the basket
                recommendations_count[product.strip()] = recommendations_count.get(product.strip(), 0) + sim_score
    
    # Calculate the relative probability of each recommended product
    recommendations_with_prob = []
    if total_similarity > 0:
        recommendations_with_prob = [(product, score / total_similarity) for product, score in recommendations_count.items()]
    else:
        print("No se encontraron similitudes suficientes para calcular probabilidades.")
     
    # Sort recommendations by relevance score
    recommendations_with_prob.sort(key=lambda x: x[1], reverse=True)
    
    # Create a new DataFrame to store recommendations
    recommendations_data = []
    
    for product, score in recommendations_with_prob:
        # Search for the product description in the products DataFrame
        description = productos.loc[productos['ARTICULO'] == product, 'DESCRIPCION']
        if not description.empty:
            recommendations_data.append({
                'ARTICULO': product,
                'DESCRIPCION': description.values[0],
                'RELEVANCIA': score
            })
    recommendations_df = pd.DataFrame(recommendations_data)
    
    return recommendations_df.head(5)

def retroalimentacion(cestas, cesta_nueva):
    # Convert basket from list to string
    cesta_unida = ' '.join(cesta_nueva)
    
    # Add the new basket to the historical baskets if it doesn't already exist
    if not cestas['Cestas'].isin([cesta_unida]).any():
        cestas.loc[len(cestas)] = cesta_unida
        print("Cesta añadida.")
        
        # Re-save the updated baskets DataFrame
        cestas.to_csv('RecommendationFiles/cestas_final.csv', index=False)
    else:
        print("La cesta ya existe en el DataFrame.")
    
    # Re-vectorize the basket DataFrame
    count_vectorizer = CountVectorizer()
    count_vectorizer.fit(cestas['Cestas'])
    count_matrix = count_vectorizer.transform(cestas['Cestas'])
    tf_matrix = normalize(count_matrix, norm='l1')

    # Save new versions of the vectorizer and matrix
    count_vectorizer_file = get_next_version('count_vectorizer')
    tf_matrix_file = get_next_version('tf_matrix')
    
    dump(count_vectorizer, count_vectorizer_file)
    dump(tf_matrix, tf_matrix_file)

    return None