File size: 1,585 Bytes
bbcc5b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import numpy as np

def euclidean_similarity(embedding1, embedding2):
    embedding1 = np.array(embedding1)
    embedding2 = np.array(embedding2)
    euclidean_distance = np.linalg.norm(embedding1 - embedding2)
    # Convert distance to similarity score
    similarity_score = 1 / (1 + euclidean_distance)  # You can use other transformations as well
    return similarity_score

def cosine_similarity(embedding1, embedding2):
    dot_product = np.dot(embedding1, embedding2)
    norm1 = np.linalg.norm(embedding1)
    norm2 = np.linalg.norm(embedding2)
    cosine_similarity = dot_product / (norm1 * norm2)
    return cosine_similarity

def jaccard_similarity(embedding1, embedding2):
    intersection = len(set(embedding1).intersection(set(embedding2)))
    union = len(set(embedding1).union(set(embedding2)))
    return intersection / union

def hamming_similarity(embedding1, embedding2):
    distance = np.count_nonzero(embedding1 != embedding2)
    similarity = 1 - distance / len(embedding1)
    return similarity

def get_all_similarities(embedding1, embedding2):
    euclidean = euclidean_similarity(embedding1, embedding2)
    cosine = cosine_similarity(embedding1, embedding2)
    jaccard = jaccard_similarity(embedding1, embedding2)
    hamming = hamming_similarity(embedding1, embedding2)
    return {"euclidean": euclidean, "cosine": cosine, "jaccard": jaccard, "hamming": hamming}

# Example usage:
# embedding1 = [1, 2, 3]
# embedding2 = [4, 5, 6]
# similarities = get_all_similarities(embedding1, embedding2)
# print(similarities)

print("Similarity score is working")