Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import numpy as np
|
|
9 |
from transformers import pipeline
|
10 |
import chromadb
|
11 |
from sklearn.metrics.pairwise import euclidean_distances
|
|
|
12 |
|
13 |
# Load segmentation model
|
14 |
segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")
|
@@ -110,53 +111,69 @@ def segment_clothing(img, clothes=["Hat", "Upper-clothes", "Skirt", "Pants", "Dr
|
|
110 |
|
111 |
# return structured_results
|
112 |
|
113 |
-
|
114 |
-
|
115 |
-
#
|
116 |
-
|
117 |
-
|
118 |
-
#
|
119 |
-
|
120 |
-
#
|
121 |
-
# top_metadatas = results['metadatas'][0]
|
122 |
-
# top_distances = results['distances'][0]
|
123 |
-
#
|
124 |
-
# structured_results = []
|
125 |
-
# for metadata, distance in zip(top_metadatas, top_distances):
|
126 |
-
# structured_results.append({
|
127 |
-
# 'info': metadata,
|
128 |
-
# 'similarity': 1 - distance
|
129 |
-
# })
|
130 |
-
#
|
131 |
-
# return structured_results
|
132 |
-
|
133 |
-
def find_similar_images(query_embedding, collection, top_k=5, batch_size=500):
|
134 |
-
query_embedding = query_embedding.reshape(1, -1) # ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์ฐจ์ ์กฐ์
|
135 |
-
|
136 |
-
# ๋ชจ๋ ์๋ฒ ๋ฉ๊ณผ ๋ฉํ๋ฐ์ดํฐ๋ฅผ ํ ๋ฒ์ ๊ฐ์ ธ์ด
|
137 |
-
all_data = collection.get(include=['embeddings', 'metadatas'])
|
138 |
-
all_embeddings = np.array(all_data['embeddings'])
|
139 |
-
all_metadatas = all_data['metadatas']
|
140 |
|
141 |
-
|
142 |
-
|
143 |
-
# ์ ์ฒด ๋ฐ์ดํฐ๋ฅผ batch_size์ฉ ๋๋์ด ์ฒ๋ฆฌ
|
144 |
-
for start in range(0, len(all_embeddings), batch_size):
|
145 |
-
end = start + batch_size
|
146 |
-
batch_embeddings = all_embeddings[start:end]
|
147 |
-
batch_metadatas = all_metadatas[start:end]
|
148 |
-
|
149 |
-
# ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
|
150 |
-
similarities = cosine_similarity(query_embedding, batch_embeddings).flatten()
|
151 |
-
|
152 |
-
# ํ์ฌ ๋ฐฐ์น์์ ์ ์ฌ๋์ ๋ฉํ๋ฐ์ดํฐ๋ฅผ ์์ผ๋ก ๋ฌถ์ด ์ถ๊ฐ
|
153 |
-
batch_results = [{'info': metadata, 'similarity': similarity} for similarity, metadata in zip(similarities, batch_metadatas)]
|
154 |
-
all_results.extend(batch_results)
|
155 |
-
|
156 |
-
# ์ ์ฒด ๊ฒฐ๊ณผ ์ค์์ ์ ์ฌ๋๊ฐ ๋์ ์์๋๋ก top_k ๊ฐ๋ง ์ ํ
|
157 |
-
sorted_results = sorted(all_results, key=lambda x: x['similarity'], reverse=True)[:top_k]
|
158 |
|
159 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
|
161 |
|
162 |
|
|
|
9 |
from transformers import pipeline
|
10 |
import chromadb
|
11 |
from sklearn.metrics.pairwise import euclidean_distances
|
12 |
+
from sklearn.preprocessing import normalize
|
13 |
|
14 |
# Load segmentation model
|
15 |
segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")
|
|
|
111 |
|
112 |
# return structured_results
|
113 |
|
114 |
+
def get_all_embeddings_from_collection(collection):
|
115 |
+
# ์ปฌ๋ ์
์์ ๋ชจ๋ ์๋ฒ ๋ฉ ๋ฒกํฐ๋ฅผ ๊ฐ์ ธ์ต๋๋ค.
|
116 |
+
# ์ด ๊ฒฝ์ฐ collection ๊ฐ์ฒด๋ embeddings ์์ฑ ํฌํจ์ ์ง์ ํด ํธ์ถํฉ๋๋ค.
|
117 |
+
all_embeddings_data = collection.get(include=['embeddings'])
|
118 |
+
|
119 |
+
# ๋ชจ๋ ์๋ฒ ๋ฉ ๋ฒกํฐ๋ฅผ numpy ๋ฐฐ์ด๋ก ๋ณํํฉ๋๋ค.
|
120 |
+
all_embeddings = np.array(all_embeddings_data['embeddings'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
+
return all_embeddings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
123 |
|
124 |
+
def find_similar_images(query_embedding, collection, top_k=5):
|
125 |
+
# ๋ฐ์ดํฐ๋ฒ ์ด์ค ์๋ฒ ๋ฉ ์ ๊ทํ
|
126 |
+
database_embeddings = get_all_embeddings_from_collection(collection)
|
127 |
+
database_embeddings = normalize(database_embeddings, axis=1)
|
128 |
+
|
129 |
+
# ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์ ๊ทํ
|
130 |
+
query_embedding = normalize(query_embedding.reshape(1, -1), axis=1)
|
131 |
+
#query_embedding = query_embedding.reshape(1, -1) # Reshape to 2D array for ChromaDB
|
132 |
+
results = collection.query(
|
133 |
+
query_embeddings=query_embedding,
|
134 |
+
n_results=top_k,
|
135 |
+
include=['metadatas', 'distances']
|
136 |
+
)
|
137 |
+
|
138 |
+
top_metadatas = results['metadatas'][0]
|
139 |
+
top_distances = results['distances'][0]
|
140 |
+
|
141 |
+
structured_results = []
|
142 |
+
for metadata, distance in zip(top_metadatas, top_distances):
|
143 |
+
structured_results.append({
|
144 |
+
'info': metadata,
|
145 |
+
'similarity': 1 - distance
|
146 |
+
})
|
147 |
+
|
148 |
+
return structured_results
|
149 |
+
|
150 |
+
#def find_similar_images(query_embedding, collection, top_k=5, batch_size=500):
|
151 |
+
# query_embedding = query_embedding.reshape(1, -1) # ์ฟผ๋ฆฌ ์๋ฒ ๋ฉ ์ฐจ์ ์กฐ์
|
152 |
+
#
|
153 |
+
# # ๋ชจ๋ ์๋ฒ ๋ฉ๊ณผ ๋ฉํ๋ฐ์ดํฐ๋ฅผ ํ ๋ฒ์ ๊ฐ์ ธ์ด
|
154 |
+
# all_data = collection.get(include=['embeddings', 'metadatas'])
|
155 |
+
# all_embeddings = np.array(all_data['embeddings'])
|
156 |
+
# all_metadatas = all_data['metadatas']
|
157 |
+
#
|
158 |
+
# all_results = []
|
159 |
+
#
|
160 |
+
# # ์ ์ฒด ๋ฐ์ดํฐ๋ฅผ batch_size์ฉ ๋๋์ด ์ฒ๋ฆฌ
|
161 |
+
# for start in range(0, len(all_embeddings), batch_size):
|
162 |
+
# end = start + batch_size
|
163 |
+
# batch_embeddings = all_embeddings[start:end]
|
164 |
+
# batch_metadatas = all_metadatas[start:end]
|
165 |
+
#
|
166 |
+
# # ์ฝ์ฌ์ธ ์ ์ฌ๋ ๊ณ์ฐ
|
167 |
+
# similarities = cosine_similarity(query_embedding, batch_embeddings).flatten()
|
168 |
+
#
|
169 |
+
# # ํ์ฌ ๋ฐฐ์น์์ ์ ์ฌ๋์ ๋ฉํ๋ฐ์ดํฐ๋ฅผ ์์ผ๋ก ๋ฌถ์ด ์ถ๊ฐ
|
170 |
+
# batch_results = [{'info': metadata, 'similarity': similarity} for similarity, metadata in zip(similarities, batch_metadatas)]
|
171 |
+
# all_results.extend(batch_results)
|
172 |
+
#
|
173 |
+
# # ์ ์ฒด ๊ฒฐ๊ณผ ์ค์์ ์ ์ฌ๋๊ฐ ๋์ ์์๋๋ก top_k ๊ฐ๋ง ์ ํ
|
174 |
+
# sorted_results = sorted(all_results, key=lambda x: x['similarity'], reverse=True)[:top_k]
|
175 |
+
#
|
176 |
+
# return sorted_results
|
177 |
|
178 |
|
179 |
|