JoJosmin commited on
Commit
8392151
โ€ข
1 Parent(s): b5f066b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -45
app.py CHANGED
@@ -9,6 +9,7 @@ import numpy as np
9
  from transformers import pipeline
10
  import chromadb
11
  from sklearn.metrics.pairwise import euclidean_distances
 
12
 
13
  # Load segmentation model
14
  segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")
@@ -110,53 +111,69 @@ def segment_clothing(img, clothes=["Hat", "Upper-clothes", "Skirt", "Pants", "Dr
110
 
111
  # return structured_results
112
 
113
- #def find_similar_images(query_embedding, collection, top_k=5):
114
- # query_embedding = query_embedding.reshape(1, -1) # Reshape to 2D array for ChromaDB
115
- # results = collection.query(
116
- # query_embeddings=query_embedding,
117
- # n_results=top_k,
118
- # include=['metadatas', 'distances']
119
- # )
120
- #
121
- # top_metadatas = results['metadatas'][0]
122
- # top_distances = results['distances'][0]
123
- #
124
- # structured_results = []
125
- # for metadata, distance in zip(top_metadatas, top_distances):
126
- # structured_results.append({
127
- # 'info': metadata,
128
- # 'similarity': 1 - distance
129
- # })
130
- #
131
- # return structured_results
132
-
133
- def find_similar_images(query_embedding, collection, top_k=5, batch_size=500):
134
- query_embedding = query_embedding.reshape(1, -1) # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ์ฐจ์› ์กฐ์ •
135
-
136
- # ๋ชจ๋“  ์ž„๋ฒ ๋”ฉ๊ณผ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ํ•œ ๋ฒˆ์— ๊ฐ€์ ธ์˜ด
137
- all_data = collection.get(include=['embeddings', 'metadatas'])
138
- all_embeddings = np.array(all_data['embeddings'])
139
- all_metadatas = all_data['metadatas']
140
 
141
- all_results = []
142
-
143
- # ์ „์ฒด ๋ฐ์ดํ„ฐ๋ฅผ batch_size์”ฉ ๋‚˜๋ˆ„์–ด ์ฒ˜๋ฆฌ
144
- for start in range(0, len(all_embeddings), batch_size):
145
- end = start + batch_size
146
- batch_embeddings = all_embeddings[start:end]
147
- batch_metadatas = all_metadatas[start:end]
148
-
149
- # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
150
- similarities = cosine_similarity(query_embedding, batch_embeddings).flatten()
151
-
152
- # ํ˜„์žฌ ๋ฐฐ์น˜์—์„œ ์œ ์‚ฌ๋„์™€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ์Œ์œผ๋กœ ๋ฌถ์–ด ์ถ”๊ฐ€
153
- batch_results = [{'info': metadata, 'similarity': similarity} for similarity, metadata in zip(similarities, batch_metadatas)]
154
- all_results.extend(batch_results)
155
-
156
- # ์ „์ฒด ๊ฒฐ๊ณผ ์ค‘์—์„œ ์œ ์‚ฌ๋„๊ฐ€ ๋†’์€ ์ˆœ์„œ๋Œ€๋กœ top_k ๊ฐœ๋งŒ ์„ ํƒ
157
- sorted_results = sorted(all_results, key=lambda x: x['similarity'], reverse=True)[:top_k]
158
 
159
- return sorted_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
 
162
 
 
9
  from transformers import pipeline
10
  import chromadb
11
  from sklearn.metrics.pairwise import euclidean_distances
12
+ from sklearn.preprocessing import normalize
13
 
14
  # Load segmentation model
15
  segmenter = pipeline(model="mattmdjaga/segformer_b2_clothes")
 
111
 
112
  # return structured_results
113
 
114
+ def get_all_embeddings_from_collection(collection):
115
+ # ์ปฌ๋ ‰์…˜์—์„œ ๋ชจ๋“  ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ๋ฅผ ๊ฐ€์ ธ์˜ต๋‹ˆ๋‹ค.
116
+ # ์ด ๊ฒฝ์šฐ collection ๊ฐ์ฒด๋Š” embeddings ์†์„ฑ ํฌํ•จ์„ ์ง€์ •ํ•ด ํ˜ธ์ถœํ•ฉ๋‹ˆ๋‹ค.
117
+ all_embeddings_data = collection.get(include=['embeddings'])
118
+
119
+ # ๋ชจ๋“  ์ž„๋ฒ ๋”ฉ ๋ฒกํ„ฐ๋ฅผ numpy ๋ฐฐ์—ด๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
120
+ all_embeddings = np.array(all_embeddings_data['embeddings'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
+ return all_embeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
+ def find_similar_images(query_embedding, collection, top_k=5):
125
+ # ๋ฐ์ดํ„ฐ๋ฒ ์ด์Šค ์ž„๋ฒ ๋”ฉ ์ •๊ทœํ™”
126
+ database_embeddings = get_all_embeddings_from_collection(collection)
127
+ database_embeddings = normalize(database_embeddings, axis=1)
128
+
129
+ # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ์ •๊ทœํ™”
130
+ query_embedding = normalize(query_embedding.reshape(1, -1), axis=1)
131
+ #query_embedding = query_embedding.reshape(1, -1) # Reshape to 2D array for ChromaDB
132
+ results = collection.query(
133
+ query_embeddings=query_embedding,
134
+ n_results=top_k,
135
+ include=['metadatas', 'distances']
136
+ )
137
+
138
+ top_metadatas = results['metadatas'][0]
139
+ top_distances = results['distances'][0]
140
+
141
+ structured_results = []
142
+ for metadata, distance in zip(top_metadatas, top_distances):
143
+ structured_results.append({
144
+ 'info': metadata,
145
+ 'similarity': 1 - distance
146
+ })
147
+
148
+ return structured_results
149
+
150
+ #def find_similar_images(query_embedding, collection, top_k=5, batch_size=500):
151
+ # query_embedding = query_embedding.reshape(1, -1) # ์ฟผ๋ฆฌ ์ž„๋ฒ ๋”ฉ ์ฐจ์› ์กฐ์ •
152
+ #
153
+ # # ๋ชจ๋“  ์ž„๋ฒ ๋”ฉ๊ณผ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ํ•œ ๋ฒˆ์— ๊ฐ€์ ธ์˜ด
154
+ # all_data = collection.get(include=['embeddings', 'metadatas'])
155
+ # all_embeddings = np.array(all_data['embeddings'])
156
+ # all_metadatas = all_data['metadatas']
157
+ #
158
+ # all_results = []
159
+ #
160
+ # # ์ „์ฒด ๋ฐ์ดํ„ฐ๋ฅผ batch_size์”ฉ ๋‚˜๋ˆ„์–ด ์ฒ˜๋ฆฌ
161
+ # for start in range(0, len(all_embeddings), batch_size):
162
+ # end = start + batch_size
163
+ # batch_embeddings = all_embeddings[start:end]
164
+ # batch_metadatas = all_metadatas[start:end]
165
+ #
166
+ # # ์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ๊ณ„์‚ฐ
167
+ # similarities = cosine_similarity(query_embedding, batch_embeddings).flatten()
168
+ #
169
+ # # ํ˜„์žฌ ๋ฐฐ์น˜์—์„œ ์œ ์‚ฌ๋„์™€ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ์Œ์œผ๋กœ ๋ฌถ์–ด ์ถ”๊ฐ€
170
+ # batch_results = [{'info': metadata, 'similarity': similarity} for similarity, metadata in zip(similarities, batch_metadatas)]
171
+ # all_results.extend(batch_results)
172
+ #
173
+ # # ์ „์ฒด ๊ฒฐ๊ณผ ์ค‘์—์„œ ์œ ์‚ฌ๋„๊ฐ€ ๋†’์€ ์ˆœ์„œ๋Œ€๋กœ top_k ๊ฐœ๋งŒ ์„ ํƒ
174
+ # sorted_results = sorted(all_results, key=lambda x: x['similarity'], reverse=True)[:top_k]
175
+ #
176
+ # return sorted_results
177
 
178
 
179