Diangle commited on
Commit
b758449
·
1 Parent(s): 1b84ca8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -114,15 +114,14 @@ tokenizer = CLIPTokenizer.from_pretrained("Diangle/clip4clip-webvid")
114
  def search(search_sentence):
115
  inputs = tokenizer(text=search_sentence , return_tensors="pt", padding=True)
116
 
117
- outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], return_dict=False)
118
- text_projection = model.state_dict()['text_projection.weight']
119
- text_embeds = outputs[1] @ text_projection
120
- final_output = text_embeds[torch.arange(text_embeds.shape[0]), inputs["input_ids"].argmax(dim=-1)]
121
 
122
  # Normalization
123
- final_output = final_output / final_output.norm(dim=-1, keepdim=True)
124
- final_output = final_output.cpu().detach().numpy()
125
- sequence_output = final_output / np.sum(final_output**2, axis=1, keepdims=True)
126
 
127
  nn_search = NearestNeighbors(n_neighbors=5, metric='binary', rerank_from=100)
128
  nn_search.fit(np.packbits((ft_visual_features_database > 0.0).astype(bool), axis=1), o_data=ft_visual_features_database)
 
114
  def search(search_sentence):
115
  inputs = tokenizer(text=search_sentence , return_tensors="pt", padding=True)
116
 
117
+ outputs = model(input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"])
118
+ # text_projection = model.state_dict()['text_projection.weight']
119
+ # text_embeds = outputs[1] @ text_projection
120
+ # final_output = text_embeds[torch.arange(text_embeds.shape[0]), inputs["input_ids"].argmax(dim=-1)]
121
 
122
  # Normalization
123
+ final_output = outputs[1] / outputs[1].norm(dim=-1, keepdim=True)
124
+ sequence_output = final_output.cpu().detach().numpy()
 
125
 
126
  nn_search = NearestNeighbors(n_neighbors=5, metric='binary', rerank_from=100)
127
  nn_search.fit(np.packbits((ft_visual_features_database > 0.0).astype(bool), axis=1), o_data=ft_visual_features_database)