knysfh commited on
Commit
d6fc33f
1 Parent(s): c445d96

Add onnx mean pool function

Browse files
Files changed (1) hide show
  1. README.md +12 -0
README.md CHANGED
@@ -25206,6 +25206,15 @@ import onnxruntime
25206
  import numpy as np
25207
  from transformers import AutoTokenizer, PretrainedConfig
25208
 
 
 
 
 
 
 
 
 
 
25209
  # Load tokenizer and model config
25210
  tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v3')
25211
  config = PretrainedConfig.from_pretrained('jinaai/jina-embeddings-v3')
@@ -25230,6 +25239,9 @@ inputs = {
25230
  outputs = session.run(None, inputs)[0]
25231
 
25232
  # Apply mean pooling to 'outputs' to get a single representation of each text
 
 
 
25233
  ```
25234
 
25235
  </p>
 
25206
  import numpy as np
25207
  from transformers import AutoTokenizer, PretrainedConfig
25208
 
25209
+ # Mean pool function
25210
+ def mean_pooling(model_output: np.ndarray, attention_mask: np.ndarray):
25211
+ token_embeddings = model_output
25212
+ input_mask_expanded = np.expand_dims(attention_mask, axis=-1)
25213
+ input_mask_expanded = np.broadcast_to(input_mask_expanded, token_embeddings.shape)
25214
+ sum_embeddings = np.sum(token_embeddings * input_mask_expanded, axis=1)
25215
+ sum_mask = np.clip(np.sum(input_mask_expanded, axis=1), a_min=1e-9, a_max=None)
25216
+ return sum_embeddings / sum_mask
25217
+
25218
  # Load tokenizer and model config
25219
  tokenizer = AutoTokenizer.from_pretrained('jinaai/jina-embeddings-v3')
25220
  config = PretrainedConfig.from_pretrained('jinaai/jina-embeddings-v3')
 
25239
  outputs = session.run(None, inputs)[0]
25240
 
25241
  # Apply mean pooling to 'outputs' to get a single representation of each text
25242
+ embeddings = mean_pooling(outputs, input_text["attention_mask"])
25243
+ norm = np.linalg.norm(embeddings, ord=2, axis=1, keepdims=True)
25244
+ embeddings = embeddings / norm
25245
  ```
25246
 
25247
  </p>