Tonic commited on
Commit
7b924b8
1 Parent(s): 64d5d0e

add system prompt for intention mapper

Browse files
Files changed (1) hide show
  1. app.py +111 -1
app.py CHANGED
@@ -1,11 +1,121 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
+ import spaces
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from torch.nn import DataParallel
8
+ from torch import Tensor
9
+ from transformers import AutoTokenizer, AutoModel
10
+ import threading
11
+ import queue
12
+ import os
13
+ import json
14
+ import numpy as np
15
+
16
  """
17
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
18
  """
 
19
 
20
+ ## Global Variables
21
+
22
+ title = """
23
+ # 👋🏻Welcome to 🙋🏻‍♂️Tonic's 📽️Nvidia 🛌🏻Embed V-1 !"""
24
+
25
+ description = """
26
+ You can use this Space to test out the current model [nvidia/NV-Embed-v1](https://huggingface.co/nvidia/NV-Embed-v1). 🐣a generalist embedding model that ranks No. 1 on the Massive Text Embedding Benchmark (MTEB benchmark)(as of May 24, 2024), with 56 tasks, encompassing retrieval, reranking, classification, clustering, and semantic textual similarity tasks.
27
+ You can also use 📽️Nvidia 🛌🏻Embed V-1 by cloning this space. 🧬🔬🔍 Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/NV-Embed?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
28
+ Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [MultiTonic](https://github.com/MultiTonic) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
29
+ """
30
+
31
+ tasks = {
32
+ 'ClimateFEVER': 'Given a claim about climate change, retrieve documents that support or refute the claim',
33
+ 'DBPedia': 'Given a query, retrieve relevant entity descriptions from DBPedia',
34
+ 'FEVER': 'Given a claim, retrieve documents that support or refute the claim',
35
+ 'FiQA2018': 'Given a financial question, retrieve user replies that best answer the question',
36
+ 'HotpotQA': 'Given a multi-hop question, retrieve documents that can help answer the question',
37
+ 'MSMARCO': 'Given a web search query, retrieve relevant passages that answer the query',
38
+ 'NFCorpus': 'Given a question, retrieve relevant documents that best answer the question',
39
+ 'NQ': 'Given a question, retrieve Wikipedia passages that answer the question',
40
+ 'QuoraRetrieval': 'Given a question, retrieve questions that are semantically equivalent to the given question',
41
+ 'SCIDOCS': 'Given a scientific paper title, retrieve paper abstracts that are cited by the given paper',
42
+ }
43
+
44
+ intention_prompt= """
45
+ "type": "object",
46
+ "properties": {
47
+ "ClimateFEVER": {
48
+ "type": "boolean",
49
+ "description" : "select this for climate science related text"
50
+ },
51
+ "DBPedia": {
52
+ "type": "boolean",
53
+ "description" : "select this for encyclopedic related knowledge"
54
+ },
55
+ "FEVER": {
56
+ "type": "boolean",
57
+ "description": "select this to verify a claim or embed a claim"
58
+ },
59
+ "FiQA2018": {
60
+ "type": "boolean",
61
+ "description" : "select this for financial questions or topics"
62
+ },
63
+ "HotpotQA": {
64
+ "type": "boolean",
65
+ "description" : "select this for a multi-hop question or for texts that provide multihop claims"
66
+ },
67
+ "MSMARCO": {
68
+ "type": "boolean",
69
+ "description": "Given a web search query, retrieve relevant passages that answer the query"
70
+ },
71
+ "NFCorpus": {
72
+ "type": "boolean",
73
+ "description" : "Given a question, retrieve relevant documents that best answer the question"
74
+ },
75
+ "NQ": {
76
+ "type": "boolean",
77
+ "description" : "Given a question, retrieve Wikipedia passages that answer the question"
78
+ },
79
+ "QuoraRetrieval": {
80
+ "type": "boolean",
81
+ "description": "Given a question, retrieve questions that are semantically equivalent to the given question"
82
+ },
83
+ "SCIDOCS": {
84
+ "type": "boolean",
85
+ "description": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper"
86
+ }
87
+ },
88
+ "required": [
89
+ "ClimateFEVER",
90
+ "DBPedia",
91
+ "FEVER",
92
+ "FiQA2018",
93
+ "HotpotQA",
94
+ "MSMARCO",
95
+ "NFCorpus",
96
+ "NQ",
97
+ "QuoraRetrieval",
98
+ "SCIDOCS",
99
+ ]
100
+ produce a complete json schema."
101
+
102
+ you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :"""
103
+
104
+ ## add chroma vector store
105
+
106
+
107
+ ## use instruct embeddings
108
+ # Load the tokenizer and model
109
+ tokenizer = AutoTokenizer.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True)
110
+ model = AutoModel.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True).to(device)
111
+
112
+
113
+ ## Make intention Mapper
114
+
115
+
116
+
117
+ ## Change to Yi API Client
118
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
119
 
120
  def respond(
121
  message,