import gradio as gr from huggingface_hub import InferenceClient import spaces import torch import torch.nn.functional as F from torch.nn import DataParallel from torch import Tensor from transformers import AutoTokenizer, AutoModel import threading import queue import os import json import numpy as np """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ ## Global Variables title = """ # 👋🏻Welcome to 🙋🏻‍♂️Tonic's 📽️Nvidia 🛌🏻Embed V-1 !""" description = """ You can use this Space to test out the current model [nvidia/NV-Embed-v1](https://huggingface.co/nvidia/NV-Embed-v1). 🐣a generalist embedding model that ranks No. 1 on the Massive Text Embedding Benchmark (MTEB benchmark)(as of May 24, 2024), with 56 tasks, encompassing retrieval, reranking, classification, clustering, and semantic textual similarity tasks. You can also use 📽️Nvidia 🛌🏻Embed V-1 by cloning this space. 🧬🔬🔍 Simply click here: Duplicate Space Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to 🌟 [MultiTonic](https://github.com/MultiTonic) 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 """ tasks = { 'ClimateFEVER': 'Given a claim about climate change, retrieve documents that support or refute the claim', 'DBPedia': 'Given a query, retrieve relevant entity descriptions from DBPedia', 'FEVER': 'Given a claim, retrieve documents that support or refute the claim', 'FiQA2018': 'Given a financial question, retrieve user replies that best answer the question', 'HotpotQA': 'Given a multi-hop question, retrieve documents that can help answer the question', 'MSMARCO': 'Given a web search query, retrieve relevant passages that answer the query', 'NFCorpus': 'Given a question, retrieve relevant documents that best answer the question', 'NQ': 'Given a question, retrieve Wikipedia passages that answer the question', 'QuoraRetrieval': 'Given a question, retrieve questions that are semantically equivalent to the given question', 'SCIDOCS': 'Given a scientific paper title, retrieve paper abstracts that are cited by the given paper', } intention_prompt= """ "type": "object", "properties": { "ClimateFEVER": { "type": "boolean", "description" : "select this for climate science related text" }, "DBPedia": { "type": "boolean", "description" : "select this for encyclopedic related knowledge" }, "FEVER": { "type": "boolean", "description": "select this to verify a claim or embed a claim" }, "FiQA2018": { "type": "boolean", "description" : "select this for financial questions or topics" }, "HotpotQA": { "type": "boolean", "description" : "select this for a multi-hop question or for texts that provide multihop claims" }, "MSMARCO": { "type": "boolean", "description": "Given a web search query, retrieve relevant passages that answer the query" }, "NFCorpus": { "type": "boolean", "description" : "Given a question, retrieve relevant documents that best answer the question" }, "NQ": { "type": "boolean", "description" : "Given a question, retrieve Wikipedia passages that answer the question" }, "QuoraRetrieval": { "type": "boolean", "description": "Given a question, retrieve questions that are semantically equivalent to the given question" }, "SCIDOCS": { "type": "boolean", "description": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper" } }, "required": [ "ClimateFEVER", "DBPedia", "FEVER", "FiQA2018", "HotpotQA", "MSMARCO", "NFCorpus", "NQ", "QuoraRetrieval", "SCIDOCS", ] produce a complete json schema." you will recieve a text , classify the text according to the schema above. ONLY PROVIDE THE FINAL JSON , DO NOT PRODUCE ANY ADDITION INSTRUCTION :""" ## add chroma vector store ## use instruct embeddings # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True) model = AutoModel.from_pretrained('nvidia/NV-Embed-v1', trust_remote_code=True).to(device) ## Make intention Mapper ## Change to Yi API Client client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a friendly Chatbot.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()