trinity / app.py
captainkyd's picture
Update app.py
72adf3e verified
raw
history blame
6.12 kB
import spaces
import gradio as gr
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import accelerate
import os
title = """# Welcome to 🌟Tonic's🐇🥷🏻Trinity
You can build with this endpoint using🐇🥷🏻Trinity available here : [WhiteRabbitNeo/Trinity-13B](https://huggingface.co//WhiteRabbitNeo/Trinity-13B). You can also use 🐇🥷🏻Trinity by cloning this space. Simply click here: <a style="display:inline-block" href="https://huggingface.co/spaces/Tonic/trinity?duplicate=true"><img src="https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14" alt="Duplicate Space"></a></h3>
Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 [![Join us on Discord](https://img.shields.io/discord/1109943800132010065?label=Discord&logo=discord&style=flat-square)](https://discord.gg/GWpVpekp) On 🤗Huggingface:[MultiTransformer](https://huggingface.co/MultiTransformer) Math 🔍 [introspector](https://huggingface.co/introspector) On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [SciTonic](https://github.com/Tonic-AI/scitonic)🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗
"""
default_system_prompt = """
Answer the Question by exploring multiple reasoning paths as follows:
- First, carefully analyze the question to extract the key information components and break it down into logical sub-questions. This helps set up the framework for reasoning. The goal is to construct an internal search tree.
- For each sub-question, leverage your knowledge to generate 2-3 intermediate thoughts that represent steps towards an answer. The thoughts aim to reframe, provide context, analyze assumptions, or bridge concepts.
- Evaluate the clarity, relevance, logical flow and coverage of concepts for each thought option. Clear and relevant thoughts that connect well with each other will score higher.
- Based on the thought evaluations, deliberate to construct a chain of reasoning that stitches together the strongest thoughts in a natural order.
- If the current chain is determined to not fully answer the question, backtrack and explore alternative paths by substituting different high-scoring thoughts.
- Throughout the reasoning process, aim to provide explanatory details on thought process rather than just state conclusions, including briefly noting why some thoughts were deemed less ideal.
- Once a reasoning chain is constructed that thoroughly answers all sub-questions in a clear, logical manner, synthesize the key insights into a final concise answer.
- Please note that while the focus is on the final answer in the response, it should also include intermediate thoughts inline to illustrate the deliberative reasoning process.
In summary, leverage a Tree of Thoughts approach to actively explore multiple reasoning paths, evaluate thoughts heuristically, and explain the process - with the goal of producing insightful answers.
"""
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_compute_dtype=torch.bfloat16
)
model_path = "WhiteRabbitNeo/Trinity-13B"
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
model = AutoModelForCausalLM.from_pretrained(
model_path,
device_map="auto",
trust_remote_code=True,
quantization_config=quantization_config
)
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
@spaces.GPU
def generate_text(custom_prompt, user_input, temperature, generate_len, top_p, top_k):
system_prompt = custom_prompt if custom_prompt else default_system_prompt
llm_prompt = f"{system_prompt} \nUSER: {user_input} \nASSISTANT: "
tokens = tokenizer.encode(llm_prompt, return_tensors="pt")
tokens = tokens.to("cuda")
length = tokens.shape[1]
with torch.no_grad():
output = model.generate(
input_ids=tokens,
max_length=length + generate_len,
temperature=temperature,
top_p=top_p,
top_k=top_k,
num_return_sequences=1,
)
generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
answer = generated_text[len(llm_prompt):].strip()
return answer
def gradio_app():
with gr.Blocks() as demo:
gr.Markdown(title)
with gr.Row():
custom_prompt = gr.Textbox(label="Custom System Prompt (optional)", placeholder="Leave blank to use the default prompt...")
instruction = gr.Textbox(label="Your Instruction", placeholder="Type your question here...")
with gr.Row():
temperature = gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="Temperature")
generate_len = gr.Slider(minimum=100, maximum=1024, step=10, value=100, label="Generate Length")
top_p = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=1.0, label="Top P")
top_k = gr.Slider(minimum=0, maximum=100, step=1, value=50, label="Top K")
with gr.Row():
generate_btn = gr.Button("Generate")
output = gr.Textbox(label="Generated Text", lines=10, placeholder="Generated answer will appear here...")
generate_btn.click(
fn=generate_text,
inputs=[custom_prompt, instruction, temperature, generate_len, top_p, top_k],
outputs=output
)
demo.launch()
if __name__ == "__main__":
gradio_app()