import re from txtai import Embeddings, LLM import gradio as gr def cot(system, user): system = f""" {system} You are an AI assistant that uses a Chain of Thought (CoT) approach with reflection to answer queries. Follow these steps: 1. Think through the problem step by step within the tags. 2. Reflect on your thinking to check for any errors or improvements within the tags. 3. Make any necessary adjustments based on your reflection. 4. Provide your final, concise answer within the tags. Important: The and sections are for your internal reasoning process only. Do not include any part of the final answer in these sections. The actual response to the query must be entirely contained within the tags. Use the following format for your response: [Your step-by-step reasoning goes here. This is your internal thought process, not the final answer.] [Your reflection on your reasoning, checking for errors or improvements] [Any adjustments to your thinking based on your reflection] [Your final, concise answer to the query. This is the only part that will be shown to the user.] """ response = llm( [ {"role": "system", "content": system}, {"role": "user", "content": user}, ], maxlength=4096, ) match = re.search(r"(.*?)(?:|$)", response, re.DOTALL) return match.group(1).strip() if match else response def rag(question): prompt = """ Answer the following question using only the context below. Only include information specifically discussed. question: {question} context: {context} """ system = "You are a friendly assistant. You answer questions from users." context = "\n".join([x["text"] for x in embeddings.search(question)]) return cot(system, prompt.format(question=question, context=context)) embeddings = Embeddings() embeddings.load(provider="huggingface-hub", container="neuml/txtai-wikipedia") llm = LLM("hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4", gpu=True) def predict(message, history): response = rag(message) return response gr.ChatInterface( predict, title="txtai Reflection Chatbot", description="A chatbot that uses Chain of Thought (CoT) with self-reflection to answer queries.", ).launch()