Spaces:

ajaynagotha
/

bhagvad-gita-llm-v2

Sleeping

App Files Files Community

ajaynagotha commited on Nov 23, 2024

Commit

702d4ed

•

1 Parent(s): 0f56688

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -25

app.py CHANGED Viewed

@@ -3,38 +3,25 @@ from datasets import load_dataset
 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 import torch
 import logging
-import sys
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from fastapi.middleware.cors import CORSMiddleware
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
-file_handler = logging.FileHandler('app.log')
-file_handler.setLevel(logging.INFO)
-file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
-logger.addHandler(file_handler)
-logger.info("Starting the application")
-try:
-    logger.info("Loading the dataset")
-    ds = load_dataset("adarshxs/gita")
-    logger.info("Dataset loaded successfully")
-except Exception as e:
-    logger.error(f"Error loading dataset: {str(e)}")
-    sys.exit(1)
-try:
-    logger.info("Loading the model and tokenizer")
-    model_name = "deepset/roberta-large-squad2"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    model = AutoModelForQuestionAnswering.from_pretrained(model_name)
-    logger.info("Model and tokenizer loaded successfully")
-except Exception as e:
-    logger.error(f"Error loading model or tokenizer: {str(e)}")
-    sys.exit(1)
 def clean_answer(answer):
     special_tokens = set(tokenizer.all_special_tokens)
@@ -45,7 +32,7 @@ def answer_question(question):
     logger.info(f"Received question: {question}")
     try:
         logger.info("Combining text from dataset")
-        context = " ".join([item['Text'] for item in ds['train']])
         logger.info(f"Combined context length: {len(context)} characters")
         logger.info("Tokenizing input")
         inputs = tokenizer.encode_plus(question, context, return_tensors="pt", max_length=512, truncation=True)
@@ -68,8 +55,10 @@ def answer_question(question):
         logger.error(f"Error in answer_question function: {str(e)}")
         return "I'm sorry, but an error occurred while processing your question. Please try again later."
 app = FastAPI()
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
@@ -96,6 +85,7 @@ async def predict(question: Question):
         logger.error(f"Error in predict function: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
 iface = gr.Interface(
     fn=answer_question,
     inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
@@ -104,8 +94,10 @@ iface = gr.Interface(
     description="Ask a question about the Bhagavad Gita, and get an answer based on the dataset."
 )
 app = gr.mount_gradio_app(app, iface, path="/")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)

 from transformers import AutoTokenizer, AutoModelForQuestionAnswering
 import torch
 import logging
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 from fastapi.middleware.cors import CORSMiddleware
+# Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
+# Load dataset
+logger.info("Loading the dataset")
+ds = load_dataset("adarshxs/gita")
+logger.info("Dataset loaded successfully")
+# Load model and tokenizer
+logger.info("Loading the model and tokenizer")
+model_name = "deepset/roberta-large-squad2"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForQuestionAnswering.from_pretrained(model_name)
+logger.info("Model and tokenizer loaded successfully")
 def clean_answer(answer):
     special_tokens = set(tokenizer.all_special_tokens)
     logger.info(f"Received question: {question}")
     try:
         logger.info("Combining text from dataset")
+        context = " ".join([item.get('Text', '') for item in ds['train']])
         logger.info(f"Combined context length: {len(context)} characters")
         logger.info("Tokenizing input")
         inputs = tokenizer.encode_plus(question, context, return_tensors="pt", max_length=512, truncation=True)
         logger.error(f"Error in answer_question function: {str(e)}")
         return "I'm sorry, but an error occurred while processing your question. Please try again later."
+# FastAPI setup
 app = FastAPI()
+# Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"],
         logger.error(f"Error in predict function: {str(e)}")
         raise HTTPException(status_code=500, detail=str(e))
+# Gradio interface
 iface = gr.Interface(
     fn=answer_question,
     inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."),
     description="Ask a question about the Bhagavad Gita, and get an answer based on the dataset."
 )
+# Mount Gradio app to FastAPI
 app = gr.mount_gradio_app(app, iface, path="/")
+# For local development and testing
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=7860)