Spaces:

darpan-jain
/

llm-chat

Runtime error

App Files Files Community

darpan-jain commited on Apr 13, 2023

Commit

ae9cafc

•

1 Parent(s): 1e89e38

Add logging

Browse files

Files changed (1) hide show

app_chat.py +19 -6

app_chat.py CHANGED Viewed

@@ -4,6 +4,13 @@ import torch
 import transformers
 import gradio as gr
 import time
 MODEL = "decapoda-research/llama-7b-hf"
 LORA_WEIGHTS = "tloen/alpaca-lora-7b"
@@ -11,11 +18,13 @@ device = "cpu"
 print(f"Model device = {device}", flush=True)
 def load_model():
     tokenizer = LlamaTokenizer.from_pretrained(MODEL)
     model = LlamaForCausalLM.from_pretrained(MODEL, device_map={"": device}, low_cpu_mem_usage=True)
     model = PeftModel.from_pretrained(model, LORA_WEIGHTS, device_map={"": device}, torch_dtype=torch.float16)
     model.eval()
     return model, tokenizer
 def generate_prompt(input):
@@ -66,26 +75,30 @@ def eval_prompt(
             return bot_response
 def run_app(model, tokenizer):
     with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=True) as chat:
         chatbot = gr.Chatbot(label = "Alpaca Demo")
         msg = gr.Textbox(show_label = False, placeholder = "Enter your text here")
         clear = gr.Button("Clear")
-        temparature = gr.Slider(minimum=0, maximum=1, value=0.8, label="Temparature")
         def user(user_msg, history):
             return "", history + [[user_msg, None]]
         def bot(history):
-            print("Processing user input for Alpaca response...")
             last_input = history[-1][0]
-            print(f"User input = {last_input}")
             tick = time.time()
             bot_response = eval_prompt(model, tokenizer, last_input)
-            print(f"Inference time = {time.time() - tick} seconds")
             history[-1][1] = bot_response
-            print("Response generated and added to history.\n")
             return history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
@@ -94,12 +107,12 @@ def run_app(model, tokenizer):
         clear.click(lambda: None, None, chatbot, queue=False)
     chat.queue()
     chat.launch(share=True)
 if __name__ == "__main__":
     model, tokenizer = load_model()
     # Run the actual gradio app

 import transformers
 import gradio as gr
 import time
+import logging
+logging.basicConfig(level=logging.INFO)
+# Dump logs to a file
+logging.getLogger().addHandler(logging.FileHandler("app_chat.log"))
 MODEL = "decapoda-research/llama-7b-hf"
 LORA_WEIGHTS = "tloen/alpaca-lora-7b"
 print(f"Model device = {device}", flush=True)
 def load_model():
+    logging.info("Loading model...")
     tokenizer = LlamaTokenizer.from_pretrained(MODEL)
     model = LlamaForCausalLM.from_pretrained(MODEL, device_map={"": device}, low_cpu_mem_usage=True)
     model = PeftModel.from_pretrained(model, LORA_WEIGHTS, device_map={"": device}, torch_dtype=torch.float16)
     model.eval()
+    logging.info("Model loaded.")
     return model, tokenizer
 def generate_prompt(input):
             return bot_response
 def run_app(model, tokenizer):
+    logging.info("Starting chat app...")
     with gr.Blocks(theme=gr.themes.Soft(), analytics_enabled=True) as chat:
         chatbot = gr.Chatbot(label = "Alpaca Demo")
         msg = gr.Textbox(show_label = False, placeholder = "Enter your text here")
         clear = gr.Button("Clear")
         def user(user_msg, history):
+            logging.info("User input received.")
             return "", history + [[user_msg, None]]
         def bot(history):
+            logging.info("Processing user input for Alpaca response...")
             last_input = history[-1][0]
+            logging.info(f"User input = {last_input}")
             tick = time.time()
             bot_response = eval_prompt(model, tokenizer, last_input)
+            logging.info(f"Inference time = {time.time() - tick} seconds")
             history[-1][1] = bot_response
+            logging.info("Response generated and added to history.\n")
             return history
         msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
         clear.click(lambda: None, None, chatbot, queue=False)
     chat.queue()
     chat.launch(share=True)
 if __name__ == "__main__":
     model, tokenizer = load_model()
     # Run the actual gradio app