FinLLaVA

Running on Zero

App Files Files Community

amstrongzyf commited on Aug 30

Commit

986b2b2

•

1 Parent(s): 38c55e2

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -60

app.py CHANGED Viewed

@@ -1,112 +1,145 @@
 import time
 from threading import Thread
 import gradio as gr
 import torch
-from PIL import Image
-from transformers import AutoProcessor, LlavaForConditionalGeneration, TextIteratorStreamer, TextStreamer
-import spaces
-import argparse
 from llava_llama3.model.builder import load_pretrained_model
-from llava_llama3.constants import IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN
-from llava_llama3.conversation import conv_templates, SeparatorStyle
-from llava_llama3.utils import disable_torch_init
-from llava_llama3.mm_utils import process_images, tokenizer_image_token, get_model_name_from_path
 from llava_llama3.serve.cli import chat_llava
-import requests
-from io import BytesIO
-import base64
 import os
-import glob
-import pandas as pd
-from tqdm import tqdm
-import json
 root_path = os.path.dirname(os.path.abspath(__file__))
 print(f'\033[92m{root_path}\033[0m')
 os.environ['GRADIO_TEMP_DIR'] = root_path
-parser = argparse.ArgumentParser()
-parser.add_argument("--model-path", type=str, default="TheFinAI/FinLLaVA")
-parser.add_argument("--device", type=str, default="cuda")
-parser.add_argument("--conv-mode", type=str, default="llama_3")
-parser.add_argument("--temperature", type=float, default=0.7)
-parser.add_argument("--max-new-tokens", type=int, default=512)
-parser.add_argument("--load-8bit", action="store_true")
-parser.add_argument("--load-4bit", action="store_true")
-args = parser.parse_args()
-# Load model
 tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
-    args.model_path,
     None,
     'llava_llama3',
-    args.load_8bit,
-    args.load_4bit,
-    device=args.device)
-@spaces.GPU
-def bot_streaming(message, history):
-    print(message)
     image_file = None
     if message["files"]:
-        if type(message["files"][-1]) == dict:
             image_file = message["files"][-1]["path"]
         else:
             image_file = message["files"][-1]
     else:
         for hist in history:
-            if type(hist[0]) == tuple:
                 image_file = hist[0][0]
     if image_file is None:
         gr.Error("You need to upload an image for LLaVA to work.")
         return
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     def generate():
         print('\033[92mRunning chat\033[0m')
-        output = chat_llava(
-                    args=args,
-                    image_file=image_file,
-                    text=message['text'],
-                    tokenizer=tokenizer,
-                    model=llava_model,
-                    image_processor=image_processor,
-                    context_len=context_len,
-                    streamer=streamer)
-        return output
     thread = Thread(target=generate)
     thread.start()
-    # thread.join()
     buffer = ""
-    # output = generate()
     for new_text in streamer:
         buffer += new_text
-        generated_text_without_prompt = buffer
         time.sleep(0.06)
-        yield generated_text_without_prompt
-chatbot = gr.Chatbot(scale=1)
-chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
-with gr.Blocks(fill_height=True) as demo:
     gr.ChatInterface(
         fn=bot_streaming,
-        title="FinLLaVA Demo",
         examples=[
-            {"text": "What is in this picture?", "files": ["http://images.cocodataset.org/val2017/000000039769.jpg"]},
         ],
         description="",
-        stop_btn="Stop Generation",
-        multimodal=True,
-        textbox=chat_input,
-        chatbot=chatbot,
     )
-demo.queue(api_open=False)
-demo.launch(show_api=False, share=False)

 import time
 from threading import Thread
+import copy
 import gradio as gr
 import torch
+from transformers import AutoProcessor, LlavaForConditionalGeneration, TextIteratorStreamer
 from llava_llama3.model.builder import load_pretrained_model
 from llava_llama3.serve.cli import chat_llava
 import os
+import argparse
+# Set environment variables
 root_path = os.path.dirname(os.path.abspath(__file__))
 print(f'\033[92m{root_path}\033[0m')
 os.environ['GRADIO_TEMP_DIR'] = root_path
+# Create a default arguments object
+default_args = argparse.Namespace(
+    model_path="TheFinAI/FinLLaVA",
+    device="cuda",
+    conv_mode="llama_3",
+    temperature=0.7,
+    max_new_tokens=512,
+    load_8bit=False,
+    load_4bit=False
+)
+# Load the model
 tokenizer, llava_model, image_processor, context_len = load_pretrained_model(
+    default_args.model_path,
     None,
     'llava_llama3',
+    default_args.load_8bit,
+    default_args.load_4bit,
+    device=default_args.device
+)
+def bot_streaming(message, history, temperature, max_new_tokens):
     image_file = None
     if message["files"]:
+        if isinstance(message["files"][-1], dict):
             image_file = message["files"][-1]["path"]
         else:
             image_file = message["files"][-1]
     else:
         for hist in history:
+            if isinstance(hist[0], tuple):
                 image_file = hist[0][0]
     if image_file is None:
         gr.Error("You need to upload an image for LLaVA to work.")
         return
+    args = copy.deepcopy(default_args)
+    args.temperature = temperature
+    args.max_new_tokens = max_new_tokens
     streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     def generate():
         print('\033[92mRunning chat\033[0m')
+        return chat_llava(
+            args=args,
+            image_file=image_file,
+            text=message['text'],
+            tokenizer=tokenizer,
+            model=llava_model,
+            image_processor=image_processor,
+            context_len=context_len,
+            streamer=streamer
+        )
     thread = Thread(target=generate)
     thread.start()
     buffer = ""
     for new_text in streamer:
         buffer += new_text
         time.sleep(0.06)
+        yield buffer
+# Define CSS styles
+css = """
+body {
+    font-family: Arial, sans-serif;
+}
+.gradio-container {
+    max-width: 800px;
+    margin: auto;
+}
+.chatbot {
+    height: 400px;
+    overflow-y: auto;
+}
+"""
+# Create interface using gr.Blocks
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# FinLLaVA Demo")
+    chatbot = gr.Chatbot(scale=1)
+    chat_input = gr.MultimodalTextbox(
+        interactive=True,
+        file_types=["image"],
+        placeholder="Enter message or upload file...",
+        show_label=False
+    )
+    with gr.Accordion("Advanced Settings", open=False):
+        temperature = gr.Slider(
+            label="Temperature",
+            minimum=0.1,
+            maximum=2.0,
+            step=0.1,
+            value=default_args.temperature
+        )
+        max_new_tokens = gr.Slider(
+            label="Max New Tokens",
+            minimum=1,
+            maximum=1024,
+            step=1,
+            value=default_args.max_new_tokens
+        )
     gr.ChatInterface(
         fn=bot_streaming,
+        chatbot=chatbot,
+        textbox=chat_input,
+        additional_inputs=[temperature, max_new_tokens],
         examples=[
+            {"text": "What's in this image?", "files": ["http://images.cocodataset.org/val2017/000000039769.jpg"]},
         ],
+        title="",
         description="",
+        theme="soft",
+        retry_btn="Retry",
+        undo_btn="Undo",
+        clear_btn="Clear",
     )
+if __name__ == "__main__":
+    demo.queue(api_open=False).launch(share=False, debug=True)