Spaces:

lixin4ever
/

VideoLLaMA2

Running on Zero

App Files Files Community

ClownRat commited on Jun 13

Commit

8c55b8f

•

1 Parent(s): 9b4dadd

update demo.

Browse files

Files changed (1) hide show

app.py +104 -102

app.py CHANGED Viewed

@@ -61,13 +61,12 @@ The service is a research preview intended for non-commercial use only, subject
 class Chat:
-    def __init__(self, model_path, conv_mode, model_base=None, load_8bit=False, load_4bit=False, device='cuda'):
         # disable_torch_init()
         model_name = get_model_name_from_path(model_path)
         self.tokenizer, self.model, processor, context_len = load_pretrained_model(
             model_path, model_base, model_name,
             load_8bit, load_4bit,
-            device=device,
             offload_folder="save_folder")
         self.processor = processor
         self.conv_mode = conv_mode
@@ -193,7 +192,9 @@ def generate(image, video, first_run, state, state_, textbox_in, tensor, modals,
     state.append_message(state.roles[1], textbox_out)
     return (gr.update(value=image if os.path.exists(image) else None, interactive=True), gr.update(value=video if os.path.exists(video) else None, interactive=True),
-            state.to_gradio_chatbot(), False, state, state_, gr.update(value=None, interactive=True), tensor, modals)
 def regenerate(state, state_, textbox, tensor, modals):
@@ -216,103 +217,104 @@ def clear_history(state, state_, tensor, modals):
             True, state, state_, gr.update(value=None, interactive=True), [], [])
-if __name__ == '__main__':
-    conv_mode = "llama_2"
-    model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
-    def find_cuda():
-        # Check if CUDA_HOME or CUDA_PATH environment variables are set
-        cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
-        if cuda_home and os.path.exists(cuda_home):
-            return cuda_home
-        # Search for the nvcc executable in the system's PATH
-        nvcc_path = shutil.which('nvcc')
-        if nvcc_path:
-            # Remove the 'bin/nvcc' part to get the CUDA installation path
-            cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
-            return cuda_path
-        return None
-    cuda_path = find_cuda()
-    if cuda_path:
-        print(f"CUDA installation found at: {cuda_path}")
-    else:
-        print("CUDA installation not found")
-    device = torch.device("cuda")
-    handler = Chat(model_path, conv_mode=conv_mode, load_8bit=False, load_4bit=True)
-    # handler.model.to(dtype=torch.float16)
-    # handler = handler.model.to(device)
-    if not os.path.exists("temp"):
-        os.makedirs("temp")
-    textbox = gr.Textbox(
-        show_label=False, placeholder="Enter text and press ENTER", container=False
-    )
-    with gr.Blocks(title='VideoLLaMA2🚀', theme=gr.themes.Default(), css=block_css) as demo:
-        gr.Markdown(title_markdown)
-        state = gr.State()
-        state_ = gr.State()
-        first_run = gr.State()
-        tensor = gr.State()
-        modals = gr.State()
-        with gr.Row():
-            with gr.Column(scale=3):
-                image = gr.Image(label="Input Image", type="filepath")
-                video = gr.Video(label="Input Video")
-                cur_dir = os.path.dirname(os.path.abspath(__file__))
-                gr.Examples(
-                    examples=[
-                        [
-                            f"{cur_dir}/examples/extreme_ironing.jpg",
-                            "What is unusual about this image?",
-                        ],
-                        [
-                            f"{cur_dir}/examples/waterview.jpg",
-                            "What are the things I should be cautious about when I visit here?",
-                        ],
-                        [
-                            f"{cur_dir}/examples/desert.jpg",
-                            "If there are factual errors in the questions, point it out; if not, proceed answering the question. What’s happening in the desert?",
-                        ],
                     ],
-                    inputs=[image, textbox],
-                )
-            with gr.Column(scale=7):
-                chatbot = gr.Chatbot(label="VideoLLaMA2", bubble_full_width=True, height=750)
-                with gr.Row():
-                    with gr.Column(scale=8):
-                        textbox.render()
-                    with gr.Column(scale=1, min_width=50):
-                        submit_btn = gr.Button(value="Send", variant="primary", interactive=True)
-                with gr.Row(elem_id="buttons") as button_row:
-                    upvote_btn = gr.Button(value="👍  Upvote", interactive=True)
-                    downvote_btn = gr.Button(value="👎  Downvote", interactive=True)
-                    # flag_btn = gr.Button(value="⚠️  Flag", interactive=True)
-                    # stop_btn = gr.Button(value="⏹️  Stop Generation", interactive=False)
-                    regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=True)
-                    clear_btn = gr.Button(value="🗑️  Clear history", interactive=True)
-        gr.Markdown(tos_markdown)
-        gr.Markdown(learn_more_markdown)
-        submit_btn.click(generate, [image, video, first_run, state, state_, textbox, tensor, modals],
-                        [image, video, chatbot, first_run, state, state_, textbox, tensor, modals])
-        regenerate_btn.click(regenerate, [state, state_, textbox, tensor, modals], [state, state_, textbox, chatbot, first_run, tensor, modals]).then(
-            generate, [image, video, first_run, state, state_, textbox, tensor, modals], [image, video, chatbot, first_run, state, state_, textbox, tensor, modals])
-        clear_btn.click(clear_history, [state, state_, tensor, modals],
-                        [image, video, chatbot, first_run, state, state_, textbox, tensor, modals])
-    demo.launch()

 class Chat:
+    def __init__(self, model_path, conv_mode, model_base=None, load_8bit=False, load_4bit=False):
         # disable_torch_init()
         model_name = get_model_name_from_path(model_path)
         self.tokenizer, self.model, processor, context_len = load_pretrained_model(
             model_path, model_base, model_name,
             load_8bit, load_4bit,
             offload_folder="save_folder")
         self.processor = processor
         self.conv_mode = conv_mode
     state.append_message(state.roles[1], textbox_out)
     return (gr.update(value=image if os.path.exists(image) else None, interactive=True), gr.update(value=video if os.path.exists(video) else None, interactive=True),
+            state.to_gradio_chatbot(), False, state, state_, gr.update(value=None, interactive=True),
+            # tensor, modals
+            )
 def regenerate(state, state_, textbox, tensor, modals):
             True, state, state_, gr.update(value=None, interactive=True), [], [])
+conv_mode = "llama_2"
+model_path = 'DAMO-NLP-SG/VideoLLaMA2-7B'
+def find_cuda():
+    # Check if CUDA_HOME or CUDA_PATH environment variables are set
+    cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
+    if cuda_home and os.path.exists(cuda_home):
+        return cuda_home
+    # Search for the nvcc executable in the system's PATH
+    nvcc_path = shutil.which('nvcc')
+    if nvcc_path:
+        # Remove the 'bin/nvcc' part to get the CUDA installation path
+        cuda_path = os.path.dirname(os.path.dirname(nvcc_path))
+        return cuda_path
+    return None
+cuda_path = find_cuda()
+if cuda_path:
+    print(f"CUDA installation found at: {cuda_path}")
+else:
+    print("CUDA installation not found")
+device = torch.device("cuda")
+handler = Chat(model_path, conv_mode=conv_mode, load_8bit=False, load_4bit=True)
+# handler.model.to(dtype=torch.float16)
+# handler = handler.model.to(device)
+if not os.path.exists("temp"):
+    os.makedirs("temp")
+textbox = gr.Textbox(
+    show_label=False, placeholder="Enter text and press ENTER", container=False
+)
+with gr.Blocks(title='VideoLLaMA2🚀', theme=gr.themes.Default(), css=block_css) as demo:
+    gr.Markdown(title_markdown)
+    state = gr.State()
+    state_ = gr.State()
+    first_run = gr.State()
+    tensor = gr.State()
+    modals = gr.State()
+    with gr.Row():
+        with gr.Column(scale=3):
+            image = gr.Image(label="Input Image", type="filepath")
+            video = gr.Video(label="Input Video")
+            cur_dir = os.path.dirname(os.path.abspath(__file__))
+            gr.Examples(
+                examples=[
+                    [
+                        f"{cur_dir}/examples/extreme_ironing.jpg",
+                        "What is unusual about this image?",
                     ],
+                    [
+                        f"{cur_dir}/examples/waterview.jpg",
+                        "What are the things I should be cautious about when I visit here?",
+                    ],
+                    [
+                        f"{cur_dir}/examples/desert.jpg",
+                        "If there are factual errors in the questions, point it out; if not, proceed answering the question. What’s happening in the desert?",
+                    ],
+                ],
+                inputs=[image, textbox],
+            )
+        with gr.Column(scale=7):
+            chatbot = gr.Chatbot(label="VideoLLaMA2", bubble_full_width=True, height=750)
+            with gr.Row():
+                with gr.Column(scale=8):
+                    textbox.render()
+                with gr.Column(scale=1, min_width=50):
+                    submit_btn = gr.Button(value="Send", variant="primary", interactive=True)
+            with gr.Row(elem_id="buttons") as button_row:
+                upvote_btn = gr.Button(value="👍  Upvote", interactive=True)
+                downvote_btn = gr.Button(value="👎  Downvote", interactive=True)
+                # flag_btn = gr.Button(value="⚠️  Flag", interactive=True)
+                # stop_btn = gr.Button(value="⏹️  Stop Generation", interactive=False)
+                regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=True)
+                clear_btn = gr.Button(value="🗑️  Clear history", interactive=True)
+    gr.Markdown(tos_markdown)
+    gr.Markdown(learn_more_markdown)
+    submit_btn.click(generate, [image, video, first_run, state, state_, textbox, tensor, modals],
+                    [image, video, chatbot, first_run, state, state_, textbox,
+                    #  tensor, modals
+                     ])
+    regenerate_btn.click(regenerate, [state, state_, textbox, tensor, modals], [state, state_, textbox, chatbot, first_run, tensor, modals]).then(
+        generate, [image, video, first_run, state, state_, textbox, tensor, modals], [image, video, chatbot, first_run, state, state_, textbox, tensor, modals])
+    clear_btn.click(clear_history, [state, state_, tensor, modals],
+                    [image, video, chatbot, first_run, state, state_, textbox, tensor, modals])
+demo.launch()