praeclarumjj3 commited on
Commit
2621850
·
verified ·
1 Parent(s): 1105730

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -370,7 +370,7 @@ title = "<h1 style='margin-bottom: -10px; text-align: center'>OLA-VLM: Elevating
370
  description = "<p style='font-size: 16px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://praeclarumjj3.github.io/' style='text-decoration:none' target='_blank'>Jitesh Jain</a> &nbsp;&nbsp <a href='https://zyang-ur.github.io/' style='text-decoration:none' target='_blank'>Zhengyuan Yang</a> &nbsp;&nbsp <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Humphrey Shi<sup>*</sup></a> &nbsp;&nbsp <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Jianfeng Gao<sup>*</sup></a> &nbsp;&nbsp <a href='https://jwyang.github.io/' style='text-decoration:none' target='_blank'>Jianwei Yang<sup>*</sup></a></p>" \
371
  + "<p style='font-size: 12px; margin: 5px; font-weight: w300; text-align: center'><sup>*</sup>Equal Advising</p>" \
372
  + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/ola_vlm/' target='_blank'>Project Page</a> | <a href='https://youtu.be/' target='_blank'>Video</a> | <a href='https://arxiv.org/abs/2412.09585' target='_blank'>ArXiv</a> | <a href='https://github.com/SHI-Labs/OLA-VLM' target='_blank'>Github</a></p>" \
373
- + "<p style='text-align: left; font-size: 16px; margin: 5px; font-weight: w300;'>OLA-VLM introduces a new approach to distilling vision knowledge into the hidden representations of LLMs, utilizing target visual representations to advance visual perception in multimodal LLMs. In the demo, along with the standard VQA setting, you can also visualize the intermediate representations from selected layers in OLA-VLM by clicking on the <code>✨ Visualize</code> button!</p>" \
374
  + "<ul style='text-align: left; font-size: 16px; margin: 5px; font-weight: w300; padding: 0;'> \
375
  <li><b>depth</b>: Visualizes the depth information in the representations using the decoder from the <a href='https://github.com/DepthAnything/Depth-Anything-V2' target='_blank'>Depth-Anything-v2 model</a>.</li> \
376
  <li><b>seg</b>: Visualizes the segmentation information in the representations using the decoder from the <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>OneFormer model</a>.</li> \
@@ -435,7 +435,7 @@ with gr.Blocks(title="OLA-VLM", theme=gr.themes.Default(), css=block_css) as dem
435
  submit_btn = gr.Button(value="Send", variant="primary")
436
 
437
  # with gr.Accordion("Representations from selected layers of the LLM (expects only a single image input)", open=False) as interm_out:
438
- inter_vis_btn = gr.Button(value="✨ Visualize")
439
  with gr.Row():
440
  depth_box = gr.Image(label="depth", type="pil", visible=True)
441
  seg_box = gr.Image(label="seg", type="pil", visible=True)
 
370
  description = "<p style='font-size: 16px; margin: 5px; font-weight: w300; text-align: center'> <a href='https://praeclarumjj3.github.io/' style='text-decoration:none' target='_blank'>Jitesh Jain</a> &nbsp;&nbsp <a href='https://zyang-ur.github.io/' style='text-decoration:none' target='_blank'>Zhengyuan Yang</a> &nbsp;&nbsp <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Humphrey Shi<sup>*</sup></a> &nbsp;&nbsp <a href='https://www.humphreyshi.com/home' style='text-decoration:none' target='_blank'>Jianfeng Gao<sup>*</sup></a> &nbsp;&nbsp <a href='https://jwyang.github.io/' style='text-decoration:none' target='_blank'>Jianwei Yang<sup>*</sup></a></p>" \
371
  + "<p style='font-size: 12px; margin: 5px; font-weight: w300; text-align: center'><sup>*</sup>Equal Advising</p>" \
372
  + "<p style='font-size: 16px; margin: 5px; font-weight: w600; text-align: center'> <a href='https://praeclarumjj3.github.io/ola_vlm/' target='_blank'>Project Page</a> | <a href='https://youtu.be/' target='_blank'>Video</a> | <a href='https://arxiv.org/abs/2412.09585' target='_blank'>ArXiv</a> | <a href='https://github.com/SHI-Labs/OLA-VLM' target='_blank'>Github</a></p>" \
373
+ + "<p style='text-align: left; font-size: 16px; margin: 5px; font-weight: w300;'>OLA-VLM introduces a new approach to distilling vision knowledge into the hidden representations of LLMs, utilizing target visual representations to advance visual perception in multimodal LLMs. In the demo, along with the standard VQA setting, you can also visualize the intermediate representations from selected layers in OLA-VLM by clicking on the <code>✨ Visualize Intermediate Representations</code> button! Note that our demo only supports single image input currently.</p>" \
374
  + "<ul style='text-align: left; font-size: 16px; margin: 5px; font-weight: w300; padding: 0;'> \
375
  <li><b>depth</b>: Visualizes the depth information in the representations using the decoder from the <a href='https://github.com/DepthAnything/Depth-Anything-V2' target='_blank'>Depth-Anything-v2 model</a>.</li> \
376
  <li><b>seg</b>: Visualizes the segmentation information in the representations using the decoder from the <a href='https://github.com/SHI-Labs/OneFormer' target='_blank'>OneFormer model</a>.</li> \
 
435
  submit_btn = gr.Button(value="Send", variant="primary")
436
 
437
  # with gr.Accordion("Representations from selected layers of the LLM (expects only a single image input)", open=False) as interm_out:
438
+ inter_vis_btn = gr.Button(value="✨ Visualize Intermediate Representations")
439
  with gr.Row():
440
  depth_box = gr.Image(label="depth", type="pil", visible=True)
441
  seg_box = gr.Image(label="seg", type="pil", visible=True)