edge_vlm / app.py
irotem98's picture
Update app.py
a5a67ad verified
import os
import shutil
import subprocess
# Clone and install dependencies
print("Cloning the repository...")
subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"])
print("Installing dependencies...")
subprocess.run(["pip", "install", "-r", "edge_vlm/requirements.txt"])
subprocess.run(["pip", "install", "sentencepiece"])
# Copy all files from edge_vlm to current directory
print("Copying files...")
source_dir = "edge_vlm"
destination_dir = "."
import torch
import gradio as gr
for item in os.listdir(source_dir):
source_item = os.path.join(source_dir, item)
destination_item = os.path.join(destination_dir, item)
if os.path.isdir(source_item):
if os.path.exists(destination_item):
shutil.rmtree(destination_item)
shutil.copytree(source_item, destination_item)
else:
shutil.copy(source_item, destination_item)
print("Files copied successfully.")
# Now import the model from the copied files
from model import MoondreamModel
# Load the model and tokenizer
print("Loading model...")
model = MoondreamModel.load_model()
print("Model loaded.")
print("Loading tokenizer...")
tokenizer = MoondreamModel.load_tokenizer()
print("Tokenizer loaded.")
# Define the default question
default_question = "Describe the image."
# Function to handle image and return generated caption
def generate_caption_with_default(image):
print("Preprocessing image...")
preprocessed_image = MoondreamModel.preprocess_image(image)
print("Image preprocessed.")
print("Generating caption...")
caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer)
print("Caption generated.")
return caption
# Create Gradio interface
print("Setting up Gradio interface...")
interface = gr.Interface(
fn=generate_caption_with_default,
inputs=gr.Image(type="pil", label="Upload an Image"),
outputs="text",
title="Image Caption Generator",
description=(
f"The default question is: '{default_question}'.\n\n"
"Please note that the inference may take up to 200 seconds due to long captions and CPU limitations.\n\n"
"[![Hugging Face Model](https://img.shields.io/badge/Hugging%20Face-Model-blue)](https://huggingface.co/irotem98/edge_vlm) "
"[![GitHub Repo](https://img.shields.io/badge/GitHub-Repo-green)](https://github.com/rotem154154/edge_vlm)"
)
)
# Launch the interface
print("Launching interface...")
interface.launch()