import os import shutil import subprocess # Clone and install dependencies print("Cloning the repository...") subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"]) print("Installing dependencies...") subprocess.run(["pip", "install", "-r", "edge_vlm/requirements.txt"]) subprocess.run(["pip", "install", "sentencepiece"]) # Copy all files from edge_vlm to current directory print("Copying files...") source_dir = "edge_vlm" destination_dir = "." import torch import gradio as gr for item in os.listdir(source_dir): source_item = os.path.join(source_dir, item) destination_item = os.path.join(destination_dir, item) if os.path.isdir(source_item): if os.path.exists(destination_item): shutil.rmtree(destination_item) shutil.copytree(source_item, destination_item) else: shutil.copy(source_item, destination_item) print("Files copied successfully.") # Now import the model from the copied files from model import MoondreamModel # Load the model and tokenizer print("Loading model...") model = MoondreamModel.load_model() print("Model loaded.") print("Loading tokenizer...") tokenizer = MoondreamModel.load_tokenizer() print("Tokenizer loaded.") # Define the default question default_question = "Describe the image." # Function to handle image and return generated caption def generate_caption_with_default(image): print("Preprocessing image...") preprocessed_image = MoondreamModel.preprocess_image(image) print("Image preprocessed.") print("Generating caption...") caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer) print("Caption generated.") return caption # Create Gradio interface print("Setting up Gradio interface...") interface = gr.Interface( fn=generate_caption_with_default, inputs=gr.Image(type="pil", label="Upload an Image"), outputs="text", title="Image Caption Generator", description=( f"The default question is: '{default_question}'.\n\n" "Please note that the inference may take up to 200 seconds due to long captions and CPU limitations.\n\n" "[![Hugging Face Model](https://img.shields.io/badge/Hugging%20Face-Model-blue)](https://huggingface.co/irotem98/edge_vlm) " "[![GitHub Repo](https://img.shields.io/badge/GitHub-Repo-green)](https://github.com/rotem154154/edge_vlm)" ) ) # Launch the interface print("Launching interface...") interface.launch()