Spaces:

umichVision
/

virtex-redcaps

Runtime error

App Files Files Community

zamborg commited on Nov 13, 2021

Commit

5281471

•

1 Parent(s): 49c0315

updated things

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +52 -91
virtex/requirements.txt +0 -18

.gitignore CHANGED Viewed

@@ -2,3 +2,4 @@
 *.pth
 *.yaml
 *ipynb_checkpoints

 *.pth
 *.yaml
 *ipynb_checkpoints
+__pycache__

app.py CHANGED Viewed

@@ -1,98 +1,59 @@
 import streamlit as st
-from huggingface_hub import snapshot_download
-from PIL import Image
-import argparse
-import json
-import os
-from typing import Any, Dict, List
-from loguru import logger
-import torch
-import torchvision
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-import wordsegment as ws
-from virtex.config import Config
-from virtex.data import ImageDirectoryDataset
-from virtex.factories import TokenizerFactory, PretrainingModelFactory
-from virtex.utils.checkpointing import CheckpointManager
-from virtex.utils.common import common_parser
-CONFIG_PATH = "config.yaml"
-MODEL_PATH = "checkpoint_last5.pth"
 # x = st.slider("Select a value")
 # st.write(x, "squared is", x * x)
-class ImageLoader():
-    def __init__(self):
-        self.transformer = torchvision.transforms.Compose([torchvision.transforms.Resize(256),
-                               torchvision.transforms.CenterCrop(224),
-                               torchvision.transforms.ToTensor()])
-    def load(self, im_path, prompt):
-        im = torch.FloatTensor(self.transformer(Image.open(im_path))).unsqueeze(0)
-        return {"image": im, "decode_prompt": prompt}
-class VirTexModel():
-    def __init__(self):
-        self.config = Config(CONFIG_PATH)
-        ws.load()
-        self.device = 'cpu'
-        self.tokenizer = TokenizerFactory.from_config(self.config)
-        self.model = PretrainingModelFactory.from_config(self.config).to(self.device)
-        CheckpointManager(model=self.model).load("./checkpoint_last5.pth")
-        self.model.eval()
-        self.loader = ImageLoader()
-    def predict(self, im_path):
-        subreddit_tokens = torch.tensor([self.model.sos_index], device=self.device).long()
-        predictions: List[Dict[str, Any]] = []
-        image = self.loader.load(im_path, subreddit_tokens) # should be of shape 1, 3, 224, 224
-        output_dict = self.model(image)
-        caption = output_dict["predictions"][0] #only one prediction
-        caption = caption.tolist()
-        if self.tokenizer.token_to_id("[SEP]") in caption: # this is just the 0 index actually
-            sos_index = caption.index(self.tokenizer.token_to_id("[SEP]"))
-            caption[sos_index] = self.tokenizer.token_to_id("::")
-        caption = self.tokenizer.decode(caption)
-        # Separate out subreddit from the rest of caption.
-        if "⁇" in caption: # "⁇" is the token decode equivalent of "::"
-            subreddit, rest_of_caption = caption.split("⁇")
-            subreddit = "".join(subreddit.split())
-            rest_of_caption = rest_of_caption.strip()
-        else:
-            subreddit, rest_of_caption = "", caption
-        return subreddit, rest_of_caption
-def load_models():
-    #download model files
-    download_files = [CONFIG_PATH, MODEL_PATH]
-    for f in download_files:
-        fp = cached_download(hf_hub_url("zamborg/redcaps", filename=f))
-        os.system(f"cp {fp} ./{f}")
-# load a virtex model
-from huggingface_hub import hf_hub_url, cached_download
-# #download model files
-download_files = [CONFIG_PATH, MODEL_PATH]
-for f in download_files:
-    fp = cached_download(hf_hub_url("zamborg/redcaps", filename=f))
-    os.system(f"cp {fp} ./{f}")
-#inference on test.jpg
-virtexModel = VirTexModel()
-subreddit, caption = virtexModel.predict("./test.jpg")
-print(subreddit)
-print(caption)

 import streamlit as st
+import io
 # x = st.slider("Select a value")
 # st.write(x, "squared is", x * x)
+st.title("Image Captioning Demo from Redcaps")
+st.sidebar.markdown(
+    """
+    Image Captioning Model from VirTex trained on Redcaps
+    """
+)
+with st.spinner("Loading Model"):
+    from model import *
+    sample_images = glob.glob("./samples/*.jpg")
+    download_files()
+    virtexModel = VirTexModel()
+    imageLoader = ImageLoader()
+random_image = get_rand_img(sample_images)
+st.sidebar.title("Select a sample image")
+sample_image = st.sidebar.selectbox(
+    "",
+    sample_images
+)
+if st.sidebar.button("Random Sample Image"):
+    random_image = get_rand_img(sample_images)
+    sample_image = None
+uploaded_image = None
+with st.sidebar.form("file-uploader-form", clear_on_submit=True):
+    uploaded_file = st.file_uploader("Choose a file")
+    submitted = st.form_submit_button("Submit")
+    if uploaded_file is not None and submitted:
+        uploaded_image = Image.open(io.BytesIO(uploaded_file.get_values()))
+if uploaded_image is None and submitted:
+    st.write("Please select a file to upload")
+else:
+    image_file = sample_image if sample_image is not None else random_image
+    image = uploaded_image if uploaded_image is not None else Image.open()
+    image_dict = imageLoader.transform(image)
+    show.image(st.image(image_dict["image"]), "Target Image")
+    with st.spinner("Generating Caption"):
+        subreddit, caption = virtexModel.predict(image_dict)
+        st.header("Predicted Caption:\n\n")
+        st.subheader(f"Subreddit: {subreddit}\n")
+        st.subheader(f"Caption: {caption}\n")
+    image.close()

virtex/requirements.txt DELETED Viewed

@@ -1,18 +0,0 @@
-albumentations>=0.5.0
-Cython>=0.25
-ftfy==5.8
-future==0.18.0
-lmdb==0.97
-loguru==0.3.2
-mypy_extensions==0.4.1
-lvis==0.5.3
-numpy>=1.17
-opencv-python==4.1.2.30
-scikit-learn==0.21.3
-sentencepiece>=0.1.90
-torch==1.7.0
-torchvision==0.8
-tqdm>=4.50.0
-wordsegment==1.3.1
-git+git://github.com/facebookresearch/fvcore.git#egg=fvcore
-git+git://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI