Spaces:

Marqo
/

Marqo-FashionSigLIP-Classification

Running

App Files Files Community

Ellie Sleightholm commited on 22 days ago

Commit

8e62182

•

1 Parent(s): d1591db

adding basic space structure

Browse files

Files changed (10) hide show

app.py +176 -0
images/blouse.jpg +0 -0
images/cargo.jpg +0 -0
images/dress.jpg +0 -0
images/hat.jpg +0 -0
images/polo-shirt.jpg +0 -0
images/sunglasses.jpg +0 -0
images/sweatpants.jpg +0 -0
images/t-shirt.jpg +0 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,176 @@

+import gradio as gr
+import open_clip
+import torch
+import requests
+import numpy as np
+from PIL import Image
+# Sidebar content
+sidebar_markdown = """
+We have several resources related to our new fashion models.
+## Documentation
+📚 [Blog Post](https://www.marqo.ai/blog/search-model-for-fashion)
+📝 [Use Case Blog Post](https://www.marqo.ai/blog/ecommerce-image-classification-with-marqo-fashionclip)
+## Code
+💻 [GitHub Repo](https://github.com/marqo-ai/marqo-FashionCLIP)
+🤝 [Google Colab](https://colab.research.google.com/drive/1nq978xFJjJcnyrJ2aE5l82GHAXOvTmfd?usp=sharing)
+🤗 [Hugging Face Collection](https://huggingface.co/collections/Marqo/marqo-fashionclip-and-marqo-fashionsiglip-66b43f2d09a06ad2368d4af6)
+## Citation
+If you use Marqo-FashionSigLIP or Marqo-FashionCLIP, please cite us:
+```
+@software{Jung_Marqo-FashionCLIP_and_Marqo-FashionSigLIP_2024,
+author = {Jung, Myong Chol and Clark, Jesse},
+month = aug,
+title = {{Marqo-FashionCLIP and Marqo-FashionSigLIP}},
+url = {https://github.com/marqo-ai/marqo-FashionCLIP},
+version = {1.0.0},
+year = {2024}
+```
+"""
+# List of fashion items
+items = [
+    "leggings", "jogger", "palazzo", "cargo", "dresspants", "chinos",
+    "dress", "blouse", "t-shirt", "jeans", "skirt", "shorts",
+    "sweater", "cardigan", "tank top", "hoodie", "coat",
+    "jacket", "polo shirt", "crop top", "romper",
+    "overalls", "blazer", "sweatpants", "vest",
+    "dungarees", "poncho", "bodysuit", "maxi dress",
+    "hat", "sunglasses", "glasses", "shoes", "sandals", "heels", "trainers", "belt", "tie", "dress shirt", "boots",
+    "slippers",
+    "sneakers",
+    "insoles",
+    "socks",
+    "insulated jacket",
+    "fleece",
+    "rain jacket",
+    "running jacket",
+    "windbreaker",
+    "shirt",
+    "t-shirt",
+    "tank top",
+    "graphic top",
+    "sweater",
+    "sweatshirt",
+    "vest",
+    "pant",
+    "legging",
+    "short",
+    "dress",
+    "skirt",
+    "skort",
+    "brief",
+    "sports bra",
+    "base layer top",
+    "base layer bottom",
+    "swimsuit",
+    "rashguard",
+    "water shorts",
+    "cover up",
+    "goggle",
+    "hat",
+    "sun hat",
+    "glove",
+    "mitten",
+    "belt",
+    "leg gaiter",
+]
+# Initialize the model and tokenizer
+model_name = 'hf-hub:Marqo/marqo-fashionSigLIP'
+model, preprocess_train, preprocess_val = open_clip.create_model_and_transforms(model_name)
+tokenizer = open_clip.get_tokenizer(model_name)
+# Generate descriptions dynamically
+def generate_description(item):
+    if "pants" in item or item in ["leggings", "jogger", "cargo", "chinos", "palazzo"]:
+        return f"a pair of {item} pants"
+    elif item in ["dress", "blouse", "t-shirt", "tank top", "sweater", "cardigan", "hoodie", "coat", "jacket", "polo shirt", "crop top", "romper", "blazer", "vest", "bodysuit", "maxi dress"]:
+        return f"a {item}"
+    elif item in ["hat", "sunglasses", "glasses"]:
+        return f"a {item} worn on the head or face"
+    elif item in ["shoes", "sandals", "heels", "trainers"]:
+        return f"a pair of {item} worn on the feet"
+    elif item in ["jeans", "skirt", "shorts", "sweatpants", "dungarees", "poncho", "overalls", "dress shirt"]:
+        return f"a {item} piece of clothing"
+    else:
+        return f"a fashion item called {item}"
+items_desc = [generate_description(item) for item in items]
+text = tokenizer(items_desc)
+# Encode text features
+with torch.no_grad(), torch.cuda.amp.autocast():
+    text_features = model.encode_text(text)
+    text_features /= text_features.norm(dim=-1, keepdim=True)
+# Prediction function
+def predict(inp):
+    image = preprocess_val(inp).unsqueeze(0)
+    with torch.no_grad(), torch.cuda.amp.autocast():
+        image_features = model.encode_image(image)
+        image_features /= image_features.norm(dim=-1, keepdim=True)
+        text_probs = (100 * image_features @ text_features.T).softmax(dim=-1)
+        # Sort the confidences and get the top 10
+        sorted_confidences = sorted(
+            {items[i]: float(text_probs[0, i]) for i in range(len(items))}.items(),
+            key=lambda x: x[1],
+            reverse=True
+        )
+        top_10_confidences = dict(sorted_confidences[:10])
+    return top_10_confidences
+# Gradio interface
+title = "Fashion Item Classifier with Marqo-FashionSigLIP"
+description = "Upload an image of a fashion item and classify it using [Marqo-FashionSigLIP](https://huggingface.co/Marqo/marqo-fashionSigLIP)!"
+# Example image paths with thumbnails
+examples = [
+    ["images/dress.jpg", "Dress"],
+    ["images/sweatpants.jpg", "Sweatpants"],
+    ["images/t-shirt.jpg", "T-Shirt"],
+    ["images/hat.jpg", "Hat"],
+    ["images/blouse.jpg", "Blouse"],
+    ["images/cargo.jpg", "Cargos"],
+    ["images/sunglasses.jpg", "Sunglasses"],
+    ["images/polo-shirt.jpg", "Polo Shirt"],
+]
+with gr.Blocks(css="""
+    .remove-btn {
+        font-size: 24px !important; /* Increase the font size of the cross button */
+        line-height: 24px !important;
+        width: 30px !important; /* Increase the width */
+        height: 30px !important; /* Increase the height */
+    }
+""") as demo:
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown(f"# {title}")
+            gr.Markdown(description)
+            gr.Markdown(sidebar_markdown)
+            gr.Markdown(" ", elem_id="vertical-line")  # Add an empty Markdown with a custom ID
+        with gr.Column(scale=2):
+            input_image = gr.Image(type="pil", label="Upload Fashion Item Image", height=312)
+            predict_button = gr.Button("Classify")
+            gr.Markdown("Or click on one of the images below to classify it:")
+            gr.Examples(examples=examples, inputs=input_image)
+        # with gr.Column(scale=3):
+            output_label = gr.Label(num_top_classes=6)
+            predict_button.click(predict, inputs=input_image, outputs=output_label)
+# Launch the interface
+demo.launch(share=True)