Spaces:

lep1
/

braille-recognition-model

Runtime error

App Files Files Community

lep1 commited on 22 days ago

Commit

faa2a07

•

1 Parent(s): 8ca85bc

Upload 4 files

Browse files

Files changed (4) hide show

README (1).md +13 -0
app (1).py +112 -0
convert.py +53 -0
requirements (1).txt +111 -0

README (1).md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Braille Detection
+emoji: 🕶
+colorFrom: blue
+colorTo: yellow
+sdk: streamlit
+sdk_version: 1.17.0
+app_file: app.py
+pinned: true
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app (1).py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Reference
+- https://docs.streamlit.io/library/api-reference/layout
+- https://github.com/CodingMantras/yolov8-streamlit-detection-tracking/blob/master/app.py
+- https://huggingface.co/keremberke/yolov8m-valorant-detection/tree/main
+- https://docs.ultralytics.com/usage/python/
+"""
+import time
+import PIL
+import streamlit as st
+import torch
+from ultralyticsplus import YOLO, render_result
+from convert import convert_to_braille_unicode, parse_xywh_and_class
+def load_model(model_path):
+    """load model from path"""
+    model = YOLO(model_path)
+    return model
+def load_image(image_path):
+    """load image from path"""
+    image = PIL.Image.open(image_path)
+    return image
+# title
+st.title("Braille Pattern Detection")
+# sidebar
+st.sidebar.header("Detection Config")
+conf = float(st.sidebar.slider("Class Confidence", 10, 75, 15)) / 100
+iou = float(st.sidebar.slider("IoU Threshold", 10, 75, 15)) / 100
+model_path = "snoop2head/yolov8m-braille"
+try:
+    model = load_model(model_path)
+    model.overrides["conf"] = conf  # NMS confidence threshold
+    model.overrides["iou"] = iou  # NMS IoU threshold
+    model.overrides["agnostic_nms"] = False  # NMS class-agnostic
+    model.overrides["max_det"] = 1000  # maximum number of detections per image
+except Exception as ex:
+    print(ex)
+    st.write(f"Unable to load model. Check the specified path: {model_path}")
+source_img = None
+source_img = st.sidebar.file_uploader(
+    "Choose an image...", type=("jpg", "jpeg", "png", "bmp", "webp")
+)
+col1, col2 = st.columns(2)
+# left column of the page body
+with col1:
+    if source_img is None:
+        default_image_path = "./images/alpha-numeric.jpeg"
+        image = load_image(default_image_path)
+        st.image(
+            default_image_path, caption="Example Input Image", use_column_width=True
+        )
+    else:
+        image = load_image(source_img)
+        st.image(source_img, caption="Uploaded Image", use_column_width=True)
+# right column of the page body
+with col2:
+    with st.spinner("Wait for it..."):
+        start_time = time.time()
+    try:
+        with torch.no_grad():
+            res = model.predict(
+                image, save=True, save_txt=True, exist_ok=True, conf=conf
+            )
+            boxes = res[0].boxes  # first image
+            res_plotted = res[0].plot()[:, :, ::-1]
+            list_boxes = parse_xywh_and_class(boxes)
+            st.image(res_plotted, caption="Detected Image", use_column_width=True)
+            IMAGE_DOWNLOAD_PATH = f"runs/detect/predict/image0.jpg"
+    except Exception as ex:
+        st.write("Please upload image with types of JPG, JPEG, PNG ...")
+try:
+    st.success(f"Done! Inference time: {time.time() - start_time:.2f} seconds")
+    st.subheader("Detected Braille Patterns")
+    for box_line in list_boxes:
+        str_left_to_right = ""
+        box_classes = box_line[:, -1]
+        for each_class in box_classes:
+            str_left_to_right += convert_to_braille_unicode(
+                model.names[int(each_class)]
+            )
+        st.write(str_left_to_right)
+except Exception as ex:
+    st.write("Please try again with images with types of JPG, JPEG, PNG ...")
+with open(IMAGE_DOWNLOAD_PATH, "rb") as fl:
+    st.download_button(
+        "Download object-detected image",
+        data=fl,
+        file_name="image0.jpg",
+        mime="image/jpg",
+    )

convert.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import json
+import numpy as np
+import torch
+def convert_to_braille_unicode(str_input: str, path: str = "./braille_map.json") -> str:
+    with open(path, "r") as fl:
+        data = json.load(fl)
+    if str_input in data.keys():
+        str_output = data[str_input]
+    return str_output
+def parse_xywh_and_class(boxes: torch.Tensor) -> list:
+    """
+    boxes input tensor
+        boxes (torch.Tensor) or (numpy.ndarray): A tensor or numpy array containing the detection boxes,
+            with shape (num_boxes, 6).
+        orig_shape (torch.Tensor) or (numpy.ndarray): Original image size, in the format (height, width).
+    Properties:
+        xyxy (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format.
+        conf (torch.Tensor) or (numpy.ndarray): The confidence values of the boxes.
+        cls (torch.Tensor) or (numpy.ndarray): The class values of the boxes.
+        xywh (torch.Tensor) or (numpy.ndarray): The boxes in xywh format.
+        xyxyn (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format normalized by original image size.
+        xywhn (torch.Tensor) or (numpy.ndarray): The boxes in xywh format normalized by original image size.
+    """
+    # copy values from troublesome "boxes" object to numpy array
+    new_boxes = np.zeros(boxes.shape)
+    new_boxes[:, :4] = boxes.xywh.numpy()  # first 4 channels are xywh
+    new_boxes[:, 4] = boxes.conf.numpy()  # 5th channel is confidence
+    new_boxes[:, 5] = boxes.cls.numpy()  # 6th channel is class which is last channel
+    # sort according to y coordinate
+    new_boxes = new_boxes[new_boxes[:, 1].argsort()]
+    # find threshold index to break the line
+    y_threshold = np.mean(new_boxes[:, 3]) // 2
+    boxes_diff = np.diff(new_boxes[:, 1])
+    threshold_index = np.where(boxes_diff > y_threshold)[0]
+    # cluster according to threshold_index
+    boxes_clustered = np.split(new_boxes, threshold_index + 1)
+    boxes_return = []
+    for cluster in boxes_clustered:
+        # sort according to x coordinate
+        cluster = cluster[cluster[:, 0].argsort()]
+        boxes_return.append(cluster)
+    return boxes_return

requirements (1).txt ADDED Viewed

	@@ -0,0 +1,111 @@

+absl-py==1.4.0
+altair==4.2.2
+antlr4-python3-runtime==4.9.3
+appnope==0.1.3
+asttokens==2.2.1
+attrs==22.2.0
+backcall==0.2.0
+backports.zoneinfo==0.2.1
+blinker==1.5
+cachetools==5.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.0.4
+contourpy==1.0.7
+cycler==0.11.0
+decorator==5.1.1
+entrypoints==0.4
+executing==1.2.0
+filelock==3.10.4
+fire==0.5.0
+fonttools==4.39.2
+gitdb==4.0.10
+GitPython==3.1.31
+google-auth==2.16.3
+google-auth-oauthlib==0.4.6
+grpcio==1.51.3
+huggingface-hub==0.13.3
+hydra-core==1.3.2
+idna==3.4
+importlib-metadata==6.1.0
+importlib-resources==5.12.0
+ipython==8.11.0
+jedi==0.18.2
+Jinja2==3.1.2
+jsonschema==4.17.3
+kiwisolver==1.4.4
+Markdown==3.4.3
+markdown-it-py==2.2.0
+MarkupSafe==2.1.2
+matplotlib==3.7.1
+matplotlib-inline==0.1.6
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.0
+numpy==1.24.2
+oauthlib==3.2.2
+omegaconf==2.3.0
+opencv-python==4.6.0.66
+packaging==23.0
+pandas==1.5.3
+parso==0.8.3
+pexpect==4.8.0
+pickleshare==0.7.5
+Pillow==9.4.0
+pkgutil_resolve_name==1.3.10
+prompt-toolkit==3.0.38
+protobuf==3.20.3
+psutil==5.9.4
+ptyprocess==0.7.0
+pure-eval==0.2.2
+pyarrow==11.0.0
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pybboxes==0.1.6
+pydeck==0.8.0
+Pygments==2.14.0
+Pympler==1.0.1
+pyparsing==3.0.9
+pyrsistent==0.19.3
+python-dateutil==2.8.2
+pytz==2023.2
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+requests==2.28.2
+requests-oauthlib==1.3.1
+rich==13.3.2
+rsa==4.9
+sahi==0.11.13
+scipy==1.10.1
+seaborn==0.12.2
+semver==2.13.0
+sentry-sdk==1.17.0
+shapely==2.0.1
+six==1.16.0
+smmap==5.0.0
+stack-data==0.6.2
+streamlit==1.20.0
+sympy==1.11.1
+tensorboard==2.12.0
+tensorboard-data-server==0.7.0
+tensorboard-plugin-wit==1.8.1
+termcolor==2.2.0
+terminaltables==3.1.10
+thop==0.1.1.post2209072238
+toml==0.10.2
+toolz==0.12.0
+torch==2.0.0
+torchvision==0.15.1
+tornado==6.2
+tqdm==4.65.0
+traitlets==5.9.0
+typing_extensions==4.5.0
+tzdata==2023.2
+tzlocal==4.3
+ultralytics==8.0.43
+ultralyticsplus==0.0.28
+urllib3==1.26.15
+validators==0.20.0
+wcwidth==0.2.6
+Werkzeug==2.2.3
+zipp==3.15.0