Spaces:

lep1
/

braille-recognition-model

Runtime error

App Files Files Community

lep1 commited on 22 days ago

Commit

904d7fb

•

1 Parent(s): 10fe8c2

Upload 14 files

Browse files

Files changed (15) hide show

.gitattributes +6 -0
README.md +6 -6
alphabet_map.json +28 -0
app.py +112 -0
braille_map.json +65 -0
convert.py +73 -0
image/alpha-numeric.jpeg +0 -0
image/gray_image.jpg +3 -0
image/img_41.jpg +0 -0
image/test_1.jpg +3 -0
image/test_2.jpg +3 -0
image/test_3.jpg +3 -0
image/test_4.jpg +3 -0
image/test_5.jpg +3 -0
number_map.json +66 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+image/gray_image.jpg filter=lfs diff=lfs merge=lfs -text
+image/test_1.jpg filter=lfs diff=lfs merge=lfs -text
+image/test_2.jpg filter=lfs diff=lfs merge=lfs -text
+image/test_3.jpg filter=lfs diff=lfs merge=lfs -text
+image/test_4.jpg filter=lfs diff=lfs merge=lfs -text
+image/test_5.jpg filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
 ---
-title: Braille Recognition Model
-emoji: 🏃
 colorFrom: blue
-colorTo: red
 sdk: streamlit
-sdk_version: 1.39.0
 app_file: app.py
-pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Braille Detection
+emoji: 🕶
 colorFrom: blue
+colorTo: yellow
 sdk: streamlit
+sdk_version: 1.17.0
 app_file: app.py
+pinned: true
 license: mit
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

alphabet_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "a": "100000",
+  "b": "110000",
+  "c": "100100",
+  "d": "100110",
+  "e": "100010",
+  "f": "110100",
+  "g": "110110",
+  "h": "110010",
+  "i": "010100",
+  "j": "010110",
+  "k": "101000",
+  "l": "111000",
+  "m": "101100",
+  "n": "101110",
+  "o": "101010",
+  "p": "111100",
+  "q": "111110",
+  "r": "111010",
+  "s": "011100",
+  "t": "011110",
+  "u": "101001",
+  "v": "111001",
+  "w": "010111",
+  "x": "101101",
+  "y": "101111",
+  "z": "101011"
+}

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+"""
+Reference
+- https://docs.streamlit.io/library/api-reference/layout
+- https://github.com/CodingMantras/yolov8-streamlit-detection-tracking/blob/master/app.py
+- https://huggingface.co/keremberke/yolov8m-valorant-detection/tree/main
+- https://docs.ultralytics.com/usage/python/
+"""
+import time
+import PIL
+import streamlit as st
+import torch
+from ultralyticsplus import YOLO, render_result
+from convert import convert_to_braille_unicode, parse_xywh_and_class
+def load_model(model_path):
+    """load model from path"""
+    model = YOLO(model_path)
+    return model
+def load_image(image_path):
+    """load image from path"""
+    image = PIL.Image.open(image_path)
+    return image
+# title
+st.title("Braille Pattern Detection")
+# sidebar
+st.sidebar.header("Detection Config")
+conf = float(st.sidebar.slider("Class Confidence", 10, 75, 15)) / 100
+iou = float(st.sidebar.slider("IoU Threshold", 10, 75, 15)) / 100
+model_path = "snoop2head/yolov8m-braille"
+try:
+    model = load_model(model_path)
+    model.overrides["conf"] = conf  # NMS confidence threshold
+    model.overrides["iou"] = iou  # NMS IoU threshold
+    model.overrides["agnostic_nms"] = False  # NMS class-agnostic
+    model.overrides["max_det"] = 1000  # maximum number of detections per image
+except Exception as ex:
+    print(ex)
+    st.write(f"Unable to load model. Check the specified path: {model_path}")
+source_img = None
+source_img = st.sidebar.file_uploader(
+    "Choose an image...", type=("jpg", "jpeg", "png", "bmp", "webp")
+)
+col1, col2 = st.columns(2)
+# left column of the page body
+with col1:
+    if source_img is None:
+        default_image_path = "./images/alpha-numeric.jpeg"
+        image = load_image(default_image_path)
+        st.image(
+            default_image_path, caption="Example Input Image", use_column_width=True
+        )
+    else:
+        image = load_image(source_img)
+        st.image(source_img, caption="Uploaded Image", use_column_width=True)
+# right column of the page body
+with col2:
+    with st.spinner("Wait for it..."):
+        start_time = time.time()
+    try:
+        with torch.no_grad():
+            res = model.predict(
+                image, save=True, save_txt=True, exist_ok=True, conf=conf
+            )
+            boxes = res[0].boxes  # first image
+            res_plotted = res[0].plot()[:, :, ::-1]
+            list_boxes = parse_xywh_and_class(boxes)
+            st.image(res_plotted, caption="Detected Image", use_column_width=True)
+            IMAGE_DOWNLOAD_PATH = f"runs/detect/predict/image0.jpg"
+    except Exception as ex:
+        st.write("Please upload image with types of JPG, JPEG, PNG ...")
+try:
+    st.success(f"Done! Inference time: {time.time() - start_time:.2f} seconds")
+    st.subheader("Detected Braille Patterns")
+    for box_line in list_boxes:
+        str_left_to_right = ""
+        box_classes = box_line[:, -1]
+        for each_class in box_classes:
+            str_left_to_right += convert_to_braille_unicode(
+                model.names[int(each_class)]
+            )
+        st.write(str_left_to_right)
+except Exception as ex:
+    st.write("Please try again with images with types of JPG, JPEG, PNG ...")
+with open(IMAGE_DOWNLOAD_PATH, "rb") as fl:
+    st.download_button(
+        "Download object-detected image",
+        data=fl,
+        file_name="image0.jpg",
+        mime="image/jpg",
+    )

braille_map.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "000001": "⠠",
+  "000010": "⠐",
+  "000011": "⠰",
+  "000100": "⠈",
+  "000101": "⠨",
+  "000110": "⠘",
+  "000111": "⠸",
+  "001000": "⠄",
+  "001001": "⠤",
+  "001010": "⠔",
+  "001011": "⠴",
+  "001100": "⠌",
+  "001101": "⠬",
+  "001110": "⠜",
+  "001111": "⠼",
+  "010000": "⠂",
+  "010001": "⠢",
+  "010010": "⠒",
+  "010011": "⠲",
+  "010100": "⠊",
+  "010101": "⠪",
+  "010110": "⠚",
+  "010111": "⠺",
+  "011000": "⠆",
+  "011001": "⠦",
+  "011010": "⠖",
+  "011011": "⠶",
+  "011100": "⠎",
+  "011101": "⠮",
+  "011110": "⠞",
+  "011111": "⠾",
+  "100000": "⠁",
+  "100001": "⠡",
+  "100010": "⠑",
+  "100011": "⠱",
+  "100100": "⠉",
+  "100101": "⠩",
+  "100110": "⠙",
+  "100111": "⠹",
+  "101000": "⠅",
+  "101001": "⠥",
+  "101010": "⠕",
+  "101011": "⠵",
+  "101100": "⠍",
+  "101101": "⠭",
+  "101110": "⠝",
+  "101111": "⠽",
+  "110000": "⠃",
+  "110001": "⠣",
+  "110010": "⠓",
+  "110011": "⠳",
+  "110100": "⠋",
+  "110101": "⠫",
+  "110110": "⠛",
+  "110111": "⠻",
+  "111000": "⠇",
+  "111001": "⠧",
+  "111010": "⠗",
+  "111011": "⠷",
+  "111100": "⠏",
+  "111101": "⠯",
+  "111110": "⠟",
+  "111111": "⠿"
+}

convert.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import json
+import numpy as np
+import torch
+def convert_to_braille_unicode(str_input: str, path: str = "./src/utils/number_map.json") -> str:
+    with open(path, "r") as fl:
+        data = json.load(fl)
+    if str_input in data.keys():
+        str_output = data[str_input]
+    return str_output
+def parse_xywh_and_class(boxes: torch.Tensor) -> list:
+    """
+    boxes input tensor
+        boxes (torch.Tensor) or (numpy.ndarray): A tensor or numpy array containing the detection boxes,
+            with shape (num_boxes, 6).
+        orig_shape (torch.Tensor) or (numpy.ndarray): Original image size, in the format (height, width).
+    Properties:
+        xyxy (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format.
+        conf (torch.Tensor) or (numpy.ndarray): The confidence values of the boxes.
+        cls (torch.Tensor) or (numpy.ndarray): The class values of the boxes.
+        xywh (torch.Tensor) or (numpy.ndarray): The boxes in xywh format.
+        xyxyn (torch.Tensor) or (numpy.ndarray): The boxes in xyxy format normalized by original image size.
+        xywhn (torch.Tensor) or (numpy.ndarray): The boxes in xywh format normalized by original image size.
+    """
+    # copy values from troublesome "boxes" object to numpy array
+    new_boxes = np.zeros(boxes.shape)
+    new_boxes[:, :4] = boxes.xywh.cpu().numpy()  # first 4 channels are xywh
+    new_boxes[:, 4] = boxes.conf.cpu().numpy()  # 5th channel is confidence
+    new_boxes[:, 5] = boxes.cls.cpu().numpy()  # 6th channel is class which is last channel
+    # sort according to y coordinate
+    new_boxes = new_boxes[new_boxes[:, 1].argsort()]
+    # find threshold index to break the line
+    y_threshold = np.mean(new_boxes[:, 3]) // 2
+    boxes_diff = np.diff(new_boxes[:, 1])
+    threshold_index = np.where(boxes_diff > y_threshold)[0]
+    # cluster according to threshold_index
+    boxes_clustered = np.split(new_boxes, threshold_index + 1)
+    boxes_return = []
+    for cluster in boxes_clustered:
+        # sort according to x coordinate
+        cluster = cluster[cluster[:, 0].argsort()]
+        boxes_return.append(cluster)
+    return boxes_return
+def arrange_braille_to_2x3(box_classes: list) -> list:
+    """
+    将检测到的盲文字符类别数组转为 2x3 点阵格式。
+    :param box_classes: 检测到的盲文字符类别列表 (长度必须是6的倍数)
+    :return: 2x3 盲文点阵列表
+    """
+    # 检查输入长度是否为6的倍数
+    if len(box_classes) % 6 != 0:
+        raise ValueError("输入的盲文字符数组长度必须是6的倍数")
+    braille_2x3_list = []
+    # 每次取6个字符并将它们排成2x3格式
+    for i in range(0, len(box_classes), 6):
+        # reshape为3x2矩阵然后转置为2x3矩阵
+        braille_char = np.array(box_classes[i:i + 6]).reshape(3, 2).T
+        braille_2x3_list.append(braille_char)
+    return braille_2x3_list

image/alpha-numeric.jpeg ADDED Viewed

image/gray_image.jpg ADDED Viewed

Git LFS Details

SHA256: 73655ab37367bf782370592f476fc4bed4d7c288633c7c3250dc55be6317d7dd
Pointer size: 132 Bytes
Size of remote file: 3.34 MB

image/img_41.jpg ADDED Viewed

image/test_1.jpg ADDED Viewed

Git LFS Details

SHA256: 1177b20c221a8a41131c5308ad45283dbe6814e5948c6a4936f73bb9a67255fa
Pointer size: 132 Bytes
Size of remote file: 3.52 MB

image/test_2.jpg ADDED Viewed

Git LFS Details

SHA256: 10cb74bb43c255ceacbc9ffdf92c34d0bdcdc63c050cfaa3d87254ebfd7f97c3
Pointer size: 132 Bytes
Size of remote file: 3.12 MB

image/test_3.jpg ADDED Viewed

Git LFS Details

SHA256: b51058b60303467b4f1286900f7684fcbdf2d00a23c6cc0815cf4f4b2c297493
Pointer size: 132 Bytes
Size of remote file: 3.36 MB

image/test_4.jpg ADDED Viewed

Git LFS Details

SHA256: 57ed48603586bc3cc5529b052f58f526d69abdac77c819de8598db3ec8927470
Pointer size: 132 Bytes
Size of remote file: 2.25 MB

image/test_5.jpg ADDED Viewed

Git LFS Details

SHA256: 3323297ac34f3c68c19ea0bd41ea677ab21eff94c66909ffa6243ba75cf6f211
Pointer size: 132 Bytes
Size of remote file: 2.44 MB

number_map.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+    "000001": "⠠",
+    "000010": "⠐",
+    "000011": "⠰",
+    "000100": "⠈",
+    "000101": "⠨",
+    "000110": "⠘",
+    "000111": "⠸",
+    "001000": "⠄",
+    "001001": "⠤",
+    "001010": "⠔",
+    "001011": "⠴",
+    "001100": "⠌",
+    "001101": "⠬",
+    "001110": "⠜",
+    "001111": "floor",
+    "010000": "⠂",
+    "010001": "⠢",
+    "010010": "⠒",
+    "010011": "⠲",
+    "010100": "9",
+    "010101": "⠪",
+    "010110": "0",
+    "010111": "⠺",
+    "011000": "⠆",
+    "011001": "⠦",
+    "011010": "⠖",
+    "011011": "⠶",
+    "011100": "⠎",
+    "011101": "⠮",
+    "011110": "⠞",
+    "011111": "⠾",
+    "100000": "1",
+    "100001": "⠡",
+    "100010": "5",
+    "100011": "⠱",
+    "100100": "3",
+    "100101": "⠩",
+    "100110": "4",
+    "100111": "⠹",
+    "101000": "⠅",
+    "101001": "⠥",
+    "101010": "⠕",
+    "101011": "⠵",
+    "101100": "⠍",
+    "101101": "⠭",
+    "101110": "⠝",
+    "101111": "⠽",
+    "110000": "2",
+    "110001": "⠣",
+    "110010": "8",
+    "110011": "⠳",
+    "110100": "6",
+    "110101": "⠫",
+    "110110": "7",
+    "110111": "⠻",
+    "111000": "⠇",
+    "111001": "⠧",
+    "111010": "⠗",
+    "111011": "⠷",
+    "111100": "⠏",
+    "111101": "⠯",
+    "111110": "⠟",
+    "111111": "⠿"
+  }