Spaces:

dibahadie
/

KeychainSegmentation

Running

File size: 10,632 Bytes

a712780

import math

import cv2
import imutils
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from ultralytics import YOLO

from models.birefnet import BiRefNet
from util.utils import check_state_dict
from PIL import Image
import torch
from torchvision import transforms
from openvino.runtime import Core

device = "cuda" if torch.cuda.is_available() else "cpu"


model = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7', usage_to_weights_file['General'])), trust_remote_code=True)
model.to(device)
model.eval()


# Input Data
transform_image = transforms.Compose([
    transforms.Resize((1024, 1024)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

import torch
from PIL import Image
import torchvision.transforms as transforms



def pred_segmentation(imagepath='../DIS-VD-11#Furniture#17#Table#4317824734_63b46ff6e6_o.jpg', box=[-1, -1, -1, -1]):
    print('predicting segmentation...')
    # box: left, top, right, bottom
    image = Image.open(imagepath)
    w, h = image.size[:2]
    for idx_coord_value, coord_value in enumerate(box):
        if coord_value == -1:
            box[idx_coord_value] = [0, 0, w, h][idx_coord_value]
    image_crop = image.crop(box)
    input_images = transform_image(image_crop).unsqueeze(0)

    model.eval()
    # Prediction
    with torch.no_grad():
        preds = model(input_images)[-1].sigmoid()
    pred = preds[0].squeeze()

    canvas = torch.zeros_like(pred)
    box_to_canvas = [int(round(coord_value * (canvas.shape[-1] / w, canvas.shape[-2] / h)[idx_coord_value % 2])) for idx_coord_value, coord_value in enumerate(box)]
    pred = torch.nn.functional.interpolate(
        pred.unsqueeze(0).unsqueeze(0),
        size=(box_to_canvas[3] - box_to_canvas[1], box_to_canvas[2] - box_to_canvas[0]),
        mode='bilinear',
        align_corners=True
    ).squeeze()
    canvas[box_to_canvas[1]:box_to_canvas[3], box_to_canvas[0]:box_to_canvas[2]] = pred

    # Show Results
    pred_pil = transforms.ToPILImage()(canvas)
    return pred_pil


def pred_bbox(image_path):
    print('predicting bounding box...')
    image = cv2.imread(image_path)
    model = YOLO('models/weights/yolo_finetuned.pt')

    # Perform prediction
    results = model(image)
    boxes = results[0].boxes.xyxy.cpu().numpy()[0]

    # Extract the bounding box coordinates
    x1, y1, x2, y2 = map(int, list(boxes))
    return [x1, y1, x2, y2]


def get_kps_from_pil(pil_image):
    print('converting keypoints...')
    image_array = np.array(pil_image)

    # Find contours using OpenCV
    contours, _ = cv2.findContours(image_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Find the largest contour by area
    largest_contour = max(contours, key=cv2.contourArea)
    largest_contour = np.array(largest_contour)
    contour = []
    for i in range(len(largest_contour)):
        contour.append(largest_contour[i][0])
    scaler = MinMaxScaler()
    kps = scaler.fit_transform(contour)
    kps = np.array(kps)
    kps = kps * 299
    kps = np.int32(kps)
    return kps


def get_features_up(contour):
    feature = []
    for i in range(0, 300):
        position = 0
        unsorted_features = []
        for j in range(len(contour)):
            point = contour[j]
            prev_point = point
            if j != 0:
                prev_point = contour[j - 1]
            if point[0] > i and position == 0:
                position = 1
            elif point[0] < i and position == 0:
                position = -1
            elif point[0] > i and position == -1:
                unsorted_features.append((point[1] + prev_point[1]) // 2)
                position = 1
            elif point[0] < i and position == 1:
                position = -1
                unsorted_features.append((point[1] + prev_point[1]) // 2)
            elif point[0] == i and position == 1:
                unsorted_features.append(point[1])
                position = -1
            elif point[0] == i and position == -1:
                unsorted_features.append(point[1])
                position = 1
            elif point[0] == i and position == 0:
                position = 1

        if len(unsorted_features) != 0:
            if len(unsorted_features) == 1:
                unsorted_features.append((contour[0][1] + contour[-1][1]) // 2)
            unsorted_features.sort()
            feature.append(max(unsorted_features))
        else:
            feature.append(-1)

    return feature


def get_features_down(contour):
    feature = []
    for i in range(0, 300):
        position = 0
        unsorted_features = []
        for j in range(len(contour)):
            point = contour[j]
            prev_point = point
            if j != 0:
                prev_point = contour[j - 1]
            if point[0] > i and position == 0:
                position = 1
            elif point[0] < i and position == 0:
                position = -1
            elif point[0] > i and position == -1:
                unsorted_features.append((point[1] + prev_point[1]) // 2)
                position = 1
            elif point[0] < i and position == 1:
                position = -1
                unsorted_features.append((point[1] + prev_point[1]) // 2)
            elif point[0] == i and position == 1:
                unsorted_features.append(point[1])
                position = -1
            elif point[0] == i and position == -1:
                unsorted_features.append(point[1])
                position = 1
            elif point[0] == i and position == 0:
                position = 1

        if len(unsorted_features) != 0:
            if len(unsorted_features) == 1:
                unsorted_features.append((contour[0][1] + contour[-1][1]) // 2)
            unsorted_features.sort()
            feature.append(min(unsorted_features))
        else:
            feature.append(-1)

    return feature


def get_features_right(contour):
    feature = []
    for i in range(0, 300):
        position = 0
        unsorted_features = []
        for j in range(len(contour)):
            point = contour[j]
            prev_point = point
            if j != 0:
                prev_point = contour[j - 1]
            if point[1] > i and position == 0:
                position = 1
            elif point[1] < i and position == 0:
                position = -1
            elif point[1] > i and position == -1:
                unsorted_features.append((point[0] + prev_point[0]) // 2)
                position = 1
            elif point[1] < i and position == 1:
                position = -1
                unsorted_features.append((point[0] + prev_point[0]) // 2)
            elif point[1] == i and position == 1:
                unsorted_features.append(point[0])
                position = -1
            elif point[1] == i and position == -1:
                unsorted_features.append(point[0])
                position = 1
            elif point[1] == i and position == 0:
                position = 1

        if len(unsorted_features) != 0:
            if len(unsorted_features) == 1:
                unsorted_features.append((contour[0][0] + contour[-1][0]) // 2)
            unsorted_features.sort()
            feature.append(min(unsorted_features))
        else:
            feature.append(-1)

    return feature


def get_features_left(contour):
    feature = []
    for i in range(0, 300):
        position = 0
        unsorted_features = []
        for j in range(len(contour)):
            point = contour[j]
            prev_point = point
            if j != 0:
                prev_point = contour[j - 1]
            if point[1] > i and position == 0:
                position = 1
            elif point[1] < i and position == 0:
                position = -1
            elif point[1] > i and position == -1:
                unsorted_features.append((point[0] + prev_point[0]) // 2)
                position = 1
            elif point[1] < i and position == 1:
                position = -1
                unsorted_features.append((point[0] + prev_point[0]) // 2)
            elif point[1] == i and position == 1:
                unsorted_features.append(point[0])
                position = -1
            elif point[1] == i and position == -1:
                unsorted_features.append(point[0])
                position = 1
            elif point[1] == i and position == 0:
                position = 1

        if len(unsorted_features) != 0:
            if len(unsorted_features) == 1:
                unsorted_features.append((contour[0][0] + contour[-1][0]) // 2)
            unsorted_features.sort()
            feature.append(max(unsorted_features))
        else:
            feature.append(-1)

    return feature


def extract_features(contour):
    print('extracting features...')
    return get_features_down(contour) + get_features_up(contour) + get_features_right(contour) + get_features_left(contour)


def final_features(image_path):
    image = Image.open(image_path)
    image = rotate_image(image)
    pil_image = pred_segmentation(image, pred_bbox(image_path))
    contour = get_kps_from_pil(pil_image)
    return extract_features(contour)


def predict_kps(image):
    model = YOLO('models/weights/yolo_finetuned.pt')
    # Perform prediction
    results = model(image)
    kps = results[0].masks.xy[0]
    return kps


def calculate_angle(p1, p2):
    delta_y = p2[1] - p1[1]
    delta_x = p2[0] - p1[0]
    return math.degrees(np.arctan2(delta_y, delta_x))


# Function to rotate points by a given angle
def calculate_square(img):
    np_image = np.array(img)
    # Convert RGB (PIL) to BGR (OpenCV)
    if np_image.ndim == 3:  # Check if the image is colored
        cv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
    else:
        # For grayscale images, no conversion is needed
        cv_image = np_image

    rect = cv2.minAreaRect(predict_kps(cv_image))
    box = cv2.boxPoints(rect)
    box = np.int32(box)
    return box


def rotate_image(image):
    square = calculate_square(image)
    # Calculate the lengths of the sides
    side_lengths = [np.linalg.norm(square[i] - square[i + 1]) for i in range(len(square) - 1)]

    # Find the indices of the larger side
    max_index = np.argmax(side_lengths)

    # Find the two points that form the largest side
    p1, p2 = square[max_index], square[max_index + 1]

    # Calculate the angle between this side and the horizontal axis
    angle = calculate_angle(p1, p2)

    # Rotate the square to align the largest side with the horizontal axis
    rotated_image = image.rotate(angle)

    return rotated_image