import math import cv2 import imutils import numpy as np from sklearn.preprocessing import MinMaxScaler from ultralytics import YOLO from models.birefnet import BiRefNet from util.utils import check_state_dict from PIL import Image import torch from torchvision import transforms from openvino.runtime import Core device = "cuda" if torch.cuda.is_available() else "cpu" model = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7', usage_to_weights_file['General'])), trust_remote_code=True) model.to(device) model.eval() # Input Data transform_image = transforms.Compose([ transforms.Resize((1024, 1024)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) import torch from PIL import Image import torchvision.transforms as transforms def pred_segmentation(imagepath='../DIS-VD-11#Furniture#17#Table#4317824734_63b46ff6e6_o.jpg', box=[-1, -1, -1, -1]): print('predicting segmentation...') # box: left, top, right, bottom image = Image.open(imagepath) w, h = image.size[:2] for idx_coord_value, coord_value in enumerate(box): if coord_value == -1: box[idx_coord_value] = [0, 0, w, h][idx_coord_value] image_crop = image.crop(box) input_images = transform_image(image_crop).unsqueeze(0) model.eval() # Prediction with torch.no_grad(): preds = model(input_images)[-1].sigmoid() pred = preds[0].squeeze() canvas = torch.zeros_like(pred) box_to_canvas = [int(round(coord_value * (canvas.shape[-1] / w, canvas.shape[-2] / h)[idx_coord_value % 2])) for idx_coord_value, coord_value in enumerate(box)] pred = torch.nn.functional.interpolate( pred.unsqueeze(0).unsqueeze(0), size=(box_to_canvas[3] - box_to_canvas[1], box_to_canvas[2] - box_to_canvas[0]), mode='bilinear', align_corners=True ).squeeze() canvas[box_to_canvas[1]:box_to_canvas[3], box_to_canvas[0]:box_to_canvas[2]] = pred # Show Results pred_pil = transforms.ToPILImage()(canvas) return pred_pil def pred_bbox(image_path): print('predicting bounding box...') image = cv2.imread(image_path) model = YOLO('models/weights/yolo_finetuned.pt') # Perform prediction results = model(image) boxes = results[0].boxes.xyxy.cpu().numpy()[0] # Extract the bounding box coordinates x1, y1, x2, y2 = map(int, list(boxes)) return [x1, y1, x2, y2] def get_kps_from_pil(pil_image): print('converting keypoints...') image_array = np.array(pil_image) # Find contours using OpenCV contours, _ = cv2.findContours(image_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # Find the largest contour by area largest_contour = max(contours, key=cv2.contourArea) largest_contour = np.array(largest_contour) contour = [] for i in range(len(largest_contour)): contour.append(largest_contour[i][0]) scaler = MinMaxScaler() kps = scaler.fit_transform(contour) kps = np.array(kps) kps = kps * 299 kps = np.int32(kps) return kps def get_features_up(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[0] > i and position == 0: position = 1 elif point[0] < i and position == 0: position = -1 elif point[0] > i and position == -1: unsorted_features.append((point[1] + prev_point[1]) // 2) position = 1 elif point[0] < i and position == 1: position = -1 unsorted_features.append((point[1] + prev_point[1]) // 2) elif point[0] == i and position == 1: unsorted_features.append(point[1]) position = -1 elif point[0] == i and position == -1: unsorted_features.append(point[1]) position = 1 elif point[0] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][1] + contour[-1][1]) // 2) unsorted_features.sort() feature.append(max(unsorted_features)) else: feature.append(-1) return feature def get_features_down(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[0] > i and position == 0: position = 1 elif point[0] < i and position == 0: position = -1 elif point[0] > i and position == -1: unsorted_features.append((point[1] + prev_point[1]) // 2) position = 1 elif point[0] < i and position == 1: position = -1 unsorted_features.append((point[1] + prev_point[1]) // 2) elif point[0] == i and position == 1: unsorted_features.append(point[1]) position = -1 elif point[0] == i and position == -1: unsorted_features.append(point[1]) position = 1 elif point[0] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][1] + contour[-1][1]) // 2) unsorted_features.sort() feature.append(min(unsorted_features)) else: feature.append(-1) return feature def get_features_right(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[1] > i and position == 0: position = 1 elif point[1] < i and position == 0: position = -1 elif point[1] > i and position == -1: unsorted_features.append((point[0] + prev_point[0]) // 2) position = 1 elif point[1] < i and position == 1: position = -1 unsorted_features.append((point[0] + prev_point[0]) // 2) elif point[1] == i and position == 1: unsorted_features.append(point[0]) position = -1 elif point[1] == i and position == -1: unsorted_features.append(point[0]) position = 1 elif point[1] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][0] + contour[-1][0]) // 2) unsorted_features.sort() feature.append(min(unsorted_features)) else: feature.append(-1) return feature def get_features_left(contour): feature = [] for i in range(0, 300): position = 0 unsorted_features = [] for j in range(len(contour)): point = contour[j] prev_point = point if j != 0: prev_point = contour[j - 1] if point[1] > i and position == 0: position = 1 elif point[1] < i and position == 0: position = -1 elif point[1] > i and position == -1: unsorted_features.append((point[0] + prev_point[0]) // 2) position = 1 elif point[1] < i and position == 1: position = -1 unsorted_features.append((point[0] + prev_point[0]) // 2) elif point[1] == i and position == 1: unsorted_features.append(point[0]) position = -1 elif point[1] == i and position == -1: unsorted_features.append(point[0]) position = 1 elif point[1] == i and position == 0: position = 1 if len(unsorted_features) != 0: if len(unsorted_features) == 1: unsorted_features.append((contour[0][0] + contour[-1][0]) // 2) unsorted_features.sort() feature.append(max(unsorted_features)) else: feature.append(-1) return feature def extract_features(contour): print('extracting features...') return get_features_down(contour) + get_features_up(contour) + get_features_right(contour) + get_features_left(contour) def final_features(image_path): image = Image.open(image_path) image = rotate_image(image) pil_image = pred_segmentation(image, pred_bbox(image_path)) contour = get_kps_from_pil(pil_image) return extract_features(contour) def predict_kps(image): model = YOLO('models/weights/yolo_finetuned.pt') # Perform prediction results = model(image) kps = results[0].masks.xy[0] return kps def calculate_angle(p1, p2): delta_y = p2[1] - p1[1] delta_x = p2[0] - p1[0] return math.degrees(np.arctan2(delta_y, delta_x)) # Function to rotate points by a given angle def calculate_square(img): np_image = np.array(img) # Convert RGB (PIL) to BGR (OpenCV) if np_image.ndim == 3: # Check if the image is colored cv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR) else: # For grayscale images, no conversion is needed cv_image = np_image rect = cv2.minAreaRect(predict_kps(cv_image)) box = cv2.boxPoints(rect) box = np.int32(box) return box def rotate_image(image): square = calculate_square(image) # Calculate the lengths of the sides side_lengths = [np.linalg.norm(square[i] - square[i + 1]) for i in range(len(square) - 1)] # Find the indices of the larger side max_index = np.argmax(side_lengths) # Find the two points that form the largest side p1, p2 = square[max_index], square[max_index + 1] # Calculate the angle between this side and the horizontal axis angle = calculate_angle(p1, p2) # Rotate the square to align the largest side with the horizontal axis rotated_image = image.rotate(angle) return rotated_image