Spaces:
Running
Running
import math | |
import cv2 | |
import imutils | |
import numpy as np | |
from sklearn.preprocessing import MinMaxScaler | |
from ultralytics import YOLO | |
from models.birefnet import BiRefNet | |
from util.utils import check_state_dict | |
from PIL import Image | |
import torch | |
from torchvision import transforms | |
from openvino.runtime import Core | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7', usage_to_weights_file['General'])), trust_remote_code=True) | |
model.to(device) | |
model.eval() | |
# Input Data | |
transform_image = transforms.Compose([ | |
transforms.Resize((1024, 1024)), | |
transforms.ToTensor(), | |
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) | |
]) | |
import torch | |
from PIL import Image | |
import torchvision.transforms as transforms | |
def pred_segmentation(imagepath='../DIS-VD-11#Furniture#17#Table#4317824734_63b46ff6e6_o.jpg', box=[-1, -1, -1, -1]): | |
print('predicting segmentation...') | |
# box: left, top, right, bottom | |
image = Image.open(imagepath) | |
w, h = image.size[:2] | |
for idx_coord_value, coord_value in enumerate(box): | |
if coord_value == -1: | |
box[idx_coord_value] = [0, 0, w, h][idx_coord_value] | |
image_crop = image.crop(box) | |
input_images = transform_image(image_crop).unsqueeze(0) | |
model.eval() | |
# Prediction | |
with torch.no_grad(): | |
preds = model(input_images)[-1].sigmoid() | |
pred = preds[0].squeeze() | |
canvas = torch.zeros_like(pred) | |
box_to_canvas = [int(round(coord_value * (canvas.shape[-1] / w, canvas.shape[-2] / h)[idx_coord_value % 2])) for idx_coord_value, coord_value in enumerate(box)] | |
pred = torch.nn.functional.interpolate( | |
pred.unsqueeze(0).unsqueeze(0), | |
size=(box_to_canvas[3] - box_to_canvas[1], box_to_canvas[2] - box_to_canvas[0]), | |
mode='bilinear', | |
align_corners=True | |
).squeeze() | |
canvas[box_to_canvas[1]:box_to_canvas[3], box_to_canvas[0]:box_to_canvas[2]] = pred | |
# Show Results | |
pred_pil = transforms.ToPILImage()(canvas) | |
return pred_pil | |
def pred_bbox(image_path): | |
print('predicting bounding box...') | |
image = cv2.imread(image_path) | |
model = YOLO('models/weights/yolo_finetuned.pt') | |
# Perform prediction | |
results = model(image) | |
boxes = results[0].boxes.xyxy.cpu().numpy()[0] | |
# Extract the bounding box coordinates | |
x1, y1, x2, y2 = map(int, list(boxes)) | |
return [x1, y1, x2, y2] | |
def get_kps_from_pil(pil_image): | |
print('converting keypoints...') | |
image_array = np.array(pil_image) | |
# Find contours using OpenCV | |
contours, _ = cv2.findContours(image_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
# Find the largest contour by area | |
largest_contour = max(contours, key=cv2.contourArea) | |
largest_contour = np.array(largest_contour) | |
contour = [] | |
for i in range(len(largest_contour)): | |
contour.append(largest_contour[i][0]) | |
scaler = MinMaxScaler() | |
kps = scaler.fit_transform(contour) | |
kps = np.array(kps) | |
kps = kps * 299 | |
kps = np.int32(kps) | |
return kps | |
def get_features_up(contour): | |
feature = [] | |
for i in range(0, 300): | |
position = 0 | |
unsorted_features = [] | |
for j in range(len(contour)): | |
point = contour[j] | |
prev_point = point | |
if j != 0: | |
prev_point = contour[j - 1] | |
if point[0] > i and position == 0: | |
position = 1 | |
elif point[0] < i and position == 0: | |
position = -1 | |
elif point[0] > i and position == -1: | |
unsorted_features.append((point[1] + prev_point[1]) // 2) | |
position = 1 | |
elif point[0] < i and position == 1: | |
position = -1 | |
unsorted_features.append((point[1] + prev_point[1]) // 2) | |
elif point[0] == i and position == 1: | |
unsorted_features.append(point[1]) | |
position = -1 | |
elif point[0] == i and position == -1: | |
unsorted_features.append(point[1]) | |
position = 1 | |
elif point[0] == i and position == 0: | |
position = 1 | |
if len(unsorted_features) != 0: | |
if len(unsorted_features) == 1: | |
unsorted_features.append((contour[0][1] + contour[-1][1]) // 2) | |
unsorted_features.sort() | |
feature.append(max(unsorted_features)) | |
else: | |
feature.append(-1) | |
return feature | |
def get_features_down(contour): | |
feature = [] | |
for i in range(0, 300): | |
position = 0 | |
unsorted_features = [] | |
for j in range(len(contour)): | |
point = contour[j] | |
prev_point = point | |
if j != 0: | |
prev_point = contour[j - 1] | |
if point[0] > i and position == 0: | |
position = 1 | |
elif point[0] < i and position == 0: | |
position = -1 | |
elif point[0] > i and position == -1: | |
unsorted_features.append((point[1] + prev_point[1]) // 2) | |
position = 1 | |
elif point[0] < i and position == 1: | |
position = -1 | |
unsorted_features.append((point[1] + prev_point[1]) // 2) | |
elif point[0] == i and position == 1: | |
unsorted_features.append(point[1]) | |
position = -1 | |
elif point[0] == i and position == -1: | |
unsorted_features.append(point[1]) | |
position = 1 | |
elif point[0] == i and position == 0: | |
position = 1 | |
if len(unsorted_features) != 0: | |
if len(unsorted_features) == 1: | |
unsorted_features.append((contour[0][1] + contour[-1][1]) // 2) | |
unsorted_features.sort() | |
feature.append(min(unsorted_features)) | |
else: | |
feature.append(-1) | |
return feature | |
def get_features_right(contour): | |
feature = [] | |
for i in range(0, 300): | |
position = 0 | |
unsorted_features = [] | |
for j in range(len(contour)): | |
point = contour[j] | |
prev_point = point | |
if j != 0: | |
prev_point = contour[j - 1] | |
if point[1] > i and position == 0: | |
position = 1 | |
elif point[1] < i and position == 0: | |
position = -1 | |
elif point[1] > i and position == -1: | |
unsorted_features.append((point[0] + prev_point[0]) // 2) | |
position = 1 | |
elif point[1] < i and position == 1: | |
position = -1 | |
unsorted_features.append((point[0] + prev_point[0]) // 2) | |
elif point[1] == i and position == 1: | |
unsorted_features.append(point[0]) | |
position = -1 | |
elif point[1] == i and position == -1: | |
unsorted_features.append(point[0]) | |
position = 1 | |
elif point[1] == i and position == 0: | |
position = 1 | |
if len(unsorted_features) != 0: | |
if len(unsorted_features) == 1: | |
unsorted_features.append((contour[0][0] + contour[-1][0]) // 2) | |
unsorted_features.sort() | |
feature.append(min(unsorted_features)) | |
else: | |
feature.append(-1) | |
return feature | |
def get_features_left(contour): | |
feature = [] | |
for i in range(0, 300): | |
position = 0 | |
unsorted_features = [] | |
for j in range(len(contour)): | |
point = contour[j] | |
prev_point = point | |
if j != 0: | |
prev_point = contour[j - 1] | |
if point[1] > i and position == 0: | |
position = 1 | |
elif point[1] < i and position == 0: | |
position = -1 | |
elif point[1] > i and position == -1: | |
unsorted_features.append((point[0] + prev_point[0]) // 2) | |
position = 1 | |
elif point[1] < i and position == 1: | |
position = -1 | |
unsorted_features.append((point[0] + prev_point[0]) // 2) | |
elif point[1] == i and position == 1: | |
unsorted_features.append(point[0]) | |
position = -1 | |
elif point[1] == i and position == -1: | |
unsorted_features.append(point[0]) | |
position = 1 | |
elif point[1] == i and position == 0: | |
position = 1 | |
if len(unsorted_features) != 0: | |
if len(unsorted_features) == 1: | |
unsorted_features.append((contour[0][0] + contour[-1][0]) // 2) | |
unsorted_features.sort() | |
feature.append(max(unsorted_features)) | |
else: | |
feature.append(-1) | |
return feature | |
def extract_features(contour): | |
print('extracting features...') | |
return get_features_down(contour) + get_features_up(contour) + get_features_right(contour) + get_features_left(contour) | |
def final_features(image_path): | |
image = Image.open(image_path) | |
image = rotate_image(image) | |
pil_image = pred_segmentation(image, pred_bbox(image_path)) | |
contour = get_kps_from_pil(pil_image) | |
return extract_features(contour) | |
def predict_kps(image): | |
model = YOLO('models/weights/yolo_finetuned.pt') | |
# Perform prediction | |
results = model(image) | |
kps = results[0].masks.xy[0] | |
return kps | |
def calculate_angle(p1, p2): | |
delta_y = p2[1] - p1[1] | |
delta_x = p2[0] - p1[0] | |
return math.degrees(np.arctan2(delta_y, delta_x)) | |
# Function to rotate points by a given angle | |
def calculate_square(img): | |
np_image = np.array(img) | |
# Convert RGB (PIL) to BGR (OpenCV) | |
if np_image.ndim == 3: # Check if the image is colored | |
cv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR) | |
else: | |
# For grayscale images, no conversion is needed | |
cv_image = np_image | |
rect = cv2.minAreaRect(predict_kps(cv_image)) | |
box = cv2.boxPoints(rect) | |
box = np.int32(box) | |
return box | |
def rotate_image(image): | |
square = calculate_square(image) | |
# Calculate the lengths of the sides | |
side_lengths = [np.linalg.norm(square[i] - square[i + 1]) for i in range(len(square) - 1)] | |
# Find the indices of the larger side | |
max_index = np.argmax(side_lengths) | |
# Find the two points that form the largest side | |
p1, p2 = square[max_index], square[max_index + 1] | |
# Calculate the angle between this side and the horizontal axis | |
angle = calculate_angle(p1, p2) | |
# Rotate the square to align the largest side with the horizontal axis | |
rotated_image = image.rotate(angle) | |
return rotated_image | |