Spaces:

dibahadie
/

KeychainSegmentation

Running

App Files Files Community

KeychainSegmentation / segment_key.py

dibahadie

Update segment_key.py

a712780 verified 5 days ago

raw

history blame

10.6 kB

	import math

	import cv2
	import imutils
	import numpy as np
	from sklearn.preprocessing import MinMaxScaler
	from ultralytics import YOLO

	from models.birefnet import BiRefNet
	from util.utils import check_state_dict
	from PIL import Image
	import torch
	from torchvision import transforms
	from openvino.runtime import Core

	device = "cuda" if torch.cuda.is_available() else "cpu"


	model = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7', usage_to_weights_file['General'])), trust_remote_code=True)
	model.to(device)
	model.eval()


	# Input Data
	transform_image = transforms.Compose([
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
	])

	import torch
	from PIL import Image
	import torchvision.transforms as transforms



	def pred_segmentation(imagepath='../DIS-VD-11#Furniture#17#Table#4317824734_63b46ff6e6_o.jpg', box=[-1, -1, -1, -1]):
	print('predicting segmentation...')
	# box: left, top, right, bottom
	image = Image.open(imagepath)
	w, h = image.size[:2]
	for idx_coord_value, coord_value in enumerate(box):
	if coord_value == -1:
	box[idx_coord_value] = [0, 0, w, h][idx_coord_value]
	image_crop = image.crop(box)
	input_images = transform_image(image_crop).unsqueeze(0)

	model.eval()
	# Prediction
	with torch.no_grad():
	preds = model(input_images)[-1].sigmoid()
	pred = preds[0].squeeze()

	canvas = torch.zeros_like(pred)
	box_to_canvas = [int(round(coord_value * (canvas.shape[-1] / w, canvas.shape[-2] / h)[idx_coord_value % 2])) for idx_coord_value, coord_value in enumerate(box)]
	pred = torch.nn.functional.interpolate(
	pred.unsqueeze(0).unsqueeze(0),
	size=(box_to_canvas[3] - box_to_canvas[1], box_to_canvas[2] - box_to_canvas[0]),
	mode='bilinear',
	align_corners=True
	).squeeze()
	canvas[box_to_canvas[1]:box_to_canvas[3], box_to_canvas[0]:box_to_canvas[2]] = pred

	# Show Results
	pred_pil = transforms.ToPILImage()(canvas)
	return pred_pil


	def pred_bbox(image_path):
	print('predicting bounding box...')
	image = cv2.imread(image_path)
	model = YOLO('models/weights/yolo_finetuned.pt')

	# Perform prediction
	results = model(image)
	boxes = results[0].boxes.xyxy.cpu().numpy()[0]

	# Extract the bounding box coordinates
	x1, y1, x2, y2 = map(int, list(boxes))
	return [x1, y1, x2, y2]


	def get_kps_from_pil(pil_image):
	print('converting keypoints...')
	image_array = np.array(pil_image)

	# Find contours using OpenCV
	contours, _ = cv2.findContours(image_array, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

	# Find the largest contour by area
	largest_contour = max(contours, key=cv2.contourArea)
	largest_contour = np.array(largest_contour)
	contour = []
	for i in range(len(largest_contour)):
	contour.append(largest_contour[i][0])
	scaler = MinMaxScaler()
	kps = scaler.fit_transform(contour)
	kps = np.array(kps)
	kps = kps * 299
	kps = np.int32(kps)
	return kps


	def get_features_up(contour):
	feature = []
	for i in range(0, 300):
	position = 0
	unsorted_features = []
	for j in range(len(contour)):
	point = contour[j]
	prev_point = point
	if j != 0:
	prev_point = contour[j - 1]
	if point[0] > i and position == 0:
	position = 1
	elif point[0] < i and position == 0:
	position = -1
	elif point[0] > i and position == -1:
	unsorted_features.append((point[1] + prev_point[1]) // 2)
	position = 1
	elif point[0] < i and position == 1:
	position = -1
	unsorted_features.append((point[1] + prev_point[1]) // 2)
	elif point[0] == i and position == 1:
	unsorted_features.append(point[1])
	position = -1
	elif point[0] == i and position == -1:
	unsorted_features.append(point[1])
	position = 1
	elif point[0] == i and position == 0:
	position = 1

	if len(unsorted_features) != 0:
	if len(unsorted_features) == 1:
	unsorted_features.append((contour[0][1] + contour[-1][1]) // 2)
	unsorted_features.sort()
	feature.append(max(unsorted_features))
	else:
	feature.append(-1)

	return feature


	def get_features_down(contour):
	feature = []
	for i in range(0, 300):
	position = 0
	unsorted_features = []
	for j in range(len(contour)):
	point = contour[j]
	prev_point = point
	if j != 0:
	prev_point = contour[j - 1]
	if point[0] > i and position == 0:
	position = 1
	elif point[0] < i and position == 0:
	position = -1
	elif point[0] > i and position == -1:
	unsorted_features.append((point[1] + prev_point[1]) // 2)
	position = 1
	elif point[0] < i and position == 1:
	position = -1
	unsorted_features.append((point[1] + prev_point[1]) // 2)
	elif point[0] == i and position == 1:
	unsorted_features.append(point[1])
	position = -1
	elif point[0] == i and position == -1:
	unsorted_features.append(point[1])
	position = 1
	elif point[0] == i and position == 0:
	position = 1

	if len(unsorted_features) != 0:
	if len(unsorted_features) == 1:
	unsorted_features.append((contour[0][1] + contour[-1][1]) // 2)
	unsorted_features.sort()
	feature.append(min(unsorted_features))
	else:
	feature.append(-1)

	return feature


	def get_features_right(contour):
	feature = []
	for i in range(0, 300):
	position = 0
	unsorted_features = []
	for j in range(len(contour)):
	point = contour[j]
	prev_point = point
	if j != 0:
	prev_point = contour[j - 1]
	if point[1] > i and position == 0:
	position = 1
	elif point[1] < i and position == 0:
	position = -1
	elif point[1] > i and position == -1:
	unsorted_features.append((point[0] + prev_point[0]) // 2)
	position = 1
	elif point[1] < i and position == 1:
	position = -1
	unsorted_features.append((point[0] + prev_point[0]) // 2)
	elif point[1] == i and position == 1:
	unsorted_features.append(point[0])
	position = -1
	elif point[1] == i and position == -1:
	unsorted_features.append(point[0])
	position = 1
	elif point[1] == i and position == 0:
	position = 1

	if len(unsorted_features) != 0:
	if len(unsorted_features) == 1:
	unsorted_features.append((contour[0][0] + contour[-1][0]) // 2)
	unsorted_features.sort()
	feature.append(min(unsorted_features))
	else:
	feature.append(-1)

	return feature


	def get_features_left(contour):
	feature = []
	for i in range(0, 300):
	position = 0
	unsorted_features = []
	for j in range(len(contour)):
	point = contour[j]
	prev_point = point
	if j != 0:
	prev_point = contour[j - 1]
	if point[1] > i and position == 0:
	position = 1
	elif point[1] < i and position == 0:
	position = -1
	elif point[1] > i and position == -1:
	unsorted_features.append((point[0] + prev_point[0]) // 2)
	position = 1
	elif point[1] < i and position == 1:
	position = -1
	unsorted_features.append((point[0] + prev_point[0]) // 2)
	elif point[1] == i and position == 1:
	unsorted_features.append(point[0])
	position = -1
	elif point[1] == i and position == -1:
	unsorted_features.append(point[0])
	position = 1
	elif point[1] == i and position == 0:
	position = 1

	if len(unsorted_features) != 0:
	if len(unsorted_features) == 1:
	unsorted_features.append((contour[0][0] + contour[-1][0]) // 2)
	unsorted_features.sort()
	feature.append(max(unsorted_features))
	else:
	feature.append(-1)

	return feature


	def extract_features(contour):
	print('extracting features...')
	return get_features_down(contour) + get_features_up(contour) + get_features_right(contour) + get_features_left(contour)


	def final_features(image_path):
	image = Image.open(image_path)
	image = rotate_image(image)
	pil_image = pred_segmentation(image, pred_bbox(image_path))
	contour = get_kps_from_pil(pil_image)
	return extract_features(contour)


	def predict_kps(image):
	model = YOLO('models/weights/yolo_finetuned.pt')
	# Perform prediction
	results = model(image)
	kps = results[0].masks.xy[0]
	return kps


	def calculate_angle(p1, p2):
	delta_y = p2[1] - p1[1]
	delta_x = p2[0] - p1[0]
	return math.degrees(np.arctan2(delta_y, delta_x))


	# Function to rotate points by a given angle
	def calculate_square(img):
	np_image = np.array(img)
	# Convert RGB (PIL) to BGR (OpenCV)
	if np_image.ndim == 3: # Check if the image is colored
	cv_image = cv2.cvtColor(np_image, cv2.COLOR_RGB2BGR)
	else:
	# For grayscale images, no conversion is needed
	cv_image = np_image

	rect = cv2.minAreaRect(predict_kps(cv_image))
	box = cv2.boxPoints(rect)
	box = np.int32(box)
	return box


	def rotate_image(image):
	square = calculate_square(image)
	# Calculate the lengths of the sides
	side_lengths = [np.linalg.norm(square[i] - square[i + 1]) for i in range(len(square) - 1)]

	# Find the indices of the larger side
	max_index = np.argmax(side_lengths)

	# Find the two points that form the largest side
	p1, p2 = square[max_index], square[max_index + 1]

	# Calculate the angle between this side and the horizontal axis
	angle = calculate_angle(p1, p2)

	# Rotate the square to align the largest side with the horizontal axis
	rotated_image = image.rotate(angle)

	return rotated_image