import gradio as gr import pandas as pd import cv2 import mediapipe as mp import os from statistics import mean import numpy as np from mediapipe.tasks import python from mediapipe.tasks.python import vision from mediapipe.framework.formats import landmark_pb2 from mediapipe import solutions from PIL import Image import torch, torchvision import torchvision.transforms as T from huggingface_hub import hf_hub_download import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt cropped_image = [] analyzed_image = [] finetuned_classes = [ 'iris', ] # take a phone # run face landmark on it to crop image # run our model on it # Display results # Create a FaceLandmarker object. base_options = python.BaseOptions(model_asset_path='face_landmarker_v2_with_blendshapes.task') options = vision.FaceLandmarkerOptions(base_options=base_options, output_face_blendshapes=True, output_facial_transformation_matrixes=True, num_faces=1) detector = vision.FaceLandmarker.create_from_options(options) # Loading the model model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=False, num_classes=1) hf_hub_download(repo_id="zivpollak/ECXV001", filename="checkpoint.pth", local_dir='.') checkpoint = torch.load('checkpoint.pth', map_location='cpu') model.load_state_dict(checkpoint['model'], strict=False) model.eval() def video_identity(video): return video # standard PyTorch mean-std input image normalization transform = T.Compose([ T.Resize(800), T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) def handle_image(input_image): global cropped_image, analyzed_image cv2.imwrite("image.jpg", input_image) #image = mp.Image.create_from_file("image.jpg") image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(input_image)) cropped_image = image.numpy_view().copy() analyzed_image = image.numpy_view().copy() detection_result = detector.detect(image) face_landmarks_list = detection_result.face_landmarks # Draw the face landmarks. face_landmarks = face_landmarks_list[0] face_landmarks_proto = landmark_pb2.NormalizedLandmarkList() face_landmarks_proto.landmark.extend([ landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks ]) height, width, _ = cropped_image.shape p1 = [int(face_landmarks_proto.landmark[70].x * width), int(face_landmarks_proto.landmark[70].y * height)] cv2.circle(input_image, (p1[0], p1[1]), 10, (0, 0, 255), -1) p2 = [int(face_landmarks_proto.landmark[346].x * width), int(face_landmarks_proto.landmark[346].y * height)] cv2.circle(input_image, (p2[0], p2[1]), 10, (0, 0, 255), -1) cropped_image = cropped_image[p1[1]:p2[1], p1[0]:p2[0]] output_image = run_worflow(cropped_image, model) return (output_image) def filter_bboxes_from_outputs(img, outputs, threshold=0.7 ): # keep only predictions with confidence above threshold probas = outputs['pred_logits'].softmax(-1)[0, :, :-1] keep = probas.max(-1).values > threshold probas_to_keep = probas[keep] # convert boxes from [0; 1] to image scales bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], img.size) return probas_to_keep, bboxes_scaled def plot_finetuned_results(img, prob=None, boxes=None): if prob is not None and boxes is not None: for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()): print("adding rectangle") cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 255), 1) return img def rescale_bboxes(out_bbox, size): print (size) img_w, img_h = size b = box_cxcywh_to_xyxy(out_bbox) b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32) return b def box_cxcywh_to_xyxy(x): x_c, y_c, w, h = x.unbind(1) b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)] return torch.stack(b, dim=1) def run_worflow(my_image, my_model): # Write image to disk and read it as PIL !!!! cv2.imwrite("img1.jpg", my_image) my_image = Image.open("img1.jpg") # mean-std normalize the input image (batch-size: 1) img = transform(my_image).unsqueeze(0) # propagate through the model outputs = my_model(img) output_image = cv2.imread("img1.jpg") for threshold in [0.4, 0.4]: probas_to_keep, bboxes_scaled = filter_bboxes_from_outputs(my_image, outputs, threshold=threshold) print(bboxes_scaled) output_image = plot_finetuned_results(output_image, probas_to_keep, bboxes_scaled) return output_image with gr.Blocks() as demo: gr.Markdown( """ # Iris detection """) #video1 = gr.Video(height=200, width=200)#source="webcam") image1 = gr.Image() b = gr.Button("Analyze") gr.Markdown( """ # Cropped image """) #cropped_image = gr.Gallery( # label="cropped", show_label=False, elem_id="cropped" #) cropped_image = gr.Image() out = [cropped_image] b.click(fn=handle_image, inputs=image1, outputs=out) demo.launch()