File size: 5,735 Bytes
4c71129 2acfef6 4c71129 2acfef6 5aadbcd 4c71129 2acfef6 4c71129 5aadbcd 4c71129 2acfef6 4c71129 5aadbcd 4c71129 5aadbcd 4c71129 2acfef6 5aadbcd 2acfef6 5aadbcd 2acfef6 5aadbcd 2acfef6 5aadbcd 2acfef6 5aadbcd 2acfef6 5aadbcd 2acfef6 4c71129 2acfef6 4c71129 2acfef6 4c71129 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import gradio as gr
import pandas as pd
import cv2
import mediapipe as mp
import os
from statistics import mean
import numpy as np
from mediapipe.tasks import python
from mediapipe.tasks.python import vision
from mediapipe.framework.formats import landmark_pb2
from mediapipe import solutions
from PIL import Image
import torch, torchvision
import torchvision.transforms as T
from huggingface_hub import hf_hub_download
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
cropped_image = []
analyzed_image = []
finetuned_classes = [
'iris',
]
# take a phone
# run face landmark on it to crop image
# run our model on it
# Display results
# Create a FaceLandmarker object.
base_options = python.BaseOptions(model_asset_path='face_landmarker_v2_with_blendshapes.task')
options = vision.FaceLandmarkerOptions(base_options=base_options,
output_face_blendshapes=True,
output_facial_transformation_matrixes=True,
num_faces=1)
detector = vision.FaceLandmarker.create_from_options(options)
# Loading the model
model = torch.hub.load('facebookresearch/detr', 'detr_resnet50', pretrained=False, num_classes=1)
hf_hub_download(repo_id="zivpollak/ECXV001", filename="checkpoint.pth", local_dir='.')
checkpoint = torch.load('checkpoint.pth', map_location='cpu')
model.load_state_dict(checkpoint['model'], strict=False)
model.eval()
def video_identity(video):
return video
# standard PyTorch mean-std input image normalization
transform = T.Compose([
T.Resize(800),
T.ToTensor(),
T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
def handle_image(input_image):
global cropped_image, analyzed_image
cv2.imwrite("image.jpg", input_image)
#image = mp.Image.create_from_file("image.jpg")
image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(input_image))
cropped_image = image.numpy_view().copy()
analyzed_image = image.numpy_view().copy()
detection_result = detector.detect(image)
face_landmarks_list = detection_result.face_landmarks
# Draw the face landmarks.
face_landmarks = face_landmarks_list[0]
face_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
face_landmarks_proto.landmark.extend([
landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in face_landmarks
])
height, width, _ = cropped_image.shape
p1 = [int(face_landmarks_proto.landmark[70].x * width), int(face_landmarks_proto.landmark[70].y * height)]
cv2.circle(input_image, (p1[0], p1[1]), 10, (0, 0, 255), -1)
p2 = [int(face_landmarks_proto.landmark[346].x * width), int(face_landmarks_proto.landmark[346].y * height)]
cv2.circle(input_image, (p2[0], p2[1]), 10, (0, 0, 255), -1)
cropped_image = cropped_image[p1[1]:p2[1], p1[0]:p2[0]]
output_image = run_worflow(cropped_image, model)
return (output_image)
def filter_bboxes_from_outputs(img,
outputs,
threshold=0.7
):
# keep only predictions with confidence above threshold
probas = outputs['pred_logits'].softmax(-1)[0, :, :-1]
keep = probas.max(-1).values > threshold
probas_to_keep = probas[keep]
# convert boxes from [0; 1] to image scales
bboxes_scaled = rescale_bboxes(outputs['pred_boxes'][0, keep], img.size)
return probas_to_keep, bboxes_scaled
def plot_finetuned_results(img, prob=None, boxes=None):
if prob is not None and boxes is not None:
for p, (xmin, ymin, xmax, ymax) in zip(prob, boxes.tolist()):
print("adding rectangle")
cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 255), 1)
return img
def rescale_bboxes(out_bbox, size):
print (size)
img_w, img_h = size
b = box_cxcywh_to_xyxy(out_bbox)
b = b * torch.tensor([img_w, img_h, img_w, img_h], dtype=torch.float32)
return b
def box_cxcywh_to_xyxy(x):
x_c, y_c, w, h = x.unbind(1)
b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
(x_c + 0.5 * w), (y_c + 0.5 * h)]
return torch.stack(b, dim=1)
def run_worflow(my_image, my_model):
# Write image to disk and read it as PIL !!!!
cv2.imwrite("img1.jpg", my_image)
my_image = Image.open("img1.jpg")
# mean-std normalize the input image (batch-size: 1)
img = transform(my_image).unsqueeze(0)
# propagate through the model
outputs = my_model(img)
output_image = cv2.imread("img1.jpg")
for threshold in [0.4, 0.4]:
probas_to_keep, bboxes_scaled = filter_bboxes_from_outputs(my_image,
outputs,
threshold=threshold)
print(bboxes_scaled)
output_image = plot_finetuned_results(output_image,
probas_to_keep,
bboxes_scaled)
return output_image
with gr.Blocks() as demo:
gr.Markdown(
"""
# Iris detection
""")
#video1 = gr.Video(height=200, width=200)#source="webcam")
image1 = gr.Image()
b = gr.Button("Analyze")
gr.Markdown(
"""
# Cropped image
""")
#cropped_image = gr.Gallery(
# label="cropped", show_label=False, elem_id="cropped"
#)
cropped_image = gr.Image()
out = [cropped_image]
b.click(fn=handle_image, inputs=image1, outputs=out)
demo.launch()
|