faceplugin
commited on
Commit
•
901e379
0
Parent(s):
initial commit
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +5 -0
- app.py +53 -0
- face_recognition/__pycache__/extract.cpython-310.pyc +0 -0
- face_recognition/__pycache__/match.cpython-310.pyc +0 -0
- face_recognition/app.py +94 -0
- face_recognition/extract.py +88 -0
- face_recognition/face_detect/__pycache__/detect_imgs.cpython-310.pyc +0 -0
- face_recognition/face_detect/check_gt_box.py +59 -0
- face_recognition/face_detect/create_fd_result.py +99 -0
- face_recognition/face_detect/detect_imgs.py +65 -0
- face_recognition/face_detect/models/pretrained/version-RFB-320.pth +3 -0
- face_recognition/face_detect/models/pretrained/version-RFB-640.pth +3 -0
- face_recognition/face_detect/models/pretrained/version-slim-320.pth +3 -0
- face_recognition/face_detect/models/pretrained/version-slim-640.pth +3 -0
- face_recognition/face_detect/models/voc-model-labels.txt +2 -0
- face_recognition/face_detect/requirements.txt +11 -0
- face_recognition/face_detect/vision/__init__.py +0 -0
- face_recognition/face_detect/vision/__pycache__/__init__.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/datasets/__init__.py +0 -0
- face_recognition/face_detect/vision/datasets/caffe_pb2.py +0 -0
- face_recognition/face_detect/vision/datasets/voc_dataset.py +146 -0
- face_recognition/face_detect/vision/nn/__init__.py +0 -0
- face_recognition/face_detect/vision/nn/__pycache__/__init__.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/nn/__pycache__/mb_tiny.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/nn/__pycache__/mb_tiny_RFB.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/nn/mb_tiny.py +51 -0
- face_recognition/face_detect/vision/nn/mb_tiny_RFB.py +118 -0
- face_recognition/face_detect/vision/nn/multibox_loss.py +46 -0
- face_recognition/face_detect/vision/ssd/__init__.py +0 -0
- face_recognition/face_detect/vision/ssd/__pycache__/__init__.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/__pycache__/data_preprocessing.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_RFB_fd.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_fd.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/__pycache__/predictor.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/__pycache__/ssd.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/config/__init__.py +0 -0
- face_recognition/face_detect/vision/ssd/config/__pycache__/__init__.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/config/__pycache__/fd_config.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/ssd/config/fd_config.py +41 -0
- face_recognition/face_detect/vision/ssd/data_preprocessing.py +61 -0
- face_recognition/face_detect/vision/ssd/mb_tiny_RFB_fd.py +64 -0
- face_recognition/face_detect/vision/ssd/mb_tiny_fd.py +64 -0
- face_recognition/face_detect/vision/ssd/predictor.py +70 -0
- face_recognition/face_detect/vision/ssd/ssd.py +166 -0
- face_recognition/face_detect/vision/transforms/__init__.py +0 -0
- face_recognition/face_detect/vision/transforms/__pycache__/__init__.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/transforms/__pycache__/transforms.cpython-310.pyc +0 -0
- face_recognition/face_detect/vision/transforms/transforms.py +541 -0
- face_recognition/face_detect/vision/utils/__init__.py +1 -0
- face_recognition/face_detect/vision/utils/__pycache__/__init__.cpython-310.pyc +0 -0
.gitattributes
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.jpg filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
sys.path.append('.')
|
3 |
+
sys.path.append('./face_recognition')
|
4 |
+
import os
|
5 |
+
import io
|
6 |
+
import cv2
|
7 |
+
import base64
|
8 |
+
import json
|
9 |
+
import gradio as gr
|
10 |
+
import requests
|
11 |
+
import numpy as np
|
12 |
+
from io import BytesIO
|
13 |
+
import configparser
|
14 |
+
import numpy as np
|
15 |
+
from PIL import Image
|
16 |
+
|
17 |
+
from face_recognition.match import match_1_1
|
18 |
+
|
19 |
+
def face_recognition_on_file(file1, file2):
|
20 |
+
img1 = cv2.imread(file1)
|
21 |
+
img2 = cv2.imread(file2)
|
22 |
+
|
23 |
+
response = match_1_1(img1, img2)
|
24 |
+
|
25 |
+
return response
|
26 |
+
|
27 |
+
|
28 |
+
with gr.Blocks() as demo:
|
29 |
+
gr.Markdown(
|
30 |
+
"""
|
31 |
+
# FacePlugin Online Demo
|
32 |
+
|
33 |
+
"""
|
34 |
+
)
|
35 |
+
|
36 |
+
with gr.TabItem("Face Recognition"):
|
37 |
+
with gr.Row():
|
38 |
+
with gr.Column():
|
39 |
+
first_input = gr.Image(type='filepath')
|
40 |
+
gr.Examples(['images/rec_5.jpg', 'images/rec_1.jpg', 'images/9.png', 'images/rec_3.jpg'],
|
41 |
+
inputs=first_input)
|
42 |
+
start_button = gr.Button("Run")
|
43 |
+
with gr.Column():
|
44 |
+
second_input = gr.Image(type='filepath')
|
45 |
+
gr.Examples(['images/rec_6.jpg', 'images/rec_2.jpg', 'images/10.jpg', 'images/rec_4.jpg'],
|
46 |
+
inputs=second_input)
|
47 |
+
|
48 |
+
with gr.Column():
|
49 |
+
app_output = [gr.JSON()]
|
50 |
+
|
51 |
+
start_button.click(face_recognition_on_file, inputs=[first_input, second_input], outputs=app_output)
|
52 |
+
|
53 |
+
demo.queue().launch(share=True)
|
face_recognition/__pycache__/extract.cpython-310.pyc
ADDED
Binary file (2.03 kB). View file
|
|
face_recognition/__pycache__/match.cpython-310.pyc
ADDED
Binary file (763 Bytes). View file
|
|
face_recognition/app.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
import numpy as np
|
4 |
+
import base64
|
5 |
+
import face_manage.manage as db_manage
|
6 |
+
from flask import Flask, render_template, request, jsonify
|
7 |
+
from extract import GetImageInfo
|
8 |
+
|
9 |
+
app = Flask(__name__)
|
10 |
+
|
11 |
+
UPLOAD_FOLDER = os.path.basename('uploads')
|
12 |
+
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
13 |
+
|
14 |
+
|
15 |
+
@app.route("/")
|
16 |
+
def start_page():
|
17 |
+
print("Start")
|
18 |
+
response = jsonify({"status": "Start"})
|
19 |
+
response.status_code = 200
|
20 |
+
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
21 |
+
return response
|
22 |
+
|
23 |
+
|
24 |
+
@app.route("/enroll")
|
25 |
+
def enroll():
|
26 |
+
file = request.files['image']
|
27 |
+
image = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
28 |
+
|
29 |
+
db_manage.open_database(0)
|
30 |
+
count, boxes, scores, landmarks, alignimgs, features = GetImageInfo(image, 5)
|
31 |
+
|
32 |
+
for idx in range(0, count):
|
33 |
+
db_manage.register_face('sample name', idx, boxes[idx], landmarks[idx], alignimgs[idx], features[idx])
|
34 |
+
|
35 |
+
# db_manage.clear_database()
|
36 |
+
|
37 |
+
response = jsonify({"status": "True"})
|
38 |
+
response.status_code = 200
|
39 |
+
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
40 |
+
return response
|
41 |
+
|
42 |
+
|
43 |
+
@app.route("/delete/all")
|
44 |
+
def delete_all():
|
45 |
+
db_manage.open_database(0)
|
46 |
+
db_manage.clear_database()
|
47 |
+
|
48 |
+
response = jsonify({"status": "True"})
|
49 |
+
response.status_code = 200
|
50 |
+
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
51 |
+
return response
|
52 |
+
|
53 |
+
|
54 |
+
@app.route("/match11")
|
55 |
+
def match_1_1():
|
56 |
+
file1 = request.files['image1']
|
57 |
+
file2 = request.files['image2']
|
58 |
+
|
59 |
+
image1 = cv2.imdecode(np.fromstring(file1.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
60 |
+
image2 = cv2.imdecode(np.fromstring(file2.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
61 |
+
|
62 |
+
count1, boxes1, scores1, landmarks1, alignimgs1, features1 = GetImageInfo(image1, 1)
|
63 |
+
count2, boxes2, scores2, landmarks2, alignimgs2, features2 = GetImageInfo(image2, 1)
|
64 |
+
|
65 |
+
if count1 != 0 and count2 != 0:
|
66 |
+
sim = db_manage.get_similarity(features1[0], features2[0])
|
67 |
+
if sim > db_manage.threshold:
|
68 |
+
result = True
|
69 |
+
else:
|
70 |
+
result = False
|
71 |
+
|
72 |
+
response = jsonify({"status": result})
|
73 |
+
response.status_code = 200
|
74 |
+
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
75 |
+
return response
|
76 |
+
|
77 |
+
|
78 |
+
@app.route("/match1n")
|
79 |
+
def match_1_n():
|
80 |
+
file = request.files['image']
|
81 |
+
image = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
|
82 |
+
|
83 |
+
result, filename, sub_index = False, None, -1
|
84 |
+
count, boxes, scores, landmarks, alignimgs, features = GetImageInfo(image, 1)
|
85 |
+
|
86 |
+
for idx in range(count):
|
87 |
+
id, fn, sub_id = db_manage.verify_face(features[idx])
|
88 |
+
if id != -1:
|
89 |
+
result, filename, sub_index = True, fn, id
|
90 |
+
|
91 |
+
response = jsonify({"status": result, "filename": filename, "subIndex": sub_index})
|
92 |
+
response.status_code = 200
|
93 |
+
response.headers["Content-Type"] = "application/json; charset=utf-8"
|
94 |
+
return response
|
face_recognition/extract.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import argparse
|
3 |
+
import cv2
|
4 |
+
import torch
|
5 |
+
import numpy as np
|
6 |
+
import ctypes
|
7 |
+
import os.path
|
8 |
+
import time
|
9 |
+
|
10 |
+
from face_detect.detect_imgs import get_face_boundingbox
|
11 |
+
from face_landmark.GetLandmark import get_face_landmark
|
12 |
+
from face_feature.GetFeature import get_face_feature
|
13 |
+
from face_pose.GetPose import get_face_pose
|
14 |
+
import face_manage.manage as db_manage
|
15 |
+
|
16 |
+
def GetImageInfo(image, faceMaxCount):
|
17 |
+
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
18 |
+
### Detection
|
19 |
+
start_time = time.time() * 1000
|
20 |
+
boxes, scores = get_face_boundingbox(image)
|
21 |
+
boxes = boxes[:faceMaxCount]
|
22 |
+
scores = scores[:faceMaxCount]
|
23 |
+
count = len(boxes)
|
24 |
+
bboxes = []
|
25 |
+
bscores = []
|
26 |
+
for idx in range(count):
|
27 |
+
bboxes.append(boxes[idx].data.numpy())
|
28 |
+
bscores.append(scores[idx].data.numpy())
|
29 |
+
# print("Detection time = %s ms" % (time.time() * 1000 - start_time))
|
30 |
+
|
31 |
+
### Landmark
|
32 |
+
start_time = time.time() * 1000
|
33 |
+
landmarks = [] ### np.zeros((count, 136), dtype=np.float32)
|
34 |
+
for idx in range(count):
|
35 |
+
landmarks.append(get_face_landmark(gray_image, boxes[idx]).data.numpy())
|
36 |
+
# print("Landmark time = %s ms" % (time.time() * 1000 - start_time))
|
37 |
+
|
38 |
+
### Pose
|
39 |
+
poses = []
|
40 |
+
for idx in range(count):
|
41 |
+
poses.append(get_face_pose(boxes[idx], landmarks[idx]))
|
42 |
+
|
43 |
+
### Feature
|
44 |
+
start_time = time.time() * 1000
|
45 |
+
features = []
|
46 |
+
alignimgs = []
|
47 |
+
for idx in range(count):
|
48 |
+
alignimg, feature = get_face_feature(image, landmarks[idx])
|
49 |
+
features.append(feature)
|
50 |
+
alignimgs.append(alignimg)
|
51 |
+
print("Feature extraction time = %s ms" % (time.time() * 1000 - start_time))
|
52 |
+
|
53 |
+
####
|
54 |
+
if 0:
|
55 |
+
for idx in range(count):
|
56 |
+
print_image = image.copy()
|
57 |
+
box = boxes[idx].numpy()
|
58 |
+
print(">>>>>>>>: ", box)
|
59 |
+
landmark = landmarks[idx]
|
60 |
+
cv2.rectangle(print_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
|
61 |
+
for p in range(68):
|
62 |
+
cv2.circle(print_image, (int(landmark[p * 2]), int(landmark[p * 2 + 1])), 1, (255,255,255))
|
63 |
+
cv2.imshow("face recognition", print_image)
|
64 |
+
cv2.waitKey()
|
65 |
+
|
66 |
+
return count, bboxes, bscores, landmarks, alignimgs, features
|
67 |
+
|
68 |
+
def get_similarity(feat1, feat2):
|
69 |
+
return (np.sum(feat1 * feat2) + 1) * 50
|
70 |
+
|
71 |
+
if __name__ == '__main__':
|
72 |
+
threshold = 75
|
73 |
+
test_directory = 'test'
|
74 |
+
|
75 |
+
efn = os.getcwd() + "/test/1.png"
|
76 |
+
img = cv2.imread(efn, cv2.IMREAD_COLOR)
|
77 |
+
count, boxes, scores, landmarks, alignimgs, features1 = GetImageInfo(img, 5)
|
78 |
+
|
79 |
+
vfn = os.getcwd() + "/test/3.png"
|
80 |
+
img = cv2.imread(vfn, cv2.IMREAD_COLOR)
|
81 |
+
count, boxes, scores, landmarks, alignimgs, features2 = GetImageInfo(img, 5)
|
82 |
+
|
83 |
+
score = get_similarity(features1[0], features2[0])
|
84 |
+
print('score = ', score)
|
85 |
+
if score > threshold:
|
86 |
+
print('same person')
|
87 |
+
else:
|
88 |
+
print('different person')
|
face_recognition/face_detect/__pycache__/detect_imgs.cpython-310.pyc
ADDED
Binary file (2.05 kB). View file
|
|
face_recognition/face_detect/check_gt_box.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This code is used to check the data size distribution in the dataset.
|
3 |
+
"""
|
4 |
+
import xml.etree.ElementTree as ET
|
5 |
+
from math import sqrt as sqrt
|
6 |
+
|
7 |
+
import cv2
|
8 |
+
import matplotlib.pyplot as plt
|
9 |
+
|
10 |
+
# sets = [("./data/wider_face_add_lm_10_10", "trainval")]
|
11 |
+
sets = [("./data/wider_face_add_lm_10_10", "test")]
|
12 |
+
|
13 |
+
classes = ['face']
|
14 |
+
|
15 |
+
if __name__ == '__main__':
|
16 |
+
width = []
|
17 |
+
height = []
|
18 |
+
|
19 |
+
for image_set, set in sets:
|
20 |
+
image_ids = open('{}/ImageSets/Main/{}.txt'.format(image_set, set)).read().strip().split()
|
21 |
+
for image_id in image_ids:
|
22 |
+
img_path = '{}/JPEGImages/{}.jpg'.format(image_set, image_id)
|
23 |
+
label_file = open('{}/Annotations/{}.xml'.format(image_set, image_id))
|
24 |
+
tree = ET.parse(label_file)
|
25 |
+
root = tree.getroot()
|
26 |
+
size = root.find('size')
|
27 |
+
img_w = int(size.find('width').text)
|
28 |
+
img_h = int(size.find('height').text)
|
29 |
+
img = cv2.imread(img_path)
|
30 |
+
for obj in root.iter('object'):
|
31 |
+
difficult = obj.find('difficult').text
|
32 |
+
cls = obj.find('name').text
|
33 |
+
if cls not in classes or int(difficult) == 2:
|
34 |
+
continue
|
35 |
+
cls_id = classes.index(cls)
|
36 |
+
|
37 |
+
xmlbox = obj.find('bndbox')
|
38 |
+
xmin = int(xmlbox.find('xmin').text)
|
39 |
+
ymin = int(xmlbox.find('ymin').text)
|
40 |
+
xmax = int(xmlbox.find('xmax').text)
|
41 |
+
ymax = int(xmlbox.find('ymax').text)
|
42 |
+
w = xmax - xmin
|
43 |
+
h = ymax - ymin
|
44 |
+
|
45 |
+
# img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 8)
|
46 |
+
w_change = (w / img_w) * 320
|
47 |
+
h_change = (h / img_h) * 240
|
48 |
+
s = w_change * h_change
|
49 |
+
if w_change / h_change > 6:
|
50 |
+
print("{}/{}/{}/{}".format(xmin, xmax, ymin, ymax))
|
51 |
+
width.append(sqrt(s))
|
52 |
+
height.append(w_change / h_change)
|
53 |
+
print(img_path)
|
54 |
+
# img = cv2.resize(img, (608, 608))
|
55 |
+
# cv2.imwrite('{}_{}'.format(image_set.split('/')[-1], set), img)
|
56 |
+
# cv2.waitKey()
|
57 |
+
|
58 |
+
plt.plot(width, height, 'ro')
|
59 |
+
plt.show()
|
face_recognition/face_detect/create_fd_result.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This code is used to batch detect images in a folder.
|
3 |
+
"""
|
4 |
+
import argparse
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
|
8 |
+
import cv2
|
9 |
+
|
10 |
+
from vision.ssd.config.fd_config import define_img_size
|
11 |
+
|
12 |
+
parser = argparse.ArgumentParser(description='detect_imgs')
|
13 |
+
|
14 |
+
parser.add_argument('--net_type', default="RFB", type=str,
|
15 |
+
help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
|
16 |
+
parser.add_argument('--input_size', default=320, type=int,
|
17 |
+
help='define network input size,default optional value 128/160/320/480/640/1280')
|
18 |
+
parser.add_argument('--threshold', default=0.65, type=float,
|
19 |
+
help='score threshold')
|
20 |
+
parser.add_argument('--candidate_size', default=1500, type=int,
|
21 |
+
help='nms candidate size')
|
22 |
+
parser.add_argument('--path', default="D:/Database/face_detect/test/originalPics", type=str,
|
23 |
+
help='imgs dir')
|
24 |
+
parser.add_argument('--test_device', default="cpu", type=str,
|
25 |
+
help='cuda:0 or cpu')
|
26 |
+
args = parser.parse_args()
|
27 |
+
define_img_size(args.input_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
|
28 |
+
|
29 |
+
from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
|
30 |
+
from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
|
31 |
+
|
32 |
+
result_path = "./detect_imgs_results"
|
33 |
+
label_path = "./models/voc-model-labels.txt"
|
34 |
+
fd_result_path = 'D:/Database/face_detect/test/rfb_fd_result.txt'
|
35 |
+
fddb_txt_path = 'D:/Database/face_detect/test/FDDB-folds/FDDB-fold-01-10_2845.txt'
|
36 |
+
|
37 |
+
test_device = args.test_device
|
38 |
+
|
39 |
+
class_names = [name.strip() for name in open(label_path).readlines()]
|
40 |
+
if args.net_type == 'slim':
|
41 |
+
model_path = "models/pretrained/version-slim-320.pth"
|
42 |
+
net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
|
43 |
+
predictor = create_mb_tiny_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
|
44 |
+
elif args.net_type == 'RFB':
|
45 |
+
model_path = "models/pretrained/version-RFB-320.pth"
|
46 |
+
net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
|
47 |
+
predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
|
48 |
+
else:
|
49 |
+
print("The net type is wrong!")
|
50 |
+
sys.exit(1)
|
51 |
+
net.load(model_path)
|
52 |
+
|
53 |
+
def get_file_names(dir_path):
|
54 |
+
file_list = os.listdir(dir_path)
|
55 |
+
total_file_list = list()
|
56 |
+
|
57 |
+
for entry in file_list:
|
58 |
+
full_path = os.path.join(dir_path, entry)
|
59 |
+
if (os.path.isdir(full_path)):
|
60 |
+
total_file_list = total_file_list + get_file_names(full_path)
|
61 |
+
else:
|
62 |
+
total_file_list.append(full_path)
|
63 |
+
|
64 |
+
return total_file_list
|
65 |
+
|
66 |
+
def get_file_paths(txt_path):
|
67 |
+
path_list = list()
|
68 |
+
with open(txt_path, "r") as txt_file:
|
69 |
+
for line in txt_file:
|
70 |
+
path_list.append(line.strip())
|
71 |
+
|
72 |
+
return path_list
|
73 |
+
|
74 |
+
if __name__ == '__main__':
|
75 |
+
if not os.path.exists(result_path):
|
76 |
+
os.makedirs(result_path)
|
77 |
+
listdir = get_file_paths(fddb_txt_path)
|
78 |
+
|
79 |
+
total_count = 0
|
80 |
+
correct_count = 0
|
81 |
+
for file_path in listdir:
|
82 |
+
filename = file_path
|
83 |
+
img_path = os.path.join(args.path, filename)
|
84 |
+
orig_image = cv2.imread(img_path + ".jpg")
|
85 |
+
if orig_image is None:
|
86 |
+
continue
|
87 |
+
|
88 |
+
print("filename: ", filename)
|
89 |
+
image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
|
90 |
+
boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold)
|
91 |
+
|
92 |
+
with open(fd_result_path, "a") as fd_result_file:
|
93 |
+
print(filename, file=fd_result_file)
|
94 |
+
print(boxes.size(0), file=fd_result_file)
|
95 |
+
for i in range(boxes.size(0)):
|
96 |
+
box = boxes[i, :]
|
97 |
+
score = f"{probs[i]:.3f}"
|
98 |
+
print(f"{box[0]:.3f}", f"{box[1]:.3f}", f"{box[2] - box[0]:.3f}", f"{box[3] - box[1]:.3f}", score, file=fd_result_file)
|
99 |
+
|
face_recognition/face_detect/detect_imgs.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This code is used to batch detect images in a folder.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import os
|
6 |
+
import sys
|
7 |
+
import cv2
|
8 |
+
import numpy as np
|
9 |
+
import torch
|
10 |
+
|
11 |
+
from face_detect.vision.ssd.config.fd_config import define_img_size
|
12 |
+
|
13 |
+
input_size = 320
|
14 |
+
test_device = 'cpu'
|
15 |
+
net_type = 'slim'
|
16 |
+
threshold = 0.6
|
17 |
+
candidate_size = 1500
|
18 |
+
|
19 |
+
define_img_size(input_size) # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
|
20 |
+
|
21 |
+
from face_detect.vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
|
22 |
+
from face_detect.vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
|
23 |
+
|
24 |
+
label_path = "./face_recognition/face_detect/models/voc-model-labels.txt"
|
25 |
+
test_device = test_device
|
26 |
+
|
27 |
+
class_names = [name.strip() for name in open(label_path).readlines()]
|
28 |
+
if net_type == 'slim':
|
29 |
+
model_path = "./face_recognition/face_detect/models/pretrained/version-slim-320.pth"
|
30 |
+
# model_path = "./face_detect/models/pretrained/version-slim-640.pth"
|
31 |
+
net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
|
32 |
+
predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device)
|
33 |
+
elif net_type == 'RFB':
|
34 |
+
model_path = "./face_recognition/face_detect/models/pretrained/version-RFB-320.pth"
|
35 |
+
# model_path = "./face_detect/models/pretrained/version-RFB-640.pth"
|
36 |
+
net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
|
37 |
+
predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device)
|
38 |
+
else:
|
39 |
+
print("The net type is wrong!")
|
40 |
+
sys.exit(1)
|
41 |
+
net.load(model_path)
|
42 |
+
|
43 |
+
def get_face_boundingbox(orig_image):
|
44 |
+
"""
|
45 |
+
Description:
|
46 |
+
In input image, detect face
|
47 |
+
|
48 |
+
Args:
|
49 |
+
orig_image: input BGR image.
|
50 |
+
"""
|
51 |
+
boxes, labels, probs = predictor.predict(cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB), candidate_size / 2, threshold)
|
52 |
+
|
53 |
+
if len(boxes) == 0:
|
54 |
+
return torch.tensor([]), torch.tensor([])
|
55 |
+
|
56 |
+
height, width, _ = orig_image.shape
|
57 |
+
valid_face = np.logical_and(
|
58 |
+
np.logical_and(boxes[:,0] >= 0, boxes[:,1] >= 0),
|
59 |
+
np.logical_and(boxes[:,2] < width, boxes[:,3] < height)
|
60 |
+
)
|
61 |
+
|
62 |
+
boxes = boxes[valid_face]
|
63 |
+
probs = probs[valid_face]
|
64 |
+
|
65 |
+
return boxes, probs
|
face_recognition/face_detect/models/pretrained/version-RFB-320.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c722b4427cc71642768baef6e15c659931b56f07425e5d2b0ec033ad41b145b3
|
3 |
+
size 1168374
|
face_recognition/face_detect/models/pretrained/version-RFB-640.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf34512b1a93dc234178e8a701ecf25c6afddf335a3226accf62982536e160b5
|
3 |
+
size 1168354
|
face_recognition/face_detect/models/pretrained/version-slim-320.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cd24abce45da5dbc7cfd8167cd3d5f955382dfc9d9ae9459f0026abd3c2e38a4
|
3 |
+
size 1091283
|
face_recognition/face_detect/models/pretrained/version-slim-640.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02ca778098127c46d2b2680f1c398c7b993c12a424e94c34e6d608beb73481e4
|
3 |
+
size 1091287
|
face_recognition/face_detect/models/voc-model-labels.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
BACKGROUND
|
2 |
+
face
|
face_recognition/face_detect/requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
numpy
|
2 |
+
torch
|
3 |
+
opencv_python
|
4 |
+
torchvision
|
5 |
+
typing
|
6 |
+
torchstat
|
7 |
+
torchsummary
|
8 |
+
ptflops
|
9 |
+
matplotlib
|
10 |
+
onnx
|
11 |
+
onnxruntime
|
face_recognition/face_detect/vision/__init__.py
ADDED
File without changes
|
face_recognition/face_detect/vision/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (173 Bytes). View file
|
|
face_recognition/face_detect/vision/datasets/__init__.py
ADDED
File without changes
|
face_recognition/face_detect/vision/datasets/caffe_pb2.py
ADDED
The diff for this file is too large to render.
See raw diff
|
|
face_recognition/face_detect/vision/datasets/voc_dataset.py
ADDED
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import os
|
3 |
+
import pathlib
|
4 |
+
import xml.etree.ElementTree as ET
|
5 |
+
import h5py
|
6 |
+
import cv2
|
7 |
+
import numpy as np
|
8 |
+
import lmdb
|
9 |
+
from .caffe_pb2 import *
|
10 |
+
|
11 |
+
class VOCDataset:
|
12 |
+
|
13 |
+
def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
|
14 |
+
"""Dataset for VOC data.
|
15 |
+
Args:
|
16 |
+
root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
|
17 |
+
Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
|
18 |
+
"""
|
19 |
+
self.root = "D:/test"
|
20 |
+
self.transform = transform
|
21 |
+
self.target_transform = target_transform
|
22 |
+
if is_test:
|
23 |
+
image_sets_file = self.root + '/test.txt'
|
24 |
+
else:
|
25 |
+
image_sets_file = self.root + '/test.txt'
|
26 |
+
self.ids = ['1.hdf5']#VOCDataset._read_image_ids(image_sets_file)
|
27 |
+
self.keep_difficult = keep_difficult
|
28 |
+
|
29 |
+
# if the labels file exists, read in the class names
|
30 |
+
label_file_name = self.root + "labels.txt"
|
31 |
+
|
32 |
+
if os.path.isfile(label_file_name):
|
33 |
+
class_string = ""
|
34 |
+
with open(label_file_name, 'r') as infile:
|
35 |
+
for line in infile:
|
36 |
+
class_string += line.rstrip()
|
37 |
+
|
38 |
+
# classes should be a comma separated list
|
39 |
+
|
40 |
+
classes = class_string.split(',')
|
41 |
+
# prepend BACKGROUND as first class
|
42 |
+
classes.insert(0, 'BACKGROUND')
|
43 |
+
classes = [elem.replace(" ", "") for elem in classes]
|
44 |
+
self.class_names = tuple(classes)
|
45 |
+
logging.info("VOC Labels read from file: " + str(self.class_names))
|
46 |
+
|
47 |
+
else:
|
48 |
+
logging.info("No labels file, using default VOC classes.")
|
49 |
+
self.class_names = ('BACKGROUND',
|
50 |
+
'face')
|
51 |
+
|
52 |
+
self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
|
53 |
+
|
54 |
+
# def __getitem__(self, index):
|
55 |
+
# image_id = self.ids[index]
|
56 |
+
# boxes, labels, is_difficult = self._get_annotation(image_id)
|
57 |
+
# if not self.keep_difficult:
|
58 |
+
# boxes = boxes[is_difficult == 0]
|
59 |
+
# labels = labels[is_difficult == 0]
|
60 |
+
# image = self._read_image(image_id)
|
61 |
+
# if self.transform:
|
62 |
+
# image, boxes, labels = self.transform(image, boxes, labels)
|
63 |
+
# if self.target_transform:
|
64 |
+
# boxes, labels = self.target_transform(boxes, labels)
|
65 |
+
# return image, boxes, labels
|
66 |
+
|
67 |
+
def __getitem__(self, index):
|
68 |
+
num_per_shared = 3
|
69 |
+
file_idx = index // num_per_shared
|
70 |
+
idx_in_file = index % num_per_shared
|
71 |
+
hdf_path = os.path.join(self.root, self.ids[file_idx])
|
72 |
+
with h5py.File(hdf_path, 'r') as f:
|
73 |
+
boxes = f[str(idx_in_file) + '_boxes']
|
74 |
+
is_difficult = f[str(idx_in_file) + '_difficult']
|
75 |
+
image = f[str(idx_in_file) + '_image']
|
76 |
+
labels = f[str(idx_in_file) + 'labels']
|
77 |
+
|
78 |
+
if not self.keep_difficult:
|
79 |
+
boxes = boxes[is_difficult == 0]
|
80 |
+
labels = labels[is_difficult == 0]
|
81 |
+
if self.transform:
|
82 |
+
image, boxes, labels = self.transform(image, boxes, labels)
|
83 |
+
if self.target_transform:
|
84 |
+
boxes, labels = self.target_transform(boxes, labels)
|
85 |
+
|
86 |
+
return image, boxes, labels
|
87 |
+
|
88 |
+
def get_image(self, index):
|
89 |
+
image_id = self.ids[index]
|
90 |
+
image = self._read_image(image_id)
|
91 |
+
if self.transform:
|
92 |
+
image, _ = self.transform(image)
|
93 |
+
return image
|
94 |
+
|
95 |
+
def get_annotation(self, index):
|
96 |
+
image_id = self.ids[index]
|
97 |
+
return image_id, self._get_annotation(image_id)
|
98 |
+
|
99 |
+
def __len__(self):
|
100 |
+
total = 0
|
101 |
+
# for file in self.ids:
|
102 |
+
# hdf_path = os.path.join(self.root, file)
|
103 |
+
# f = h5py.File(hdf_path, 'r')
|
104 |
+
# total += len(f.keys())
|
105 |
+
return total // 4
|
106 |
+
|
107 |
+
@staticmethod
|
108 |
+
def _read_image_ids(image_sets_file):
|
109 |
+
ids = []
|
110 |
+
with open(image_sets_file) as f:
|
111 |
+
for line in f:
|
112 |
+
ids.append(line.rstrip())
|
113 |
+
return ids
|
114 |
+
|
115 |
+
def _get_annotation(self, image_id):
|
116 |
+
annotation_file = self.root / f"Annotations/{image_id}.xml"
|
117 |
+
objects = ET.parse(annotation_file).findall("object")
|
118 |
+
boxes = []
|
119 |
+
labels = []
|
120 |
+
is_difficult = []
|
121 |
+
for object in objects:
|
122 |
+
class_name = object.find('name').text.lower().strip()
|
123 |
+
# we're only concerned with clases in our list
|
124 |
+
if class_name in self.class_dict:
|
125 |
+
bbox = object.find('bndbox')
|
126 |
+
|
127 |
+
# VOC dataset format follows Matlab, in which indexes start from 0
|
128 |
+
x1 = float(bbox.find('xmin').text) - 1
|
129 |
+
y1 = float(bbox.find('ymin').text) - 1
|
130 |
+
x2 = float(bbox.find('xmax').text) - 1
|
131 |
+
y2 = float(bbox.find('ymax').text) - 1
|
132 |
+
boxes.append([x1, y1, x2, y2])
|
133 |
+
|
134 |
+
labels.append(self.class_dict[class_name])
|
135 |
+
is_difficult_str = object.find('difficult').text
|
136 |
+
is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
|
137 |
+
|
138 |
+
return (np.array(boxes, dtype=np.float32),
|
139 |
+
np.array(labels, dtype=np.int64),
|
140 |
+
np.array(is_difficult, dtype=np.uint8))
|
141 |
+
|
142 |
+
def _read_image(self, image_id):
|
143 |
+
image_file = self.root / f"JPEGImages/{image_id}.jpg"
|
144 |
+
image = cv2.imread(str(image_file))
|
145 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
146 |
+
return image
|
face_recognition/face_detect/vision/nn/__init__.py
ADDED
File without changes
|
face_recognition/face_detect/vision/nn/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (176 Bytes). View file
|
|
face_recognition/face_detect/vision/nn/__pycache__/mb_tiny.cpython-310.pyc
ADDED
Binary file (1.8 kB). View file
|
|
face_recognition/face_detect/vision/nn/__pycache__/mb_tiny_RFB.cpython-310.pyc
ADDED
Binary file (4.01 kB). View file
|
|
face_recognition/face_detect/vision/nn/mb_tiny.py
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch.nn as nn
|
2 |
+
import torch.nn.functional as F
|
3 |
+
|
4 |
+
|
5 |
+
class Mb_Tiny(nn.Module):
|
6 |
+
|
7 |
+
def __init__(self, num_classes=2):
|
8 |
+
super(Mb_Tiny, self).__init__()
|
9 |
+
self.base_channel = 8 * 2
|
10 |
+
|
11 |
+
def conv_bn(inp, oup, stride):
|
12 |
+
return nn.Sequential(
|
13 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
14 |
+
nn.BatchNorm2d(oup),
|
15 |
+
nn.ReLU(inplace=True)
|
16 |
+
)
|
17 |
+
|
18 |
+
def conv_dw(inp, oup, stride):
|
19 |
+
return nn.Sequential(
|
20 |
+
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
21 |
+
nn.BatchNorm2d(inp),
|
22 |
+
nn.ReLU(inplace=True),
|
23 |
+
|
24 |
+
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
25 |
+
nn.BatchNorm2d(oup),
|
26 |
+
nn.ReLU(inplace=True),
|
27 |
+
)
|
28 |
+
|
29 |
+
self.model = nn.Sequential(
|
30 |
+
conv_bn(3, self.base_channel, 2), # 160*120
|
31 |
+
conv_dw(self.base_channel, self.base_channel * 2, 1),
|
32 |
+
conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60
|
33 |
+
conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
|
34 |
+
conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30
|
35 |
+
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
36 |
+
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
37 |
+
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
38 |
+
conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15
|
39 |
+
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
40 |
+
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
41 |
+
conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8
|
42 |
+
conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
|
43 |
+
)
|
44 |
+
self.fc = nn.Linear(1024, num_classes)
|
45 |
+
|
46 |
+
def forward(self, x):
|
47 |
+
x = self.model(x)
|
48 |
+
x = F.avg_pool2d(x, 7)
|
49 |
+
x = x.view(-1, 1024)
|
50 |
+
x = self.fc(x)
|
51 |
+
return x
|
face_recognition/face_detect/vision/nn/mb_tiny_RFB.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class BasicConv(nn.Module):
|
7 |
+
|
8 |
+
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
|
9 |
+
super(BasicConv, self).__init__()
|
10 |
+
self.out_channels = out_planes
|
11 |
+
if bn:
|
12 |
+
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
|
13 |
+
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
|
14 |
+
self.relu = nn.ReLU(inplace=True) if relu else None
|
15 |
+
else:
|
16 |
+
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
|
17 |
+
self.bn = None
|
18 |
+
self.relu = nn.ReLU(inplace=True) if relu else None
|
19 |
+
|
20 |
+
def forward(self, x):
|
21 |
+
x = self.conv(x)
|
22 |
+
if self.bn is not None:
|
23 |
+
x = self.bn(x)
|
24 |
+
if self.relu is not None:
|
25 |
+
x = self.relu(x)
|
26 |
+
return x
|
27 |
+
|
28 |
+
|
29 |
+
class BasicRFB(nn.Module):
|
30 |
+
|
31 |
+
def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8, vision=1, groups=1):
|
32 |
+
super(BasicRFB, self).__init__()
|
33 |
+
self.scale = scale
|
34 |
+
self.out_channels = out_planes
|
35 |
+
inter_planes = in_planes // map_reduce
|
36 |
+
|
37 |
+
self.branch0 = nn.Sequential(
|
38 |
+
BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
|
39 |
+
BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
|
40 |
+
BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 1, dilation=vision + 1, relu=False, groups=groups)
|
41 |
+
)
|
42 |
+
self.branch1 = nn.Sequential(
|
43 |
+
BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
|
44 |
+
BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
|
45 |
+
BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
|
46 |
+
)
|
47 |
+
self.branch2 = nn.Sequential(
|
48 |
+
BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
|
49 |
+
BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=3, stride=1, padding=1, groups=groups),
|
50 |
+
BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
|
51 |
+
BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
|
52 |
+
)
|
53 |
+
|
54 |
+
self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
|
55 |
+
self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
|
56 |
+
self.relu = nn.ReLU(inplace=False)
|
57 |
+
|
58 |
+
def forward(self, x):
|
59 |
+
x0 = self.branch0(x)
|
60 |
+
x1 = self.branch1(x)
|
61 |
+
x2 = self.branch2(x)
|
62 |
+
|
63 |
+
out = torch.cat((x0, x1, x2), 1)
|
64 |
+
out = self.ConvLinear(out)
|
65 |
+
short = self.shortcut(x)
|
66 |
+
out = out * self.scale + short
|
67 |
+
out = self.relu(out)
|
68 |
+
|
69 |
+
return out
|
70 |
+
|
71 |
+
|
72 |
+
class Mb_Tiny_RFB(nn.Module):
|
73 |
+
|
74 |
+
def __init__(self, num_classes=2):
|
75 |
+
super(Mb_Tiny_RFB, self).__init__()
|
76 |
+
self.base_channel = 8 * 2
|
77 |
+
|
78 |
+
def conv_bn(inp, oup, stride):
|
79 |
+
return nn.Sequential(
|
80 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
81 |
+
nn.BatchNorm2d(oup),
|
82 |
+
nn.ReLU(inplace=True)
|
83 |
+
)
|
84 |
+
|
85 |
+
def conv_dw(inp, oup, stride):
|
86 |
+
return nn.Sequential(
|
87 |
+
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
|
88 |
+
nn.BatchNorm2d(inp),
|
89 |
+
nn.ReLU(inplace=True),
|
90 |
+
|
91 |
+
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
92 |
+
nn.BatchNorm2d(oup),
|
93 |
+
nn.ReLU(inplace=True),
|
94 |
+
)
|
95 |
+
|
96 |
+
self.model = nn.Sequential(
|
97 |
+
conv_bn(3, self.base_channel, 2), # 160*120
|
98 |
+
conv_dw(self.base_channel, self.base_channel * 2, 1),
|
99 |
+
conv_dw(self.base_channel * 2, self.base_channel * 2, 2), # 80*60
|
100 |
+
conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
|
101 |
+
conv_dw(self.base_channel * 2, self.base_channel * 4, 2), # 40*30
|
102 |
+
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
103 |
+
conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
|
104 |
+
BasicRFB(self.base_channel * 4, self.base_channel * 4, stride=1, scale=1.0),
|
105 |
+
conv_dw(self.base_channel * 4, self.base_channel * 8, 2), # 20*15
|
106 |
+
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
107 |
+
conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
|
108 |
+
conv_dw(self.base_channel * 8, self.base_channel * 16, 2), # 10*8
|
109 |
+
conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
|
110 |
+
)
|
111 |
+
self.fc = nn.Linear(1024, num_classes)
|
112 |
+
|
113 |
+
def forward(self, x):
|
114 |
+
x = self.model(x)
|
115 |
+
x = F.avg_pool2d(x, 7)
|
116 |
+
x = x.view(-1, 1024)
|
117 |
+
x = self.fc(x)
|
118 |
+
return x
|
face_recognition/face_detect/vision/nn/multibox_loss.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
from ..utils import box_utils
|
6 |
+
|
7 |
+
|
8 |
+
class MultiboxLoss(nn.Module):
|
9 |
+
def __init__(self, priors, neg_pos_ratio,
|
10 |
+
center_variance, size_variance, device):
|
11 |
+
"""Implement SSD Multibox Loss.
|
12 |
+
|
13 |
+
Basically, Multibox loss combines classification loss
|
14 |
+
and Smooth L1 regression loss.
|
15 |
+
"""
|
16 |
+
super(MultiboxLoss, self).__init__()
|
17 |
+
self.neg_pos_ratio = neg_pos_ratio
|
18 |
+
self.center_variance = center_variance
|
19 |
+
self.size_variance = size_variance
|
20 |
+
self.priors = priors
|
21 |
+
self.priors.to(device)
|
22 |
+
|
23 |
+
def forward(self, confidence, predicted_locations, labels, gt_locations):
|
24 |
+
"""Compute classification loss and smooth l1 loss.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
confidence (batch_size, num_priors, num_classes): class predictions.
|
28 |
+
locations (batch_size, num_priors, 4): predicted locations.
|
29 |
+
labels (batch_size, num_priors): real labels of all the priors.
|
30 |
+
boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
|
31 |
+
"""
|
32 |
+
num_classes = confidence.size(2)
|
33 |
+
with torch.no_grad():
|
34 |
+
# derived from cross_entropy=sum(log(p))
|
35 |
+
loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
|
36 |
+
mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
|
37 |
+
|
38 |
+
confidence = confidence[mask, :]
|
39 |
+
classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], reduction='sum')
|
40 |
+
pos_mask = labels > 0
|
41 |
+
predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
|
42 |
+
gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
|
43 |
+
smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum') # smooth_l1_loss
|
44 |
+
# smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum') #l2 loss
|
45 |
+
num_pos = gt_locations.size(0)
|
46 |
+
return smooth_l1_loss / num_pos, classification_loss / num_pos
|
face_recognition/face_detect/vision/ssd/__init__.py
ADDED
File without changes
|
face_recognition/face_detect/vision/ssd/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (177 Bytes). View file
|
|
face_recognition/face_detect/vision/ssd/__pycache__/data_preprocessing.cpython-310.pyc
ADDED
Binary file (2.59 kB). View file
|
|
face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_RFB_fd.cpython-310.pyc
ADDED
Binary file (2.04 kB). View file
|
|
face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_fd.cpython-310.pyc
ADDED
Binary file (2.02 kB). View file
|
|
face_recognition/face_detect/vision/ssd/__pycache__/predictor.cpython-310.pyc
ADDED
Binary file (2.21 kB). View file
|
|
face_recognition/face_detect/vision/ssd/__pycache__/ssd.cpython-310.pyc
ADDED
Binary file (5.79 kB). View file
|
|
face_recognition/face_detect/vision/ssd/config/__init__.py
ADDED
File without changes
|
face_recognition/face_detect/vision/ssd/config/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (184 Bytes). View file
|
|
face_recognition/face_detect/vision/ssd/config/__pycache__/fd_config.cpython-310.pyc
ADDED
Binary file (1.53 kB). View file
|
|
face_recognition/face_detect/vision/ssd/config/fd_config.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
|
3 |
+
from face_detect.vision.utils.box_utils import generate_priors
|
4 |
+
|
5 |
+
image_mean_test = image_mean = np.array([127, 127, 127])
|
6 |
+
image_std = 128.0
|
7 |
+
iou_threshold = 0.3
|
8 |
+
center_variance = 0.1
|
9 |
+
size_variance = 0.2
|
10 |
+
|
11 |
+
min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
|
12 |
+
shrinkage_list = []
|
13 |
+
image_size = [320, 240] # default input size 320*240
|
14 |
+
feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]] # default feature map size
|
15 |
+
priors = []
|
16 |
+
|
17 |
+
|
18 |
+
def define_img_size(size):
|
19 |
+
global image_size, feature_map_w_h_list, priors
|
20 |
+
img_size_dict = {128: [128, 96],
|
21 |
+
160: [160, 120],
|
22 |
+
320: [320, 240],
|
23 |
+
480: [480, 360],
|
24 |
+
640: [640, 480],
|
25 |
+
1280: [1280, 960]}
|
26 |
+
image_size = img_size_dict[size]
|
27 |
+
|
28 |
+
feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]],
|
29 |
+
160: [[20, 10, 5, 3], [15, 8, 4, 2]],
|
30 |
+
320: [[40, 20, 10, 5], [30, 15, 8, 4]],
|
31 |
+
480: [[60, 30, 15, 8], [45, 23, 12, 6]],
|
32 |
+
640: [[80, 40, 20, 10], [60, 30, 15, 8]],
|
33 |
+
1280: [[160, 80, 40, 20], [120, 60, 30, 15]]}
|
34 |
+
feature_map_w_h_list = feature_map_w_h_list_dict[size]
|
35 |
+
|
36 |
+
for i in range(0, len(image_size)):
|
37 |
+
item_list = []
|
38 |
+
for k in range(0, len(feature_map_w_h_list[i])):
|
39 |
+
item_list.append(image_size[i] / feature_map_w_h_list[i][k])
|
40 |
+
shrinkage_list.append(item_list)
|
41 |
+
priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes)
|
face_recognition/face_detect/vision/ssd/data_preprocessing.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from ..transforms.transforms import *
|
2 |
+
|
3 |
+
|
4 |
+
class TrainAugmentation:
|
5 |
+
def __init__(self, size, mean=0, std=1.0):
|
6 |
+
"""
|
7 |
+
Args:
|
8 |
+
size: the size the of final image.
|
9 |
+
mean: mean pixel value per channel.
|
10 |
+
"""
|
11 |
+
self.mean = mean
|
12 |
+
self.size = size
|
13 |
+
self.augment = Compose([
|
14 |
+
ConvertFromInts(),
|
15 |
+
PhotometricDistort(),
|
16 |
+
RandomSampleCrop_v2(),
|
17 |
+
RandomMirror(),
|
18 |
+
ToPercentCoords(),
|
19 |
+
Resize(self.size),
|
20 |
+
SubtractMeans(self.mean),
|
21 |
+
lambda img, boxes=None, labels=None: (img / std, boxes, labels),
|
22 |
+
ToTensor(),
|
23 |
+
])
|
24 |
+
|
25 |
+
def __call__(self, img, boxes, labels):
|
26 |
+
"""
|
27 |
+
|
28 |
+
Args:
|
29 |
+
img: the output of cv.imread in RGB layout.
|
30 |
+
boxes: boundding boxes in the form of (x1, y1, x2, y2).
|
31 |
+
labels: labels of boxes.
|
32 |
+
"""
|
33 |
+
return self.augment(img, boxes, labels)
|
34 |
+
|
35 |
+
|
36 |
+
class TestTransform:
|
37 |
+
def __init__(self, size, mean=0.0, std=1.0):
|
38 |
+
self.transform = Compose([
|
39 |
+
ToPercentCoords(),
|
40 |
+
Resize(size),
|
41 |
+
SubtractMeans(mean),
|
42 |
+
lambda img, boxes=None, labels=None: (img / std, boxes, labels),
|
43 |
+
ToTensor(),
|
44 |
+
])
|
45 |
+
|
46 |
+
def __call__(self, image, boxes, labels):
|
47 |
+
return self.transform(image, boxes, labels)
|
48 |
+
|
49 |
+
|
50 |
+
class PredictionTransform:
|
51 |
+
def __init__(self, size, mean=0.0, std=1.0):
|
52 |
+
self.transform = Compose([
|
53 |
+
Resize(size),
|
54 |
+
SubtractMeans(mean),
|
55 |
+
lambda img, boxes=None, labels=None: (img / std, boxes, labels),
|
56 |
+
ToTensor()
|
57 |
+
])
|
58 |
+
|
59 |
+
def __call__(self, image):
|
60 |
+
image, _, _ = self.transform(image)
|
61 |
+
return image
|
face_recognition/face_detect/vision/ssd/mb_tiny_RFB_fd.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.nn import Conv2d, Sequential, ModuleList, ReLU
|
2 |
+
|
3 |
+
from face_detect.vision.nn.mb_tiny_RFB import Mb_Tiny_RFB
|
4 |
+
from face_detect.vision.ssd.config import fd_config as config
|
5 |
+
from face_detect.vision.ssd.predictor import Predictor
|
6 |
+
from face_detect.vision.ssd.ssd import SSD
|
7 |
+
|
8 |
+
|
9 |
+
def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
|
10 |
+
"""Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
|
11 |
+
"""
|
12 |
+
return Sequential(
|
13 |
+
Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
|
14 |
+
groups=in_channels, stride=stride, padding=padding),
|
15 |
+
ReLU(),
|
16 |
+
Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"):
|
21 |
+
base_net = Mb_Tiny_RFB(2)
|
22 |
+
base_net_model = base_net.model # disable dropout layer
|
23 |
+
|
24 |
+
source_layer_indexes = [
|
25 |
+
8,
|
26 |
+
11,
|
27 |
+
13
|
28 |
+
]
|
29 |
+
extras = ModuleList([
|
30 |
+
Sequential(
|
31 |
+
Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
|
32 |
+
ReLU(),
|
33 |
+
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
|
34 |
+
ReLU()
|
35 |
+
)
|
36 |
+
])
|
37 |
+
|
38 |
+
regression_headers = ModuleList([
|
39 |
+
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
|
40 |
+
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
|
41 |
+
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
|
42 |
+
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
|
43 |
+
])
|
44 |
+
|
45 |
+
classification_headers = ModuleList([
|
46 |
+
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
|
47 |
+
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
48 |
+
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
49 |
+
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
|
50 |
+
])
|
51 |
+
|
52 |
+
return SSD(num_classes, base_net_model, source_layer_indexes,
|
53 |
+
extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
|
54 |
+
|
55 |
+
|
56 |
+
def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
|
57 |
+
predictor = Predictor(net, config.image_size, config.image_mean_test,
|
58 |
+
config.image_std,
|
59 |
+
nms_method=nms_method,
|
60 |
+
iou_threshold=config.iou_threshold,
|
61 |
+
candidate_size=candidate_size,
|
62 |
+
sigma=sigma,
|
63 |
+
device=device)
|
64 |
+
return predictor
|
face_recognition/face_detect/vision/ssd/mb_tiny_fd.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch.nn import Conv2d, Sequential, ModuleList, ReLU
|
2 |
+
|
3 |
+
from face_detect.vision.nn.mb_tiny import Mb_Tiny
|
4 |
+
from face_detect.vision.ssd.config import fd_config as config
|
5 |
+
from face_detect.vision.ssd.predictor import Predictor
|
6 |
+
from face_detect.vision.ssd.ssd import SSD
|
7 |
+
|
8 |
+
|
9 |
+
def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
|
10 |
+
"""Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
|
11 |
+
"""
|
12 |
+
return Sequential(
|
13 |
+
Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
|
14 |
+
groups=in_channels, stride=stride, padding=padding),
|
15 |
+
ReLU(),
|
16 |
+
Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
|
17 |
+
)
|
18 |
+
|
19 |
+
|
20 |
+
def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"):
|
21 |
+
base_net = Mb_Tiny(2)
|
22 |
+
base_net_model = base_net.model # disable dropout layer
|
23 |
+
|
24 |
+
source_layer_indexes = [
|
25 |
+
8,
|
26 |
+
11,
|
27 |
+
13
|
28 |
+
]
|
29 |
+
extras = ModuleList([
|
30 |
+
Sequential(
|
31 |
+
Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
|
32 |
+
ReLU(),
|
33 |
+
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
|
34 |
+
ReLU()
|
35 |
+
)
|
36 |
+
])
|
37 |
+
|
38 |
+
regression_headers = ModuleList([
|
39 |
+
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
|
40 |
+
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
|
41 |
+
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
|
42 |
+
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
|
43 |
+
])
|
44 |
+
|
45 |
+
classification_headers = ModuleList([
|
46 |
+
SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
|
47 |
+
SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
48 |
+
SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
|
49 |
+
Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
|
50 |
+
])
|
51 |
+
|
52 |
+
return SSD(num_classes, base_net_model, source_layer_indexes,
|
53 |
+
extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
|
54 |
+
|
55 |
+
|
56 |
+
def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
|
57 |
+
predictor = Predictor(net, config.image_size, config.image_mean_test,
|
58 |
+
config.image_std,
|
59 |
+
nms_method=nms_method,
|
60 |
+
iou_threshold=config.iou_threshold,
|
61 |
+
candidate_size=candidate_size,
|
62 |
+
sigma=sigma,
|
63 |
+
device=device)
|
64 |
+
return predictor
|
face_recognition/face_detect/vision/ssd/predictor.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
|
3 |
+
from ..utils import box_utils
|
4 |
+
from .data_preprocessing import PredictionTransform
|
5 |
+
from ..utils.misc import Timer
|
6 |
+
|
7 |
+
|
8 |
+
class Predictor:
|
9 |
+
def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None,
|
10 |
+
iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None):
|
11 |
+
self.net = net
|
12 |
+
self.transform = PredictionTransform(size, mean, std)
|
13 |
+
self.iou_threshold = iou_threshold
|
14 |
+
self.filter_threshold = filter_threshold
|
15 |
+
self.candidate_size = candidate_size
|
16 |
+
self.nms_method = nms_method
|
17 |
+
|
18 |
+
self.sigma = sigma
|
19 |
+
if device:
|
20 |
+
self.device = device
|
21 |
+
else:
|
22 |
+
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
23 |
+
|
24 |
+
self.net.to(self.device)
|
25 |
+
self.net.eval()
|
26 |
+
|
27 |
+
self.timer = Timer()
|
28 |
+
|
29 |
+
def predict(self, image, top_k=-1, prob_threshold=None):
|
30 |
+
cpu_device = torch.device("cpu")
|
31 |
+
height, width, _ = image.shape
|
32 |
+
image = self.transform(image)
|
33 |
+
images = image.unsqueeze(0)
|
34 |
+
images = images.to(self.device)
|
35 |
+
with torch.no_grad():
|
36 |
+
for i in range(1):
|
37 |
+
scores, boxes = self.net.forward(images)
|
38 |
+
boxes = boxes[0]
|
39 |
+
scores = scores[0]
|
40 |
+
if not prob_threshold:
|
41 |
+
prob_threshold = self.filter_threshold
|
42 |
+
# this version of nms is slower on GPU, so we move data to CPU.
|
43 |
+
boxes = boxes.to(cpu_device)
|
44 |
+
scores = scores.to(cpu_device)
|
45 |
+
picked_box_probs = []
|
46 |
+
picked_labels = []
|
47 |
+
for class_index in range(1, scores.size(1)):
|
48 |
+
probs = scores[:, class_index]
|
49 |
+
mask = probs > prob_threshold
|
50 |
+
probs = probs[mask]
|
51 |
+
if probs.size(0) == 0:
|
52 |
+
continue
|
53 |
+
subset_boxes = boxes[mask, :]
|
54 |
+
box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
|
55 |
+
box_probs = box_utils.nms(box_probs, self.nms_method,
|
56 |
+
score_threshold=prob_threshold,
|
57 |
+
iou_threshold=self.iou_threshold,
|
58 |
+
sigma=self.sigma,
|
59 |
+
top_k=top_k,
|
60 |
+
candidate_size=self.candidate_size)
|
61 |
+
picked_box_probs.append(box_probs)
|
62 |
+
picked_labels.extend([class_index] * box_probs.size(0))
|
63 |
+
if not picked_box_probs:
|
64 |
+
return torch.tensor([]), torch.tensor([]), torch.tensor([])
|
65 |
+
picked_box_probs = torch.cat(picked_box_probs)
|
66 |
+
picked_box_probs[:, 0] *= width
|
67 |
+
picked_box_probs[:, 1] *= height
|
68 |
+
picked_box_probs[:, 2] *= width
|
69 |
+
picked_box_probs[:, 3] *= height
|
70 |
+
return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]
|
face_recognition/face_detect/vision/ssd/ssd.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import namedtuple
|
2 |
+
from typing import List, Tuple
|
3 |
+
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
import torch.nn as nn
|
7 |
+
import torch.nn.functional as F
|
8 |
+
|
9 |
+
from face_detect.vision.utils import box_utils
|
10 |
+
|
11 |
+
GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1'])
|
12 |
+
|
13 |
+
|
14 |
+
class SSD(nn.Module):
|
15 |
+
def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int],
|
16 |
+
extras: nn.ModuleList, classification_headers: nn.ModuleList,
|
17 |
+
regression_headers: nn.ModuleList, is_test=False, config=None, device=None):
|
18 |
+
"""Compose a SSD model using the given components.
|
19 |
+
"""
|
20 |
+
super(SSD, self).__init__()
|
21 |
+
|
22 |
+
self.num_classes = num_classes
|
23 |
+
self.base_net = base_net
|
24 |
+
self.source_layer_indexes = source_layer_indexes
|
25 |
+
self.extras = extras
|
26 |
+
self.classification_headers = classification_headers
|
27 |
+
self.regression_headers = regression_headers
|
28 |
+
self.is_test = is_test
|
29 |
+
self.config = config
|
30 |
+
|
31 |
+
# register layers in source_layer_indexes by adding them to a module list
|
32 |
+
self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes
|
33 |
+
if isinstance(t, tuple) and not isinstance(t, GraphPath)])
|
34 |
+
if device:
|
35 |
+
self.device = device
|
36 |
+
else:
|
37 |
+
self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
38 |
+
if is_test:
|
39 |
+
self.config = config
|
40 |
+
self.priors = config.priors.to(self.device)
|
41 |
+
|
42 |
+
def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
|
43 |
+
confidences = []
|
44 |
+
locations = []
|
45 |
+
start_layer_index = 0
|
46 |
+
header_index = 0
|
47 |
+
end_layer_index = 0
|
48 |
+
for end_layer_index in self.source_layer_indexes:
|
49 |
+
if isinstance(end_layer_index, GraphPath):
|
50 |
+
path = end_layer_index
|
51 |
+
end_layer_index = end_layer_index.s0
|
52 |
+
added_layer = None
|
53 |
+
elif isinstance(end_layer_index, tuple):
|
54 |
+
added_layer = end_layer_index[1]
|
55 |
+
end_layer_index = end_layer_index[0]
|
56 |
+
path = None
|
57 |
+
else:
|
58 |
+
added_layer = None
|
59 |
+
path = None
|
60 |
+
for layer in self.base_net[start_layer_index: end_layer_index]:
|
61 |
+
x = layer(x)
|
62 |
+
if added_layer:
|
63 |
+
y = added_layer(x)
|
64 |
+
else:
|
65 |
+
y = x
|
66 |
+
if path:
|
67 |
+
sub = getattr(self.base_net[end_layer_index], path.name)
|
68 |
+
for layer in sub[:path.s1]:
|
69 |
+
x = layer(x)
|
70 |
+
y = x
|
71 |
+
for layer in sub[path.s1:]:
|
72 |
+
x = layer(x)
|
73 |
+
end_layer_index += 1
|
74 |
+
start_layer_index = end_layer_index
|
75 |
+
confidence, location = self.compute_header(header_index, y)
|
76 |
+
header_index += 1
|
77 |
+
confidences.append(confidence)
|
78 |
+
locations.append(location)
|
79 |
+
|
80 |
+
for layer in self.base_net[end_layer_index:]:
|
81 |
+
x = layer(x)
|
82 |
+
|
83 |
+
for layer in self.extras:
|
84 |
+
x = layer(x)
|
85 |
+
confidence, location = self.compute_header(header_index, x)
|
86 |
+
header_index += 1
|
87 |
+
confidences.append(confidence)
|
88 |
+
locations.append(location)
|
89 |
+
|
90 |
+
confidences = torch.cat(confidences, 1)
|
91 |
+
locations = torch.cat(locations, 1)
|
92 |
+
|
93 |
+
if self.is_test:
|
94 |
+
confidences = F.softmax(confidences, dim=2)
|
95 |
+
boxes = box_utils.convert_locations_to_boxes(
|
96 |
+
locations, self.priors, self.config.center_variance, self.config.size_variance
|
97 |
+
)
|
98 |
+
boxes = box_utils.center_form_to_corner_form(boxes)
|
99 |
+
return confidences, boxes
|
100 |
+
else:
|
101 |
+
return confidences, locations
|
102 |
+
|
103 |
+
def compute_header(self, i, x):
|
104 |
+
confidence = self.classification_headers[i](x)
|
105 |
+
confidence = confidence.permute(0, 2, 3, 1).contiguous()
|
106 |
+
confidence = confidence.view(confidence.size(0), -1, self.num_classes)
|
107 |
+
|
108 |
+
location = self.regression_headers[i](x)
|
109 |
+
location = location.permute(0, 2, 3, 1).contiguous()
|
110 |
+
location = location.view(location.size(0), -1, 4)
|
111 |
+
|
112 |
+
return confidence, location
|
113 |
+
|
114 |
+
def init_from_base_net(self, model):
|
115 |
+
self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True)
|
116 |
+
self.source_layer_add_ons.apply(_xavier_init_)
|
117 |
+
self.extras.apply(_xavier_init_)
|
118 |
+
self.classification_headers.apply(_xavier_init_)
|
119 |
+
self.regression_headers.apply(_xavier_init_)
|
120 |
+
|
121 |
+
def init_from_pretrained_ssd(self, model):
|
122 |
+
state_dict = torch.load(model, map_location=lambda storage, loc: storage)
|
123 |
+
state_dict = {k: v for k, v in state_dict.items() if not (k.startswith("classification_headers") or k.startswith("regression_headers"))}
|
124 |
+
model_dict = self.state_dict()
|
125 |
+
model_dict.update(state_dict)
|
126 |
+
self.load_state_dict(model_dict)
|
127 |
+
self.classification_headers.apply(_xavier_init_)
|
128 |
+
self.regression_headers.apply(_xavier_init_)
|
129 |
+
|
130 |
+
def init(self):
|
131 |
+
self.base_net.apply(_xavier_init_)
|
132 |
+
self.source_layer_add_ons.apply(_xavier_init_)
|
133 |
+
self.extras.apply(_xavier_init_)
|
134 |
+
self.classification_headers.apply(_xavier_init_)
|
135 |
+
self.regression_headers.apply(_xavier_init_)
|
136 |
+
|
137 |
+
def load(self, model):
|
138 |
+
self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
|
139 |
+
|
140 |
+
def save(self, model_path):
|
141 |
+
torch.save(self.state_dict(), model_path)
|
142 |
+
|
143 |
+
|
144 |
+
class MatchPrior(object):
|
145 |
+
def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
|
146 |
+
self.center_form_priors = center_form_priors
|
147 |
+
self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
|
148 |
+
self.center_variance = center_variance
|
149 |
+
self.size_variance = size_variance
|
150 |
+
self.iou_threshold = iou_threshold
|
151 |
+
|
152 |
+
def __call__(self, gt_boxes, gt_labels):
|
153 |
+
if type(gt_boxes) is np.ndarray:
|
154 |
+
gt_boxes = torch.from_numpy(gt_boxes)
|
155 |
+
if type(gt_labels) is np.ndarray:
|
156 |
+
gt_labels = torch.from_numpy(gt_labels)
|
157 |
+
boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
|
158 |
+
self.corner_form_priors, self.iou_threshold)
|
159 |
+
boxes = box_utils.corner_form_to_center_form(boxes)
|
160 |
+
locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
|
161 |
+
return locations, labels
|
162 |
+
|
163 |
+
|
164 |
+
def _xavier_init_(m: nn.Module):
|
165 |
+
if isinstance(m, nn.Conv2d):
|
166 |
+
nn.init.xavier_uniform_(m.weight)
|
face_recognition/face_detect/vision/transforms/__init__.py
ADDED
File without changes
|
face_recognition/face_detect/vision/transforms/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (184 Bytes). View file
|
|
face_recognition/face_detect/vision/transforms/__pycache__/transforms.cpython-310.pyc
ADDED
Binary file (15.2 kB). View file
|
|
face_recognition/face_detect/vision/transforms/transforms.py
ADDED
@@ -0,0 +1,541 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# from https://github.com/amdegroot/ssd.pytorch
|
2 |
+
|
3 |
+
|
4 |
+
import types
|
5 |
+
|
6 |
+
import cv2
|
7 |
+
import numpy as np
|
8 |
+
import torch
|
9 |
+
from numpy import random
|
10 |
+
from torchvision import transforms
|
11 |
+
|
12 |
+
|
13 |
+
def intersect(box_a, box_b):
|
14 |
+
max_xy = np.minimum(box_a[:, 2:], box_b[2:])
|
15 |
+
min_xy = np.maximum(box_a[:, :2], box_b[:2])
|
16 |
+
inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
|
17 |
+
return inter[:, 0] * inter[:, 1]
|
18 |
+
|
19 |
+
|
20 |
+
def jaccard_numpy(box_a, box_b):
|
21 |
+
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
22 |
+
is simply the intersection over union of two boxes.
|
23 |
+
E.g.:
|
24 |
+
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
25 |
+
Args:
|
26 |
+
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
|
27 |
+
box_b: Single bounding box, Shape: [4]
|
28 |
+
Return:
|
29 |
+
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
|
30 |
+
"""
|
31 |
+
inter = intersect(box_a, box_b)
|
32 |
+
area_a = ((box_a[:, 2] - box_a[:, 0]) *
|
33 |
+
(box_a[:, 3] - box_a[:, 1])) # [A,B]
|
34 |
+
area_b = ((box_b[2] - box_b[0]) *
|
35 |
+
(box_b[3] - box_b[1])) # [A,B]
|
36 |
+
union = area_a + area_b - inter
|
37 |
+
return inter / union # [A,B]
|
38 |
+
|
39 |
+
|
40 |
+
def object_converage_numpy(box_a, box_b):
|
41 |
+
"""Compute the jaccard overlap of two sets of boxes. The jaccard overlap
|
42 |
+
is simply the intersection over union of two boxes.
|
43 |
+
E.g.:
|
44 |
+
A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
|
45 |
+
Args:
|
46 |
+
box_a: Multiple bounding boxes, Shape: [num_boxes,4]
|
47 |
+
box_b: Single bounding box, Shape: [4]
|
48 |
+
Return:
|
49 |
+
jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
|
50 |
+
"""
|
51 |
+
inter = intersect(box_a, box_b)
|
52 |
+
area_a = ((box_a[:, 2] - box_a[:, 0]) *
|
53 |
+
(box_a[:, 3] - box_a[:, 1])) # [A,B]
|
54 |
+
area_b = ((box_b[2] - box_b[0]) *
|
55 |
+
(box_b[3] - box_b[1])) # [A,B]
|
56 |
+
return inter / area_a # [A,B]
|
57 |
+
|
58 |
+
|
59 |
+
class Compose(object):
|
60 |
+
"""Composes several augmentations together.
|
61 |
+
Args:
|
62 |
+
transforms (List[Transform]): list of transforms to compose.
|
63 |
+
Example:
|
64 |
+
>>> augmentations.Compose([
|
65 |
+
>>> transforms.CenterCrop(10),
|
66 |
+
>>> transforms.ToTensor(),
|
67 |
+
>>> ])
|
68 |
+
"""
|
69 |
+
|
70 |
+
def __init__(self, transforms):
|
71 |
+
self.transforms = transforms
|
72 |
+
|
73 |
+
def __call__(self, img, boxes=None, labels=None):
|
74 |
+
for t in self.transforms:
|
75 |
+
img, boxes, labels = t(img, boxes, labels)
|
76 |
+
return img, boxes, labels
|
77 |
+
|
78 |
+
|
79 |
+
class Lambda(object):
|
80 |
+
"""Applies a lambda as a transform."""
|
81 |
+
|
82 |
+
def __init__(self, lambd):
|
83 |
+
assert isinstance(lambd, types.LambdaType)
|
84 |
+
self.lambd = lambd
|
85 |
+
|
86 |
+
def __call__(self, img, boxes=None, labels=None):
|
87 |
+
return self.lambd(img, boxes, labels)
|
88 |
+
|
89 |
+
|
90 |
+
class ConvertFromInts(object):
|
91 |
+
def __call__(self, image, boxes=None, labels=None):
|
92 |
+
return image.astype(np.float32), boxes, labels
|
93 |
+
|
94 |
+
|
95 |
+
class SubtractMeans(object):
|
96 |
+
def __init__(self, mean):
|
97 |
+
self.mean = np.array(mean, dtype=np.float32)
|
98 |
+
|
99 |
+
def __call__(self, image, boxes=None, labels=None):
|
100 |
+
image = image.astype(np.float32)
|
101 |
+
image -= self.mean
|
102 |
+
return image.astype(np.float32), boxes, labels
|
103 |
+
|
104 |
+
|
105 |
+
class imgprocess(object):
|
106 |
+
def __init__(self, std):
|
107 |
+
self.std = np.array(std, dtype=np.float32)
|
108 |
+
|
109 |
+
def __call__(self, image, boxes=None, labels=None):
|
110 |
+
image = image.astype(np.float32)
|
111 |
+
image /= self.std
|
112 |
+
return image.astype(np.float32), boxes, labels
|
113 |
+
|
114 |
+
|
115 |
+
class ToAbsoluteCoords(object):
|
116 |
+
def __call__(self, image, boxes=None, labels=None):
|
117 |
+
height, width, channels = image.shape
|
118 |
+
boxes[:, 0] *= width
|
119 |
+
boxes[:, 2] *= width
|
120 |
+
boxes[:, 1] *= height
|
121 |
+
boxes[:, 3] *= height
|
122 |
+
|
123 |
+
return image, boxes, labels
|
124 |
+
|
125 |
+
|
126 |
+
class ToPercentCoords(object):
|
127 |
+
def __call__(self, image, boxes=None, labels=None):
|
128 |
+
height, width, channels = image.shape
|
129 |
+
boxes[:, 0] /= width
|
130 |
+
boxes[:, 2] /= width
|
131 |
+
boxes[:, 1] /= height
|
132 |
+
boxes[:, 3] /= height
|
133 |
+
|
134 |
+
return image, boxes, labels
|
135 |
+
|
136 |
+
|
137 |
+
class Resize(object):
|
138 |
+
def __init__(self, size=(300, 300)):
|
139 |
+
self.size = size
|
140 |
+
|
141 |
+
def __call__(self, image, boxes=None, labels=None):
|
142 |
+
image = cv2.resize(image, (self.size[0],
|
143 |
+
self.size[1]))
|
144 |
+
return image, boxes, labels
|
145 |
+
|
146 |
+
|
147 |
+
class RandomSaturation(object):
|
148 |
+
def __init__(self, lower=0.5, upper=1.5):
|
149 |
+
self.lower = lower
|
150 |
+
self.upper = upper
|
151 |
+
assert self.upper >= self.lower, "contrast upper must be >= lower."
|
152 |
+
assert self.lower >= 0, "contrast lower must be non-negative."
|
153 |
+
|
154 |
+
def __call__(self, image, boxes=None, labels=None):
|
155 |
+
if random.randint(2):
|
156 |
+
image[:, :, 1] *= random.uniform(self.lower, self.upper)
|
157 |
+
|
158 |
+
return image, boxes, labels
|
159 |
+
|
160 |
+
|
161 |
+
class RandomHue(object):
|
162 |
+
def __init__(self, delta=18.0):
|
163 |
+
assert delta >= 0.0 and delta <= 360.0
|
164 |
+
self.delta = delta
|
165 |
+
|
166 |
+
def __call__(self, image, boxes=None, labels=None):
|
167 |
+
if random.randint(2):
|
168 |
+
image[:, :, 0] += random.uniform(-self.delta, self.delta)
|
169 |
+
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
|
170 |
+
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
|
171 |
+
return image, boxes, labels
|
172 |
+
|
173 |
+
|
174 |
+
class RandomLightingNoise(object):
|
175 |
+
def __init__(self):
|
176 |
+
self.perms = ((0, 1, 2), (0, 2, 1),
|
177 |
+
(1, 0, 2), (1, 2, 0),
|
178 |
+
(2, 0, 1), (2, 1, 0))
|
179 |
+
|
180 |
+
def __call__(self, image, boxes=None, labels=None):
|
181 |
+
if random.randint(2):
|
182 |
+
swap = self.perms[random.randint(len(self.perms))]
|
183 |
+
shuffle = SwapChannels(swap) # shuffle channels
|
184 |
+
image = shuffle(image)
|
185 |
+
return image, boxes, labels
|
186 |
+
|
187 |
+
|
188 |
+
class ConvertColor(object):
|
189 |
+
def __init__(self, current, transform):
|
190 |
+
self.transform = transform
|
191 |
+
self.current = current
|
192 |
+
|
193 |
+
def __call__(self, image, boxes=None, labels=None):
|
194 |
+
if self.current == 'BGR' and self.transform == 'HSV':
|
195 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
|
196 |
+
elif self.current == 'RGB' and self.transform == 'HSV':
|
197 |
+
image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
|
198 |
+
elif self.current == 'BGR' and self.transform == 'RGB':
|
199 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
200 |
+
elif self.current == 'HSV' and self.transform == 'BGR':
|
201 |
+
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
|
202 |
+
elif self.current == 'HSV' and self.transform == "RGB":
|
203 |
+
image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
|
204 |
+
else:
|
205 |
+
raise NotImplementedError
|
206 |
+
return image, boxes, labels
|
207 |
+
|
208 |
+
|
209 |
+
class RandomContrast(object):
|
210 |
+
def __init__(self, lower=0.5, upper=1.5):
|
211 |
+
self.lower = lower
|
212 |
+
self.upper = upper
|
213 |
+
assert self.upper >= self.lower, "contrast upper must be >= lower."
|
214 |
+
assert self.lower >= 0, "contrast lower must be non-negative."
|
215 |
+
|
216 |
+
# expects float image
|
217 |
+
def __call__(self, image, boxes=None, labels=None):
|
218 |
+
if random.randint(2):
|
219 |
+
alpha = random.uniform(self.lower, self.upper)
|
220 |
+
image *= alpha
|
221 |
+
return image, boxes, labels
|
222 |
+
|
223 |
+
|
224 |
+
class RandomBrightness(object):
|
225 |
+
def __init__(self, delta=32):
|
226 |
+
assert delta >= 0.0
|
227 |
+
assert delta <= 255.0
|
228 |
+
self.delta = delta
|
229 |
+
|
230 |
+
def __call__(self, image, boxes=None, labels=None):
|
231 |
+
if random.randint(2):
|
232 |
+
delta = random.uniform(-self.delta, self.delta)
|
233 |
+
image += delta
|
234 |
+
return image, boxes, labels
|
235 |
+
|
236 |
+
|
237 |
+
class ToCV2Image(object):
|
238 |
+
def __call__(self, tensor, boxes=None, labels=None):
|
239 |
+
return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
|
240 |
+
|
241 |
+
|
242 |
+
class ToTensor(object):
|
243 |
+
def __call__(self, cvimage, boxes=None, labels=None):
|
244 |
+
return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
|
245 |
+
|
246 |
+
|
247 |
+
class RandomSampleCrop(object):
|
248 |
+
"""Crop
|
249 |
+
Arguments:
|
250 |
+
img (Image): the image being input during training
|
251 |
+
boxes (Tensor): the original bounding boxes in pt form
|
252 |
+
labels (Tensor): the class labels for each bbox
|
253 |
+
mode (float tuple): the min and max jaccard overlaps
|
254 |
+
Return:
|
255 |
+
(img, boxes, classes)
|
256 |
+
img (Image): the cropped image
|
257 |
+
boxes (Tensor): the adjusted bounding boxes in pt form
|
258 |
+
labels (Tensor): the class labels for each bbox
|
259 |
+
"""
|
260 |
+
|
261 |
+
def __init__(self):
|
262 |
+
self.sample_options = (
|
263 |
+
# using entire original input image
|
264 |
+
None,
|
265 |
+
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
|
266 |
+
(0.1, None),
|
267 |
+
(0.3, None),
|
268 |
+
(0.7, None),
|
269 |
+
(0.9, None),
|
270 |
+
# randomly sample a patch
|
271 |
+
(None, None),
|
272 |
+
)
|
273 |
+
|
274 |
+
def __call__(self, image, boxes=None, labels=None):
|
275 |
+
height, width, _ = image.shape
|
276 |
+
while True:
|
277 |
+
# randomly choose a mode
|
278 |
+
mode = random.choice(self.sample_options)
|
279 |
+
if mode is None:
|
280 |
+
return image, boxes, labels
|
281 |
+
|
282 |
+
min_iou, max_iou = mode
|
283 |
+
if min_iou is None:
|
284 |
+
min_iou = float('-inf')
|
285 |
+
if max_iou is None:
|
286 |
+
max_iou = float('inf')
|
287 |
+
|
288 |
+
# max trails (50)
|
289 |
+
for _ in range(50):
|
290 |
+
current_image = image
|
291 |
+
|
292 |
+
w = random.uniform(0.3 * width, width)
|
293 |
+
h = random.uniform(0.3 * height, height)
|
294 |
+
|
295 |
+
# aspect ratio constraint b/t .5 & 2
|
296 |
+
if h / w < 0.5 or h / w > 2:
|
297 |
+
continue
|
298 |
+
|
299 |
+
left = random.uniform(width - w)
|
300 |
+
top = random.uniform(height - h)
|
301 |
+
|
302 |
+
# convert to integer rect x1,y1,x2,y2
|
303 |
+
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
|
304 |
+
|
305 |
+
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
|
306 |
+
overlap = jaccard_numpy(boxes, rect)
|
307 |
+
|
308 |
+
# is min and max overlap constraint satisfied? if not try again
|
309 |
+
if overlap.max() < min_iou or overlap.min() > max_iou:
|
310 |
+
continue
|
311 |
+
|
312 |
+
# cut the crop from the image
|
313 |
+
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
|
314 |
+
:]
|
315 |
+
|
316 |
+
# keep overlap with gt box IF center in sampled patch
|
317 |
+
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
|
318 |
+
|
319 |
+
# mask in all gt boxes that above and to the left of centers
|
320 |
+
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
|
321 |
+
|
322 |
+
# mask in all gt boxes that under and to the right of centers
|
323 |
+
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
|
324 |
+
|
325 |
+
# mask in that both m1 and m2 are true
|
326 |
+
mask = m1 * m2
|
327 |
+
|
328 |
+
# have any valid boxes? try again if not
|
329 |
+
if not mask.any():
|
330 |
+
continue
|
331 |
+
|
332 |
+
# take only matching gt boxes
|
333 |
+
current_boxes = boxes[mask, :].copy()
|
334 |
+
|
335 |
+
# take only matching gt labels
|
336 |
+
current_labels = labels[mask]
|
337 |
+
|
338 |
+
# should we use the box left and top corner or the crop's
|
339 |
+
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
|
340 |
+
rect[:2])
|
341 |
+
# adjust to crop (by substracting crop's left,top)
|
342 |
+
current_boxes[:, :2] -= rect[:2]
|
343 |
+
|
344 |
+
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
|
345 |
+
rect[2:])
|
346 |
+
# adjust to crop (by substracting crop's left,top)
|
347 |
+
current_boxes[:, 2:] -= rect[:2]
|
348 |
+
|
349 |
+
return current_image, current_boxes, current_labels
|
350 |
+
|
351 |
+
|
352 |
+
class RandomSampleCrop_v2(object):
|
353 |
+
"""Crop
|
354 |
+
Arguments:
|
355 |
+
img (Image): the image being input during training
|
356 |
+
boxes (Tensor): the original bounding boxes in pt form
|
357 |
+
labels (Tensor): the class labels for each bbox
|
358 |
+
mode (float tuple): the min and max jaccard overlaps
|
359 |
+
Return:
|
360 |
+
(img, boxes, classes)
|
361 |
+
img (Image): the cropped image
|
362 |
+
boxes (Tensor): the adjusted bounding boxes in pt form
|
363 |
+
labels (Tensor): the class labels for each bbox
|
364 |
+
"""
|
365 |
+
|
366 |
+
def __init__(self):
|
367 |
+
self.sample_options = (
|
368 |
+
# using entire original input image
|
369 |
+
None,
|
370 |
+
# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
|
371 |
+
|
372 |
+
# randomly sample a patch
|
373 |
+
(1, None),
|
374 |
+
(1, None),
|
375 |
+
(1, None),
|
376 |
+
(1, None),
|
377 |
+
)
|
378 |
+
|
379 |
+
def __call__(self, image, boxes=None, labels=None):
|
380 |
+
height, width, _ = image.shape
|
381 |
+
while True:
|
382 |
+
# randomly choose a mode
|
383 |
+
mode = random.choice(self.sample_options)
|
384 |
+
if mode is None:
|
385 |
+
return image, boxes, labels
|
386 |
+
|
387 |
+
min_iou, max_iou = mode
|
388 |
+
if min_iou is None:
|
389 |
+
min_iou = float('-inf')
|
390 |
+
if max_iou is None:
|
391 |
+
max_iou = float('inf')
|
392 |
+
|
393 |
+
# max trails (50)
|
394 |
+
for _ in range(50):
|
395 |
+
current_image = image
|
396 |
+
|
397 |
+
w = random.uniform(0.3 * width, width)
|
398 |
+
h = random.uniform(0.3 * height, height)
|
399 |
+
|
400 |
+
# aspect ratio constraint b/t .5 & 2
|
401 |
+
if h / w != 1:
|
402 |
+
continue
|
403 |
+
left = random.uniform(width - w)
|
404 |
+
top = random.uniform(height - h)
|
405 |
+
|
406 |
+
# convert to integer rect x1,y1,x2,y2
|
407 |
+
rect = np.array([int(left), int(top), int(left + w), int(top + h)])
|
408 |
+
|
409 |
+
# calculate IoU (jaccard overlap) b/t the cropped and gt boxes
|
410 |
+
overlap = object_converage_numpy(boxes, rect)
|
411 |
+
|
412 |
+
# is min and max overlap constraint satisfied? if not try again
|
413 |
+
if overlap.max() < min_iou or overlap.min() > max_iou:
|
414 |
+
continue
|
415 |
+
|
416 |
+
# cut the crop from the image
|
417 |
+
current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
|
418 |
+
:]
|
419 |
+
|
420 |
+
# keep overlap with gt box IF center in sampled patch
|
421 |
+
centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
|
422 |
+
|
423 |
+
# mask in all gt boxes that above and to the left of centers
|
424 |
+
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
|
425 |
+
|
426 |
+
# mask in all gt boxes that under and to the right of centers
|
427 |
+
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
|
428 |
+
|
429 |
+
# mask in that both m1 and m2 are true
|
430 |
+
mask = m1 * m2
|
431 |
+
|
432 |
+
# have any valid boxes? try again if not
|
433 |
+
if not mask.any():
|
434 |
+
continue
|
435 |
+
|
436 |
+
# take only matching gt boxes
|
437 |
+
current_boxes = boxes[mask, :].copy()
|
438 |
+
|
439 |
+
# take only matching gt labels
|
440 |
+
current_labels = labels[mask]
|
441 |
+
|
442 |
+
# should we use the box left and top corner or the crop's
|
443 |
+
current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
|
444 |
+
rect[:2])
|
445 |
+
# adjust to crop (by substracting crop's left,top)
|
446 |
+
current_boxes[:, :2] -= rect[:2]
|
447 |
+
|
448 |
+
current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
|
449 |
+
rect[2:])
|
450 |
+
# adjust to crop (by substracting crop's left,top)
|
451 |
+
current_boxes[:, 2:] -= rect[:2]
|
452 |
+
|
453 |
+
return current_image, current_boxes, current_labels
|
454 |
+
|
455 |
+
|
456 |
+
class Expand(object):
|
457 |
+
def __init__(self, mean):
|
458 |
+
self.mean = mean
|
459 |
+
|
460 |
+
def __call__(self, image, boxes, labels):
|
461 |
+
if random.randint(2):
|
462 |
+
return image, boxes, labels
|
463 |
+
|
464 |
+
height, width, depth = image.shape
|
465 |
+
ratio = random.uniform(1, 4)
|
466 |
+
left = random.uniform(0, width * ratio - width)
|
467 |
+
top = random.uniform(0, height * ratio - height)
|
468 |
+
|
469 |
+
expand_image = np.zeros(
|
470 |
+
(int(height * ratio), int(width * ratio), depth),
|
471 |
+
dtype=image.dtype)
|
472 |
+
expand_image[:, :, :] = self.mean
|
473 |
+
expand_image[int(top):int(top + height),
|
474 |
+
int(left):int(left + width)] = image
|
475 |
+
image = expand_image
|
476 |
+
|
477 |
+
boxes = boxes.copy()
|
478 |
+
boxes[:, :2] += (int(left), int(top))
|
479 |
+
boxes[:, 2:] += (int(left), int(top))
|
480 |
+
|
481 |
+
return image, boxes, labels
|
482 |
+
|
483 |
+
|
484 |
+
class RandomMirror(object):
|
485 |
+
def __call__(self, image, boxes, classes):
|
486 |
+
_, width, _ = image.shape
|
487 |
+
if random.randint(2):
|
488 |
+
image = image[:, ::-1]
|
489 |
+
boxes = boxes.copy()
|
490 |
+
boxes[:, 0::2] = width - boxes[:, 2::-2]
|
491 |
+
return image, boxes, classes
|
492 |
+
|
493 |
+
|
494 |
+
class SwapChannels(object):
|
495 |
+
"""Transforms a tensorized image by swapping the channels in the order
|
496 |
+
specified in the swap tuple.
|
497 |
+
Args:
|
498 |
+
swaps (int triple): final order of channels
|
499 |
+
eg: (2, 1, 0)
|
500 |
+
"""
|
501 |
+
|
502 |
+
def __init__(self, swaps):
|
503 |
+
self.swaps = swaps
|
504 |
+
|
505 |
+
def __call__(self, image):
|
506 |
+
"""
|
507 |
+
Args:
|
508 |
+
image (Tensor): image tensor to be transformed
|
509 |
+
Return:
|
510 |
+
a tensor with channels swapped according to swap
|
511 |
+
"""
|
512 |
+
# if torch.is_tensor(image):
|
513 |
+
# image = image.data.cpu().numpy()
|
514 |
+
# else:
|
515 |
+
# image = np.array(image)
|
516 |
+
image = image[:, :, self.swaps]
|
517 |
+
return image
|
518 |
+
|
519 |
+
|
520 |
+
class PhotometricDistort(object):
|
521 |
+
def __init__(self):
|
522 |
+
self.pd = [
|
523 |
+
RandomContrast(), # RGB
|
524 |
+
ConvertColor(current="RGB", transform='HSV'), # HSV
|
525 |
+
RandomSaturation(), # HSV
|
526 |
+
RandomHue(), # HSV
|
527 |
+
ConvertColor(current='HSV', transform='RGB'), # RGB
|
528 |
+
RandomContrast() # RGB
|
529 |
+
]
|
530 |
+
self.rand_brightness = RandomBrightness()
|
531 |
+
self.rand_light_noise = RandomLightingNoise()
|
532 |
+
|
533 |
+
def __call__(self, image, boxes, labels):
|
534 |
+
im = image.copy()
|
535 |
+
im, boxes, labels = self.rand_brightness(im, boxes, labels)
|
536 |
+
if random.randint(2):
|
537 |
+
distort = Compose(self.pd[:-1])
|
538 |
+
else:
|
539 |
+
distort = Compose(self.pd[1:])
|
540 |
+
im, boxes, labels = distort(im, boxes, labels)
|
541 |
+
return self.rand_light_noise(im, boxes, labels)
|
face_recognition/face_detect/vision/utils/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .misc import *
|
face_recognition/face_detect/vision/utils/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (200 Bytes). View file
|
|