FaceRecognition-LivenessDetection-SDK

Sleeping

App Files Files Community

faceplugin commited on Apr 8

Commit

901e379

•

0 Parent(s):

initial commit

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
app.py +53 -0
face_recognition/__pycache__/extract.cpython-310.pyc +0 -0
face_recognition/__pycache__/match.cpython-310.pyc +0 -0
face_recognition/app.py +94 -0
face_recognition/extract.py +88 -0
face_recognition/face_detect/__pycache__/detect_imgs.cpython-310.pyc +0 -0
face_recognition/face_detect/check_gt_box.py +59 -0
face_recognition/face_detect/create_fd_result.py +99 -0
face_recognition/face_detect/detect_imgs.py +65 -0
face_recognition/face_detect/models/pretrained/version-RFB-320.pth +3 -0
face_recognition/face_detect/models/pretrained/version-RFB-640.pth +3 -0
face_recognition/face_detect/models/pretrained/version-slim-320.pth +3 -0
face_recognition/face_detect/models/pretrained/version-slim-640.pth +3 -0
face_recognition/face_detect/models/voc-model-labels.txt +2 -0
face_recognition/face_detect/requirements.txt +11 -0
face_recognition/face_detect/vision/__init__.py +0 -0
face_recognition/face_detect/vision/__pycache__/__init__.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/datasets/__init__.py +0 -0
face_recognition/face_detect/vision/datasets/caffe_pb2.py +0 -0
face_recognition/face_detect/vision/datasets/voc_dataset.py +146 -0
face_recognition/face_detect/vision/nn/__init__.py +0 -0
face_recognition/face_detect/vision/nn/__pycache__/__init__.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/nn/__pycache__/mb_tiny.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/nn/__pycache__/mb_tiny_RFB.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/nn/mb_tiny.py +51 -0
face_recognition/face_detect/vision/nn/mb_tiny_RFB.py +118 -0
face_recognition/face_detect/vision/nn/multibox_loss.py +46 -0
face_recognition/face_detect/vision/ssd/__init__.py +0 -0
face_recognition/face_detect/vision/ssd/__pycache__/__init__.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/__pycache__/data_preprocessing.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_RFB_fd.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_fd.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/__pycache__/predictor.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/__pycache__/ssd.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/config/__init__.py +0 -0
face_recognition/face_detect/vision/ssd/config/__pycache__/__init__.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/config/__pycache__/fd_config.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/ssd/config/fd_config.py +41 -0
face_recognition/face_detect/vision/ssd/data_preprocessing.py +61 -0
face_recognition/face_detect/vision/ssd/mb_tiny_RFB_fd.py +64 -0
face_recognition/face_detect/vision/ssd/mb_tiny_fd.py +64 -0
face_recognition/face_detect/vision/ssd/predictor.py +70 -0
face_recognition/face_detect/vision/ssd/ssd.py +166 -0
face_recognition/face_detect/vision/transforms/__init__.py +0 -0
face_recognition/face_detect/vision/transforms/__pycache__/__init__.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/transforms/__pycache__/transforms.cpython-310.pyc +0 -0
face_recognition/face_detect/vision/transforms/transforms.py +541 -0
face_recognition/face_detect/vision/utils/__init__.py +1 -0
face_recognition/face_detect/vision/utils/__pycache__/__init__.cpython-310.pyc +0 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+*.npy filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import sys
+sys.path.append('.')
+sys.path.append('./face_recognition')
+import os
+import io
+import cv2
+import base64
+import json
+import gradio as gr
+import requests
+import numpy as np
+from io import BytesIO
+import configparser
+import numpy as np
+from PIL import Image
+from face_recognition.match import match_1_1
+def face_recognition_on_file(file1, file2):
+    img1 = cv2.imread(file1)
+    img2 = cv2.imread(file2)
+    response = match_1_1(img1, img2)
+    return response
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+    # FacePlugin Online Demo
+    """
+    )
+    with gr.TabItem("Face Recognition"):
+        with gr.Row():
+            with gr.Column():
+                first_input = gr.Image(type='filepath')
+                gr.Examples(['images/rec_5.jpg', 'images/rec_1.jpg', 'images/9.png', 'images/rec_3.jpg'],
+                            inputs=first_input)
+                start_button = gr.Button("Run")
+            with gr.Column():
+                second_input = gr.Image(type='filepath')
+                gr.Examples(['images/rec_6.jpg', 'images/rec_2.jpg', 'images/10.jpg', 'images/rec_4.jpg'],
+                            inputs=second_input)
+            with gr.Column():
+                app_output = [gr.JSON()]
+        start_button.click(face_recognition_on_file, inputs=[first_input, second_input], outputs=app_output)
+demo.queue().launch(share=True)

face_recognition/__pycache__/extract.cpython-310.pyc ADDED Viewed

Binary file (2.03 kB). View file

face_recognition/__pycache__/match.cpython-310.pyc ADDED Viewed

Binary file (763 Bytes). View file

face_recognition/app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import os
+import cv2
+import numpy as np
+import base64
+import face_manage.manage as db_manage
+from flask import Flask, render_template, request, jsonify
+from extract import GetImageInfo
+app = Flask(__name__)
+UPLOAD_FOLDER = os.path.basename('uploads')
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+@app.route("/")
+def start_page():
+    print("Start")
+    response = jsonify({"status": "Start"})
+    response.status_code = 200
+    response.headers["Content-Type"] = "application/json; charset=utf-8"
+    return response
+@app.route("/enroll")
+def enroll():
+    file = request.files['image']
+    image = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
+    db_manage.open_database(0)
+    count, boxes, scores, landmarks, alignimgs, features = GetImageInfo(image, 5)
+    for idx in range(0, count):
+        db_manage.register_face('sample name', idx, boxes[idx], landmarks[idx], alignimgs[idx], features[idx])
+    # db_manage.clear_database()
+    response = jsonify({"status": "True"})
+    response.status_code = 200
+    response.headers["Content-Type"] = "application/json; charset=utf-8"
+    return response
+@app.route("/delete/all")
+def delete_all():
+    db_manage.open_database(0)
+    db_manage.clear_database()
+    response = jsonify({"status": "True"})
+    response.status_code = 200
+    response.headers["Content-Type"] = "application/json; charset=utf-8"
+    return response
+@app.route("/match11")
+def match_1_1():
+    file1 = request.files['image1']
+    file2 = request.files['image2']
+    image1 = cv2.imdecode(np.fromstring(file1.read(), np.uint8), cv2.IMREAD_UNCHANGED)
+    image2 = cv2.imdecode(np.fromstring(file2.read(), np.uint8), cv2.IMREAD_UNCHANGED)
+    count1, boxes1, scores1, landmarks1, alignimgs1, features1 = GetImageInfo(image1, 1)
+    count2, boxes2, scores2, landmarks2, alignimgs2, features2 = GetImageInfo(image2, 1)
+    if count1 != 0 and count2 != 0:
+        sim = db_manage.get_similarity(features1[0], features2[0])
+        if sim > db_manage.threshold:
+            result = True
+        else:
+            result = False
+    response = jsonify({"status": result})
+    response.status_code = 200
+    response.headers["Content-Type"] = "application/json; charset=utf-8"
+    return response
+@app.route("/match1n")
+def match_1_n():
+    file = request.files['image']
+    image = cv2.imdecode(np.fromstring(file.read(), np.uint8), cv2.IMREAD_UNCHANGED)
+    result, filename, sub_index = False, None, -1
+    count, boxes, scores, landmarks, alignimgs, features = GetImageInfo(image, 1)
+    for idx in range(count):
+        id, fn, sub_id = db_manage.verify_face(features[idx])
+        if id != -1:
+            result, filename, sub_index = True, fn, id
+    response = jsonify({"status": result, "filename": filename, "subIndex": sub_index})
+    response.status_code = 200
+    response.headers["Content-Type"] = "application/json; charset=utf-8"
+    return response

face_recognition/extract.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import argparse
+import cv2
+import torch
+import numpy as np
+import ctypes
+import os.path
+import time
+from face_detect.detect_imgs import get_face_boundingbox
+from face_landmark.GetLandmark import get_face_landmark
+from face_feature.GetFeature import get_face_feature
+from face_pose.GetPose import get_face_pose
+import face_manage.manage as db_manage
+def GetImageInfo(image, faceMaxCount):
+    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    ### Detection
+    start_time = time.time() * 1000
+    boxes, scores = get_face_boundingbox(image)
+    boxes = boxes[:faceMaxCount]
+    scores = scores[:faceMaxCount]
+    count = len(boxes)
+    bboxes = []
+    bscores = []
+    for idx in range(count):
+        bboxes.append(boxes[idx].data.numpy())
+        bscores.append(scores[idx].data.numpy())
+    # print("Detection time = %s ms" % (time.time() * 1000 - start_time))
+    ### Landmark
+    start_time = time.time() * 1000
+    landmarks = [] ### np.zeros((count, 136), dtype=np.float32)
+    for idx in range(count):
+        landmarks.append(get_face_landmark(gray_image, boxes[idx]).data.numpy())
+    # print("Landmark time = %s ms" % (time.time() * 1000 - start_time))
+    ### Pose
+    poses = []
+    for idx in range(count):
+        poses.append(get_face_pose(boxes[idx], landmarks[idx]))
+    ### Feature
+    start_time = time.time() * 1000
+    features = []
+    alignimgs = []
+    for idx in range(count):
+        alignimg, feature = get_face_feature(image, landmarks[idx])
+        features.append(feature)
+        alignimgs.append(alignimg)
+    print("Feature extraction time = %s ms" % (time.time() * 1000 - start_time))
+    ####
+    if 0:
+        for idx in range(count):
+            print_image = image.copy()
+            box = boxes[idx].numpy()
+            print(">>>>>>>>: ", box)
+            landmark = landmarks[idx]
+            cv2.rectangle(print_image, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), (0, 0, 255), 2)
+            for p in range(68):
+                cv2.circle(print_image, (int(landmark[p * 2]), int(landmark[p * 2 + 1])), 1, (255,255,255))
+            cv2.imshow("face recognition", print_image)
+            cv2.waitKey()
+    return count, bboxes, bscores, landmarks, alignimgs, features
+def get_similarity(feat1, feat2):
+    return (np.sum(feat1 * feat2) + 1) * 50
+if __name__ == '__main__':
+    threshold = 75
+    test_directory = 'test'
+    efn = os.getcwd() + "/test/1.png"
+    img = cv2.imread(efn, cv2.IMREAD_COLOR)
+    count, boxes, scores, landmarks, alignimgs, features1 = GetImageInfo(img, 5)
+    vfn = os.getcwd() + "/test/3.png"
+    img = cv2.imread(vfn, cv2.IMREAD_COLOR)
+    count, boxes, scores, landmarks, alignimgs, features2 = GetImageInfo(img, 5)
+    score = get_similarity(features1[0], features2[0])
+    print('score = ', score)
+    if score > threshold:
+        print('same person')
+    else:
+        print('different person')

face_recognition/face_detect/__pycache__/detect_imgs.cpython-310.pyc ADDED Viewed

Binary file (2.05 kB). View file

face_recognition/face_detect/check_gt_box.py ADDED Viewed

	@@ -0,0 +1,59 @@

+"""
+This code is used to check the data size distribution in the dataset.
+"""
+import xml.etree.ElementTree as ET
+from math import sqrt as sqrt
+import cv2
+import matplotlib.pyplot as plt
+# sets = [("./data/wider_face_add_lm_10_10", "trainval")]
+sets = [("./data/wider_face_add_lm_10_10", "test")]
+classes = ['face']
+if __name__ == '__main__':
+    width = []
+    height = []
+    for image_set, set in sets:
+        image_ids = open('{}/ImageSets/Main/{}.txt'.format(image_set, set)).read().strip().split()
+        for image_id in image_ids:
+            img_path = '{}/JPEGImages/{}.jpg'.format(image_set, image_id)
+            label_file = open('{}/Annotations/{}.xml'.format(image_set, image_id))
+            tree = ET.parse(label_file)
+            root = tree.getroot()
+            size = root.find('size')
+            img_w = int(size.find('width').text)
+            img_h = int(size.find('height').text)
+            img = cv2.imread(img_path)
+            for obj in root.iter('object'):
+                difficult = obj.find('difficult').text
+                cls = obj.find('name').text
+                if cls not in classes or int(difficult) == 2:
+                    continue
+                cls_id = classes.index(cls)
+                xmlbox = obj.find('bndbox')
+                xmin = int(xmlbox.find('xmin').text)
+                ymin = int(xmlbox.find('ymin').text)
+                xmax = int(xmlbox.find('xmax').text)
+                ymax = int(xmlbox.find('ymax').text)
+                w = xmax - xmin
+                h = ymax - ymin
+                # img = cv2.rectangle(img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 8)
+                w_change = (w / img_w) * 320
+                h_change = (h / img_h) * 240
+                s = w_change * h_change
+                if w_change / h_change > 6:
+                    print("{}/{}/{}/{}".format(xmin, xmax, ymin, ymax))
+                width.append(sqrt(s))
+                height.append(w_change / h_change)
+            print(img_path)
+            # img = cv2.resize(img, (608, 608))
+            # cv2.imwrite('{}_{}'.format(image_set.split('/')[-1], set), img)
+            # cv2.waitKey()
+    plt.plot(width, height, 'ro')
+    plt.show()

face_recognition/face_detect/create_fd_result.py ADDED Viewed

	@@ -0,0 +1,99 @@

+    """
+This code is used to batch detect images in a folder.
+"""
+import argparse
+import os
+import sys
+import cv2
+from vision.ssd.config.fd_config import define_img_size
+parser = argparse.ArgumentParser(description='detect_imgs')
+parser.add_argument('--net_type', default="RFB", type=str,
+                    help='The network architecture ,optional: RFB (higher precision) or slim (faster)')
+parser.add_argument('--input_size', default=320, type=int,
+                    help='define network input size,default optional value 128/160/320/480/640/1280')
+parser.add_argument('--threshold', default=0.65, type=float,
+                    help='score threshold')
+parser.add_argument('--candidate_size', default=1500, type=int,
+                    help='nms candidate size')
+parser.add_argument('--path', default="D:/Database/face_detect/test/originalPics", type=str,
+                    help='imgs dir')
+parser.add_argument('--test_device', default="cpu", type=str,
+                    help='cuda:0 or cpu')
+args = parser.parse_args()
+define_img_size(args.input_size)  # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
+from vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
+from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
+result_path = "./detect_imgs_results"
+label_path = "./models/voc-model-labels.txt"
+fd_result_path = 'D:/Database/face_detect/test/rfb_fd_result.txt'
+fddb_txt_path = 'D:/Database/face_detect/test/FDDB-folds/FDDB-fold-01-10_2845.txt'
+test_device = args.test_device
+class_names = [name.strip() for name in open(label_path).readlines()]
+if args.net_type == 'slim':
+    model_path = "models/pretrained/version-slim-320.pth"
+    net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
+    predictor = create_mb_tiny_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
+elif args.net_type == 'RFB':
+    model_path = "models/pretrained/version-RFB-320.pth"
+    net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
+    predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=args.candidate_size, device=test_device)
+else:
+    print("The net type is wrong!")
+    sys.exit(1)
+net.load(model_path)
+def get_file_names(dir_path):
+    file_list = os.listdir(dir_path)
+    total_file_list = list()
+    for entry in file_list:
+        full_path = os.path.join(dir_path, entry)
+        if (os.path.isdir(full_path)):
+            total_file_list = total_file_list + get_file_names(full_path)
+        else:
+            total_file_list.append(full_path)
+    return total_file_list
+def get_file_paths(txt_path):
+    path_list = list()
+    with open(txt_path, "r") as txt_file:
+        for line in txt_file:
+            path_list.append(line.strip())
+    return path_list
+if __name__ == '__main__':
+    if not os.path.exists(result_path):
+        os.makedirs(result_path)
+    listdir = get_file_paths(fddb_txt_path)
+    total_count = 0
+    correct_count = 0
+    for file_path in listdir:
+        filename = file_path
+        img_path = os.path.join(args.path, filename)
+        orig_image = cv2.imread(img_path + ".jpg")
+        if orig_image is None:
+            continue
+        print("filename: ", filename)
+        image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
+        boxes, labels, probs = predictor.predict(image, args.candidate_size / 2, args.threshold)
+        with open(fd_result_path, "a") as fd_result_file:
+            print(filename, file=fd_result_file)
+            print(boxes.size(0), file=fd_result_file)
+            for i in range(boxes.size(0)):
+                box = boxes[i, :]
+                score = f"{probs[i]:.3f}"
+                print(f"{box[0]:.3f}", f"{box[1]:.3f}", f"{box[2] - box[0]:.3f}", f"{box[3] - box[1]:.3f}", score, file=fd_result_file)

face_recognition/face_detect/detect_imgs.py ADDED Viewed

	@@ -0,0 +1,65 @@

+"""
+This code is used to batch detect images in a folder.
+"""
+import os
+import sys
+import cv2
+import numpy as  np
+import torch
+from face_detect.vision.ssd.config.fd_config import define_img_size
+input_size = 320
+test_device = 'cpu'
+net_type = 'slim'
+threshold = 0.6
+candidate_size = 1500
+define_img_size(input_size)  # must put define_img_size() before 'import create_mb_tiny_fd, create_mb_tiny_fd_predictor'
+from face_detect.vision.ssd.mb_tiny_fd import create_mb_tiny_fd, create_mb_tiny_fd_predictor
+from face_detect.vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
+label_path = "./face_recognition/face_detect/models/voc-model-labels.txt"
+test_device = test_device
+class_names = [name.strip() for name in open(label_path).readlines()]
+if net_type == 'slim':
+    model_path = "./face_recognition/face_detect/models/pretrained/version-slim-320.pth"
+    # model_path = "./face_detect/models/pretrained/version-slim-640.pth"
+    net = create_mb_tiny_fd(len(class_names), is_test=True, device=test_device)
+    predictor = create_mb_tiny_fd_predictor(net, candidate_size=candidate_size, device=test_device)
+elif net_type == 'RFB':
+    model_path = "./face_recognition/face_detect/models/pretrained/version-RFB-320.pth"
+    # model_path = "./face_detect/models/pretrained/version-RFB-640.pth"
+    net = create_Mb_Tiny_RFB_fd(len(class_names), is_test=True, device=test_device)
+    predictor = create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=candidate_size, device=test_device)
+else:
+    print("The net type is wrong!")
+    sys.exit(1)
+net.load(model_path)
+def get_face_boundingbox(orig_image):
+    """
+        Description:
+            In input image, detect face
+        Args:
+            orig_image: input BGR image.
+    """
+    boxes, labels, probs = predictor.predict(cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB), candidate_size / 2, threshold)
+    if len(boxes) == 0:
+        return torch.tensor([]), torch.tensor([])
+    height, width, _ = orig_image.shape
+    valid_face = np.logical_and(
+        np.logical_and(boxes[:,0] >= 0, boxes[:,1] >= 0),
+        np.logical_and(boxes[:,2] < width, boxes[:,3] < height)
+    )
+    boxes = boxes[valid_face]
+    probs = probs[valid_face]
+    return boxes, probs

face_recognition/face_detect/models/pretrained/version-RFB-320.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c722b4427cc71642768baef6e15c659931b56f07425e5d2b0ec033ad41b145b3
+size 1168374

face_recognition/face_detect/models/pretrained/version-RFB-640.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf34512b1a93dc234178e8a701ecf25c6afddf335a3226accf62982536e160b5
+size 1168354

face_recognition/face_detect/models/pretrained/version-slim-320.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd24abce45da5dbc7cfd8167cd3d5f955382dfc9d9ae9459f0026abd3c2e38a4
+size 1091283

face_recognition/face_detect/models/pretrained/version-slim-640.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02ca778098127c46d2b2680f1c398c7b993c12a424e94c34e6d608beb73481e4
+size 1091287

face_recognition/face_detect/models/voc-model-labels.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ BACKGROUND
2	+ face

face_recognition/face_detect/requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+numpy
+torch
+opencv_python
+torchvision
+typing
+torchstat
+torchsummary
+ptflops
+matplotlib
+onnx
+onnxruntime

face_recognition/face_detect/vision/__init__.py ADDED Viewed

File without changes

face_recognition/face_detect/vision/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (173 Bytes). View file

face_recognition/face_detect/vision/datasets/__init__.py ADDED Viewed

File without changes

face_recognition/face_detect/vision/datasets/caffe_pb2.py ADDED Viewed

The diff for this file is too large to render. See raw diff

face_recognition/face_detect/vision/datasets/voc_dataset.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import logging
+import os
+import pathlib
+import xml.etree.ElementTree as ET
+import h5py
+import cv2
+import numpy as np
+import lmdb
+from .caffe_pb2 import *
+class VOCDataset:
+    def __init__(self, root, transform=None, target_transform=None, is_test=False, keep_difficult=False, label_file=None):
+        """Dataset for VOC data.
+        Args:
+            root: the root of the VOC2007 or VOC2012 dataset, the directory contains the following sub-directories:
+                Annotations, ImageSets, JPEGImages, SegmentationClass, SegmentationObject.
+        """
+        self.root = "D:/test"
+        self.transform = transform
+        self.target_transform = target_transform
+        if is_test:
+            image_sets_file = self.root + '/test.txt'
+        else:
+            image_sets_file = self.root + '/test.txt'
+        self.ids = ['1.hdf5']#VOCDataset._read_image_ids(image_sets_file)
+        self.keep_difficult = keep_difficult
+        # if the labels file exists, read in the class names
+        label_file_name = self.root + "labels.txt"
+        if os.path.isfile(label_file_name):
+            class_string = ""
+            with open(label_file_name, 'r') as infile:
+                for line in infile:
+                    class_string += line.rstrip()
+            # classes should be a comma separated list
+            classes = class_string.split(',')
+            # prepend BACKGROUND as first class
+            classes.insert(0, 'BACKGROUND')
+            classes = [elem.replace(" ", "") for elem in classes]
+            self.class_names = tuple(classes)
+            logging.info("VOC Labels read from file: " + str(self.class_names))
+        else:
+            logging.info("No labels file, using default VOC classes.")
+            self.class_names = ('BACKGROUND',
+                                'face')
+        self.class_dict = {class_name: i for i, class_name in enumerate(self.class_names)}
+    # def __getitem__(self, index):
+    #     image_id = self.ids[index]
+    #     boxes, labels, is_difficult = self._get_annotation(image_id)
+    #     if not self.keep_difficult:
+    #         boxes = boxes[is_difficult == 0]
+    #         labels = labels[is_difficult == 0]
+    #     image = self._read_image(image_id)
+    #     if self.transform:
+    #         image, boxes, labels = self.transform(image, boxes, labels)
+    #     if self.target_transform:
+    #         boxes, labels = self.target_transform(boxes, labels)
+    #     return image, boxes, labels
+    def __getitem__(self, index):
+        num_per_shared = 3
+        file_idx = index // num_per_shared
+        idx_in_file = index % num_per_shared
+        hdf_path = os.path.join(self.root, self.ids[file_idx])
+        with h5py.File(hdf_path, 'r') as f:
+            boxes = f[str(idx_in_file) + '_boxes']
+            is_difficult = f[str(idx_in_file) + '_difficult']
+            image = f[str(idx_in_file) + '_image']
+            labels = f[str(idx_in_file) + 'labels']
+        if not self.keep_difficult:
+            boxes = boxes[is_difficult == 0]
+            labels = labels[is_difficult == 0]
+        if self.transform:
+            image, boxes, labels = self.transform(image, boxes, labels)
+        if self.target_transform:
+            boxes, labels = self.target_transform(boxes, labels)
+        return image, boxes, labels
+    def get_image(self, index):
+        image_id = self.ids[index]
+        image = self._read_image(image_id)
+        if self.transform:
+            image, _ = self.transform(image)
+        return image
+    def get_annotation(self, index):
+        image_id = self.ids[index]
+        return image_id, self._get_annotation(image_id)
+    def __len__(self):
+        total = 0
+        # for file in self.ids:
+        #     hdf_path = os.path.join(self.root, file)
+        #     f = h5py.File(hdf_path, 'r')
+        #     total += len(f.keys())
+        return total // 4
+    @staticmethod
+    def _read_image_ids(image_sets_file):
+        ids = []
+        with open(image_sets_file) as f:
+            for line in f:
+                ids.append(line.rstrip())
+        return ids
+    def _get_annotation(self, image_id):
+        annotation_file = self.root / f"Annotations/{image_id}.xml"
+        objects = ET.parse(annotation_file).findall("object")
+        boxes = []
+        labels = []
+        is_difficult = []
+        for object in objects:
+            class_name = object.find('name').text.lower().strip()
+            # we're only concerned with clases in our list
+            if class_name in self.class_dict:
+                bbox = object.find('bndbox')
+                # VOC dataset format follows Matlab, in which indexes start from 0
+                x1 = float(bbox.find('xmin').text) - 1
+                y1 = float(bbox.find('ymin').text) - 1
+                x2 = float(bbox.find('xmax').text) - 1
+                y2 = float(bbox.find('ymax').text) - 1
+                boxes.append([x1, y1, x2, y2])
+                labels.append(self.class_dict[class_name])
+                is_difficult_str = object.find('difficult').text
+                is_difficult.append(int(is_difficult_str) if is_difficult_str else 0)
+        return (np.array(boxes, dtype=np.float32),
+                np.array(labels, dtype=np.int64),
+                np.array(is_difficult, dtype=np.uint8))
+    def _read_image(self, image_id):
+        image_file = self.root / f"JPEGImages/{image_id}.jpg"
+        image = cv2.imread(str(image_file))
+        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        return image

face_recognition/face_detect/vision/nn/__init__.py ADDED Viewed

File without changes

face_recognition/face_detect/vision/nn/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (176 Bytes). View file

face_recognition/face_detect/vision/nn/__pycache__/mb_tiny.cpython-310.pyc ADDED Viewed

Binary file (1.8 kB). View file

face_recognition/face_detect/vision/nn/__pycache__/mb_tiny_RFB.cpython-310.pyc ADDED Viewed

Binary file (4.01 kB). View file

face_recognition/face_detect/vision/nn/mb_tiny.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import torch.nn as nn
+import torch.nn.functional as F
+class Mb_Tiny(nn.Module):
+    def __init__(self, num_classes=2):
+        super(Mb_Tiny, self).__init__()
+        self.base_channel = 8 * 2
+        def conv_bn(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True)
+            )
+        def conv_dw(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                nn.BatchNorm2d(inp),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True),
+            )
+        self.model = nn.Sequential(
+            conv_bn(3, self.base_channel, 2),  # 160*120
+            conv_dw(self.base_channel, self.base_channel * 2, 1),
+            conv_dw(self.base_channel * 2, self.base_channel * 2, 2),  # 80*60
+            conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
+            conv_dw(self.base_channel * 2, self.base_channel * 4, 2),  # 40*30
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 8, 2),  # 20*15
+            conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
+            conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
+            conv_dw(self.base_channel * 8, self.base_channel * 16, 2),  # 10*8
+            conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
+        )
+        self.fc = nn.Linear(1024, num_classes)
+    def forward(self, x):
+        x = self.model(x)
+        x = F.avg_pool2d(x, 7)
+        x = x.view(-1, 1024)
+        x = self.fc(x)
+        return x

face_recognition/face_detect/vision/nn/mb_tiny_RFB.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicConv(nn.Module):
+    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True):
+        super(BasicConv, self).__init__()
+        self.out_channels = out_planes
+        if bn:
+            self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=False)
+            self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True)
+            self.relu = nn.ReLU(inplace=True) if relu else None
+        else:
+            self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=True)
+            self.bn = None
+            self.relu = nn.ReLU(inplace=True) if relu else None
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.relu is not None:
+            x = self.relu(x)
+        return x
+class BasicRFB(nn.Module):
+    def __init__(self, in_planes, out_planes, stride=1, scale=0.1, map_reduce=8, vision=1, groups=1):
+        super(BasicRFB, self).__init__()
+        self.scale = scale
+        self.out_channels = out_planes
+        inter_planes = in_planes // map_reduce
+        self.branch0 = nn.Sequential(
+            BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
+            BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
+            BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 1, dilation=vision + 1, relu=False, groups=groups)
+        )
+        self.branch1 = nn.Sequential(
+            BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
+            BasicConv(inter_planes, 2 * inter_planes, kernel_size=(3, 3), stride=stride, padding=(1, 1), groups=groups),
+            BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 2, dilation=vision + 2, relu=False, groups=groups)
+        )
+        self.branch2 = nn.Sequential(
+            BasicConv(in_planes, inter_planes, kernel_size=1, stride=1, groups=groups, relu=False),
+            BasicConv(inter_planes, (inter_planes // 2) * 3, kernel_size=3, stride=1, padding=1, groups=groups),
+            BasicConv((inter_planes // 2) * 3, 2 * inter_planes, kernel_size=3, stride=stride, padding=1, groups=groups),
+            BasicConv(2 * inter_planes, 2 * inter_planes, kernel_size=3, stride=1, padding=vision + 4, dilation=vision + 4, relu=False, groups=groups)
+        )
+        self.ConvLinear = BasicConv(6 * inter_planes, out_planes, kernel_size=1, stride=1, relu=False)
+        self.shortcut = BasicConv(in_planes, out_planes, kernel_size=1, stride=stride, relu=False)
+        self.relu = nn.ReLU(inplace=False)
+    def forward(self, x):
+        x0 = self.branch0(x)
+        x1 = self.branch1(x)
+        x2 = self.branch2(x)
+        out = torch.cat((x0, x1, x2), 1)
+        out = self.ConvLinear(out)
+        short = self.shortcut(x)
+        out = out * self.scale + short
+        out = self.relu(out)
+        return out
+class Mb_Tiny_RFB(nn.Module):
+    def __init__(self, num_classes=2):
+        super(Mb_Tiny_RFB, self).__init__()
+        self.base_channel = 8 * 2
+        def conv_bn(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True)
+            )
+        def conv_dw(inp, oup, stride):
+            return nn.Sequential(
+                nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
+                nn.BatchNorm2d(inp),
+                nn.ReLU(inplace=True),
+                nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
+                nn.BatchNorm2d(oup),
+                nn.ReLU(inplace=True),
+            )
+        self.model = nn.Sequential(
+            conv_bn(3, self.base_channel, 2),  # 160*120
+            conv_dw(self.base_channel, self.base_channel * 2, 1),
+            conv_dw(self.base_channel * 2, self.base_channel * 2, 2),  # 80*60
+            conv_dw(self.base_channel * 2, self.base_channel * 2, 1),
+            conv_dw(self.base_channel * 2, self.base_channel * 4, 2),  # 40*30
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            conv_dw(self.base_channel * 4, self.base_channel * 4, 1),
+            BasicRFB(self.base_channel * 4, self.base_channel * 4, stride=1, scale=1.0),
+            conv_dw(self.base_channel * 4, self.base_channel * 8, 2),  # 20*15
+            conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
+            conv_dw(self.base_channel * 8, self.base_channel * 8, 1),
+            conv_dw(self.base_channel * 8, self.base_channel * 16, 2),  # 10*8
+            conv_dw(self.base_channel * 16, self.base_channel * 16, 1)
+        )
+        self.fc = nn.Linear(1024, num_classes)
+    def forward(self, x):
+        x = self.model(x)
+        x = F.avg_pool2d(x, 7)
+        x = x.view(-1, 1024)
+        x = self.fc(x)
+        return x

face_recognition/face_detect/vision/nn/multibox_loss.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from ..utils import box_utils
+class MultiboxLoss(nn.Module):
+    def __init__(self, priors, neg_pos_ratio,
+                 center_variance, size_variance, device):
+        """Implement SSD Multibox Loss.
+        Basically, Multibox loss combines classification loss
+         and Smooth L1 regression loss.
+        """
+        super(MultiboxLoss, self).__init__()
+        self.neg_pos_ratio = neg_pos_ratio
+        self.center_variance = center_variance
+        self.size_variance = size_variance
+        self.priors = priors
+        self.priors.to(device)
+    def forward(self, confidence, predicted_locations, labels, gt_locations):
+        """Compute classification loss and smooth l1 loss.
+        Args:
+            confidence (batch_size, num_priors, num_classes): class predictions.
+            locations (batch_size, num_priors, 4): predicted locations.
+            labels (batch_size, num_priors): real labels of all the priors.
+            boxes (batch_size, num_priors, 4): real boxes corresponding all the priors.
+        """
+        num_classes = confidence.size(2)
+        with torch.no_grad():
+            # derived from cross_entropy=sum(log(p))
+            loss = -F.log_softmax(confidence, dim=2)[:, :, 0]
+            mask = box_utils.hard_negative_mining(loss, labels, self.neg_pos_ratio)
+        confidence = confidence[mask, :]
+        classification_loss = F.cross_entropy(confidence.reshape(-1, num_classes), labels[mask], reduction='sum')
+        pos_mask = labels > 0
+        predicted_locations = predicted_locations[pos_mask, :].reshape(-1, 4)
+        gt_locations = gt_locations[pos_mask, :].reshape(-1, 4)
+        smooth_l1_loss = F.smooth_l1_loss(predicted_locations, gt_locations, reduction='sum')  # smooth_l1_loss
+        # smooth_l1_loss = F.mse_loss(predicted_locations, gt_locations, reduction='sum')  #l2 loss
+        num_pos = gt_locations.size(0)
+        return smooth_l1_loss / num_pos, classification_loss / num_pos

face_recognition/face_detect/vision/ssd/__init__.py ADDED Viewed

File without changes

face_recognition/face_detect/vision/ssd/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (177 Bytes). View file

face_recognition/face_detect/vision/ssd/__pycache__/data_preprocessing.cpython-310.pyc ADDED Viewed

Binary file (2.59 kB). View file

face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_RFB_fd.cpython-310.pyc ADDED Viewed

Binary file (2.04 kB). View file

face_recognition/face_detect/vision/ssd/__pycache__/mb_tiny_fd.cpython-310.pyc ADDED Viewed

Binary file (2.02 kB). View file

face_recognition/face_detect/vision/ssd/__pycache__/predictor.cpython-310.pyc ADDED Viewed

Binary file (2.21 kB). View file

face_recognition/face_detect/vision/ssd/__pycache__/ssd.cpython-310.pyc ADDED Viewed

Binary file (5.79 kB). View file

face_recognition/face_detect/vision/ssd/config/__init__.py ADDED Viewed

File without changes

face_recognition/face_detect/vision/ssd/config/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (184 Bytes). View file

face_recognition/face_detect/vision/ssd/config/__pycache__/fd_config.cpython-310.pyc ADDED Viewed

Binary file (1.53 kB). View file

face_recognition/face_detect/vision/ssd/config/fd_config.py ADDED Viewed

	@@ -0,0 +1,41 @@

+import numpy as np
+from face_detect.vision.utils.box_utils import generate_priors
+image_mean_test = image_mean = np.array([127, 127, 127])
+image_std = 128.0
+iou_threshold = 0.3
+center_variance = 0.1
+size_variance = 0.2
+min_boxes = [[10, 16, 24], [32, 48], [64, 96], [128, 192, 256]]
+shrinkage_list = []
+image_size = [320, 240]  # default input size 320*240
+feature_map_w_h_list = [[40, 20, 10, 5], [30, 15, 8, 4]]  # default feature map size
+priors = []
+def define_img_size(size):
+    global image_size, feature_map_w_h_list, priors
+    img_size_dict = {128: [128, 96],
+                     160: [160, 120],
+                     320: [320, 240],
+                     480: [480, 360],
+                     640: [640, 480],
+                     1280: [1280, 960]}
+    image_size = img_size_dict[size]
+    feature_map_w_h_list_dict = {128: [[16, 8, 4, 2], [12, 6, 3, 2]],
+                                 160: [[20, 10, 5, 3], [15, 8, 4, 2]],
+                                 320: [[40, 20, 10, 5], [30, 15, 8, 4]],
+                                 480: [[60, 30, 15, 8], [45, 23, 12, 6]],
+                                 640: [[80, 40, 20, 10], [60, 30, 15, 8]],
+                                 1280: [[160, 80, 40, 20], [120, 60, 30, 15]]}
+    feature_map_w_h_list = feature_map_w_h_list_dict[size]
+    for i in range(0, len(image_size)):
+        item_list = []
+        for k in range(0, len(feature_map_w_h_list[i])):
+            item_list.append(image_size[i] / feature_map_w_h_list[i][k])
+        shrinkage_list.append(item_list)
+    priors = generate_priors(feature_map_w_h_list, shrinkage_list, image_size, min_boxes)

face_recognition/face_detect/vision/ssd/data_preprocessing.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from ..transforms.transforms import *
+class TrainAugmentation:
+    def __init__(self, size, mean=0, std=1.0):
+        """
+        Args:
+            size: the size the of final image.
+            mean: mean pixel value per channel.
+        """
+        self.mean = mean
+        self.size = size
+        self.augment = Compose([
+            ConvertFromInts(),
+            PhotometricDistort(),
+            RandomSampleCrop_v2(),
+            RandomMirror(),
+            ToPercentCoords(),
+            Resize(self.size),
+            SubtractMeans(self.mean),
+            lambda img, boxes=None, labels=None: (img / std, boxes, labels),
+            ToTensor(),
+        ])
+    def __call__(self, img, boxes, labels):
+        """
+        Args:
+            img: the output of cv.imread in RGB layout.
+            boxes: boundding boxes in the form of (x1, y1, x2, y2).
+            labels: labels of boxes.
+        """
+        return self.augment(img, boxes, labels)
+class TestTransform:
+    def __init__(self, size, mean=0.0, std=1.0):
+        self.transform = Compose([
+            ToPercentCoords(),
+            Resize(size),
+            SubtractMeans(mean),
+            lambda img, boxes=None, labels=None: (img / std, boxes, labels),
+            ToTensor(),
+        ])
+    def __call__(self, image, boxes, labels):
+        return self.transform(image, boxes, labels)
+class PredictionTransform:
+    def __init__(self, size, mean=0.0, std=1.0):
+        self.transform = Compose([
+            Resize(size),
+            SubtractMeans(mean),
+            lambda img, boxes=None, labels=None: (img / std, boxes, labels),
+            ToTensor()
+        ])
+    def __call__(self, image):
+        image, _, _ = self.transform(image)
+        return image

face_recognition/face_detect/vision/ssd/mb_tiny_RFB_fd.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from torch.nn import Conv2d, Sequential, ModuleList, ReLU
+from face_detect.vision.nn.mb_tiny_RFB import Mb_Tiny_RFB
+from face_detect.vision.ssd.config import fd_config as config
+from face_detect.vision.ssd.predictor import Predictor
+from face_detect.vision.ssd.ssd import SSD
+def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
+    """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
+    """
+    return Sequential(
+        Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
+               groups=in_channels, stride=stride, padding=padding),
+        ReLU(),
+        Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
+    )
+def create_Mb_Tiny_RFB_fd(num_classes, is_test=False, device="cuda"):
+    base_net = Mb_Tiny_RFB(2)
+    base_net_model = base_net.model  # disable dropout layer
+    source_layer_indexes = [
+        8,
+        11,
+        13
+    ]
+    extras = ModuleList([
+        Sequential(
+            Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
+            ReLU(),
+            SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
+            ReLU()
+        )
+    ])
+    regression_headers = ModuleList([
+        SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
+        Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
+    ])
+    classification_headers = ModuleList([
+        SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
+        Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
+    ])
+    return SSD(num_classes, base_net_model, source_layer_indexes,
+               extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
+def create_Mb_Tiny_RFB_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
+    predictor = Predictor(net, config.image_size, config.image_mean_test,
+                          config.image_std,
+                          nms_method=nms_method,
+                          iou_threshold=config.iou_threshold,
+                          candidate_size=candidate_size,
+                          sigma=sigma,
+                          device=device)
+    return predictor

face_recognition/face_detect/vision/ssd/mb_tiny_fd.py ADDED Viewed

	@@ -0,0 +1,64 @@

+from torch.nn import Conv2d, Sequential, ModuleList, ReLU
+from face_detect.vision.nn.mb_tiny import Mb_Tiny
+from face_detect.vision.ssd.config import fd_config as config
+from face_detect.vision.ssd.predictor import Predictor
+from face_detect.vision.ssd.ssd import SSD
+def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0):
+    """Replace Conv2d with a depthwise Conv2d and Pointwise Conv2d.
+    """
+    return Sequential(
+        Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size,
+               groups=in_channels, stride=stride, padding=padding),
+        ReLU(),
+        Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1),
+    )
+def create_mb_tiny_fd(num_classes, is_test=False, device="cuda"):
+    base_net = Mb_Tiny(2)
+    base_net_model = base_net.model  # disable dropout layer
+    source_layer_indexes = [
+        8,
+        11,
+        13
+    ]
+    extras = ModuleList([
+        Sequential(
+            Conv2d(in_channels=base_net.base_channel * 16, out_channels=base_net.base_channel * 4, kernel_size=1),
+            ReLU(),
+            SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=base_net.base_channel * 16, kernel_size=3, stride=2, padding=1),
+            ReLU()
+        )
+    ])
+    regression_headers = ModuleList([
+        SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * 4, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * 4, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * 4, kernel_size=3, padding=1),
+        Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * 4, kernel_size=3, padding=1)
+    ])
+    classification_headers = ModuleList([
+        SeperableConv2d(in_channels=base_net.base_channel * 4, out_channels=3 * num_classes, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 8, out_channels=2 * num_classes, kernel_size=3, padding=1),
+        SeperableConv2d(in_channels=base_net.base_channel * 16, out_channels=2 * num_classes, kernel_size=3, padding=1),
+        Conv2d(in_channels=base_net.base_channel * 16, out_channels=3 * num_classes, kernel_size=3, padding=1)
+    ])
+    return SSD(num_classes, base_net_model, source_layer_indexes,
+               extras, classification_headers, regression_headers, is_test=is_test, config=config, device=device)
+def create_mb_tiny_fd_predictor(net, candidate_size=200, nms_method=None, sigma=0.5, device=None):
+    predictor = Predictor(net, config.image_size, config.image_mean_test,
+                          config.image_std,
+                          nms_method=nms_method,
+                          iou_threshold=config.iou_threshold,
+                          candidate_size=candidate_size,
+                          sigma=sigma,
+                          device=device)
+    return predictor

face_recognition/face_detect/vision/ssd/predictor.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import torch
+from ..utils import box_utils
+from .data_preprocessing import PredictionTransform
+from ..utils.misc import Timer
+class Predictor:
+    def __init__(self, net, size, mean=0.0, std=1.0, nms_method=None,
+                 iou_threshold=0.3, filter_threshold=0.01, candidate_size=200, sigma=0.5, device=None):
+        self.net = net
+        self.transform = PredictionTransform(size, mean, std)
+        self.iou_threshold = iou_threshold
+        self.filter_threshold = filter_threshold
+        self.candidate_size = candidate_size
+        self.nms_method = nms_method
+        self.sigma = sigma
+        if device:
+            self.device = device
+        else:
+            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        self.net.to(self.device)
+        self.net.eval()
+        self.timer = Timer()
+    def predict(self, image, top_k=-1, prob_threshold=None):
+        cpu_device = torch.device("cpu")
+        height, width, _ = image.shape
+        image = self.transform(image)
+        images = image.unsqueeze(0)
+        images = images.to(self.device)
+        with torch.no_grad():
+            for i in range(1):
+                scores, boxes = self.net.forward(images)
+        boxes = boxes[0]
+        scores = scores[0]
+        if not prob_threshold:
+            prob_threshold = self.filter_threshold
+        # this version of nms is slower on GPU, so we move data to CPU.
+        boxes = boxes.to(cpu_device)
+        scores = scores.to(cpu_device)
+        picked_box_probs = []
+        picked_labels = []
+        for class_index in range(1, scores.size(1)):
+            probs = scores[:, class_index]
+            mask = probs > prob_threshold
+            probs = probs[mask]
+            if probs.size(0) == 0:
+                continue
+            subset_boxes = boxes[mask, :]
+            box_probs = torch.cat([subset_boxes, probs.reshape(-1, 1)], dim=1)
+            box_probs = box_utils.nms(box_probs, self.nms_method,
+                                      score_threshold=prob_threshold,
+                                      iou_threshold=self.iou_threshold,
+                                      sigma=self.sigma,
+                                      top_k=top_k,
+                                      candidate_size=self.candidate_size)
+            picked_box_probs.append(box_probs)
+            picked_labels.extend([class_index] * box_probs.size(0))
+        if not picked_box_probs:
+            return torch.tensor([]), torch.tensor([]), torch.tensor([])
+        picked_box_probs = torch.cat(picked_box_probs)
+        picked_box_probs[:, 0] *= width
+        picked_box_probs[:, 1] *= height
+        picked_box_probs[:, 2] *= width
+        picked_box_probs[:, 3] *= height
+        return picked_box_probs[:, :4], torch.tensor(picked_labels), picked_box_probs[:, 4]

face_recognition/face_detect/vision/ssd/ssd.py ADDED Viewed

	@@ -0,0 +1,166 @@

+from collections import namedtuple
+from typing import List, Tuple
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from face_detect.vision.utils import box_utils
+GraphPath = namedtuple("GraphPath", ['s0', 'name', 's1'])
+class SSD(nn.Module):
+    def __init__(self, num_classes: int, base_net: nn.ModuleList, source_layer_indexes: List[int],
+                 extras: nn.ModuleList, classification_headers: nn.ModuleList,
+                 regression_headers: nn.ModuleList, is_test=False, config=None, device=None):
+        """Compose a SSD model using the given components.
+        """
+        super(SSD, self).__init__()
+        self.num_classes = num_classes
+        self.base_net = base_net
+        self.source_layer_indexes = source_layer_indexes
+        self.extras = extras
+        self.classification_headers = classification_headers
+        self.regression_headers = regression_headers
+        self.is_test = is_test
+        self.config = config
+        # register layers in source_layer_indexes by adding them to a module list
+        self.source_layer_add_ons = nn.ModuleList([t[1] for t in source_layer_indexes
+                                                   if isinstance(t, tuple) and not isinstance(t, GraphPath)])
+        if device:
+            self.device = device
+        else:
+            self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        if is_test:
+            self.config = config
+            self.priors = config.priors.to(self.device)
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        confidences = []
+        locations = []
+        start_layer_index = 0
+        header_index = 0
+        end_layer_index = 0
+        for end_layer_index in self.source_layer_indexes:
+            if isinstance(end_layer_index, GraphPath):
+                path = end_layer_index
+                end_layer_index = end_layer_index.s0
+                added_layer = None
+            elif isinstance(end_layer_index, tuple):
+                added_layer = end_layer_index[1]
+                end_layer_index = end_layer_index[0]
+                path = None
+            else:
+                added_layer = None
+                path = None
+            for layer in self.base_net[start_layer_index: end_layer_index]:
+                x = layer(x)
+            if added_layer:
+                y = added_layer(x)
+            else:
+                y = x
+            if path:
+                sub = getattr(self.base_net[end_layer_index], path.name)
+                for layer in sub[:path.s1]:
+                    x = layer(x)
+                y = x
+                for layer in sub[path.s1:]:
+                    x = layer(x)
+                end_layer_index += 1
+            start_layer_index = end_layer_index
+            confidence, location = self.compute_header(header_index, y)
+            header_index += 1
+            confidences.append(confidence)
+            locations.append(location)
+        for layer in self.base_net[end_layer_index:]:
+            x = layer(x)
+        for layer in self.extras:
+            x = layer(x)
+            confidence, location = self.compute_header(header_index, x)
+            header_index += 1
+            confidences.append(confidence)
+            locations.append(location)
+        confidences = torch.cat(confidences, 1)
+        locations = torch.cat(locations, 1)
+        if self.is_test:
+            confidences = F.softmax(confidences, dim=2)
+            boxes = box_utils.convert_locations_to_boxes(
+                locations, self.priors, self.config.center_variance, self.config.size_variance
+            )
+            boxes = box_utils.center_form_to_corner_form(boxes)
+            return confidences, boxes
+        else:
+            return confidences, locations
+    def compute_header(self, i, x):
+        confidence = self.classification_headers[i](x)
+        confidence = confidence.permute(0, 2, 3, 1).contiguous()
+        confidence = confidence.view(confidence.size(0), -1, self.num_classes)
+        location = self.regression_headers[i](x)
+        location = location.permute(0, 2, 3, 1).contiguous()
+        location = location.view(location.size(0), -1, 4)
+        return confidence, location
+    def init_from_base_net(self, model):
+        self.base_net.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage), strict=True)
+        self.source_layer_add_ons.apply(_xavier_init_)
+        self.extras.apply(_xavier_init_)
+        self.classification_headers.apply(_xavier_init_)
+        self.regression_headers.apply(_xavier_init_)
+    def init_from_pretrained_ssd(self, model):
+        state_dict = torch.load(model, map_location=lambda storage, loc: storage)
+        state_dict = {k: v for k, v in state_dict.items() if not (k.startswith("classification_headers") or k.startswith("regression_headers"))}
+        model_dict = self.state_dict()
+        model_dict.update(state_dict)
+        self.load_state_dict(model_dict)
+        self.classification_headers.apply(_xavier_init_)
+        self.regression_headers.apply(_xavier_init_)
+    def init(self):
+        self.base_net.apply(_xavier_init_)
+        self.source_layer_add_ons.apply(_xavier_init_)
+        self.extras.apply(_xavier_init_)
+        self.classification_headers.apply(_xavier_init_)
+        self.regression_headers.apply(_xavier_init_)
+    def load(self, model):
+        self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage))
+    def save(self, model_path):
+        torch.save(self.state_dict(), model_path)
+class MatchPrior(object):
+    def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold):
+        self.center_form_priors = center_form_priors
+        self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors)
+        self.center_variance = center_variance
+        self.size_variance = size_variance
+        self.iou_threshold = iou_threshold
+    def __call__(self, gt_boxes, gt_labels):
+        if type(gt_boxes) is np.ndarray:
+            gt_boxes = torch.from_numpy(gt_boxes)
+        if type(gt_labels) is np.ndarray:
+            gt_labels = torch.from_numpy(gt_labels)
+        boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels,
+                                                self.corner_form_priors, self.iou_threshold)
+        boxes = box_utils.corner_form_to_center_form(boxes)
+        locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance)
+        return locations, labels
+def _xavier_init_(m: nn.Module):
+    if isinstance(m, nn.Conv2d):
+        nn.init.xavier_uniform_(m.weight)

face_recognition/face_detect/vision/transforms/__init__.py ADDED Viewed

File without changes

face_recognition/face_detect/vision/transforms/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (184 Bytes). View file

face_recognition/face_detect/vision/transforms/__pycache__/transforms.cpython-310.pyc ADDED Viewed

Binary file (15.2 kB). View file

face_recognition/face_detect/vision/transforms/transforms.py ADDED Viewed

	@@ -0,0 +1,541 @@

+# from https://github.com/amdegroot/ssd.pytorch
+import types
+import cv2
+import numpy as np
+import torch
+from numpy import random
+from torchvision import transforms
+def intersect(box_a, box_b):
+    max_xy = np.minimum(box_a[:, 2:], box_b[2:])
+    min_xy = np.maximum(box_a[:, :2], box_b[:2])
+    inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)
+    return inter[:, 0] * inter[:, 1]
+def jaccard_numpy(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: Multiple bounding boxes, Shape: [num_boxes,4]
+        box_b: Single bounding box, Shape: [4]
+    Return:
+        jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
+    """
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, 2] - box_a[:, 0]) *
+              (box_a[:, 3] - box_a[:, 1]))  # [A,B]
+    area_b = ((box_b[2] - box_b[0]) *
+              (box_b[3] - box_b[1]))  # [A,B]
+    union = area_a + area_b - inter
+    return inter / union  # [A,B]
+def object_converage_numpy(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: Multiple bounding boxes, Shape: [num_boxes,4]
+        box_b: Single bounding box, Shape: [4]
+    Return:
+        jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]
+    """
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, 2] - box_a[:, 0]) *
+              (box_a[:, 3] - box_a[:, 1]))  # [A,B]
+    area_b = ((box_b[2] - box_b[0]) *
+              (box_b[3] - box_b[1]))  # [A,B]
+    return inter / area_a  # [A,B]
+class Compose(object):
+    """Composes several augmentations together.
+    Args:
+        transforms (List[Transform]): list of transforms to compose.
+    Example:
+        >>> augmentations.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.ToTensor(),
+        >>> ])
+    """
+    def __init__(self, transforms):
+        self.transforms = transforms
+    def __call__(self, img, boxes=None, labels=None):
+        for t in self.transforms:
+            img, boxes, labels = t(img, boxes, labels)
+        return img, boxes, labels
+class Lambda(object):
+    """Applies a lambda as a transform."""
+    def __init__(self, lambd):
+        assert isinstance(lambd, types.LambdaType)
+        self.lambd = lambd
+    def __call__(self, img, boxes=None, labels=None):
+        return self.lambd(img, boxes, labels)
+class ConvertFromInts(object):
+    def __call__(self, image, boxes=None, labels=None):
+        return image.astype(np.float32), boxes, labels
+class SubtractMeans(object):
+    def __init__(self, mean):
+        self.mean = np.array(mean, dtype=np.float32)
+    def __call__(self, image, boxes=None, labels=None):
+        image = image.astype(np.float32)
+        image -= self.mean
+        return image.astype(np.float32), boxes, labels
+class imgprocess(object):
+    def __init__(self, std):
+        self.std = np.array(std, dtype=np.float32)
+    def __call__(self, image, boxes=None, labels=None):
+        image = image.astype(np.float32)
+        image /= self.std
+        return image.astype(np.float32), boxes, labels
+class ToAbsoluteCoords(object):
+    def __call__(self, image, boxes=None, labels=None):
+        height, width, channels = image.shape
+        boxes[:, 0] *= width
+        boxes[:, 2] *= width
+        boxes[:, 1] *= height
+        boxes[:, 3] *= height
+        return image, boxes, labels
+class ToPercentCoords(object):
+    def __call__(self, image, boxes=None, labels=None):
+        height, width, channels = image.shape
+        boxes[:, 0] /= width
+        boxes[:, 2] /= width
+        boxes[:, 1] /= height
+        boxes[:, 3] /= height
+        return image, boxes, labels
+class Resize(object):
+    def __init__(self, size=(300, 300)):
+        self.size = size
+    def __call__(self, image, boxes=None, labels=None):
+        image = cv2.resize(image, (self.size[0],
+                                   self.size[1]))
+        return image, boxes, labels
+class RandomSaturation(object):
+    def __init__(self, lower=0.5, upper=1.5):
+        self.lower = lower
+        self.upper = upper
+        assert self.upper >= self.lower, "contrast upper must be >= lower."
+        assert self.lower >= 0, "contrast lower must be non-negative."
+    def __call__(self, image, boxes=None, labels=None):
+        if random.randint(2):
+            image[:, :, 1] *= random.uniform(self.lower, self.upper)
+        return image, boxes, labels
+class RandomHue(object):
+    def __init__(self, delta=18.0):
+        assert delta >= 0.0 and delta <= 360.0
+        self.delta = delta
+    def __call__(self, image, boxes=None, labels=None):
+        if random.randint(2):
+            image[:, :, 0] += random.uniform(-self.delta, self.delta)
+            image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
+            image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
+        return image, boxes, labels
+class RandomLightingNoise(object):
+    def __init__(self):
+        self.perms = ((0, 1, 2), (0, 2, 1),
+                      (1, 0, 2), (1, 2, 0),
+                      (2, 0, 1), (2, 1, 0))
+    def __call__(self, image, boxes=None, labels=None):
+        if random.randint(2):
+            swap = self.perms[random.randint(len(self.perms))]
+            shuffle = SwapChannels(swap)  # shuffle channels
+            image = shuffle(image)
+        return image, boxes, labels
+class ConvertColor(object):
+    def __init__(self, current, transform):
+        self.transform = transform
+        self.current = current
+    def __call__(self, image, boxes=None, labels=None):
+        if self.current == 'BGR' and self.transform == 'HSV':
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
+        elif self.current == 'RGB' and self.transform == 'HSV':
+            image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
+        elif self.current == 'BGR' and self.transform == 'RGB':
+            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        elif self.current == 'HSV' and self.transform == 'BGR':
+            image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
+        elif self.current == 'HSV' and self.transform == "RGB":
+            image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB)
+        else:
+            raise NotImplementedError
+        return image, boxes, labels
+class RandomContrast(object):
+    def __init__(self, lower=0.5, upper=1.5):
+        self.lower = lower
+        self.upper = upper
+        assert self.upper >= self.lower, "contrast upper must be >= lower."
+        assert self.lower >= 0, "contrast lower must be non-negative."
+    # expects float image
+    def __call__(self, image, boxes=None, labels=None):
+        if random.randint(2):
+            alpha = random.uniform(self.lower, self.upper)
+            image *= alpha
+        return image, boxes, labels
+class RandomBrightness(object):
+    def __init__(self, delta=32):
+        assert delta >= 0.0
+        assert delta <= 255.0
+        self.delta = delta
+    def __call__(self, image, boxes=None, labels=None):
+        if random.randint(2):
+            delta = random.uniform(-self.delta, self.delta)
+            image += delta
+        return image, boxes, labels
+class ToCV2Image(object):
+    def __call__(self, tensor, boxes=None, labels=None):
+        return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labels
+class ToTensor(object):
+    def __call__(self, cvimage, boxes=None, labels=None):
+        return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labels
+class RandomSampleCrop(object):
+    """Crop
+    Arguments:
+        img (Image): the image being input during training
+        boxes (Tensor): the original bounding boxes in pt form
+        labels (Tensor): the class labels for each bbox
+        mode (float tuple): the min and max jaccard overlaps
+    Return:
+        (img, boxes, classes)
+            img (Image): the cropped image
+            boxes (Tensor): the adjusted bounding boxes in pt form
+            labels (Tensor): the class labels for each bbox
+    """
+    def __init__(self):
+        self.sample_options = (
+            # using entire original input image
+            None,
+            # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
+            (0.1, None),
+            (0.3, None),
+            (0.7, None),
+            (0.9, None),
+            # randomly sample a patch
+            (None, None),
+        )
+    def __call__(self, image, boxes=None, labels=None):
+        height, width, _ = image.shape
+        while True:
+            # randomly choose a mode
+            mode = random.choice(self.sample_options)
+            if mode is None:
+                return image, boxes, labels
+            min_iou, max_iou = mode
+            if min_iou is None:
+                min_iou = float('-inf')
+            if max_iou is None:
+                max_iou = float('inf')
+            # max trails (50)
+            for _ in range(50):
+                current_image = image
+                w = random.uniform(0.3 * width, width)
+                h = random.uniform(0.3 * height, height)
+                # aspect ratio constraint b/t .5 & 2
+                if h / w < 0.5 or h / w > 2:
+                    continue
+                left = random.uniform(width - w)
+                top = random.uniform(height - h)
+                # convert to integer rect x1,y1,x2,y2
+                rect = np.array([int(left), int(top), int(left + w), int(top + h)])
+                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
+                overlap = jaccard_numpy(boxes, rect)
+                # is min and max overlap constraint satisfied? if not try again
+                if overlap.max() < min_iou or overlap.min() > max_iou:
+                    continue
+                # cut the crop from the image
+                current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
+                                :]
+                # keep overlap with gt box IF center in sampled patch
+                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
+                # mask in all gt boxes that above and to the left of centers
+                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
+                # mask in all gt boxes that under and to the right of centers
+                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
+                # mask in that both m1 and m2 are true
+                mask = m1 * m2
+                # have any valid boxes? try again if not
+                if not mask.any():
+                    continue
+                # take only matching gt boxes
+                current_boxes = boxes[mask, :].copy()
+                # take only matching gt labels
+                current_labels = labels[mask]
+                # should we use the box left and top corner or the crop's
+                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
+                                                  rect[:2])
+                # adjust to crop (by substracting crop's left,top)
+                current_boxes[:, :2] -= rect[:2]
+                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
+                                                  rect[2:])
+                # adjust to crop (by substracting crop's left,top)
+                current_boxes[:, 2:] -= rect[:2]
+                return current_image, current_boxes, current_labels
+class RandomSampleCrop_v2(object):
+    """Crop
+    Arguments:
+        img (Image): the image being input during training
+        boxes (Tensor): the original bounding boxes in pt form
+        labels (Tensor): the class labels for each bbox
+        mode (float tuple): the min and max jaccard overlaps
+    Return:
+        (img, boxes, classes)
+            img (Image): the cropped image
+            boxes (Tensor): the adjusted bounding boxes in pt form
+            labels (Tensor): the class labels for each bbox
+    """
+    def __init__(self):
+        self.sample_options = (
+            # using entire original input image
+            None,
+            # sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9
+            # randomly sample a patch
+            (1, None),
+            (1, None),
+            (1, None),
+            (1, None),
+        )
+    def __call__(self, image, boxes=None, labels=None):
+        height, width, _ = image.shape
+        while True:
+            # randomly choose a mode
+            mode = random.choice(self.sample_options)
+            if mode is None:
+                return image, boxes, labels
+            min_iou, max_iou = mode
+            if min_iou is None:
+                min_iou = float('-inf')
+            if max_iou is None:
+                max_iou = float('inf')
+            # max trails (50)
+            for _ in range(50):
+                current_image = image
+                w = random.uniform(0.3 * width, width)
+                h = random.uniform(0.3 * height, height)
+                # aspect ratio constraint b/t .5 & 2
+                if h / w != 1:
+                    continue
+                left = random.uniform(width - w)
+                top = random.uniform(height - h)
+                # convert to integer rect x1,y1,x2,y2
+                rect = np.array([int(left), int(top), int(left + w), int(top + h)])
+                # calculate IoU (jaccard overlap) b/t the cropped and gt boxes
+                overlap = object_converage_numpy(boxes, rect)
+                # is min and max overlap constraint satisfied? if not try again
+                if overlap.max() < min_iou or overlap.min() > max_iou:
+                    continue
+                # cut the crop from the image
+                current_image = current_image[rect[1]:rect[3], rect[0]:rect[2],
+                                :]
+                # keep overlap with gt box IF center in sampled patch
+                centers = (boxes[:, :2] + boxes[:, 2:]) / 2.0
+                # mask in all gt boxes that above and to the left of centers
+                m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
+                # mask in all gt boxes that under and to the right of centers
+                m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
+                # mask in that both m1 and m2 are true
+                mask = m1 * m2
+                # have any valid boxes? try again if not
+                if not mask.any():
+                    continue
+                # take only matching gt boxes
+                current_boxes = boxes[mask, :].copy()
+                # take only matching gt labels
+                current_labels = labels[mask]
+                # should we use the box left and top corner or the crop's
+                current_boxes[:, :2] = np.maximum(current_boxes[:, :2],
+                                                  rect[:2])
+                # adjust to crop (by substracting crop's left,top)
+                current_boxes[:, :2] -= rect[:2]
+                current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],
+                                                  rect[2:])
+                # adjust to crop (by substracting crop's left,top)
+                current_boxes[:, 2:] -= rect[:2]
+                return current_image, current_boxes, current_labels
+class Expand(object):
+    def __init__(self, mean):
+        self.mean = mean
+    def __call__(self, image, boxes, labels):
+        if random.randint(2):
+            return image, boxes, labels
+        height, width, depth = image.shape
+        ratio = random.uniform(1, 4)
+        left = random.uniform(0, width * ratio - width)
+        top = random.uniform(0, height * ratio - height)
+        expand_image = np.zeros(
+            (int(height * ratio), int(width * ratio), depth),
+            dtype=image.dtype)
+        expand_image[:, :, :] = self.mean
+        expand_image[int(top):int(top + height),
+        int(left):int(left + width)] = image
+        image = expand_image
+        boxes = boxes.copy()
+        boxes[:, :2] += (int(left), int(top))
+        boxes[:, 2:] += (int(left), int(top))
+        return image, boxes, labels
+class RandomMirror(object):
+    def __call__(self, image, boxes, classes):
+        _, width, _ = image.shape
+        if random.randint(2):
+            image = image[:, ::-1]
+            boxes = boxes.copy()
+            boxes[:, 0::2] = width - boxes[:, 2::-2]
+        return image, boxes, classes
+class SwapChannels(object):
+    """Transforms a tensorized image by swapping the channels in the order
+     specified in the swap tuple.
+    Args:
+        swaps (int triple): final order of channels
+            eg: (2, 1, 0)
+    """
+    def __init__(self, swaps):
+        self.swaps = swaps
+    def __call__(self, image):
+        """
+        Args:
+            image (Tensor): image tensor to be transformed
+        Return:
+            a tensor with channels swapped according to swap
+        """
+        # if torch.is_tensor(image):
+        #     image = image.data.cpu().numpy()
+        # else:
+        #     image = np.array(image)
+        image = image[:, :, self.swaps]
+        return image
+class PhotometricDistort(object):
+    def __init__(self):
+        self.pd = [
+            RandomContrast(),  # RGB
+            ConvertColor(current="RGB", transform='HSV'),  # HSV
+            RandomSaturation(),  # HSV
+            RandomHue(),  # HSV
+            ConvertColor(current='HSV', transform='RGB'),  # RGB
+            RandomContrast()  # RGB
+        ]
+        self.rand_brightness = RandomBrightness()
+        self.rand_light_noise = RandomLightingNoise()
+    def __call__(self, image, boxes, labels):
+        im = image.copy()
+        im, boxes, labels = self.rand_brightness(im, boxes, labels)
+        if random.randint(2):
+            distort = Compose(self.pd[:-1])
+        else:
+            distort = Compose(self.pd[1:])
+        im, boxes, labels = distort(im, boxes, labels)
+        return self.rand_light_noise(im, boxes, labels)

face_recognition/face_detect/vision/utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .misc import *

face_recognition/face_detect/vision/utils/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (200 Bytes). View file