Monday, 31 May 2021
Saturday, 29 May 2021
opencv 54 mobileNetSSD
SSD300 achieves 74.3% mAP at 59 FPS while SSD500 achieves 76.9% mAP at 22 FPS, which outperforms Faster R-CNN (73.2% mAP at 7 FPS) and YOLOv1 (63.4% mAP at 45 FPS).
project directory
import os
import cv2
import numpy as np
import math
import time
# load our serialized model from disk
print("Load MobileNetSSD model")
prototxt_path = "mobileNet/MobileNetSSD_deploy.prototxt"
model_path = "mobileNet/MobileNetSSD_deploy.caffemodel"
# initialize the list of class labels MobileNet SSD was trained to detect
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
# set CUDA as the preferable backend and target
print("[INFO] setting preferable backend and target to CUDA...")
def process_frame_MobileNetSSD(next_frame):
rgb = cv2.cvtColor(next_frame, cv2.COLOR_BGR2RGB)
(H, W) = next_frame.shape[:2]
# convert the frame to a blob and pass the blob through the
# network and obtain the detections
blob = cv2.dnn.blobFromImage(next_frame, size=(300, 300), ddepth=cv2.CV_8U)
net.setInput(blob, scalefactor=1.0 / 127.5, mean=[127.5, 127.5, 127.5])
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated
# with the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence`
# is greater than the minimum confidence
if confidence > 0.7:
# extract the index of the class label from the
# detections list
idx = int(detections[0, 0, i, 1])
# compute the (x, y)-coordinates of the bounding box
# for the object
box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
cv2.rectangle(next_frame, (startX, startY), (endX, endY), (0, 255, 0), 3)
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.putText(next_frame, CLASSES[idx] + " " + str(round(confidence, 2)), (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
return next_frame
cap = cv2.VideoCapture("assets/zoo.mp4")
# Define the codec and create VideoWriter object
fps = 25.175
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
size = (int(frame_width), int(frame_height))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
out = cv2.VideoWriter()
success =, ""), fourcc, fps, size, True)
frame_count = 0
t1 = time.time()
while True:
ret, frame =
if ret == False: break
frame = process_frame_MobileNetSSD(frame)
cv2.imshow("frame", frame)
frame_count += 1
if cv2.waitKey(1) == ord('q'):
if cv2.waitKey(1) == ord('p'):
cv2.waitKey(-1) # wait until any key is pressed
t2 = time.time()
# calculate FPS
fps = str(float(frame_count / float(t2 - t1))) + ' FPS'
print("Frames processed: {}".format(frame_count))
print("Elapsed time: {:.2f}".format(float(t2 - t1)))
print("FPS: {}".format(fps))
PS C:\Users\zchen\PycharmProjects\opencv> python
Load MobileNetSSD model
[INFO] setting preferable backend and target to CUDA...
Frames processed: 6772
Elapsed time: 216.37
FPS: 31.297719286548247 FPS
Friday, 28 May 2021
opencv 53 convert tensorflow model to opencv
OpenCV DNN prediction:
* shape: (1, 1000)
* class ID: 292, label: tiger, Panthera tigris
* confidence: 0.9874
import os
import tensorflow as tf
from tensorflow.keras.applications import MobileNet
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
import cv2
import numpy as np
original_tf_model = MobileNet(
def get_tf_model_proto(tf_model):
# define the directory for .pb model
pb_model_path = "models"
# define the name of .pb model
pb_model_name = "mobilenet.pb"
# create directory for further converted model
os.makedirs(pb_model_path, exist_ok=True)
# get model TF graph
tf_model_graph = tf.function(lambda x: tf_model(x))
# get concrete function
tf_model_graph = tf_model_graph.get_concrete_function(
tf.TensorSpec(tf_model.inputs[0].shape, tf_model.inputs[0].dtype))
# obtain frozen concrete function
frozen_tf_func = convert_variables_to_constants_v2(tf_model_graph)
# get frozen graph
# save full tf model,
return os.path.join(pb_model_path, pb_model_name)
def get_preprocessed_img(img_path):
# read the image
input_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
input_img = input_img.astype(np.float32)
# define preprocess parameters
mean = np.array([1.0, 1.0, 1.0]) * 127.5
scale = 1 / 127.5
# prepare input blob to fit the model input:
# 1. subtract mean
# 2. scale to set pixel values from 0 to 1
input_blob = cv2.dnn.blobFromImage(
size=(224, 224), # img target size
swapRB=True, # BGR -> RGB
crop=True # center crop
print("Input blob shape: {}\n".format(input_blob.shape))
return input_blob
def get_imagenet_labels(labels_path):
with open(labels_path) as f:
imagenet_labels = [line.strip() for line in f.readlines()]
return imagenet_labels
def get_opencv_dnn_prediction(opencv_net, preproc_img, imagenet_labels):
# set OpenCV DNN input
# OpenCV DNN inference
out = opencv_net.forward()
print("OpenCV DNN prediction: \n")
print("* shape: ", out.shape)
# get the predicted class ID
imagenet_class_id = np.argmax(out)
# get confidence
confidence = out[0][imagenet_class_id]
print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id]))
print("* confidence: {:.4f}\n".format(confidence))
# get TF frozen graph path
full_pb_path = get_tf_model_proto(original_tf_model)
# read frozen graph with OpenCV API
opencv_net = cv2.dnn.readNetFromTensorflow(full_pb_path)
print("OpenCV model was successfully read. Model layers: \n", opencv_net.getLayerNames())
# get preprocessed image
input_img = get_preprocessed_img("assets/tiger.jpg")
# get ImageNet labels
imagenet_labels = get_imagenet_labels("mobileNet/label.txt")
# obtain OpenCV DNN predictions
get_opencv_dnn_prediction(opencv_net, input_img, imagenet_labels)
run tensorflow on gpu
mobileNet labels
Thursday, 27 May 2021
disable gpu on windows shut down
batch file
@echo off
ECHO "Choose an option .."
ECHO "1 = Shutdown"
ECHO "2 = Reboot"
SET /p option=Choose one option-
IF %option%==1 (
echo *** Disabling GPU ***
C:\Users\zchen\Downloads\devmanview-x64\devmanview.exe /disable "NVIDIA GeForce xxx"
echo *** Done ***
SHUTDOWN /s /f /t 0
IF %option%==2 (
echo *** Disabling GPU ***
devmanview.exe /disable "NVIDIA GeForce xxx"
echo *** Done ***
SHUTDOWN -r -t 0
auto enable gpu on windows startup
download 64 bit devmanview
create batch file with devmanview.exe to enable graphic driver
@echo off
echo *** Enabling GPU ***
devmanview.exe /enable "NVIDIA xxx"
echo *** Done ***
Tuesday, 25 May 2021
Monday, 24 May 2021
opencv 52 faster rcnn coco
import numpy as np
import argparse
import imutils
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--mask-rcnn", default="mask-rcnn-coco",
help="base path to mask-rcnn directory")
#ap.add_argument("-i", "--image", default="assets\\mask_rcnn_image.jpg",
# help="path to input image")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="minimum threshold for pixel-wise mask segmentation")
ap.add_argument("-u", "--use-gpu", type=bool, default=1,
help="boolean indicating if CUDA GPU should be used")
ap.add_argument("-e", "--iter", type=int, default=10,
help="# of GrabCut iterations (larger value => slower runtime)")
args = vars(ap.parse_args())
# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([args["mask_rcnn"],
LABELS = open(labelsPath).read().strip().split("\n")
# initialize a list of colors to represent each possible class label
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([args["mask_rcnn"],
configPath = os.path.sep.join([args["mask_rcnn"],
# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)
# check if we are going to use GPU
if args["use_gpu"]:
# set CUDA as the preferable backend and target
print("[INFO] setting preferable backend and target to CUDA...")
cap = cv2.VideoCapture("assets/zoo.mp4")
j = 0
while True:
ret, image =
# load our input image from disk and display it to our screen
#image = cv2.imread(args["image"])
#image = imutils.resize(image, width=600)
#cv2.imshow("Input", image)
# construct a blob from the input image and then perform a
# forward pass of the Mask R-CNN, giving us (1) the bounding box
# coordinates of the objects in the image along with (2) the
# pixel-wise segmentation for each specific object
blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
(boxes, masks) = net.forward(["detection_out_final",
# loop over the number of detected objects
for i in range(0, boxes.shape[2]):
# extract the class ID of the detection along with the
# confidence (i.e., probability) associated with the
# prediction
classID = int(boxes[0, 0, i, 1])
confidence = boxes[0, 0, i, 2]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# show the class label
print("[INFO] showing output for '{}'...".format(
# scale the bounding box coordinates back relative to the
# size of the image and then compute the width and the
# height of the bounding box
(H, W) = image.shape[:2]
box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(image, (startX, startY), (endX, endY),
(0, 0, 255), 3)
cv2.putText(image, LABELS[classID] + " " + str(round(confidence, 2)), (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
if cv2.waitKey(1) == ord('q'):
if cv2.waitKey(1) == ord('p'):
cv2.waitKey(-1) # wait until any key is pressed
cv2.imshow("faster rcnn", image)
j += 1
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
name = str(j) + ".jpg"
cv2.imwrite(os.path.join(path, name), image)
import os
import cv2
import glob
img_dict = {}
for filename in glob.glob('C:/Users/zchen/PycharmProjects/opencv/googleNet/record/2/*.jpg'):
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width, height)
img_dict[filename.split("\\")[1]] = img
print("loading image " + str(len(img_dict)))
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
#frame rate = total fames / video length
out = cv2.VideoWriter(os.path.join(path , "zoo_googlenet_faster_rcnn_2.avi"),
cv2.VideoWriter_fourcc(*'DIVX'), 25.175, size)
#may run out of memory loading too many frames
#set start and stop frame # to make a short video clip
#then join them in movie maker to produce a long video
for i in range(3676, 6773):
key = str(i) + ".jpg"
print("processing image " + str(i))
run opencv on gpu
install CUDA and cuDNN
generate sln with cmake
- note 1: match opencv contrib version with opencv version
- note2: very import
if not, reinstall python with all checkbox checked -> reboot computer -> cmd window pip install numpy -> open cmake -> click file -> delete cache -> click configure button
cmake search python3 -> check parameters have value
- note3: very important
find correct arch # for nvidia gpu @
or error OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration
- note 4:
OPENCV_EXTRA_MODULES_PATH — “Give path to “opencv-contrib-version#” directory by pointng at “modules” directory(in my case: C:\Users\Administrator\Downloads\opncv-contrib-4.4.0\opencv-contrib-4.4.0\modules) “
cmake off screen
add sound track to video
windows movie maker -> select custom audio
add sound track (.mp3)
#object_detection_classes_coco - simple limited classes
traffic light
fire hydrant
street sign
stop sign
parking meter
eye glasses
sports ball
baseball bat
baseball glove
tennis racket
wine glass
hot dog
potted plant
dining table
cell phone
teddy bear
hair drier
Sunday, 23 May 2021
Saturday, 22 May 2021
transport fever 2 chapter 1 final
OBS video recorder:
add video and audio in red rectangle
match video resolution with screen resolution
record mp4
use NVIDIA audio
video trimming
trim with windows movie maker
Thursday, 20 May 2021
opencv 51 eigenface
Eigenface provides an easy and cheap way to realize face recognition in that:
- Its training process is completely automatic and easy to code.
- Eigenface adequately reduces statistical complexity in face image representation.
- Once eigenfaces of a database are calculated, face recognition can be achieved in real time.
- Eigenface can handle large databases.
However, the deficiencies of the eigenface method are also obvious:
- It is very sensitive to lighting, scale and translation, and requires a highly controlled environment.
- Eigenface has difficulty capturing expression changes.
- The most significant eigenfaces are mainly about illumination encoding and do not provide useful information regarding the actual face.
eigenface transform
AI predicts accurately
#project directory
from imutils import paths
import numpy as np
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import argparse
import imutils
import time
from os.path import dirname, abspath
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from skimage.exposure import rescale_intensity
from imutils import build_montages
def detect_faces(net, image, minConfidence=0.5):
# grab the dimensions of the image and then construct a blob
# from it
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))
# pass the blob through the network to obtain the face detections,
# then initialize a list to store the predicted bounding boxes
detections = net.forward()
boxes = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > minConfidence:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# update our bounding box results list
boxes.append((startX, startY, endX, endY))
# return the face detection bounding boxes
return boxes
def load_face_dataset(inputPath, net, minConfidence=0.5, minSamples=15):
# grab the paths to all images in our input directory, extract
# the name of the person (i.e., class label) from the directory
# structure, and count the number of example images we have per
# face
imagePaths = list(paths.list_images(inputPath))
# print(imagePaths)
names = [p.split(os.path.sep)[-2] for p in imagePaths]
(names, counts) = np.unique(names, return_counts=True)
names = names.tolist()
# print(names)
# initialize lists to store our extracted faces and associated
# labels
faces = []
labels = []
# loop over the image paths
for imagePath in imagePaths:
# load the image from disk and extract the name of the person
# from the subdirectory structure
image = cv2.imread(imagePath)
name = imagePath.split(os.path.sep)[-2]
# print(name, minSamples)
# only process images that have a sufficient number of
# examples belonging to the class
if counts[names.index(name)] < minSamples:
# perform face detection
boxes = detect_faces(net, image, minConfidence)
# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
# extract the face ROI, resize it, and convert it to
# grayscale
faceROI = image[startY:endY, startX:endX]
faceROI = cv2.resize(faceROI, (47, 62))
faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)
# update our faces and labels lists
# convert our faces and labels lists to NumPy arrays
faces = np.array(faces)
labels = np.array(labels)
# return a 2-tuple of the faces and labels
return (faces, labels)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str,
default=dirname(dirname(abspath(__file__))) + "\\assets\\faces",
help="path to input directory of images")
ap.add_argument("-f", "--face", type=str,
help="path to face detector model directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-n", "--num-components", type=int, default=150,
help="# of principal components")
ap.add_argument("-v", "--visualize", type=int, default=-1,
help="whether or not PCA components should be visualized")
args = vars(ap.parse_args())
# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = "deploy.prototxt"
weightsPath = "res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the CALTECH faces dataset
print("[INFO] loading dataset...")
(faces, labels) = load_face_dataset(args["input"], net,
minConfidence=0.5, minSamples=20)
print("[INFO] {} images in dataset".format(len(faces)))
# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)
# flatten all 2D faces into a 1D list of pixel intensities
pcaFaces = np.array([f.flatten() for f in faces])
# construct our training and testing split
split = train_test_split(faces, pcaFaces, labels, test_size=0.25,
stratify=labels, random_state=42)
(origTrain, origTest, trainX, testX, trainY, testY) = split
# compute the PCA (eigenfaces) representation of the data, then
# project the training data onto the eigenfaces subspace
print("[INFO] creating eigenfaces...")
pca = PCA(
start = time.time()
trainX = pca.fit_transform(trainX)
end = time.time()
print("[INFO] computing eigenfaces took {:.4f} seconds".format(
end - start))
# check to see if the PCA components should be visualized
if args["visualize"] > 0:
# initialize the list of images in the montage
images = []
# loop over the first 16 individual components
for (i, component) in enumerate(pca.components_[:16]):
# reshape the component to a 2D matrix, then convert the data
# type to an unsigned 8-bit integer so it can be displayed
# with OpenCV
component = component.reshape((62, 47))
component = rescale_intensity(component, out_range=(0, 255))
component = np.dstack([component.astype("uint8")] * 3)
# construct the montage for the images
montage = build_montages(images, (188, 256), (4, 4))[0]
# show the mean and principal component visualizations
# show the mean image
mean = pca.mean_.reshape((62, 47))
mean = rescale_intensity(mean, out_range=(0, 255)).astype("uint8")
cv2.imshow("Mean", mean)
cv2.imshow("Components", montage)
# train a classifier on the eigenfaces representation
print("[INFO] training classifier...")
model = SVC(kernel="rbf", C=10.0, gamma=0.001, random_state=42), trainY)
# evaluate the model
print("[INFO] evaluating model...")
predictions = model.predict(pca.transform(testX))
print(classification_report(testY, predictions,
# generate a sample of testing data
idxs = np.random.choice(range(0, len(testY)), size=10, replace=False)
# loop over a sample of the testing data
for i in idxs:
# grab the predicted name and actual name
predName = le.inverse_transform([predictions[i]])[0]
actualName = le.classes_[testY[i]]
# grab the face image and resize it such that we can easily see
# it on our screen
face = np.dstack([origTest[i]] * 3)
face = imutils.resize(face, width=250)
# draw the predicted name and actual name on the image
cv2.putText(face, "pred: {}".format(predName), (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.putText(face, "actual: {}".format(actualName), (5, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
# display the predicted name, actual name, and confidence of the
# prediction (i.e., chi-squared distance; the *lower* the distance
# is the *more confident* the prediction is)
print("[INFO] prediction: {}, actual: {}".format(
predName, actualName))
# display the current face to our screen
cv2.imshow("Face " + str(i), face)
(venv) C:\Users\zchen\PycharmProjects\opencv\googleNet>python --visualize 2
[INFO] loading face detector model...
[INFO] loading dataset...
[INFO] 401 images in dataset
[INFO] creating eigenfaces...
[INFO] computing eigenfaces took 0.2666 seconds
[INFO] training classifier...
[INFO] evaluating model...
precision recall f1-score support
abraham 0.83 1.00 0.91 5
alberta 1.00 1.00 1.00 5
carmen 0.75 1.00 0.86 6
conrad 1.00 1.00 1.00 5
cynthia 1.00 1.00 1.00 6
darrell 1.00 1.00 1.00 5
flyod 1.00 0.86 0.92 7
jacques 1.00 1.00 1.00 5
judy 1.00 0.83 0.91 6
julie 1.00 1.00 1.00 6
kathleen 1.00 1.00 1.00 6
mae 1.00 1.00 1.00 5
phil 1.00 0.86 0.92 7
raymond 1.00 1.00 1.00 5
rick 0.80 0.80 0.80 5
ronald 1.00 1.00 1.00 6
tiffany 1.00 1.00 1.00 5
willie 1.00 1.00 1.00 6
accuracy 0.96 101
macro avg 0.97 0.96 0.96 101
weighted avg 0.97 0.96 0.96 101
Wednesday, 19 May 2021
Tuesday, 18 May 2021
opencv 50 LBPs face recognition
LBPs for face recognition algorithm has the added benefit of being updatable as new faces are introduced to the dataset.
LBPs for face recognition algorithm can simply insert new face samples without having to be re-trained at all — an obvious benefit when working with face datasets where people are being added or removed from the dataset with routine frequency.
after training with LBPs algorithm, AI recognizes all faces
sample image set of a person
AI is trained with images sets from many persons
#project directory
from imutils import paths
import numpy as np
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import argparse
import imutils
import time
from os.path import dirname, abspath
def detect_faces(net, image, minConfidence=0.5):
# grab the dimensions of the image and then construct a blob
# from it
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))
# pass the blob through the network to obtain the face detections,
# then initialize a list to store the predicted bounding boxes
detections = net.forward()
boxes = []
# loop over the detections
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with
# the detection
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the confidence is
# greater than the minimum confidence
if confidence > minConfidence:
# compute the (x, y)-coordinates of the bounding box for
# the object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# update our bounding box results list
boxes.append((startX, startY, endX, endY))
# return the face detection bounding boxes
return boxes
def load_face_dataset(inputPath, net, minConfidence=0.5, minSamples=15):
# grab the paths to all images in our input directory, extract
# the name of the person (i.e., class label) from the directory
# structure, and count the number of example images we have per
# face
imagePaths = list(paths.list_images(inputPath))
names = [p.split(os.path.sep)[-2] for p in imagePaths]
(names, counts) = np.unique(names, return_counts=True)
names = names.tolist()
# initialize lists to store our extracted faces and associated
# labels
faces = []
labels = []
# loop over the image paths
for imagePath in imagePaths:
# load the image from disk and extract the name of the person
# from the subdirectory structure
image = cv2.imread(imagePath)
name = imagePath.split(os.path.sep)[-2]
#print(name, minSamples)
# only process images that have a sufficient number of
# examples belonging to the class
if counts[names.index(name)] < minSamples:
# perform face detection
boxes = detect_faces(net, image, minConfidence)
# loop over the bounding boxes
for (startX, startY, endX, endY) in boxes:
# extract the face ROI, resize it, and convert it to
# grayscale
faceROI = image[startY:endY, startX:endX]
faceROI = cv2.resize(faceROI, (47, 62))
faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)
# update our faces and labels lists
# convert our faces and labels lists to NumPy arrays
faces = np.array(faces)
labels = np.array(labels)
# return a 2-tuple of the faces and labels
return (faces, labels)
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str,
default=dirname(dirname(abspath(__file__))) + "\\assets\\faces",
help="path to input directory of images")
ap.add_argument("-f", "--face", type=str,
help="path to face detector model directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = "deploy.prototxt"
weightsPath = "res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNet(prototxtPath, weightsPath)
# load the CALTECH faces dataset
print("[INFO] loading dataset...")
(faces, labels) = load_face_dataset(args["input"], net,
minConfidence=0.5, minSamples=20)
print("[INFO] {} images in dataset".format(len(faces)))
# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)
# construct our training and testing split
(trainX, testX, trainY, testY) = \
train_test_split(faces, labels, test_size=0.25, stratify=labels, random_state=42)
# train our LBP face recognizer
print("[INFO] training face recognizer...")
# The radius=2 and neighbors=16 parameters control the number of
# pixels included in the computation of the histogram,
# along with the radius these pixels lie on.
# using an 8×8 grid which allows for more granularity, resulting in higher accuracy.
recognizer = cv2.face.LBPHFaceRecognizer_create(
radius=2, neighbors=16, grid_x=8, grid_y=8)
start = time.time()
recognizer.train(trainX, trainY)
end = time.time()
print("[INFO] training took {:.4f} seconds".format(end - start))
# initialize the list of predictions and confidence scores
print("[INFO] gathering predictions...")
predictions = []
confidence = []
start = time.time()
# loop over the test data
for i in range(0, len(testX)):
# classify the face and update the list of predictions and
# confidence scores
(prediction, conf) = recognizer.predict(testX[i])
print("predicting test image " + str(i) + " of " + str(len(testX)) + ". confidence: " + str(conf))
# measure how long making predictions took
end = time.time()
print("[INFO] inference took {:.4f} seconds".format(end - start))
# show the classification report
print(classification_report(testY, predictions,
# generate a sample of testing data
idxs = np.random.choice(range(0, len(testY)), size=10, replace=False)
# loop over a sample of the testing data
for i in idxs:
# grab the predicted name and actual name
predName = le.inverse_transform([predictions[i]])[0]
actualName = le.classes_[testY[i]]
# grab the face image and resize it such that we can easily see
# it on our screen
face = np.dstack([testX[i]] * 3)
face = imutils.resize(face, width=250)
# draw the predicted name and actual name on the image
cv2.putText(face, "pred: {}".format(predName), (5, 25),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.putText(face, "actual: {}".format(actualName), (5, 60),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
# display the predicted name, actual name, and confidence of the
# prediction (i.e., chi-squared distance; the *lower* the distance
# is the *more confident* the prediction is)
print("[INFO] prediction: {}, actual: {}, confidence: {:.2f}".format(
predName, actualName, confidence[i]))
# display the current face to our screen
cv2.imshow("Face " + str(i), face)
(venv) C:\Users\zchen\PycharmProjects\opencv\googleNet>python
[INFO] loading face detector model...
[INFO] loading dataset...
[INFO] 401 images in dataset
[INFO] training face recognizer...
[INFO] training took 1.2571 seconds
[INFO] gathering predictions...
predicting test image 0 of 101. confidence: 156.05066784335756
predicting test image 1 of 101. confidence: 172.55934680514434
predicting test image 2 of 101. confidence: 167.60806582007518
predicting test image 3 of 101. confidence: 168.3895046348667
predicting test image 4 of 101. confidence: 161.68402620047547
predicting test image 5 of 101. confidence: 175.46755127545347
predicting test image 6 of 101. confidence: 176.313718416004
predicting test image 7 of 101. confidence: 195.20015753599705
predicting test image 95 of 101. confidence: 182.50191107376187
predicting test image 96 of 101. confidence: 173.7080780628902
predicting test image 97 of 101. confidence: 177.55615542852405
predicting test image 98 of 101. confidence: 168.53302297795474
predicting test image 99 of 101. confidence: 182.47141327232762
predicting test image 100 of 101. confidence: 163.67195972658968
[INFO] inference took 129.4641 seconds
precision recall f1-score support
abraham 0.83 1.00 0.91 5
alberta 1.00 1.00 1.00 5
carmen 1.00 1.00 1.00 6
conrad 1.00 1.00 1.00 5
cynthia 1.00 1.00 1.00 6
darrell 1.00 1.00 1.00 5
flyod 1.00 0.71 0.83 7
jacques 1.00 1.00 1.00 5
judy 0.86 1.00 0.92 6
julie 1.00 1.00 1.00 6
kathleen 0.75 1.00 0.86 6
mae 1.00 1.00 1.00 5
phil 1.00 0.86 0.92 7
raymond 1.00 1.00 1.00 5
rick 1.00 0.80 0.89 5
ronald 1.00 1.00 1.00 6
tiffany 1.00 1.00 1.00 5
willie 1.00 1.00 1.00 6
accuracy 0.96 101
macro avg 0.97 0.97 0.96 101
weighted avg 0.97 0.96 0.96 101
[INFO] prediction: willie, actual: willie, confidence: 159.70
[INFO] prediction: alberta, actual: alberta, confidence: 170.80
[INFO] prediction: conrad, actual: conrad, confidence: 187.61
[INFO] prediction: jacques, actual: jacques, confidence: 191.64
[INFO] prediction: judy, actual: judy, confidence: 169.71
[INFO] prediction: phil, actual: phil, confidence: 193.52
[INFO] prediction: raymond, actual: raymond, confidence: 166.94
[INFO] prediction: cynthia, actual: cynthia, confidence: 170.20
[INFO] prediction: ronald, actual: ronald, confidence: 161.68
[INFO] prediction: darrell, actual: darrell, confidence: 127.59
face dataset
googleNet face detection
Monday, 17 May 2021
opencv 49 age gender detection
import cv2
from imutils import paths
from os.path import dirname, abspath
import argparse
import numpy as np
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", default="deploy.prototxt",
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model",
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
print("[INFO] loading face model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
ageProto = "age_deploy.prototxt"
ageModel = "age_net.caffemodel"
genderProto = "gender_deploy.prototxt"
genderModel = "gender_net.caffemodel"
MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
ageList = ['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
genderList = ['Male', 'Female']
print("[INFO] loading age model...")
ageNet = cv2.dnn.readNet(ageModel, ageProto)
print("[INFO] loading gender model...")
genderNet = cv2.dnn.readNet(genderModel, genderProto)
path = dirname(dirname(abspath(__file__))) + "\\assets\\age_gender"
imagePaths = sorted(list(paths.list_images(path)))
for j, image_path in enumerate(imagePaths):
image = cv2.imread(imagePaths[j])
h, w, channels = image.shape
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
(300, 300), (104.0, 177.0, 123.0))
detections = net.forward()
# loop over the detected faces
for i in range(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# compute the (x, y)-coordinates of the bounding box for the
# object
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
face = image[startY:endY, startX:endX]
#cv2.imshow("face"+str(j)+str(i), face)
face_blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)
genderPreds = genderNet.forward()
gender = genderList[genderPreds[0].argmax()]
agePreds = ageNet.forward()
age = ageList[agePreds[0].argmax()]
text = "{},{}".format(gender, age)
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(image, (startX, startY), (endX, endY),
(0, 0, 255), 2)
cv2.putText(image, text, (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
name = image_path.split("\\")[-1]
cv2.imshow(name, image)
face detection
face model
age model
gender model
