Saturday, 29 May 2021

opencv 54 mobileNetSSD

SSD300 achieves 74.3% mAP at 59 FPS while SSD500 achieves 76.9% mAP at 22 FPS, which outperforms Faster R-CNN (73.2% mAP at 7 FPS) and YOLOv1 (63.4% mAP at 45 FPS).


project directory
    mobileNetSSD.py
    mobileNet
        MobileNetSSD_deploy.caffemodel
        MobileNetSSD_deploy.prototxt
    asset
        zoo.mp4

#mobileNetSSD.py
import os
import cv2
import numpy as np
import math
import time

# load our serialized model from disk
print("Load MobileNetSSD model")

prototxt_path = "mobileNet/MobileNetSSD_deploy.prototxt"
model_path = "mobileNet/MobileNetSSD_deploy.caffemodel"

# initialize the list of class labels MobileNet SSD was trained to detect
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbike", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]

net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)

# set CUDA as the preferable backend and target
print("[INFO] setting preferable backend and target to CUDA...")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)


def process_frame_MobileNetSSD(next_frame):
    rgb = cv2.cvtColor(next_frame, cv2.COLOR_BGR2RGB)
    (H, W) = next_frame.shape[:2]

    # convert the frame to a blob and pass the blob through the
    # network and obtain the detections
    blob = cv2.dnn.blobFromImage(next_frame, size=(300, 300), ddepth=cv2.CV_8U)
    net.setInput(blob, scalefactor=1.0 / 127.5, mean=[127.5, 127.5, 127.5])
    detections = net.forward()

    # loop over the detections
    for i in np.arange(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated
        # with the prediction
        confidence = detections[0, 0, i, 2]
        # filter out weak detections by ensuring the `confidence`
        # is greater than the minimum confidence
        if confidence > 0.7:
            # extract the index of the class label from the
            # detections list
            idx = int(detections[0, 0, i, 1])

            # compute the (x, y)-coordinates of the bounding box
            # for the object
            box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
            (startX, startY, endX, endY) = box.astype("int")

            cv2.rectangle(next_frame, (startX, startY), (endX, endY), (0, 255, 0), 3)
            y = startY - 10 if startY - 10 > 10 else startY + 10
            cv2.putText(next_frame, CLASSES[idx] + " " + str(round(confidence, 2)), (startX, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

    return next_frame


cap = cv2.VideoCapture("assets/zoo.mp4")

# Define the codec and create VideoWriter object
fps = 25.175
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
size = (int(frame_width), int(frame_height))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
out = cv2.VideoWriter()
success = out.open(os.path.join(path, "output_mobilenetssd.mov"), fourcc, fps, size, True)

frame_count = 0
t1 = time.time()

while True:
    ret, frame = cap.read()

    if ret == False: break

    frame = process_frame_MobileNetSSD(frame)
    cv2.imshow("frame", frame)
    out.write(frame)
    frame_count += 1

    if cv2.waitKey(1) == ord('q'):
        break

    if cv2.waitKey(1) == ord('p'):
        cv2.waitKey(-1)  # wait until any key is pressed

t2 = time.time()
# calculate FPS
fps = str(float(frame_count / float(t2 - t1))) + ' FPS'
print("Frames processed: {}".format(frame_count))
print("Elapsed time: {:.2f}".format(float(t2 - t1)))
print("FPS: {}".format(fps))

cap.release()
cv2.destroyAllWindows()

------------------------
#logs
PS C:\Users\zchen\PycharmProjects\opencv> python mobileNetSSD.py
Load MobileNetSSD model
[INFO] setting preferable backend and target to CUDA...
Frames processed: 6772
Elapsed time: 216.37
FPS: 31.297719286548247 FPS

reference:

mobileNet/MobileNetSSD_deploy.prototxt

mobileNet/MobileNetSSD_deploy.caffemodel

Friday, 28 May 2021

opencv 53 convert tensorflow model to opencv


OpenCV DNN prediction:
* shape:  (1, 1000)
* class ID: 292, label: tiger, Panthera tigris
* confidence: 0.9874

#modelConversion.py
import os
import tensorflow as tf
from tensorflow.keras.applications import MobileNet
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
import cv2
import numpy as np

original_tf_model = MobileNet(
    include_top=True,
    weights="imagenet"
)


def get_tf_model_proto(tf_model):
    # define the directory for .pb model
    pb_model_path = "models"

    # define the name of .pb model
    pb_model_name = "mobilenet.pb"

    # create directory for further converted model
    os.makedirs(pb_model_path, exist_ok=True)

    # get model TF graph
    tf_model_graph = tf.function(lambda x: tf_model(x))

    # get concrete function
    tf_model_graph = tf_model_graph.get_concrete_function(
        tf.TensorSpec(tf_model.inputs[0].shape, tf_model.inputs[0].dtype))

    # obtain frozen concrete function
    frozen_tf_func = convert_variables_to_constants_v2(tf_model_graph)
    # get frozen graph
    frozen_tf_func.graph.as_graph_def()

    # save full tf model
    tf.io.write_graph(graph_or_graph_def=frozen_tf_func.graph,
                      logdir=pb_model_path,
                      name=pb_model_name,
                      as_text=False)

    return os.path.join(pb_model_path, pb_model_name)


def get_preprocessed_img(img_path):
    # read the image
    input_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    input_img = input_img.astype(np.float32)

    # define preprocess parameters
    mean = np.array([1.0, 1.0, 1.0]) * 127.5
    scale = 1 / 127.5

    # prepare input blob to fit the model input:
    # 1. subtract mean
    # 2. scale to set pixel values from 0 to 1
    input_blob = cv2.dnn.blobFromImage(
        image=input_img,
        scalefactor=scale,
        size=(224, 224),  # img target size
        mean=mean,
        swapRB=True,  # BGR -> RGB
        crop=True  # center crop
    )
    print("Input blob shape: {}\n".format(input_blob.shape))

    return input_blob


def get_imagenet_labels(labels_path):
    with open(labels_path) as f:
        imagenet_labels = [line.strip() for line in f.readlines()]
    return imagenet_labels


def get_opencv_dnn_prediction(opencv_net, preproc_img, imagenet_labels):
    # set OpenCV DNN input
    opencv_net.setInput(preproc_img)

    # OpenCV DNN inference
    out = opencv_net.forward()
    print("OpenCV DNN prediction: \n")
    print("* shape: ", out.shape)

    # get the predicted class ID
    imagenet_class_id = np.argmax(out)

    # get confidence
    confidence = out[0][imagenet_class_id]
    print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id]))
    print("* confidence: {:.4f}\n".format(confidence))


# get TF frozen graph path
full_pb_path = get_tf_model_proto(original_tf_model)

# read frozen graph with OpenCV API
opencv_net = cv2.dnn.readNetFromTensorflow(full_pb_path)
print("OpenCV model was successfully read. Model layers: \n", opencv_net.getLayerNames())

# get preprocessed image
input_img = get_preprocessed_img("assets/tiger.jpg")

# get ImageNet labels
imagenet_labels = get_imagenet_labels("mobileNet/label.txt")

# obtain OpenCV DNN predictions
get_opencv_dnn_prediction(opencv_net, input_img, imagenet_labels)

reference:

run tensorflow on gpu

mobileNet labels

Thursday, 27 May 2021

disable gpu on windows shut down

batch file

@echo off
ECHO "Choose an option .."
ECHO "1 = Shutdown"
ECHO "2 = Reboot"


SET /p option=Choose one option-

IF %option%==1 (
echo *** Disabling GPU ***  
C:\Users\zchen\Downloads\devmanview-x64\devmanview.exe /disable "NVIDIA GeForce xxx"
echo *** Done *** 
SHUTDOWN /s /f /t 0
)

IF %option%==2 (
echo *** Disabling GPU ***  
devmanview.exe /disable "NVIDIA GeForce xxx"
echo *** Done *** 
SHUTDOWN -r -t 0
)

auto enable gpu on windows startup

download 64 bit devmanview

create batch file with devmanview.exe to enable graphic driver
@echo off
echo *** Enabling GPU ***
devmanview.exe /enable "NVIDIA xxx"
echo *** Done ***
pause

windows task scheduler -> create basic task -> trigger when I log on -> start a program -> select batch file

WGET windows

https://builtvisible.com/download-your-website-with-wget/

Monday, 24 May 2021

opencv 52 faster rcnn coco


#fasterRcnn.py
import numpy as np
import argparse
import imutils
import cv2
import os

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--mask-rcnn", default="mask-rcnn-coco",
                help="base path to mask-rcnn directory")
#ap.add_argument("-i", "--image", default="assets\\mask_rcnn_image.jpg",
#                help="path to input image")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
                help="minimum threshold for pixel-wise mask segmentation")
ap.add_argument("-u", "--use-gpu", type=bool, default=1,
                help="boolean indicating if CUDA GPU should be used")
ap.add_argument("-e", "--iter", type=int, default=10,
                help="# of GrabCut iterations (larger value => slower runtime)")
args = vars(ap.parse_args())

# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([args["mask_rcnn"],
                               "object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
                           dtype="uint8")

# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([args["mask_rcnn"],
                                "frozen_inference_graph.pb"])
configPath = os.path.sep.join([args["mask_rcnn"],
                               "mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])

# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)

# check if we are going to use GPU
if args["use_gpu"]:
    # set CUDA as the preferable backend and target
    print("[INFO] setting preferable backend and target to CUDA...")
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

cap = cv2.VideoCapture("assets/zoo.mp4")

j = 0
while True:
    ret, image = cap.read()
    # load our input image from disk and display it to our screen
    #image = cv2.imread(args["image"])
    #image = imutils.resize(image, width=600)
    #cv2.imshow("Input", image)

    # construct a blob from the input image and then perform a
    # forward pass of the Mask R-CNN, giving us (1) the bounding box
    # coordinates of the objects in the image along with (2) the
    # pixel-wise segmentation for each specific object
    blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
    net.setInput(blob)
    (boxes, masks) = net.forward(["detection_out_final",
                                  "detection_masks"])

    # loop over the number of detected objects
    for i in range(0, boxes.shape[2]):
        # extract the class ID of the detection along with the
        # confidence (i.e., probability) associated with the
        # prediction
        classID = int(boxes[0, 0, i, 1])
        confidence = boxes[0, 0, i, 2]

        # filter out weak predictions by ensuring the detected
        # probability is greater than the minimum probability
        if confidence > args["confidence"]:
            # show the class label
            print("[INFO] showing output for '{}'...".format(
                LABELS[classID]))

            # scale the bounding box coordinates back relative to the
            # size of the image and then compute the width and the
            # height of the bounding box
            (H, W) = image.shape[:2]
            box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
            (startX, startY, endX, endY) = box.astype("int")
            y = startY - 10 if startY - 10 > 10 else startY + 10

            cv2.rectangle(image, (startX, startY), (endX, endY),
                          (0, 0, 255), 3)
            cv2.putText(image, LABELS[classID] + " " + str(round(confidence, 2)), (startX, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

    if cv2.waitKey(1) == ord('q'):
        break

    if cv2.waitKey(1) == ord('p'):
        cv2.waitKey(-1)  # wait until any key is pressed

    cv2.imshow("faster rcnn", image)
    j += 1
    path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
    name = str(j) + ".jpg"
    cv2.imwrite(os.path.join(path, name), image)

cap.release()
cv2.destroyAllWindows()

-------------------------
#video_writer.py
import os
import cv2
import glob

img_dict = {}
for filename in glob.glob('C:/Users/zchen/PycharmProjects/opencv/googleNet/record/2/*.jpg'):
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width, height)
    img_dict[filename.split("\\")[1]] = img
    #print(img_dict)
    print("loading image " + str(len(img_dict)))

path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'

#frame rate = total fames / video length
out = cv2.VideoWriter(os.path.join(path , "zoo_googlenet_faster_rcnn_2.avi"),
                      cv2.VideoWriter_fourcc(*'DIVX'), 25.175, size)

#may run out of memory loading too many frames
#set start and stop frame # to make a short video clip
#then join them in movie maker to produce a long video
for i in range(3676, 6773):
    key = str(i) + ".jpg"
    out.write(img_dict[key])
    print("processing image " + str(i))
out.release()

reference:

run opencv on gpu

install CUDA and cuDNN

generate sln with cmake
  • note 1: match opencv contrib version with opencv version
https://github.com/opencv/opencv_contrib/tree/version#

  • note2: very import 
python 3 is in the opencv module to be built row
if not, reinstall python with all checkbox checked -> reboot computer -> cmd window pip install numpy -> open cmake -> click file -> delete cache -> click configure button

cmake search python3 -> check parameters have value

  • note3: very important 

find correct arch # for nvidia gpu @ https://en.wikipedia.org/wiki/CUDA#GPUs_supported
or error OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration

  • note 4: 
OPENCV_EXTRA_MODULES_PATH — “Give path to “opencv-contrib-version#” directory by pointng at “modules” directory(in my case: C:\Users\Administrator\Downloads\opncv-contrib-4.4.0\opencv-contrib-4.4.0\modules) “

cmake off screen

add sound track to video
windows movie maker -> select custom audio

add sound track (.mp3)
#object_detection_classes_coco - simple limited classes
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
street sign
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
hat
backpack
umbrella
shoe
eye glasses
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
plate
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
mirror
dining table
window
desk
toilet
door
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
blender
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

Ways Rich People AVOID Paying Taxes

Saturday, 22 May 2021

transport fever 2 chapter 1 final


OBS video recorder:

add video and audio in red rectangle

match video resolution with screen resolution

record mp4

use NVIDIA audio
video trimming

trim with windows movie maker



Thursday, 20 May 2021

opencv 51 eigenface

Eigenface provides an easy and cheap way to realize face recognition in that:
  • Its training process is completely automatic and easy to code.
  • Eigenface adequately reduces statistical complexity in face image representation.
  • Once eigenfaces of a database are calculated, face recognition can be achieved in real time.
  • Eigenface can handle large databases.

However, the deficiencies of the eigenface method are also obvious:
  • It is very sensitive to lighting, scale and translation, and requires a highly controlled environment.
  • Eigenface has difficulty capturing expression changes.
  • The most significant eigenfaces are mainly about illumination encoding and do not provide useful information regarding the actual face.
eigenface transform

AI predicts accurately
#project directory
assets
faces
googleNet
deploy.prototxt
res10_300x300_ssd_iter_140000.caffemodel
eigenfaces.py

---------------------
#eigenfaces.py
from imutils import paths
import numpy as np
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import argparse
import imutils
import time
from os.path import dirname, abspath
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from skimage.exposure import rescale_intensity
from imutils import build_montages


def detect_faces(net, image, minConfidence=0.5):
    # grab the dimensions of the image and then construct a blob
    # from it
    (h, w) = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))

    # pass the blob through the network to obtain the face detections,
    # then initialize a list to store the predicted bounding boxes
    net.setInput(blob)
    detections = net.forward()
    boxes = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]
        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > minConfidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            # update our bounding box results list
            boxes.append((startX, startY, endX, endY))
    # return the face detection bounding boxes
    return boxes


def load_face_dataset(inputPath, net, minConfidence=0.5, minSamples=15):
    # grab the paths to all images in our input directory, extract
    # the name of the person (i.e., class label) from the directory
    # structure, and count the number of example images we have per
    # face
    imagePaths = list(paths.list_images(inputPath))
    # print(imagePaths)
    names = [p.split(os.path.sep)[-2] for p in imagePaths]
    (names, counts) = np.unique(names, return_counts=True)
    names = names.tolist()
    # print(names)

    # initialize lists to store our extracted faces and associated
    # labels
    faces = []
    labels = []
    # loop over the image paths
    for imagePath in imagePaths:
        # load the image from disk and extract the name of the person
        # from the subdirectory structure
        image = cv2.imread(imagePath)
        name = imagePath.split(os.path.sep)[-2]
        # print(name, minSamples)

        # only process images that have a sufficient number of
        # examples belonging to the class
        if counts[names.index(name)] < minSamples:
            continue

        # perform face detection
        boxes = detect_faces(net, image, minConfidence)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # extract the face ROI, resize it, and convert it to
            # grayscale
            faceROI = image[startY:endY, startX:endX]
            faceROI = cv2.resize(faceROI, (47, 62))
            faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)

            # update our faces and labels lists
            faces.append(faceROI)
            labels.append(name)

    # convert our faces and labels lists to NumPy arrays
    faces = np.array(faces)
    labels = np.array(labels)

    # return a 2-tuple of the faces and labels
    return (faces, labels)


# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str,
                default=dirname(dirname(abspath(__file__))) + "\\assets\\faces",
                help="path to input directory of images")
ap.add_argument("-f", "--face", type=str,
                default="",
                help="path to face detector model directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
ap.add_argument("-n", "--num-components", type=int, default=150,
                help="# of principal components")
ap.add_argument("-v", "--visualize", type=int, default=-1,
                help="whether or not PCA components should be visualized")
args = vars(ap.parse_args())

# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = "deploy.prototxt"
weightsPath = "res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the CALTECH faces dataset
print("[INFO] loading dataset...")
print(args["input"])
(faces, labels) = load_face_dataset(args["input"], net,
                                    minConfidence=0.5, minSamples=20)

print("[INFO] {} images in dataset".format(len(faces)))
# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# flatten all 2D faces into a 1D list of pixel intensities
pcaFaces = np.array([f.flatten() for f in faces])

# construct our training and testing split
split = train_test_split(faces, pcaFaces, labels, test_size=0.25,
                         stratify=labels, random_state=42)
(origTrain, origTest, trainX, testX, trainY, testY) = split

# compute the PCA (eigenfaces) representation of the data, then
# project the training data onto the eigenfaces subspace
print("[INFO] creating eigenfaces...")

pca = PCA(
    svd_solver="randomized",
    n_components=args["num_components"],
    whiten=True)

start = time.time()
trainX = pca.fit_transform(trainX)
end = time.time()
print("[INFO] computing eigenfaces took {:.4f} seconds".format(
    end - start))

# check to see if the PCA components should be visualized
if args["visualize"] > 0:
    # initialize the list of images in the montage
    images = []

    # loop over the first 16 individual components
    for (i, component) in enumerate(pca.components_[:16]):
        # reshape the component to a 2D matrix, then convert the data
        # type to an unsigned 8-bit integer so it can be displayed
        # with OpenCV
        component = component.reshape((62, 47))
        component = rescale_intensity(component, out_range=(0, 255))
        component = np.dstack([component.astype("uint8")] * 3)
        images.append(component)

    # construct the montage for the images
    montage = build_montages(images, (188, 256), (4, 4))[0]

    # show the mean and principal component visualizations
    # show the mean image
    mean = pca.mean_.reshape((62, 47))
    mean = rescale_intensity(mean, out_range=(0, 255)).astype("uint8")
    cv2.imshow("Mean", mean)
    cv2.imshow("Components", montage)
    #cv2.waitKey(0)

# train a classifier on the eigenfaces representation
print("[INFO] training classifier...")
model = SVC(kernel="rbf", C=10.0, gamma=0.001, random_state=42)
model.fit(trainX, trainY)

# evaluate the model
print("[INFO] evaluating model...")
predictions = model.predict(pca.transform(testX))
print(classification_report(testY, predictions,
                            target_names=le.classes_))

# generate a sample of testing data
idxs = np.random.choice(range(0, len(testY)), size=10, replace=False)

# loop over a sample of the testing data
for i in idxs:
    # grab the predicted name and actual name
    predName = le.inverse_transform([predictions[i]])[0]
    actualName = le.classes_[testY[i]]

    # grab the face image and resize it such that we can easily see
    # it on our screen
    face = np.dstack([origTest[i]] * 3)
    face = imutils.resize(face, width=250)

    # draw the predicted name and actual name on the image
    cv2.putText(face, "pred: {}".format(predName), (5, 25),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.putText(face, "actual: {}".format(actualName), (5, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # display the predicted name, actual name, and confidence of the
    # prediction (i.e., chi-squared distance; the *lower* the distance
    # is the *more confident* the prediction is)
    print("[INFO] prediction: {}, actual: {}".format(
        predName, actualName))

    # display the current face to our screen
    cv2.imshow("Face " + str(i), face)

cv2.waitKey(0)

-----------------------
#logs
(venv) C:\Users\zchen\PycharmProjects\opencv\googleNet>python eigenfaces.py --visualize 2
[INFO] loading face detector model...
[INFO] loading dataset...
C:\Users\zchen\PycharmProjects\opencv\assets\faces
[INFO] 401 images in dataset
[INFO] creating eigenfaces...
[INFO] computing eigenfaces took 0.2666 seconds
[INFO] training classifier...
[INFO] evaluating model...
              precision    recall  f1-score   support

     abraham       0.83      1.00      0.91         5
     alberta       1.00      1.00      1.00         5
      carmen       0.75      1.00      0.86         6
      conrad       1.00      1.00      1.00         5
     cynthia       1.00      1.00      1.00         6
     darrell       1.00      1.00      1.00         5
       flyod       1.00      0.86      0.92         7
     jacques       1.00      1.00      1.00         5
        judy       1.00      0.83      0.91         6
       julie       1.00      1.00      1.00         6
    kathleen       1.00      1.00      1.00         6
         mae       1.00      1.00      1.00         5
        phil       1.00      0.86      0.92         7
     raymond       1.00      1.00      1.00         5
        rick       0.80      0.80      0.80         5
      ronald       1.00      1.00      1.00         6
     tiffany       1.00      1.00      1.00         5
      willie       1.00      1.00      1.00         6

    accuracy                           0.96       101
   macro avg       0.97      0.96      0.96       101
weighted avg       0.97      0.96      0.96       101

reference:

Tuesday, 18 May 2021

future life on Mars

opencv 50 LBPs face recognition

LBPs for face recognition algorithm has the added benefit of being updatable as new faces are introduced to the dataset.

LBPs for face recognition algorithm can simply insert new face samples without having to be re-trained at all — an obvious benefit when working with face datasets where people are being added or removed from the dataset with routine frequency.
after training with LBPs algorithm, AI recognizes all faces   

sample image set of a person
AI is trained with images sets from many persons
#project directory
assets
faces
googleNet
deploy.prototxt
res10_300x300_ssd_iter_140000.caffemodel
LBP_face.py

--------------------------
#LBP_face.py
from imutils import paths
import numpy as np
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import argparse
import imutils
import time
from os.path import dirname, abspath


def detect_faces(net, image, minConfidence=0.5):
    # grab the dimensions of the image and then construct a blob
    # from it
    (h, w) = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))

    # pass the blob through the network to obtain the face detections,
    # then initialize a list to store the predicted bounding boxes
    net.setInput(blob)
    detections = net.forward()
    boxes = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]
        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > minConfidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            # update our bounding box results list
            boxes.append((startX, startY, endX, endY))
    # return the face detection bounding boxes
    return boxes


def load_face_dataset(inputPath, net, minConfidence=0.5, minSamples=15):
    # grab the paths to all images in our input directory, extract
    # the name of the person (i.e., class label) from the directory
    # structure, and count the number of example images we have per
    # face
    imagePaths = list(paths.list_images(inputPath))
    #print(imagePaths)
    names = [p.split(os.path.sep)[-2] for p in imagePaths]
    (names, counts) = np.unique(names, return_counts=True)
    names = names.tolist()
    #print(names)

    # initialize lists to store our extracted faces and associated
    # labels
    faces = []
    labels = []
    # loop over the image paths
    for imagePath in imagePaths:
        # load the image from disk and extract the name of the person
        # from the subdirectory structure
        image = cv2.imread(imagePath)
        name = imagePath.split(os.path.sep)[-2]
        #print(name, minSamples)

        # only process images that have a sufficient number of
        # examples belonging to the class
        if counts[names.index(name)] < minSamples:
            continue

        # perform face detection
        boxes = detect_faces(net, image, minConfidence)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # extract the face ROI, resize it, and convert it to
            # grayscale
            faceROI = image[startY:endY, startX:endX]
            faceROI = cv2.resize(faceROI, (47, 62))
            faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)

            # update our faces and labels lists
            faces.append(faceROI)
            labels.append(name)

    # convert our faces and labels lists to NumPy arrays
    faces = np.array(faces)
    labels = np.array(labels)

    # return a 2-tuple of the faces and labels
    return (faces, labels)


# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str,
                default=dirname(dirname(abspath(__file__))) + "\\assets\\faces",
                help="path to input directory of images")
ap.add_argument("-f", "--face", type=str,
                default="",
                help="path to face detector model directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = "deploy.prototxt"
weightsPath = "res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the CALTECH faces dataset
print("[INFO] loading dataset...")
print(args["input"])
(faces, labels) = load_face_dataset(args["input"], net,
                                    minConfidence=0.5, minSamples=20)

print("[INFO] {} images in dataset".format(len(faces)))
# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# construct our training and testing split
(trainX, testX, trainY, testY) = \
    train_test_split(faces, labels, test_size=0.25, stratify=labels, random_state=42)

# train our LBP face recognizer
print("[INFO] training face recognizer...")

# The radius=2 and neighbors=16 parameters control the number of
# pixels included in the computation of the histogram,
# along with the radius these pixels lie on.
# using an 8×8 grid which allows for more granularity, resulting in higher accuracy.
recognizer = cv2.face.LBPHFaceRecognizer_create(
    radius=2, neighbors=16, grid_x=8, grid_y=8)

start = time.time()
recognizer.train(trainX, trainY)
end = time.time()
print("[INFO] training took {:.4f} seconds".format(end - start))

# initialize the list of predictions and confidence scores
print("[INFO] gathering predictions...")
predictions = []
confidence = []
start = time.time()

# loop over the test data
for i in range(0, len(testX)):
    # classify the face and update the list of predictions and
    # confidence scores
    (prediction, conf) = recognizer.predict(testX[i])
    predictions.append(prediction)
    confidence.append(conf)
    print("predicting test image " + str(i) + " of " + str(len(testX)) + ". confidence: " + str(conf))

# measure how long making predictions took
end = time.time()
print("[INFO] inference took {:.4f} seconds".format(end - start))

# show the classification report
print(classification_report(testY, predictions,
                            target_names=le.classes_))

# generate a sample of testing data
idxs = np.random.choice(range(0, len(testY)), size=10, replace=False)

# loop over a sample of the testing data
for i in idxs:
    # grab the predicted name and actual name
    predName = le.inverse_transform([predictions[i]])[0]
    actualName = le.classes_[testY[i]]

    # grab the face image and resize it such that we can easily see
    # it on our screen
    face = np.dstack([testX[i]] * 3)
    face = imutils.resize(face, width=250)

    # draw the predicted name and actual name on the image
    cv2.putText(face, "pred: {}".format(predName), (5, 25),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.putText(face, "actual: {}".format(actualName), (5, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # display the predicted name, actual name, and confidence of the
    # prediction (i.e., chi-squared distance; the *lower* the distance
    # is the *more confident* the prediction is)
    print("[INFO] prediction: {}, actual: {}, confidence: {:.2f}".format(
        predName, actualName, confidence[i]))

    # display the current face to our screen
    cv2.imshow("Face " + str(i), face)

cv2.waitKey(0)

----------------------------
#logs
(venv) C:\Users\zchen\PycharmProjects\opencv\googleNet>python LBP_face.py
[INFO] loading face detector model...
[INFO] loading dataset...
C:\Users\zchen\PycharmProjects\opencv\assets\faces
[INFO] 401 images in dataset
[INFO] training face recognizer...
[INFO] training took 1.2571 seconds
[INFO] gathering predictions...
predicting test image 0 of 101. confidence: 156.05066784335756
predicting test image 1 of 101. confidence: 172.55934680514434
predicting test image 2 of 101. confidence: 167.60806582007518
predicting test image 3 of 101. confidence: 168.3895046348667
predicting test image 4 of 101. confidence: 161.68402620047547
predicting test image 5 of 101. confidence: 175.46755127545347
predicting test image 6 of 101. confidence: 176.313718416004
predicting test image 7 of 101. confidence: 195.20015753599705
...
predicting test image 95 of 101. confidence: 182.50191107376187
predicting test image 96 of 101. confidence: 173.7080780628902
predicting test image 97 of 101. confidence: 177.55615542852405
predicting test image 98 of 101. confidence: 168.53302297795474
predicting test image 99 of 101. confidence: 182.47141327232762
predicting test image 100 of 101. confidence: 163.67195972658968
[INFO] inference took 129.4641 seconds
              precision    recall  f1-score   support

     abraham       0.83      1.00      0.91         5
     alberta       1.00      1.00      1.00         5
      carmen       1.00      1.00      1.00         6
      conrad       1.00      1.00      1.00         5
     cynthia       1.00      1.00      1.00         6
     darrell       1.00      1.00      1.00         5
       flyod       1.00      0.71      0.83         7
     jacques       1.00      1.00      1.00         5
        judy       0.86      1.00      0.92         6
       julie       1.00      1.00      1.00         6
    kathleen       0.75      1.00      0.86         6
         mae       1.00      1.00      1.00         5
        phil       1.00      0.86      0.92         7
     raymond       1.00      1.00      1.00         5
        rick       1.00      0.80      0.89         5
      ronald       1.00      1.00      1.00         6
     tiffany       1.00      1.00      1.00         5
      willie       1.00      1.00      1.00         6

    accuracy                           0.96       101
   macro avg       0.97      0.97      0.96       101
weighted avg       0.97      0.96      0.96       101

[INFO] prediction: willie, actual: willie, confidence: 159.70
[INFO] prediction: alberta, actual: alberta, confidence: 170.80
[INFO] prediction: conrad, actual: conrad, confidence: 187.61
[INFO] prediction: jacques, actual: jacques, confidence: 191.64
[INFO] prediction: judy, actual: judy, confidence: 169.71
[INFO] prediction: phil, actual: phil, confidence: 193.52
[INFO] prediction: raymond, actual: raymond, confidence: 166.94
[INFO] prediction: cynthia, actual: cynthia, confidence: 170.20
[INFO] prediction: ronald, actual: ronald, confidence: 161.68
[INFO] prediction: darrell, actual: darrell, confidence: 127.59

reference:

face dataset

googleNet face detection

Monday, 17 May 2021

Things Billionaires Are Into

opencv 49 age gender detection





#age_gender.py
import cv2
from imutils import paths
from os.path import dirname, abspath
import argparse
import numpy as np

# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-p", "--prototxt", default="deploy.prototxt",
                help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model",
                default="res10_300x300_ssd_iter_140000.caffemodel",
                help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
args = vars(ap.parse_args())

print("[INFO] loading face model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])

ageProto = "age_deploy.prototxt"
ageModel = "age_net.caffemodel"

genderProto = "gender_deploy.prototxt"
genderModel = "gender_net.caffemodel"

MODEL_MEAN_VALUES = (78.4263377603, 87.7689143744, 114.895847746)
ageList = ['(0-2)', '(4-6)', '(8-12)', '(15-20)', '(25-32)', '(38-43)', '(48-53)', '(60-100)']
genderList = ['Male', 'Female']

print("[INFO] loading age model...")
ageNet = cv2.dnn.readNet(ageModel, ageProto)
print("[INFO] loading gender model...")
genderNet = cv2.dnn.readNet(genderModel, genderProto)

ageNet.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)
genderNet.setPreferableBackend(cv2.dnn.DNN_TARGET_CPU)

path = dirname(dirname(abspath(__file__))) + "\\assets\\age_gender"
imagePaths = sorted(list(paths.list_images(path)))
#print(imagePaths)

for j, image_path in enumerate(imagePaths):
    image = cv2.imread(imagePaths[j])
    h, w, channels = image.shape

    blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0,
                                 (300, 300), (104.0, 177.0, 123.0))

    net.setInput(blob)
    detections = net.forward()

    # loop over the detected faces
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with the
        # prediction
        confidence = detections[0, 0, i, 2]

        # filter out weak detections by ensuring the `confidence` is
        # greater than the minimum confidence
        if confidence > args["confidence"]:
            # compute the (x, y)-coordinates of the bounding box for the
            # object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            face = image[startY:endY, startX:endX]
            #cv2.imshow("face"+str(j)+str(i), face)

            face_blob = cv2.dnn.blobFromImage(face, 1.0, (227, 227), MODEL_MEAN_VALUES, swapRB=False)

            genderNet.setInput(face_blob)
            genderPreds = genderNet.forward()
            gender = genderList[genderPreds[0].argmax()]

            ageNet.setInput(face_blob)
            agePreds = ageNet.forward()
            age = ageList[agePreds[0].argmax()]

            text = "{},{}".format(gender, age)
            y = startY - 10 if startY - 10 > 10 else startY + 10
            cv2.rectangle(image, (startX, startY), (endX, endY),
                          (0, 0, 255), 2)
            cv2.putText(image, text, (startX, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

    name = image_path.split("\\")[-1]
    cv2.imshow(name, image)

cv2.waitKey(0)

#reference:

face detection

face model

age model

gender model