Monday, 24 May 2021

opencv 52 faster rcnn coco


#fasterRcnn.py
import numpy as np
import argparse
import imutils
import cv2
import os

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--mask-rcnn", default="mask-rcnn-coco",
                help="base path to mask-rcnn directory")
#ap.add_argument("-i", "--image", default="assets\\mask_rcnn_image.jpg",
#                help="path to input image")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
                help="minimum threshold for pixel-wise mask segmentation")
ap.add_argument("-u", "--use-gpu", type=bool, default=1,
                help="boolean indicating if CUDA GPU should be used")
ap.add_argument("-e", "--iter", type=int, default=10,
                help="# of GrabCut iterations (larger value => slower runtime)")
args = vars(ap.parse_args())

# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([args["mask_rcnn"],
                               "object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
                           dtype="uint8")

# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([args["mask_rcnn"],
                                "frozen_inference_graph.pb"])
configPath = os.path.sep.join([args["mask_rcnn"],
                               "mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])

# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)

# check if we are going to use GPU
if args["use_gpu"]:
    # set CUDA as the preferable backend and target
    print("[INFO] setting preferable backend and target to CUDA...")
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

cap = cv2.VideoCapture("assets/zoo.mp4")

j = 0
while True:
    ret, image = cap.read()
    # load our input image from disk and display it to our screen
    #image = cv2.imread(args["image"])
    #image = imutils.resize(image, width=600)
    #cv2.imshow("Input", image)

    # construct a blob from the input image and then perform a
    # forward pass of the Mask R-CNN, giving us (1) the bounding box
    # coordinates of the objects in the image along with (2) the
    # pixel-wise segmentation for each specific object
    blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
    net.setInput(blob)
    (boxes, masks) = net.forward(["detection_out_final",
                                  "detection_masks"])

    # loop over the number of detected objects
    for i in range(0, boxes.shape[2]):
        # extract the class ID of the detection along with the
        # confidence (i.e., probability) associated with the
        # prediction
        classID = int(boxes[0, 0, i, 1])
        confidence = boxes[0, 0, i, 2]

        # filter out weak predictions by ensuring the detected
        # probability is greater than the minimum probability
        if confidence > args["confidence"]:
            # show the class label
            print("[INFO] showing output for '{}'...".format(
                LABELS[classID]))

            # scale the bounding box coordinates back relative to the
            # size of the image and then compute the width and the
            # height of the bounding box
            (H, W) = image.shape[:2]
            box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
            (startX, startY, endX, endY) = box.astype("int")
            y = startY - 10 if startY - 10 > 10 else startY + 10

            cv2.rectangle(image, (startX, startY), (endX, endY),
                          (0, 0, 255), 3)
            cv2.putText(image, LABELS[classID] + " " + str(round(confidence, 2)), (startX, y),
                        cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)

    if cv2.waitKey(1) == ord('q'):
        break

    if cv2.waitKey(1) == ord('p'):
        cv2.waitKey(-1)  # wait until any key is pressed

    cv2.imshow("faster rcnn", image)
    j += 1
    path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
    name = str(j) + ".jpg"
    cv2.imwrite(os.path.join(path, name), image)

cap.release()
cv2.destroyAllWindows()

-------------------------
#video_writer.py
import os
import cv2
import glob

img_dict = {}
for filename in glob.glob('C:/Users/zchen/PycharmProjects/opencv/googleNet/record/2/*.jpg'):
    img = cv2.imread(filename)
    height, width, layers = img.shape
    size = (width, height)
    img_dict[filename.split("\\")[1]] = img
    #print(img_dict)
    print("loading image " + str(len(img_dict)))

path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'

#frame rate = total fames / video length
out = cv2.VideoWriter(os.path.join(path , "zoo_googlenet_faster_rcnn_2.avi"),
                      cv2.VideoWriter_fourcc(*'DIVX'), 25.175, size)

#may run out of memory loading too many frames
#set start and stop frame # to make a short video clip
#then join them in movie maker to produce a long video
for i in range(3676, 6773):
    key = str(i) + ".jpg"
    out.write(img_dict[key])
    print("processing image " + str(i))
out.release()

reference:

run opencv on gpu

install CUDA and cuDNN

generate sln with cmake
  • note 1: match opencv contrib version with opencv version
https://github.com/opencv/opencv_contrib/tree/version#

  • note2: very import 
python 3 is in the opencv module to be built row
if not, reinstall python with all checkbox checked -> reboot computer -> cmd window pip install numpy -> open cmake -> click file -> delete cache -> click configure button

cmake search python3 -> check parameters have value

  • note3: very important 

find correct arch # for nvidia gpu @ https://en.wikipedia.org/wiki/CUDA#GPUs_supported
or error OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration

  • note 4: 
OPENCV_EXTRA_MODULES_PATH — “Give path to “opencv-contrib-version#” directory by pointng at “modules” directory(in my case: C:\Users\Administrator\Downloads\opncv-contrib-4.4.0\opencv-contrib-4.4.0\modules) “

cmake off screen

add sound track to video
windows movie maker -> select custom audio

add sound track (.mp3)
#object_detection_classes_coco - simple limited classes
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
street sign
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
hat
backpack
umbrella
shoe
eye glasses
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
plate
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
mirror
dining table
window
desk
toilet
door
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
blender
book
clock
vase
scissors
teddy bear
hair drier
toothbrush

Ways Rich People AVOID Paying Taxes

Saturday, 22 May 2021

transport fever 2 chapter 1 final


OBS video recorder:

add video and audio in red rectangle

match video resolution with screen resolution

record mp4

use NVIDIA audio
video trimming

trim with windows movie maker



Thursday, 20 May 2021

opencv 51 eigenface

Eigenface provides an easy and cheap way to realize face recognition in that:
  • Its training process is completely automatic and easy to code.
  • Eigenface adequately reduces statistical complexity in face image representation.
  • Once eigenfaces of a database are calculated, face recognition can be achieved in real time.
  • Eigenface can handle large databases.

However, the deficiencies of the eigenface method are also obvious:
  • It is very sensitive to lighting, scale and translation, and requires a highly controlled environment.
  • Eigenface has difficulty capturing expression changes.
  • The most significant eigenfaces are mainly about illumination encoding and do not provide useful information regarding the actual face.
eigenface transform

AI predicts accurately
#project directory
assets
faces
googleNet
deploy.prototxt
res10_300x300_ssd_iter_140000.caffemodel
eigenfaces.py

---------------------
#eigenfaces.py
from imutils import paths
import numpy as np
import cv2
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import argparse
import imutils
import time
from os.path import dirname, abspath
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from skimage.exposure import rescale_intensity
from imutils import build_montages


def detect_faces(net, image, minConfidence=0.5):
    # grab the dimensions of the image and then construct a blob
    # from it
    (h, w) = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), (104.0, 177.0, 123.0))

    # pass the blob through the network to obtain the face detections,
    # then initialize a list to store the predicted bounding boxes
    net.setInput(blob)
    detections = net.forward()
    boxes = []

    # loop over the detections
    for i in range(0, detections.shape[2]):
        # extract the confidence (i.e., probability) associated with
        # the detection
        confidence = detections[0, 0, i, 2]
        # filter out weak detections by ensuring the confidence is
        # greater than the minimum confidence
        if confidence > minConfidence:
            # compute the (x, y)-coordinates of the bounding box for
            # the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            # update our bounding box results list
            boxes.append((startX, startY, endX, endY))
    # return the face detection bounding boxes
    return boxes


def load_face_dataset(inputPath, net, minConfidence=0.5, minSamples=15):
    # grab the paths to all images in our input directory, extract
    # the name of the person (i.e., class label) from the directory
    # structure, and count the number of example images we have per
    # face
    imagePaths = list(paths.list_images(inputPath))
    # print(imagePaths)
    names = [p.split(os.path.sep)[-2] for p in imagePaths]
    (names, counts) = np.unique(names, return_counts=True)
    names = names.tolist()
    # print(names)

    # initialize lists to store our extracted faces and associated
    # labels
    faces = []
    labels = []
    # loop over the image paths
    for imagePath in imagePaths:
        # load the image from disk and extract the name of the person
        # from the subdirectory structure
        image = cv2.imread(imagePath)
        name = imagePath.split(os.path.sep)[-2]
        # print(name, minSamples)

        # only process images that have a sufficient number of
        # examples belonging to the class
        if counts[names.index(name)] < minSamples:
            continue

        # perform face detection
        boxes = detect_faces(net, image, minConfidence)

        # loop over the bounding boxes
        for (startX, startY, endX, endY) in boxes:
            # extract the face ROI, resize it, and convert it to
            # grayscale
            faceROI = image[startY:endY, startX:endX]
            faceROI = cv2.resize(faceROI, (47, 62))
            faceROI = cv2.cvtColor(faceROI, cv2.COLOR_BGR2GRAY)

            # update our faces and labels lists
            faces.append(faceROI)
            labels.append(name)

    # convert our faces and labels lists to NumPy arrays
    faces = np.array(faces)
    labels = np.array(labels)

    # return a 2-tuple of the faces and labels
    return (faces, labels)


# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--input", type=str,
                default=dirname(dirname(abspath(__file__))) + "\\assets\\faces",
                help="path to input directory of images")
ap.add_argument("-f", "--face", type=str,
                default="",
                help="path to face detector model directory")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
ap.add_argument("-n", "--num-components", type=int, default=150,
                help="# of principal components")
ap.add_argument("-v", "--visualize", type=int, default=-1,
                help="whether or not PCA components should be visualized")
args = vars(ap.parse_args())

# load our serialized face detector model from disk
print("[INFO] loading face detector model...")
prototxtPath = "deploy.prototxt"
weightsPath = "res10_300x300_ssd_iter_140000.caffemodel"
net = cv2.dnn.readNet(prototxtPath, weightsPath)

# load the CALTECH faces dataset
print("[INFO] loading dataset...")
print(args["input"])
(faces, labels) = load_face_dataset(args["input"], net,
                                    minConfidence=0.5, minSamples=20)

print("[INFO] {} images in dataset".format(len(faces)))
# encode the string labels as integers
le = LabelEncoder()
labels = le.fit_transform(labels)

# flatten all 2D faces into a 1D list of pixel intensities
pcaFaces = np.array([f.flatten() for f in faces])

# construct our training and testing split
split = train_test_split(faces, pcaFaces, labels, test_size=0.25,
                         stratify=labels, random_state=42)
(origTrain, origTest, trainX, testX, trainY, testY) = split

# compute the PCA (eigenfaces) representation of the data, then
# project the training data onto the eigenfaces subspace
print("[INFO] creating eigenfaces...")

pca = PCA(
    svd_solver="randomized",
    n_components=args["num_components"],
    whiten=True)

start = time.time()
trainX = pca.fit_transform(trainX)
end = time.time()
print("[INFO] computing eigenfaces took {:.4f} seconds".format(
    end - start))

# check to see if the PCA components should be visualized
if args["visualize"] > 0:
    # initialize the list of images in the montage
    images = []

    # loop over the first 16 individual components
    for (i, component) in enumerate(pca.components_[:16]):
        # reshape the component to a 2D matrix, then convert the data
        # type to an unsigned 8-bit integer so it can be displayed
        # with OpenCV
        component = component.reshape((62, 47))
        component = rescale_intensity(component, out_range=(0, 255))
        component = np.dstack([component.astype("uint8")] * 3)
        images.append(component)

    # construct the montage for the images
    montage = build_montages(images, (188, 256), (4, 4))[0]

    # show the mean and principal component visualizations
    # show the mean image
    mean = pca.mean_.reshape((62, 47))
    mean = rescale_intensity(mean, out_range=(0, 255)).astype("uint8")
    cv2.imshow("Mean", mean)
    cv2.imshow("Components", montage)
    #cv2.waitKey(0)

# train a classifier on the eigenfaces representation
print("[INFO] training classifier...")
model = SVC(kernel="rbf", C=10.0, gamma=0.001, random_state=42)
model.fit(trainX, trainY)

# evaluate the model
print("[INFO] evaluating model...")
predictions = model.predict(pca.transform(testX))
print(classification_report(testY, predictions,
                            target_names=le.classes_))

# generate a sample of testing data
idxs = np.random.choice(range(0, len(testY)), size=10, replace=False)

# loop over a sample of the testing data
for i in idxs:
    # grab the predicted name and actual name
    predName = le.inverse_transform([predictions[i]])[0]
    actualName = le.classes_[testY[i]]

    # grab the face image and resize it such that we can easily see
    # it on our screen
    face = np.dstack([origTest[i]] * 3)
    face = imutils.resize(face, width=250)

    # draw the predicted name and actual name on the image
    cv2.putText(face, "pred: {}".format(predName), (5, 25),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
    cv2.putText(face, "actual: {}".format(actualName), (5, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

    # display the predicted name, actual name, and confidence of the
    # prediction (i.e., chi-squared distance; the *lower* the distance
    # is the *more confident* the prediction is)
    print("[INFO] prediction: {}, actual: {}".format(
        predName, actualName))

    # display the current face to our screen
    cv2.imshow("Face " + str(i), face)

cv2.waitKey(0)

-----------------------
#logs
(venv) C:\Users\zchen\PycharmProjects\opencv\googleNet>python eigenfaces.py --visualize 2
[INFO] loading face detector model...
[INFO] loading dataset...
C:\Users\zchen\PycharmProjects\opencv\assets\faces
[INFO] 401 images in dataset
[INFO] creating eigenfaces...
[INFO] computing eigenfaces took 0.2666 seconds
[INFO] training classifier...
[INFO] evaluating model...
              precision    recall  f1-score   support

     abraham       0.83      1.00      0.91         5
     alberta       1.00      1.00      1.00         5
      carmen       0.75      1.00      0.86         6
      conrad       1.00      1.00      1.00         5
     cynthia       1.00      1.00      1.00         6
     darrell       1.00      1.00      1.00         5
       flyod       1.00      0.86      0.92         7
     jacques       1.00      1.00      1.00         5
        judy       1.00      0.83      0.91         6
       julie       1.00      1.00      1.00         6
    kathleen       1.00      1.00      1.00         6
         mae       1.00      1.00      1.00         5
        phil       1.00      0.86      0.92         7
     raymond       1.00      1.00      1.00         5
        rick       0.80      0.80      0.80         5
      ronald       1.00      1.00      1.00         6
     tiffany       1.00      1.00      1.00         5
      willie       1.00      1.00      1.00         6

    accuracy                           0.96       101
   macro avg       0.97      0.96      0.96       101
weighted avg       0.97      0.96      0.96       101

reference: