#fasterRcnn.py
import numpy as np
import argparse
import imutils
import cv2
import os
# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--mask-rcnn", default="mask-rcnn-coco",
help="base path to mask-rcnn directory")
#ap.add_argument("-i", "--image", default="assets\\mask_rcnn_image.jpg",
# help="path to input image")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
help="minimum threshold for pixel-wise mask segmentation")
ap.add_argument("-u", "--use-gpu", type=bool, default=1,
help="boolean indicating if CUDA GPU should be used")
ap.add_argument("-e", "--iter", type=int, default=10,
help="# of GrabCut iterations (larger value => slower runtime)")
args = vars(ap.parse_args())
# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([args["mask_rcnn"],
"object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")
# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
dtype="uint8")
# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([args["mask_rcnn"],
"frozen_inference_graph.pb"])
configPath = os.path.sep.join([args["mask_rcnn"],
"mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])
# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)
# check if we are going to use GPU
if args["use_gpu"]:
# set CUDA as the preferable backend and target
print("[INFO] setting preferable backend and target to CUDA...")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
cap = cv2.VideoCapture("assets/zoo.mp4")
j = 0
while True:
ret, image = cap.read()
# load our input image from disk and display it to our screen
#image = cv2.imread(args["image"])
#image = imutils.resize(image, width=600)
#cv2.imshow("Input", image)
# construct a blob from the input image and then perform a
# forward pass of the Mask R-CNN, giving us (1) the bounding box
# coordinates of the objects in the image along with (2) the
# pixel-wise segmentation for each specific object
blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
net.setInput(blob)
(boxes, masks) = net.forward(["detection_out_final",
"detection_masks"])
# loop over the number of detected objects
for i in range(0, boxes.shape[2]):
# extract the class ID of the detection along with the
# confidence (i.e., probability) associated with the
# prediction
classID = int(boxes[0, 0, i, 1])
confidence = boxes[0, 0, i, 2]
# filter out weak predictions by ensuring the detected
# probability is greater than the minimum probability
if confidence > args["confidence"]:
# show the class label
print("[INFO] showing output for '{}'...".format(
LABELS[classID]))
# scale the bounding box coordinates back relative to the
# size of the image and then compute the width and the
# height of the bounding box
(H, W) = image.shape[:2]
box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.rectangle(image, (startX, startY), (endX, endY),
(0, 0, 255), 3)
cv2.putText(image, LABELS[classID] + " " + str(round(confidence, 2)), (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
if cv2.waitKey(1) == ord('q'):
break
if cv2.waitKey(1) == ord('p'):
cv2.waitKey(-1) # wait until any key is pressed
cv2.imshow("faster rcnn", image)
j += 1
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
name = str(j) + ".jpg"
cv2.imwrite(os.path.join(path, name), image)
cap.release()
cv2.destroyAllWindows()
-------------------------
#video_writer.py
import os
import cv2
import glob
img_dict = {}
for filename in glob.glob('C:/Users/zchen/PycharmProjects/opencv/googleNet/record/2/*.jpg'):
img = cv2.imread(filename)
height, width, layers = img.shape
size = (width, height)
img_dict[filename.split("\\")[1]] = img
#print(img_dict)
print("loading image " + str(len(img_dict)))
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
#frame rate = total fames / video length
out = cv2.VideoWriter(os.path.join(path , "zoo_googlenet_faster_rcnn_2.avi"),
cv2.VideoWriter_fourcc(*'DIVX'), 25.175, size)
#may run out of memory loading too many frames
#set start and stop frame # to make a short video clip
#then join them in movie maker to produce a long video
for i in range(3676, 6773):
key = str(i) + ".jpg"
out.write(img_dict[key])
print("processing image " + str(i))
out.release()
reference:
run opencv on gpu
install CUDA and cuDNN
generate sln with cmake
- note 1: match opencv contrib version with opencv version
https://github.com/opencv/opencv_contrib/tree/version#
- note2: very import
if not, reinstall python with all checkbox checked -> reboot computer -> cmd window pip install numpy -> open cmake -> click file -> delete cache -> click configure button
cmake search python3 -> check parameters have value
- note3: very important
find correct arch # for nvidia gpu @ https://en.wikipedia.org/wiki/CUDA#GPUs_supported
or error OpenCV was not built to work with the selected device. Please check CUDA_ARCH_PTX or CUDA_ARCH_BIN in your build configuration
- note 4:
OPENCV_EXTRA_MODULES_PATH — “Give path to “opencv-contrib-version#” directory by pointng at “modules” directory(in my case: C:\Users\Administrator\Downloads\opncv-contrib-4.4.0\opencv-contrib-4.4.0\modules) “
cmake off screen
add sound track to video
windows movie maker -> select custom audio
add sound track (.mp3)
#object_detection_classes_coco - simple limited classes
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
street sign
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
hat
backpack
umbrella
shoe
eye glasses
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
plate
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
mirror
dining table
window
desk
toilet
door
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
blender
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
No comments:
Post a Comment