Chuanshuoge
Monday 31 May 2021
Saturday 29 May 2021
opencv 54 mobileNetSSD
SSD300 achieves 74.3% mAP at 59 FPS while SSD500 achieves 76.9% mAP at 22 FPS, which outperforms Faster R-CNN (73.2% mAP at 7 FPS) and YOLOv1 (63.4% mAP at 45 FPS).
project directory
mobileNetSSD.py
mobileNet
MobileNetSSD_deploy.caffemodel
MobileNetSSD_deploy.prototxt
asset
zoo.mp4
#mobileNetSSD.py
import os
import cv2
import numpy as np
import math
import time
# load our serialized model from disk
print("Load MobileNetSSD model")
prototxt_path = "mobileNet/MobileNetSSD_deploy.prototxt"
model_path = "mobileNet/MobileNetSSD_deploy.caffemodel"
# initialize the list of class labels MobileNet SSD was trained to detect
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
net = cv2.dnn.readNetFromCaffe(prototxt_path, model_path)
# set CUDA as the preferable backend and target
print("[INFO] setting preferable backend and target to CUDA...")
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)
def process_frame_MobileNetSSD(next_frame):
rgb = cv2.cvtColor(next_frame, cv2.COLOR_BGR2RGB)
(H, W) = next_frame.shape[:2]
# convert the frame to a blob and pass the blob through the
# network and obtain the detections
blob = cv2.dnn.blobFromImage(next_frame, size=(300, 300), ddepth=cv2.CV_8U)
net.setInput(blob, scalefactor=1.0 / 127.5, mean=[127.5, 127.5, 127.5])
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated
# with the prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence`
# is greater than the minimum confidence
if confidence > 0.7:
# extract the index of the class label from the
# detections list
idx = int(detections[0, 0, i, 1])
# compute the (x, y)-coordinates of the bounding box
# for the object
box = detections[0, 0, i, 3:7] * np.array([W, H, W, H])
(startX, startY, endX, endY) = box.astype("int")
cv2.rectangle(next_frame, (startX, startY), (endX, endY), (0, 255, 0), 3)
y = startY - 10 if startY - 10 > 10 else startY + 10
cv2.putText(next_frame, CLASSES[idx] + " " + str(round(confidence, 2)), (startX, y),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 3)
return next_frame
cap = cv2.VideoCapture("assets/zoo.mp4")
# Define the codec and create VideoWriter object
fps = 25.175
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
size = (int(frame_width), int(frame_height))
fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
path = 'C:/Users/zchen/PycharmProjects/opencv/googleNet/record'
out = cv2.VideoWriter()
success = out.open(os.path.join(path, "output_mobilenetssd.mov"), fourcc, fps, size, True)
frame_count = 0
t1 = time.time()
while True:
ret, frame = cap.read()
if ret == False: break
frame = process_frame_MobileNetSSD(frame)
cv2.imshow("frame", frame)
out.write(frame)
frame_count += 1
if cv2.waitKey(1) == ord('q'):
break
if cv2.waitKey(1) == ord('p'):
cv2.waitKey(-1) # wait until any key is pressed
t2 = time.time()
# calculate FPS
fps = str(float(frame_count / float(t2 - t1))) + ' FPS'
print("Frames processed: {}".format(frame_count))
print("Elapsed time: {:.2f}".format(float(t2 - t1)))
print("FPS: {}".format(fps))
cap.release()
cv2.destroyAllWindows()
------------------------
#logs
PS C:\Users\zchen\PycharmProjects\opencv> python mobileNetSSD.py
Load MobileNetSSD model
[INFO] setting preferable backend and target to CUDA...
Frames processed: 6772
Elapsed time: 216.37
FPS: 31.297719286548247 FPS
reference:
mobileNet/MobileNetSSD_deploy.prototxt
mobileNet/MobileNetSSD_deploy.caffemodel
Friday 28 May 2021
opencv 53 convert tensorflow model to opencv
OpenCV DNN prediction:
* shape: (1, 1000)
* class ID: 292, label: tiger, Panthera tigris
* confidence: 0.9874
#modelConversion.py
import os
import tensorflow as tf
from tensorflow.keras.applications import MobileNet
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
import cv2
import numpy as np
original_tf_model = MobileNet(
include_top=True,
weights="imagenet"
)
def get_tf_model_proto(tf_model):
# define the directory for .pb model
pb_model_path = "models"
# define the name of .pb model
pb_model_name = "mobilenet.pb"
# create directory for further converted model
os.makedirs(pb_model_path, exist_ok=True)
# get model TF graph
tf_model_graph = tf.function(lambda x: tf_model(x))
# get concrete function
tf_model_graph = tf_model_graph.get_concrete_function(
tf.TensorSpec(tf_model.inputs[0].shape, tf_model.inputs[0].dtype))
# obtain frozen concrete function
frozen_tf_func = convert_variables_to_constants_v2(tf_model_graph)
# get frozen graph
frozen_tf_func.graph.as_graph_def()
# save full tf model
tf.io.write_graph(graph_or_graph_def=frozen_tf_func.graph,
logdir=pb_model_path,
name=pb_model_name,
as_text=False)
return os.path.join(pb_model_path, pb_model_name)
def get_preprocessed_img(img_path):
# read the image
input_img = cv2.imread(img_path, cv2.IMREAD_COLOR)
input_img = input_img.astype(np.float32)
# define preprocess parameters
mean = np.array([1.0, 1.0, 1.0]) * 127.5
scale = 1 / 127.5
# prepare input blob to fit the model input:
# 1. subtract mean
# 2. scale to set pixel values from 0 to 1
input_blob = cv2.dnn.blobFromImage(
image=input_img,
scalefactor=scale,
size=(224, 224), # img target size
mean=mean,
swapRB=True, # BGR -> RGB
crop=True # center crop
)
print("Input blob shape: {}\n".format(input_blob.shape))
return input_blob
def get_imagenet_labels(labels_path):
with open(labels_path) as f:
imagenet_labels = [line.strip() for line in f.readlines()]
return imagenet_labels
def get_opencv_dnn_prediction(opencv_net, preproc_img, imagenet_labels):
# set OpenCV DNN input
opencv_net.setInput(preproc_img)
# OpenCV DNN inference
out = opencv_net.forward()
print("OpenCV DNN prediction: \n")
print("* shape: ", out.shape)
# get the predicted class ID
imagenet_class_id = np.argmax(out)
# get confidence
confidence = out[0][imagenet_class_id]
print("* class ID: {}, label: {}".format(imagenet_class_id, imagenet_labels[imagenet_class_id]))
print("* confidence: {:.4f}\n".format(confidence))
# get TF frozen graph path
full_pb_path = get_tf_model_proto(original_tf_model)
# read frozen graph with OpenCV API
opencv_net = cv2.dnn.readNetFromTensorflow(full_pb_path)
print("OpenCV model was successfully read. Model layers: \n", opencv_net.getLayerNames())
# get preprocessed image
input_img = get_preprocessed_img("assets/tiger.jpg")
# get ImageNet labels
imagenet_labels = get_imagenet_labels("mobileNet/label.txt")
# obtain OpenCV DNN predictions
get_opencv_dnn_prediction(opencv_net, input_img, imagenet_labels)
reference:
run tensorflow on gpu
mobileNet labels
Thursday 27 May 2021
disable gpu on windows shut down
batch file
@echo off
ECHO "Choose an option .."
ECHO "1 = Shutdown"
ECHO "2 = Reboot"
SET /p option=Choose one option-
IF %option%==1 (
echo *** Disabling GPU ***
C:\Users\zchen\Downloads\devmanview-x64\devmanview.exe /disable "NVIDIA GeForce xxx"
echo *** Done ***
SHUTDOWN /s /f /t 0
)
IF %option%==2 (
echo *** Disabling GPU ***
devmanview.exe /disable "NVIDIA GeForce xxx"
echo *** Done ***
SHUTDOWN -r -t 0
)
auto enable gpu on windows startup
download 64 bit devmanview
create batch file with devmanview.exe to enable graphic driver
@echo off
echo *** Enabling GPU ***
devmanview.exe /enable "NVIDIA xxx"
echo *** Done ***
pause
Tuesday 25 May 2021
Subscribe to:
Posts (Atom)