Friday 30 April 2021

小舞 pk 千仞雪

opencv 38 mask rcnn

Mask R-CNN can automatically predict both the bounding box and the pixel-wise segmentation mask of each object in an input image. The downside is that masks produced by Mask R-CNN aren’t always “clean” — there is typically a bit of background that “bleeds” into the foreground segmentation


recognized a horse, mask around it





#logs
(venv) C:\Users\zchen\PycharmProjects\opencv>python mask_rcnn.py --mask-rcnn mask-rcnn-coco --image assets/mask_rcnn_image.jpg

[INFO] loading Mask R-CNN from disk...
[INFO] showing output for 'horse'...
[INFO] showing output for 'person'...
[INFO] showing output for 'dog'...
[INFO] showing output for 'person'...
[INFO] showing output for 'truck'...

#mask_rcnn.py
import numpy as np
import argparse
import imutils
import cv2
import os

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-m", "--mask-rcnn", required=True,
                help="base path to mask-rcnn directory")
ap.add_argument("-i", "--image", required=True,
                help="path to input image")
ap.add_argument("-c", "--confidence", type=float, default=0.5,
                help="minimum probability to filter weak detections")
ap.add_argument("-t", "--threshold", type=float, default=0.3,
                help="minimum threshold for pixel-wise mask segmentation")
ap.add_argument("-u", "--use-gpu", type=bool, default=0,
                help="boolean indicating if CUDA GPU should be used")
ap.add_argument("-e", "--iter", type=int, default=10,
                help="# of GrabCut iterations (larger value => slower runtime)")
args = vars(ap.parse_args())

# load the COCO class labels our Mask R-CNN was trained on
labelsPath = os.path.sep.join([args["mask_rcnn"],
                               "object_detection_classes_coco.txt"])
LABELS = open(labelsPath).read().strip().split("\n")

# initialize a list of colors to represent each possible class label
np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
                           dtype="uint8")

# derive the paths to the Mask R-CNN weights and model configuration
weightsPath = os.path.sep.join([args["mask_rcnn"],
                                "frozen_inference_graph.pb"])
configPath = os.path.sep.join([args["mask_rcnn"],
                               "mask_rcnn_inception_v2_coco_2018_01_28.pbtxt"])

# load our Mask R-CNN trained on the COCO dataset (90 classes)
# from disk
print("[INFO] loading Mask R-CNN from disk...")
net = cv2.dnn.readNetFromTensorflow(weightsPath, configPath)

# check if we are going to use GPU
if args["use_gpu"]:
    # set CUDA as the preferable backend and target
    print("[INFO] setting preferable backend and target to CUDA...")
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

# load our input image from disk and display it to our screen
image = cv2.imread(args["image"])
image = imutils.resize(image, width=600)
cv2.imshow("Input", image)

# construct a blob from the input image and then perform a
# forward pass of the Mask R-CNN, giving us (1) the bounding box
# coordinates of the objects in the image along with (2) the
# pixel-wise segmentation for each specific object
blob = cv2.dnn.blobFromImage(image, swapRB=True, crop=False)
net.setInput(blob)
(boxes, masks) = net.forward(["detection_out_final",
                              "detection_masks"])

# loop over the number of detected objects
for i in range(0, boxes.shape[2]):
    # extract the class ID of the detection along with the
    # confidence (i.e., probability) associated with the
    # prediction
    classID = int(boxes[0, 0, i, 1])
    confidence = boxes[0, 0, i, 2]

    # filter out weak predictions by ensuring the detected
    # probability is greater than the minimum probability
    if confidence > args["confidence"]:
        # show the class label
        print("[INFO] showing output for '{}'...".format(
            LABELS[classID]))

        # scale the bounding box coordinates back relative to the
        # size of the image and then compute the width and the
        # height of the bounding box
        (H, W) = image.shape[:2]
        box = boxes[0, 0, i, 3:7] * np.array([W, H, W, H])
        (startX, startY, endX, endY) = box.astype("int")
        boxW = endX - startX
        boxH = endY - startY

        # extract the pixel-wise segmentation for the object, resize
        # the mask such that it's the same dimensions as the bounding
        # box, and then finally threshold to create a *binary* mask
        mask = masks[i, classID]
        mask = cv2.resize(mask, (boxW, boxH),
                          interpolation=cv2.INTER_CUBIC)
        mask = (mask > args["threshold"]).astype("uint8") * 255

        # allocate a memory for our output Mask R-CNN mask and store
        # the predicted Mask R-CNN mask in the GrabCut mask
        rcnnMask = np.zeros(image.shape[:2], dtype="uint8")
        rcnnMask[startY:endY, startX:endX] = mask

        # apply a bitwise AND to the input image to show the output
        # of applying the Mask R-CNN mask to the image
        rcnnOutput = cv2.bitwise_and(image, image, mask=rcnnMask)

        # show the output of the Mask R-CNN and bitwise AND operation
        cv2.imshow("R-CNN Mask", rcnnMask)
        cv2.imshow("R-CNN Output", rcnnOutput)
        cv2.waitKey(0)

reference:

mask-rcnn-coco directory

frozen_inference_graph.pb (trained model)

Wednesday 28 April 2021

Billionaire Lifestyle


opencv 37 grabcut


try to extract tiger from the scene

grabcut mask is generated

tiger is extracted
#main.py
import numpy as np
import argparse
import time
import cv2
import os

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", type=str,
                default=os.path.sep.join(["images", "adrian.jpg"]),
                help="path to input image that we'll apply GrabCut to")
ap.add_argument("-c", "--iter", type=int, default=10,
                help="# of GrabCut iterations (larger value => slower runtime)")
args = vars(ap.parse_args())

# load the input image from disk and then allocate memory for the
# output mask generated by GrabCut -- this mask should hae the same
# spatial dimensions as the input image
image = cv2.imread(args["image"])

xl, yl, xr, yr = (0, 0, 0, 0)
btn_down = False
img = image.copy()


def drag_event(event, x, y, flags, param):
    global btn_down, xl, yl, xr, yr, img

    if event == cv2.EVENT_LBUTTONUP and btn_down:
        btn_down = False

        cv2.rectangle(img, (xl - 2, yl - 2), (xr + 2, yr + 2), (0, 0, 255), 2)
        cv2.imshow('original', img)

        grab_cut((xl, yl, xr, yr))


    elif event == cv2.EVENT_MOUSEMOVE and btn_down:
        xr, yr = (x, y)
        cv2.rectangle(img, (xl - 2, yl - 2), (xr + 2, yr + 2), (0, 0, 255), 2)
        cv2.imshow('original', img)
        img = image.copy()

    elif event == cv2.EVENT_LBUTTONDOWN:
        btn_down = True
        xl, yl = (x, y)


def grab_cut(rect):
    mask = np.zeros(image.shape[:2], dtype="uint8")

    # allocate memory for two arrays that the GrabCut algorithm internally
    # uses when segmenting the foreground from the background
    fgModel = np.zeros((1, 65), dtype="float")
    bgModel = np.zeros((1, 65), dtype="float")

    # apply GrabCut using the the bounding box segmentation method
    start = time.time()
    (mask, bgModel, fgModel) = cv2.grabCut(image, mask, rect, bgModel,
                                           fgModel, iterCount=args["iter"], mode=cv2.GC_INIT_WITH_RECT)
    end = time.time()
    print("[INFO] applying GrabCut took {:.2f} seconds".format(end - start))

    # the output mask has for possible output values, marking each pixel
    # in the mask as (1) definite background, (2) definite foreground,
    # (3) probable background, and (4) probable foreground
    values = (
        ("Definite Background", cv2.GC_BGD),
        ("Probable Background", cv2.GC_PR_BGD),
        ("Definite Foreground", cv2.GC_FGD),
        ("Probable Foreground", cv2.GC_PR_FGD),
    )

    # loop over the possible GrabCut mask values
    for (name, value) in values:
        # construct a mask that for the current value
        print("[INFO] showing mask for '{}'".format(name))
        valueMask = (mask == value).astype("uint8") * 255

        # display the mask so we can visualize it
        cv2.imshow(name, valueMask)

    # we'll set all definite background and probable background pixels
    # to 0 while definite foreground and probable foreground pixels are
    # set to 1
    outputMask = np.where((mask == cv2.GC_BGD) | (mask == cv2.GC_PR_BGD),
                          0, 1)

    # scale the mask from the range [0, 1] to [0, 255]
    outputMask = (outputMask * 255).astype("uint8")

    # apply a bitwise AND to the image using our mask generated by
    # GrabCut to generate our final output image
    output = cv2.bitwise_and(image, image, mask=outputMask)

    # show the input image followed by the mask and output generated by
    # GrabCut and bitwise masking
    cv2.imshow("Input", image)
    cv2.imshow("GrabCut Mask", outputMask)
    cv2.imshow("GrabCut Output", output)


cv2.imshow("original image", image)
cv2.setMouseCallback('original image', drag_event)

cv2.waitKey(0)

--------------------------
#logs
(venv) C:\Users\zchen\PycharmProjects\opencv>python grabcut.py -i assets/tiger.jpg
[INFO] applying GrabCut took 2.90 seconds
[INFO] showing mask for 'Definite Background'
[INFO] showing mask for 'Probable Background'
[INFO] showing mask for 'Definite Foreground'
[INFO] showing mask for 'Probable Foreground'

reference:

mouse drag event


rice field art

Monday 26 April 2021

opencv 36 measure distance by contour

 Let’s say we have a marker or object with a known width W. We then place this marker some distance D from our camera. We take a picture of our object using our camera and then measure the apparent width in pixels P. This allows us to derive the perceived focal length F of our camera:

F = (P x D) / W

As I continue to move my camera both closer and farther away from the object/marker, I can apply the triangle similarity to determine the distance of the object to the camera:

D’ = (W x F) / P
card has known width 0.28ft, camera is also 0.28ft above
apparent width of card on image is 397
focal length is also 397 from fomular 1

canny edge find contour

apparent width is 208 now when camera is lifted
distance is calculated from fouler 2


#main.py
from imutils import paths
import numpy as np
import imutils
import cv2


def find_marker(image):
    # convert the image to grayscale, blur it, and detect edges
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(gray, 30, 100)

    cv2.imshow("canny edge", edged)

    # find the contours in the edged image and keep the largest one;
    # we'll assume that this is our piece of paper in the image
    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    cnts = imutils.grab_contours(cnts)
    c = max(cnts, key=cv2.contourArea)

    # compute the bounding box of the of the paper region and return it
    return cv2.minAreaRect(c)


def distance_to_camera(knownWidth, focalLength, perWidth):
    # compute and return the distance from the maker to the camera
    return (knownWidth * focalLength) / perWidth


# initialize the known distance from the camera to the object, which
# in this case is 3.4 inches
KNOWN_DISTANCE = 3.4

# initialize the known object width, which in this case, the piece of
# card is 3.4 inches wide
KNOWN_WIDTH = 3.4

# load the furst image that contains an object that is KNOWN TO BE 2 feet
# from our camera, then find the paper marker in the image, and initialize
# the focal length
image = cv2.imread("assets/distance/initial.jpg")
marker = find_marker(image)
print(marker)
focalLength = (marker[1][0] * KNOWN_DISTANCE) / KNOWN_WIDTH
print("focal length is " + str(focalLength))

# loop over the images
for imagePath in sorted(paths.list_images("assets/distance")):
    # load the image, find the marker in the image, then compute the
    # distance to the marker from the camera
    image = cv2.imread(imagePath)
    marker = find_marker(image)
    print(marker)
    inches = distance_to_camera(KNOWN_WIDTH, focalLength, marker[1][0])

    # draw a bounding box around the image and display it
    box = cv2.cv.BoxPoints(marker) if imutils.is_cv2() else cv2.boxPoints(marker)
    box = np.int0(box)
    cv2.drawContours(image, [box], -1, (0, 255, 0), 2)
    cv2.putText(image, "%.2fft" % (inches / 12),
                (image.shape[1] - 200, image.shape[0] - 20), cv2.FONT_HERSHEY_SIMPLEX,
                2.0, (0, 255, 0), 3)
    cv2.imshow("image", image)
    cv2.waitKey(0)

reference:

北美路上,白牌玩钱,黄牌玩命,绿牌玩时间

Megacities Aerial Drone Adventure 2021

Saturday 24 April 2021

opencv 35 augmented reality


input image pantone card with aruco markers in the corners

source image

aruco markers on card define region of interest

perspective transform source image
multiply transformed image with mask

multiply input image with inverted mask

add previous 2 images
butterfly appear on the card
#main.py
import numpy as np
import argparse
import imutils
import sys
import cv2

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image containing ArUCo tag")
ap.add_argument("-s", "--source", required=True,
help="path to input source image that will be put on input")
args = vars(ap.parse_args())

# load the input image from disk, resize it, and grab its spatial
# dimensions
print("[INFO] loading input image and source image...")
image = cv2.imread(args["image"])
image = imutils.resize(image, width=600)
(imgH, imgW) = image.shape[:2]
# load the source image from disk
source = cv2.imread(args["source"])

# load the ArUCo dictionary, grab the ArUCo parameters, and detect
# the markers
print("[INFO] detecting markers...")
arucoDict = cv2.aruco.Dictionary_get(cv2.aruco.DICT_ARUCO_ORIGINAL)
arucoParams = cv2.aruco.DetectorParameters_create()
(corners, ids, rejected) = cv2.aruco.detectMarkers(image, arucoDict,
parameters=arucoParams)
# if we have not found four markers in the input image then we cannot
# apply our augmented reality technique
if len(corners) != 4:
print("[INFO] could not find 4 corners...exiting")
sys.exit(0)

# otherwise, we've found the four ArUco markers, so we can continue
# by flattening the ArUco IDs list and initializing our list of
# reference points
print("[INFO] constructing augmented reality visualization...")
ids = ids.flatten()
refPts = []

# loop over the IDs of the ArUco markers in top-left, top-right,
# bottom-right, and bottom-left order
for i in (923, 1001, 241, 1007):
# grab the index of the corner with the current ID and append the
# corner (x, y)-coordinates to our list of reference points
j = np.squeeze(np.where(ids == i))
corner = np.squeeze(corners[j])
refPts.append(corner)

# unpack our ArUco reference points and use the reference points to
# define the *destination* transform matrix, making sure the points
# are specified in top-left, top-right, bottom-right, and bottom-left
# order
(refPtTL, refPtTR, refPtBR, refPtBL) = refPts
dstMat = [refPtTL[0], refPtTR[1], refPtBR[2], refPtBL[3]]
dstMat = np.array(dstMat)

# grab the spatial dimensions of the source image and define the
# transform matrix for the *source* image in top-left, top-right,
# bottom-right, and bottom-left order
(srcH, srcW) = source.shape[:2]
srcMat = np.array([[0, 0], [srcW, 0], [srcW, srcH], [0, srcH]])

# compute the homography matrix and then warp the source image to the
# destination based on the homography
(H, _) = cv2.findHomography(srcMat, dstMat)
warped = cv2.warpPerspective(source, H, (imgW, imgH))

# construct a mask for the source image now that the perspective warp
# has taken place (we'll need this mask to copy the source image into
# the destination)
mask = np.zeros((imgH, imgW), dtype="uint8")
cv2.fillConvexPoly(mask, dstMat.astype("int32"), (255, 255, 255),
cv2.LINE_AA)

# this step is optional, but to give the source image a black border
# surrounding it when applied to the source image, you can apply a
# dilation operation
rect = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
mask = cv2.dilate(mask, rect, iterations=2)

# create a three channel version of the mask by stacking it depth-wise,
# such that we can copy the warped source image into the input image
maskScaled = mask.copy() / 255.0
maskScaled = np.dstack([maskScaled] * 3)

# copy the warped source image into the input image by (1) multiplying
# the warped image and masked together, (2) multiplying the original
# input image with the mask (giving more weight to the input where
# there *ARE NOT* masked pixels), and (3) adding the resulting
# multiplications together
warpedMultiplied = cv2.multiply(warped.astype("float"), maskScaled)
imageMultiplied = cv2.multiply(image.astype(float), 1.0 - maskScaled)
output = cv2.add(warpedMultiplied, imageMultiplied)
output = output.astype("uint8")

# show the input image, source image, output of our augmented reality
cv2.imshow("Input", image)
cv2.imshow("Source", source)
cv2.imshow("OpenCV AR Output", output)
cv2.imshow("mask", mask)
cv2.imshow("warped source", warpedMultiplied.astype("uint8"))
cv2.imshow("masked Input", imageMultiplied.astype("uint8"))
cv2.waitKey(0)

------------------
#terminal
python main.py -i assets/pantone.jpg -s assets/butterfly.jpg

reference:

detect aruco marker

np.squeeze

np.dstack

beautiful rare horse breeds

Friday 23 April 2021

唐三,杨无敌,毒斗罗 PK 雪清河,刺血斗罗,蛇矛斗罗

opencv 34 detect ArUco markers


all markers are detected correctly

image at angle, majority of markers are detected

markers are detected far away
#main.py
import argparse
import imutils
import cv2
import sys

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
                help="path to input image containing ArUCo tag")
ap.add_argument("-t", "--type", type=str,
                default="DICT_ARUCO_ORIGINAL",
                help="type of ArUCo tag to detect")
args = vars(ap.parse_args())

# define names of each possible ArUco tag OpenCV supports
ARUCO_DICT = {
    "DICT_4X4_50": cv2.aruco.DICT_4X4_50,
    "DICT_4X4_100": cv2.aruco.DICT_4X4_100,
    "DICT_4X4_250": cv2.aruco.DICT_4X4_250,
    "DICT_4X4_1000": cv2.aruco.DICT_4X4_1000,
    "DICT_5X5_50": cv2.aruco.DICT_5X5_50,
    "DICT_5X5_100": cv2.aruco.DICT_5X5_100,
    "DICT_5X5_250": cv2.aruco.DICT_5X5_250,
    "DICT_5X5_1000": cv2.aruco.DICT_5X5_1000,
    "DICT_6X6_50": cv2.aruco.DICT_6X6_50,
    "DICT_6X6_100": cv2.aruco.DICT_6X6_100,
    "DICT_6X6_250": cv2.aruco.DICT_6X6_250,
    "DICT_6X6_1000": cv2.aruco.DICT_6X6_1000,
    "DICT_7X7_50": cv2.aruco.DICT_7X7_50,
    "DICT_7X7_100": cv2.aruco.DICT_7X7_100,
    "DICT_7X7_250": cv2.aruco.DICT_7X7_250,
    "DICT_7X7_1000": cv2.aruco.DICT_7X7_1000,
    "DICT_ARUCO_ORIGINAL": cv2.aruco.DICT_ARUCO_ORIGINAL,
    "DICT_APRILTAG_16h5": cv2.aruco.DICT_APRILTAG_16h5,
    "DICT_APRILTAG_25h9": cv2.aruco.DICT_APRILTAG_25h9,
    "DICT_APRILTAG_36h10": cv2.aruco.DICT_APRILTAG_36h10,
    "DICT_APRILTAG_36h11": cv2.aruco.DICT_APRILTAG_36h11
}

# load the input image from disk and resize it
print("[INFO] loading image...")
image = cv2.imread(args["image"])
image = imutils.resize(image, width=600)

# verify that the supplied ArUCo tag exists and is supported by
# OpenCV
if ARUCO_DICT.get(args["type"], None) is None:
    print("[INFO] ArUCo tag of '{}' is not supported".format(
        args["type"]))
    sys.exit(0)

# load the ArUCo dictionary, grab the ArUCo parameters, and detect
# the markers
print("[INFO] detecting '{}' tags...".format(args["type"]))
arucoDict = cv2.aruco.Dictionary_get(ARUCO_DICT[args["type"]])
arucoParams = cv2.aruco.DetectorParameters_create()
(corners, ids, rejected) = cv2.aruco.detectMarkers(image, arucoDict,
                                                   parameters=arucoParams)

# verify *at least* one ArUco marker was detected
if len(corners) > 0:
    # flatten the ArUco IDs list
    ids = ids.flatten()

    # loop over the detected ArUCo corners
    for (markerCorner, markerID) in zip(corners, ids):
        # extract the marker corners (which are always returned in
        # top-left, top-right, bottom-right, and bottom-left order)
        corners = markerCorner.reshape((4, 2))
        (topLeft, topRight, bottomRight, bottomLeft) = corners

        # convert each of the (x, y)-coordinate pairs to integers
        topRight = (int(topRight[0]), int(topRight[1]))
        bottomRight = (int(bottomRight[0]), int(bottomRight[1]))
        bottomLeft = (int(bottomLeft[0]), int(bottomLeft[1]))
        topLeft = (int(topLeft[0]), int(topLeft[1]))

        # draw the bounding box of the ArUCo detection
        cv2.line(image, topLeft, topRight, (0, 255, 0), 2)
        cv2.line(image, topRight, bottomRight, (0, 255, 0), 2)
        cv2.line(image, bottomRight, bottomLeft, (0, 255, 0), 2)
        cv2.line(image, bottomLeft, topLeft, (0, 255, 0), 2)

        # compute and draw the center (x, y)-coordinates of the ArUco
        # marker
        cX = int((topLeft[0] + bottomRight[0]) / 2.0)
        cY = int((topLeft[1] + bottomRight[1]) / 2.0)
        cv2.circle(image, (cX, cY), 4, (0, 0, 255), -1)

        # draw the ArUco marker ID on the image
        cv2.putText(image, str(markerID),
            (topLeft[0], topLeft[1] + 30), cv2.FONT_HERSHEY_SIMPLEX,
            1, (0, 0, 255), 2)
        print("[INFO] ArUco marker ID: {}".format(markerID))

        # show the output image
        cv2.imshow("Image", image)
        cv2.waitKey(0)

-----------------------
#logs
(venv) C:\Users\zchen\PycharmProjects\opencv>python aruco_detection.py -i assets/markers2.png -t DICT_5X5_100
[INFO] loading image...
[INFO] detecting 'DICT_5X5_100' tags...
[INFO] ArUco marker ID: 9
[INFO] ArUco marker ID: 6
[INFO] ArUco marker ID: 7
[INFO] ArUco marker ID: 8
[INFO] ArUco marker ID: 3
[INFO] ArUco marker ID: 5
[INFO] ArUco marker ID: 0
[INFO] ArUco marker ID: 1
[INFO] ArUco marker ID: 2

reference:

Monday 19 April 2021

opencv 33 generate ArUco marker

 ArUco markers are used for:
  • Camera calibration
  • Object size estimation
  • Measuring the distance between camera and object
  • 3D position
  • Object orientation
  • Robotics and autonomous navigation
ArUco Tags ID 0 - 9
#main.py
import numpy as np
import argparse
import cv2
import sys

ARUCO_DICT = {
"DICT_4X4_50": cv2.aruco.DICT_4X4_50,
"DICT_4X4_100": cv2.aruco.DICT_4X4_100,
"DICT_4X4_250": cv2.aruco.DICT_4X4_250,
"DICT_4X4_1000": cv2.aruco.DICT_4X4_1000,
"DICT_5X5_50": cv2.aruco.DICT_5X5_50,
"DICT_5X5_100": cv2.aruco.DICT_5X5_100,
"DICT_5X5_250": cv2.aruco.DICT_5X5_250,
"DICT_5X5_1000": cv2.aruco.DICT_5X5_1000,
"DICT_6X6_50": cv2.aruco.DICT_6X6_50,
"DICT_6X6_100": cv2.aruco.DICT_6X6_100,
"DICT_6X6_250": cv2.aruco.DICT_6X6_250,
"DICT_6X6_1000": cv2.aruco.DICT_6X6_1000,
"DICT_7X7_50": cv2.aruco.DICT_7X7_50,
"DICT_7X7_100": cv2.aruco.DICT_7X7_100,
"DICT_7X7_250": cv2.aruco.DICT_7X7_250,
"DICT_7X7_1000": cv2.aruco.DICT_7X7_1000,
"DICT_ARUCO_ORIGINAL": cv2.aruco.DICT_ARUCO_ORIGINAL,
"DICT_APRILTAG_16h5": cv2.aruco.DICT_APRILTAG_16h5,
"DICT_APRILTAG_25h9": cv2.aruco.DICT_APRILTAG_25h9,
"DICT_APRILTAG_36h10": cv2.aruco.DICT_APRILTAG_36h10,
"DICT_APRILTAG_36h11": cv2.aruco.DICT_APRILTAG_36h11
}

# construct the argument parser and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--id", type=int, required=True,
help="ID of ArUCo tag to generate")
ap.add_argument("-t", "--type", type=str,
default="DICT_ARUCO_ORIGINAL",
help="type of ArUCo tag to generate")
args = vars(ap.parse_args())

# verify that the supplied ArUCo tag exists and is supported by OpenCV
if ARUCO_DICT.get(args["type"], None) is None:
print("[INFO] ArUCo tag of '{}' is not supported".format(
args["type"]))
sys.exit(0)

# load the ArUCo dictionary
arucoDict = cv2.aruco.Dictionary_get(ARUCO_DICT[args["type"]])

# allocate memory for the output ArUCo tag and then draw the ArUCo
# tag on the output image
print("[INFO] generating ArUCo tag type '{}' with ID '{}'".format(
args["type"], args["id"]))
tag = np.zeros((300, 300, 1), dtype="uint8")

#drawMarker( dictionary, id, size, image, border bits)
cv2.aruco.drawMarker(arucoDict, args["id"], 300, tag, 1)

# write the generated ArUCo tag to disk and then display it to our
# screen
cv2.imwrite("assets/aurco.png", tag)
name = "ArUCo ID " + str(args["id"])
cv2.imshow(name, tag)
cv2.waitKey(0)

--------------------------
#logs
(venv) C:\Users\zchen\PycharmProjects\opencv>python aruco.py --id 9 --type DICT_5X5_100
[INFO] generating ArUCo tag type 'DICT_5X5_100' with ID '9'

reference:

C-17, F-22, MQ8

Sunday 18 April 2021

West Virginia tour

电动大货车

 

今年油价涨的真的是太快了,去年疫情92号汽油仅5块多,而如今,大多数城市地区92号汽油已经突破6.6元/L,很多人已经是尽量能不开车,如果是大排量发动机,比如纳智捷,估计这些车主宁愿坐公交,骑电动车也不想开车。但是,正因为油价涨了,从去年开始,新能源电动车的销量与日俱增,比亚迪汉EV、特斯拉Model 3、理想ONE的销量销量都是居高不下,前两款车型的销量多次排进同级别车型销量前五,理想ONE更是多次取得中大型SUV销量冠军,新能源汽车真的很有可能淘汰燃油车。

相比家用车,大货车的油耗确实称得上“油老虎”,所以这也养出了很多“油耗子”。在公路上行驶的大货车基本都对油箱进行改装过,两侧都有硕大容积的油箱,加一次油少说五六千,多了八九千,过万都是很正常的,所以很多大货车司机在停车休息的时候,都会安排人看好油箱,甚至专门养只狗看管。大货车司机为什么不喜欢踩刹车,还不是因为停车起步油耗会增加,所以很多司机在临近红绿灯的时候都会采取滑行,避免重车起步。家用车、公交车都开始采用电动车,而马斯克又来搅局了,造出电动大货车,6秒破百续航超800公里,售价100万起。

首先,先来解释一下,为什么特斯拉造的电动大货车会让人失业?马斯克曾经表示,特斯拉造的电动大货车会搭载和私家车一样的“无人驾驶技术”,当然,也就是代替司机去掌握方向盘,车道保持辅助、主动刹车等等。当然,这样的技术还不足以让人失业,接下来马斯克所表示的这项技术,确实可以帮助企业、物流单位减少人员工资。

新车将会带有“车队模式”,我们都知道,火车之所以能够行驶,都是最前面的机车所带动,而特斯拉造的车足以通过一名驾驶员,在前方驾驶车辆,后方车辆进行功能上的连接,可以进入彻底的“无人驾驶”,车辆变道,系统会检测周围道路状况进行变道,主驾驶车辆减速、刹车,后方车辆也能够完成。是不是足以让人失业?对此你们怎么看,可以在评论区留言。

这款电动大货车的外观造型也是非常富有科技感的,坐姿相对比解放、东风等大货车的坐姿要低些许,所以盲区也会相对减少。而且,整台车乍看之下与和谐号子弹头高速列车极为相似,风阻系数低至0.36,几乎堪比超跑。坐进车内,内饰相对简洁,富有科技感。中间控制主机的驾驶屏幕,因为两侧后视镜采用摄像头,所以都是通过显示器进行显示。如果在国内地区上市销售,很显然这款车必须老老实实装上传统的后视镜,因为我国的法律暂时还不允许。

车内只有一个驾驶位,但是车内的高度惊人高达2米,稍微改装一下就可以设计出上下两层的床铺,即便坐在床上,头顶的距离也不会像火车硬卧那般压抑,更多是像现在的动卧。大货车最注重的就是动力,更何况这是台电动大货车。牵引车头前两个车轮控制转向,后面四个车轮则各自搭载一台电机,据悉动力输出可达到1000P。对此,国外进行了 测试,载重36吨货物百公里加速是20秒,在空车运行情况下,百公里加速不到6秒,远要比咱们的柴油车加速能力更强。

其次,这四台电机都是独立的个体,单个发生故障都可以保证车辆正常行驶,据说这四台电机都可以在160万公里以内做到“零故障”,所以电机的寿命也是非常长的。据马斯克介绍,这台电动大货车会分别推出两个版本,入门续航482公里,进阶续航可达到804公里。当然,这还是最初的打算,马斯克表示电池组也会进行重新设计,从而在提高预计行驶里程。很有可能会实现充电半小时有效行驶643公里,普通大货车加油从排队加油到驶离也得半小时左右,所以这台电动大货车并不会影响运输效率。

Saturday 17 April 2021

opencv 32 reading book

perspective transform to straighten the book

adaptive threshold to highlight text

detect texts with tesseract
reorder sentences into paragraph

read paragraph with text to speech
#book_reader.py
import cv2
import numpy as np
import imutils
import pytesseract
#import pyttsx3
from gtts import gTTS
from playsound import playsound
import subprocess
import os

pytesseract.pytesseract.tesseract_cmd = "C:\\Program Files\\Tesseract-OCR\\tesseract.exe"

def speak(audioString):
    print(audioString)
    tts = gTTS(text=audioString, lang='en')
    tts.save("assets/audio.mp3")


    wmp = r"C:\Program Files (x86)\Windows Media Player\wmplayer.exe"
    media_file = os.path.abspath(os.path.relpath("assets/audio.mp3"))
    p = subprocess.call([wmp, media_file])


    playsound("audio.mp3")
    #engine = pyttsx3.init()
    #engine.say(audioString)
    #engine.runAndWait()

img = cv2.imread("assets/ikea.jpg")
h, w, c = img.shape
relative_w = 1500
relative_h = int(relative_w / w * h)
img = cv2.resize(img, (relative_w, relative_h))
img_copy = img.copy()

gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (7, 7), 0)

# perform edge detection, then perform a dilation + erosion to
# close gaps in between object edges
edged = cv2.Canny(gray, 100, 200)
edged = cv2.dilate(edged, None, iterations=1)
edged = cv2.erode(edged, None, iterations=1)

# find contours in the edge map
cnts = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL,
                        cv2.CHAIN_APPROX_SIMPLE)
cnts = imutils.grab_contours(cnts)

# find contour with biggest area
largest_contour = None
for c in cnts:
    largest = 0
    area = cv2.contourArea(c)

    if area > largest:
        largest = area
        largest_contour = c

box = cv2.minAreaRect(largest_contour)
box = cv2.cv.BoxPoints(box) if imutils.is_cv2() else cv2.boxPoints(box)
box = np.array(box, dtype="int")

cv2.drawContours(img_copy, [box.astype("int")], -1, (0, 255, 0), 2)

box_frame = np.float32(box)
img_frame = np.float32([[0, relative_h], [0, 0], [relative_w, 0], [relative_w, relative_h]])

matrix = cv2.getPerspectiveTransform(box_frame, img_frame)
straight_img = cv2.warpPerspective(img, matrix, (relative_w, relative_h))


gray2 = cv2.cvtColor(straight_img, cv2.COLOR_BGR2GRAY)
adaptive_threshold = cv2.adaptiveThreshold(gray2, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 11, 2)
adaptive_threshold_inv = cv2.bitwise_not(adaptive_threshold)
median_blur = cv2.medianBlur(adaptive_threshold, 3)

img_rgb = cv2.cvtColor(median_blur, cv2.COLOR_GRAY2RGB)

hImg, wImg, _ = img_rgb.shape

boxes = pytesseract.image_to_data(img_rgb)
print(boxes)

#record sentences from detected texts
blocks = []
previous_x = 0
block = []
for i, b in enumerate(boxes.splitlines()):
    if i != 0:
        b = b.split()

        if len(b) == 12:
            x, y, w, h = (int(b[6]), int(b[7]), int(b[8]), int(b[9]))
            cv2.rectangle(straight_img, (x, y), (w + x, h + y), (0, 0, 255), 1)
            cv2.putText(straight_img, b[11], (x, y), cv2.FONT_HERSHEY_COMPLEX, 0.5, (50, 50, 255), 2)

            if abs(x - previous_x) > 300:
                blocks.append({'block': block, 'x': previous_x})
                block = []

            block.append(b[11])
            previous_x = x

blocks.append({'block': block, 'x': previous_x})
print(blocks)

block_length = len(blocks)

for i in range(0, block_length):

    for j in range(i+1, block_length):

        if abs(blocks[i]['x'] - blocks[j]['x']) < 300:
            removed = blocks.pop(j)
            blocks.insert(i + 1, removed)
            break

print(blocks)

#reorder sentences into graph
paragraph = ""
previous_x = 0

for i, b in enumerate(blocks):

    if abs(b['x'] - previous_x) < 300:
        paragraph = paragraph + " ".join(b['block']) + "\n"

    else:
        paragraph = paragraph + " ".join(b['block']) + " "

    previous_x = b['x']

print(paragraph)

cv2.imshow("picture", img_copy)
#cv2.imshow("canny edge", edged)
cv2.imshow("straight image", straight_img)
cv2.imshow("median blur", median_blur)

cv2.waitKey(0)

speak(paragraph)

------------------------------------
#logs
#detected words table
level   page_num        block_num       par_num line_num        word_num        left    top     width   height  conf    text
1       1       0       0       0       0       0       0       1500    1125    -1
2       1       1       0       0       0       180     997     576     24      -1
3       1       1       1       0       0       180     997     576     24      -1
4       1       1       1       1       0       180     997     576     24      -1
5       1       1       1       1       1       180     1009    28      12      85      SEE
5       1       1       1       1       2       215     1008    34      12      85      OUR
5       1       1       1       1       3       257     993     77      35      95      BIRTHDAY
5       1       1       1       1       4       345     993     59      35      91      OFFERS
5       1       1       1       1       5       424     1013    2       3       7       "
5       1       1       1       1       6       730     1000    26      11      5       ot
2       1       2       0       0       0       499     1006    433     52      -1
3       1       2       1       0       0       499     1006    433     52      -1
4       1       2       1       1       0       499     1006    433     29      -1
5       1       2       1       1       1       499     1012    40      23      30      THE
5       1       2       1       1       2       546     1012    62      20      96      PRICES
5       1       2       1       1       3       614     1019    21      12      91      IN
5       1       2       1       1       4       641     1018    42      12      91      THIS
5       1       2       1       1       5       690     1017    104     13      92      CATALOGUE
5       1       2       1       1       6       801     1016    36      12      92      CAN
5       1       2       1       1       7       843     1006    49      22      96      ONLY
5       1       2       1       1       8       898     1006    34      22      96      GET
4       1       2       1       2       0       506     1036    395     22      -1
5       1       2       1       2       1       506     1040    61      12      95      LOWER
5       1       2       1       2       2       574     1027    51      35      73      UNTIL
5       1       2       1       2       3       633     1027    39      35      73      JULY
5       1       2       1       2       4       681     1037    20      13      79      31
5       1       2       1       2       5       709     1037    49      15      79      2019,
5       1       2       1       2       6       765     1037    59      12      96      NEVER
5       1       2       1       2       7       830     1036    71      12      95      HIGHER
2       1       3       0       0       0       170     1022    204     47      -1
3       1       3       1       0       0       174     1022    200     43      -1
4       1       3       1       1       0       176     1022    190     20      -1
5       1       3       1       1       1       176     1028    19      12      71      AT
5       1       3       1       1       2       206     1015    28      34      71      THE
5       1       3       1       1       3       240     1025    69      15      93      BACK
5       1       3       1       1       4       291     1015    17      34      93      OF
5       1       3       1       1       5       316     1022    50      20      15      THE.
4       1       3       1       2       0       170     1041    204     28      -1
5       1       3       1       2       1       170     1041    100     28      94      CATALOGUE
5       1       3       1       2       2       349     1042    25      3       17      mo
2       1       4       0       0       0       0       0       1500    1125    -1
3       1       4       1       0       0       0       0       1500    1125    -1
4       1       4       1       1       0       0       0       1500    1125    -1
5       1       4       1       1       1       0       0       1500    1125    95

#sentence
[{'block': ['SEE', 'OUR', 'BIRTHDAY', 'OFFERS', '"'], 'x': 424}, {'block': ['ot', 'THE', 'PRICES', 'IN', 'THIS', 'CATALOGUE', 'CAN', 'ONLY', 'GET'], 'x': 898}, {'bloc
k': ['LOWER', 'UNTIL', 'JULY', '31', '2019,', 'NEVER', 'HIGHER'], 'x': 830}, {'block': ['AT', 'THE', 'BACK', 'OF', 'THE.', 'CATALOGUE', 'mo'], 'x': 349}]

#ordered sentence
[{'block': ['SEE', 'OUR', 'BIRTHDAY', 'OFFERS', '"'], 'x': 424}, {'block': ['AT', 'THE', 'BACK', 'OF', 'THE.', 'CATALOGUE', 'mo'], 'x': 349}, {'block': ['ot', 'THE',
'PRICES', 'IN', 'THIS', 'CATALOGUE', 'CAN', 'ONLY', 'GET'], 'x': 898}, {'block': ['LOWER', 'UNTIL', 'JULY', '31', '2019,', 'NEVER', 'HIGHER'], 'x': 830}]

#paragraph
SEE OUR BIRTHDAY OFFERS " AT THE BACK OF THE. CATALOGUE mo
ot THE PRICES IN THIS CATALOGUE CAN ONLY GET LOWER UNTIL JULY 31 2019, NEVER HIGHER

reference:
perspective transform

adaptive threshold

text to speech