I am trying to detect the pixel coordinates (in the image frame) of a mouse pointer in images taken from a screen recording using python and opencv. I have tried a few of the examples posted for shape detection using cv2.approxPolyDP and unfortunately none seem to work. The image:
Some approaches that I have tried:
import cv2
import numpy as np
from matplotlib import pyplot as plt
# reading image
img = cv2.imread('test.png')
# converting image into grayscale image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# setting threshold of gray image
_, threshold = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)
# using a findContours() function
contours, _ = cv2.findContours(
threshold, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
i = 0
# list for storing names of shapes
for contour in contours:
# here we are ignoring first counter because
# findcontour function detects whole image as shape
if i == 0:
i = 1
continue
# cv2.approxPloyDP() function to approximate the shape
approx = cv2.approxPolyDP(
contour, 0.01 * cv2.arcLength(contour, True), True)
# using drawContours() function
cv2.drawContours(img, [contour], 0, (0, 0, 255), 5)
# finding center point of shape
M = cv2.moments(contour)
if M['m00'] != 0.0:
x = int(M['m10']/M['m00'])
y = int(M['m01']/M['m00'])
# putting shape name at center of each shape
if len(approx) == 3:
cv2.putText(img, 'Triangle', (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
elif len(approx) == 4:
cv2.putText(img, 'Quadrilateral', (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
elif len(approx) == 5:
cv2.putText(img, 'Pentagon', (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
elif len(approx) == 6:
cv2.putText(img, 'Hexagon', (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
else:
cv2.putText(img, 'circle', (x, y),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
# displaying the image after drawing contours
cv2.imshow('shapes', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
import cv2
import numpy as np
image_obj = cv2.imread('test.jpg')
gray = cv2.cvtColor(image_obj, cv2.COLOR_BGR2GRAY)
kernel = np.ones((4, 4), np.uint8)
dilation = cv2.dilate(gray, kernel, iterations=1)
blur = cv2.GaussianBlur(dilation, (5, 5), 0)
thresh = cv2.adaptiveThreshold(blur, 255, 1, 1, 11, 2)
# Now finding Contours ###################
_, contours, _ = cv2.findContours(
thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
coordinates = []
for cnt in contours:
# [point_x, point_y, width, height] = cv2.boundingRect(cnt)
approx = cv2.approxPolyDP(
cnt, 0.07 * cv2.arcLength(cnt, True), True)
if len(approx) == 3:
coordinates.append([cnt])
cv2.drawContours(image_obj, [cnt], 0, (0, 0, 255), 3)
cv2.imwrite("result.png", image_obj)
-- shape detection 2 from a learnopencv tutorial
# Standard imports
import cv2
import numpy as np;
# Read image
im = cv2.imread("blob.jpg", cv2.IMREAD_GRAYSCALE)
# Set up the detector with default parameters.
detector = cv2.SimpleBlobDetector()
# Detect blobs.
keypoints = detector.detect(im)
# Draw detected blobs as red circles.
# cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS ensures the size of the circle corresponds to the size of blob
im_with_keypoints = cv2.drawKeypoints(im, keypoints, np.array([]), (0,0,255), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Show keypoints
cv2.imshow("Keypoints", im_with_keypoints)
cv2.waitKey(0)
-- color thresholding approaches because mouse pointer is white:
import cv2
import numpy as np
cap = cv2.VideoCapture(0)
while(1):
_, frame = cap.read()
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# define range of white color in HSV
# change it according to your need !
lower_white = np.array([0,0,0], dtype=np.uint8)
upper_white = np.array([0,0,255], dtype=np.uint8)
# Threshold the HSV image to get only white colors
mask = cv2.inRange(hsv, lower_white, upper_white)
# Bitwise-AND mask and original image
res = cv2.bitwise_and(frame,frame, mask= mask)
cv2.imshow('frame',frame)
cv2.imshow('mask',mask)
cv2.imshow('res',res)
k = cv2.waitKey(5) & 0xFF
if k == 27:
break
cv2.destroyAllWindows()
I would have thought that some combination of blob detection using tuned circularity, convexity, and inertia parameters along with a color thresholding would be pretty simple (mouse cursors seem pretty distinctive white triangle shapes), but has not proven to be easy.
A pre-trained CNN seemed like overkill for something like this, but perhaps not?
Thanks

cv2.matchTemplate. These should get you started one and two. Note: this method will work for images that have the same size as the template, otherwise search up dynamic template matching