1
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(MODEL_PATH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
num_detections = detection_graph.get_tensor_by_name('num_detections:0')

gameWindow = [0, 0, 200, 300]

while True:
    image = np.array(ImageGrab.grab(bbox=(gameWindow[0], gameWindow[1], gameWindow[2], gameWindow[3])))
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_expanded = np.expand_dims(image_rgb, axis=0)

    (boxes, scores, classes, num) = sess.run(
        [detection_boxes, detection_scores, detection_classes, num_detections],
        feed_dict={image_tensor: image_expanded})

    vis_util.visualize_boxes_and_labels_on_image_array(
        image,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        line_thickness=8,
        min_score_thresh=0.60)

    frame = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    # print("Made it ")
    cv2.imshow('Detect the dumb trees', frame)

    if cv2.waitKey(1) == 27:
        break
cv2.destroyAllWindows()

I'm trying to get the x1, y1, x2, y2 coordinates of the boxes that the API draws using vis_util.visualize_boxes_and_labels_on_image_array()

I've tried looking into detection_boxes but i get a bunch of values which I have no idea what they mean.

Could someone provide me a solution please? Thanks

2 Answers 2

0

This question seems similar to yours: How to find bounding boxes coordinates in Tensorflow Object Detection API

And someone has posted a simple code solution.

There is a another way, where you can manipulate the visualize_boxes_and_labels_on_image_array() function to return the coordinates Something like:

coordinates_list = []
for box, color in box_to_color_map.items():
  ymin, xmin, ymax, xmax = box
  height, width, channels = image.shape
  ymin = int(ymin*height)
  ymax = int(ymax*height)
  xmin = int(xmin*width)
  xmax = int(xmax*width)
  coordinates_list.append([xmin, ymin, xmax, ymax])

return coordinates_list
Sign up to request clarification or add additional context in comments.

Comments

0

The numbers in detection_boxes are [ymin, xmin, ymax, xmax] and they are normalised to the size of your image since "use_normalized_coordinates=True" in your script. Each index in the detection_boxes correspond to the same index in the detection_scores and Detection_classes. So you have to find what is the object you want at what threshold score in order to get the index for the detection_box. Example:

boxes=[]
    for i in range(len(detection_boxes)):
        if detection_classes[i]=3 and detection_scores[i]>0.9:
             boxes.append(detection_boxes[i])

The score threshold set here is 0.9 and the class i am looking for is 3. Those box that match are stored in an array call boxes.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.