diff --git a/caffe/TinyYolo/Makefile b/caffe/TinyYolo/Makefile new file mode 100644 index 00000000..81b7ccf8 --- /dev/null +++ b/caffe/TinyYolo/Makefile @@ -0,0 +1,126 @@ + +ifneq ($(findstring movidius, $(PYTHONPATH)), movidius) + export PYTHONPATH:=/opt/movidius/caffe/python:/opt/movidius/mvnc/python:$(PYTHONPATH) +endif + +NCCOMPILE = mvNCCompile +NCPROFILE = mvNCProfile +NCCHECK = mvNCCheck + +GRAPH_FILENAME = yolo_tiny.graph +GET_GRAPH = wget --no-cache -P . http://ncs-forum-uploads.s3.amazonaws.com/ncappzoo/tiny_yolo/${GRAPH_FILENAME} + +PROTOTXT_FILENAME= tiny-yolo-v1.prototxt +GET_PROTOTXT = wget --no-cache -P . http://ncs-forum-uploads.s3.amazonaws.com/ncappzoo/tiny_yolo/${PROTOTXT_FILENAME} + +CAFFEMODEL_FILENAME = tiny-yolo-v1_53000.caffemodel +GET_CAFFEMODEL = wget --no-cache -P . -N http://ncs-forum-uploads.s3.amazonaws.com/ncappzoo/tiny_yolo/${CAFFEMODEL_FILENAME} + + +.PHONY: all +all: profile compile + +.PHONY: prototxt +prototxt: + @echo "\nmaking prototxt" + @if [ -e ${PROTOTXT_FILENAME} ] ; \ + then \ + echo "Prototxt file already exists"; \ + else \ + echo "Downloading Prototxt file"; \ + ${GET_PROTOTXT}; \ + if [ -e ${PROTOTXT_FILENAME} ] ; \ + then \ + echo "got prototext file." ; \ + else \ + echo "***\nError - Could not download prototxt file. Check network and proxy settings \n***\n"; \ + exit 1; \ + fi ; \ + fi + +.PHONY: caffemodel +caffemodel: + @echo "\nmaking caffemodel" + @if [ -e ${CAFFEMODEL_FILENAME} ] ; \ + then \ + echo "caffemodel file already exists"; \ + else \ + echo "Downloading caffemodel file"; \ + ${GET_CAFFEMODEL}; \ + if ! [ -e ${CAFFEMODEL_FILENAME} ] ; \ + then \ + echo "***\nError - Could not download caffemodel file. Check network and proxy settings \n***\n"; \ + exit 1; \ + fi ; \ + fi + +.PHONY: profile +profile: prototxt + @echo "\nmaking profile" + ${NCPROFILE} ${PROTOTXT_FILENAME} -s 12 + +.PHONY: browse_profile +browse_profile: profile + @echo "\nmaking browse_profile" + @if [ -e output_report.html ] ; \ + then \ + firefox output_report.html & \ + else \ + @echo "***\nError - output_report.html not found" ; \ + fi ; + +.PHONY: compile +compile: prototxt caffemodel + @echo "\nmaking compile" + ${NCCOMPILE} -o ${GRAPH_FILENAME} -w ${CAFFEMODEL_FILENAME} -s 12 ${PROTOTXT_FILENAME} + +.PHONY: graph +graph: + @echo "\nmaking (downloading) graph" + @if [ -e ${GRAPH_FILENAME} ] ; \ + then \ + echo "graph file already exists"; \ + else \ + ${GET_GRAPH}; \ + if ! [ -e ${GRAPH_FILENAME} ] ; \ + then \ + echo "***\nError - Could not download graph file. Check network and proxy settings \n***\n"; \ + exit 1; \ + fi ; \ + fi ; + +.PHONY: run_py +run_py: compile + @echo "\nmaking run_py" + python3 ./run.py + +.PHONY: help +help: + @echo "possible make targets: "; + @echo " make help - shows this message"; + @echo " make all - makes the following: prototxt, profile, compile, check, cpp, run_py, run_cpp"; + @echo " make prototxt - downloads and adds input shape to Caffe prototxt file"; + @echo " make caffemodel - downloads the caffemodel for the network" + @echo " make compile - runs SDK compiler tool to compile the NCS graph file for the network"; + @echo " make profile - runs the SDK profiler tool to profile the network creating output_report.html"; + @echo " make browse_profile - runs the SDK profiler tool and brings up report in browser."; + @echo " make run_py - runs the run.py python example program"; + @echo " make clean - removes all created content" + +clean_caffe_model: + @echo "\nmaking clean_caffe_model" + rm -f ${PROTOTXT_FILENAME} + rm -f ${CAFFEMODEL_FILENAME} + +clean: clean_caffe_model + @echo "\nmaking clean" + rm -f ${GRAPH_FILENAME} + rm -f graph + rm -f output.gv + rm -f output.gv.svg + rm -f output_report.html + rm -f output_expected.npy + rm -f zero_weights.caffemodel + rm -f output_result.npy + rm -f output_val.csv + diff --git a/caffe/TinyYolo/README.md b/caffe/TinyYolo/README.md new file mode 100644 index 00000000..39bb8945 --- /dev/null +++ b/caffe/TinyYolo/README.md @@ -0,0 +1,28 @@ +# Introduction +The TinyYolo network can be used for object recognition and classification. See [https://pjreddie.com/darknet/yolov1/](https://pjreddie.com/darknet/yolov1/) for more information on this network. +The provided Makefile does the following +1. Downloads the Caffe prototxt file +3. Downloads the .caffemodel file which was trained. +3. Profiles and Compiles the network using the Neural Compute SDK. +4. Runs the provided run.py program that does a single inference on a provided image as an example on how to use the network using the Neural Compute API + +# Makefile +Provided Makefile has various targets that help with the above mentioned tasks. + +## make help +Shows available targets + +## make all +Runs profile, compile. + +## make profile +Runs the provided network on the NCS and generates per layer statistics that are helpful for understanding the performance of the network on the Neural Compute Stick. + +## make compile +Uses the network description and the trained weights files to generate a Movidius internal 'graph' format file. This file is later used for loading the network on to the Neural Compute Stick and executing the network. + +## make run_py +Runs the provided run.py python program which sends a single image to the Neural Compute Stick and receives and displays the inference results along with a GUI window showing the identified objects in the image. + +## make clean +Removes all the temporary files that are created by the Makefile diff --git a/caffe/TinyYolo/run.py b/caffe/TinyYolo/run.py new file mode 100755 index 00000000..90a95fd9 --- /dev/null +++ b/caffe/TinyYolo/run.py @@ -0,0 +1,287 @@ +#! /usr/bin/env python3 + +# Copyright(c) 2017 Intel Corporation. +# License: MIT See LICENSE file in root directory. + +from mvnc import mvncapi as mvnc +import sys +import numpy as np +import cv2 +import time + +# Assume running in examples/caffe/TinyYolo and graph file is in current directory. +input_image_file= '../../data/images/nps_chair.png' +tiny_yolo_graph_file= './yolo_tiny.graph' + +# Tiny Yolo assumes input images are these dimensions. +NETWORK_IMAGE_WIDTH = 448 +NETWORK_IMAGE_HEIGHT = 448 + + +# Interpret the output from a single inference of TinyYolo (GetResult) +# and filter out objects/boxes with low probabilities. +# output is the array of floats returned from the API GetResult but converted +# to float32 format. +# input_image_width is the width of the input image +# input_image_height is the height of the input image +# Returns a list of lists. each of the inner lists represent one found object and contain +# the following 6 values: +# string that is network classification ie 'cat', or 'chair' etc +# float value for box center X pixel location within source image +# float value for box center Y pixel location within source image +# float value for box width in pixels within source image +# float value for box height in pixels within source image +# float value that is the probability for the network classification. +def filter_objects(inference_result, input_image_width, input_image_height): + + # the raw number of floats returned from the inference (GetResult()) + num_inference_results = len(inference_result) + + # the 20 classes this network was trained on + network_classifications = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", + "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", + "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] + + # only keep boxes with probabilities greater than this + probability_threshold = 0.05 + + num_classifications = len(network_classifications) # should be 20 + grid_size = 7 # the image is a 7x7 grid. Each box in the grid is 64x64 pixels + boxes_per_grid_cell = 2 # the number of boxes returned for each grid cell + + # grid_size is 7 (grid is 7x7) + # num classifications is 20 + # boxes per grid cell is 2 + all_probabilities = np.zeros((grid_size, grid_size, boxes_per_grid_cell, num_classifications)) + + # classification_probabilities contains a probability for each classification for + # each 64x64 pixel square of the grid. The source image contains + # 7x7 of these 64x64 pixel squares and there are 20 possible classifications + classification_probabilities = \ + np.reshape(inference_result[0:980], (grid_size, grid_size, num_classifications)) + num_of_class_probs = len(classification_probabilities) + + # The probability scale factor for each box + box_prob_scale_factor = np.reshape(inference_result[980:1078], (grid_size, grid_size, boxes_per_grid_cell)) + + # get the boxes from the results and adjust to be pixel units + all_boxes = np.reshape(inference_result[1078:], (grid_size, grid_size, boxes_per_grid_cell, 4)) + boxes_to_pixel_units(all_boxes, input_image_width, input_image_height, grid_size) + + # adjust the probabilities with the scaling factor + for box_index in range(boxes_per_grid_cell): # loop over boxes + for class_index in range(num_classifications): # loop over classifications + all_probabilities[:,:,box_index,class_index] = np.multiply(classification_probabilities[:,:,class_index],box_prob_scale_factor[:,:,box_index]) + + + probability_threshold_mask = np.array(all_probabilities>=probability_threshold, dtype='bool') + box_threshold_mask = np.nonzero(probability_threshold_mask) + boxes_above_threshold = all_boxes[box_threshold_mask[0],box_threshold_mask[1],box_threshold_mask[2]] + classifications_for_boxes_above = np.argmax(all_probabilities,axis=3)[box_threshold_mask[0],box_threshold_mask[1],box_threshold_mask[2]] + probabilities_above_threshold = all_probabilities[probability_threshold_mask] + + # sort the boxes from highest probability to lowest and then + # sort the probabilities and classifications to match + argsort = np.array(np.argsort(probabilities_above_threshold))[::-1] + boxes_above_threshold = boxes_above_threshold[argsort] + classifications_for_boxes_above = classifications_for_boxes_above[argsort] + probabilities_above_threshold = probabilities_above_threshold[argsort] + + + # get mask for boxes that seem to be the same object + duplicate_box_mask = get_duplicate_box_mask(boxes_above_threshold) + + # update the boxes, probabilities and classifications removing duplicates. + boxes_above_threshold = boxes_above_threshold[duplicate_box_mask] + classifications_for_boxes_above = classifications_for_boxes_above[duplicate_box_mask] + probabilities_above_threshold = probabilities_above_threshold[duplicate_box_mask] + + classes_boxes_and_probs = [] + for i in range(len(boxes_above_threshold)): + classes_boxes_and_probs.append([network_classifications[classifications_for_boxes_above[i]],boxes_above_threshold[i][0],boxes_above_threshold[i][1],boxes_above_threshold[i][2],boxes_above_threshold[i][3],probabilities_above_threshold[i]]) + + return classes_boxes_and_probs + +# creates a mask to remove duplicate objects (boxes) and their related probabilities and classifications +# that should be considered the same object. This is determined by how similar the boxes are +# based on the intersection-over-union metric. +# box_list is as list of boxes (4 floats for centerX, centerY and Length and Width) +def get_duplicate_box_mask(box_list): + # The intersection-over-union threshold to use when determining duplicates. + # objects/boxes found that are over this threshold will be + # considered the same object + max_iou = 0.30 + + box_mask = np.ones(len(box_list)) + + for i in range(len(box_list)): + if box_mask[i] == 0: continue + for j in range(i + 1, len(box_list)): + if get_intersection_over_union(box_list[i], box_list[j]) > max_iou: + box_mask[j] = 0.0 + + filter_iou_mask = np.array(box_mask > 0.0, dtype='bool') + return filter_iou_mask + +# Converts the boxes in box list to pixel units +# assumes box_list is the output from the box output from +# the tiny yolo network and is [grid_size x grid_size x 2 x 4]. +def boxes_to_pixel_units(box_list, image_width, image_height, grid_size): + + # number of boxes per grid cell + boxes_per_cell = 2 + + # setup some offset values to map boxes to pixels + # box_offset will be [[ [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6]] ...repeated for 7 ] + box_offset = np.transpose(np.reshape(np.array([np.arange(grid_size)]*(grid_size*2)),(boxes_per_cell,grid_size, grid_size)),(1,2,0)) + + # adjust the box center + box_list[:,:,:,0] += box_offset + box_list[:,:,:,1] += np.transpose(box_offset,(1,0,2)) + box_list[:,:,:,0:2] = box_list[:,:,:,0:2] / (grid_size * 1.0) + + # adjust the lengths and widths + box_list[:,:,:,2] = np.multiply(box_list[:,:,:,2],box_list[:,:,:,2]) + box_list[:,:,:,3] = np.multiply(box_list[:,:,:,3],box_list[:,:,:,3]) + + #scale the boxes to the image size in pixels + box_list[:,:,:,0] *= image_width + box_list[:,:,:,1] *= image_height + box_list[:,:,:,2] *= image_width + box_list[:,:,:,3] *= image_height + + +# Evaluate the intersection-over-union for two boxes +# The intersection-over-union metric determines how close +# two boxes are to being the same box. The closer the boxes +# are to being the same, the closer the metric will be to 1.0 +# box_1 and box_2 are arrays of 4 numbers which are the (x, y) +# points that define the center of the box and the length and width of +# the box. +# Returns the intersection-over-union (between 0.0 and 1.0) +# for the two boxes specified. +def get_intersection_over_union(box_1, box_2): + + # one diminsion of the intersecting box + intersection_dim_1 = min(box_1[0]+0.5*box_1[2],box_2[0]+0.5*box_2[2])-\ + max(box_1[0]-0.5*box_1[2],box_2[0]-0.5*box_2[2]) + + # the other dimension of the intersecting box + intersection_dim_2 = min(box_1[1]+0.5*box_1[3],box_2[1]+0.5*box_2[3])-\ + max(box_1[1]-0.5*box_1[3],box_2[1]-0.5*box_2[3]) + + if intersection_dim_1 < 0 or intersection_dim_2 < 0 : + # no intersection area + intersection_area = 0 + else : + # intersection area is product of intersection dimensions + intersection_area = intersection_dim_1*intersection_dim_2 + + # calculate the union area which is the area of each box added + # and then we need to subtract out the intersection area since + # it is counted twice (by definition it is in each box) + union_area = box_1[2]*box_1[3] + box_2[2]*box_2[3] - intersection_area; + + # now we can return the intersection over union + iou = intersection_area / union_area + + return iou + +# Displays a gui window with an image that contains +# boxes and lables for found objects. will not return until +# user presses a key. +# source_image is on which the inference was run. it is assumed to have dimensions matching the network +# filtered_objects is a list of lists (as returned from filter_objects() +# each of the inner lists represent one found object and contain +# the following 6 values: +# string that is network classification ie 'cat', or 'chair' etc +# float value for box center X pixel location within source image +# float value for box center Y pixel location within source image +# float value for box width in pixels within source image +# float value for box height in pixels within source image +# float value that is the probability for the network classification. +# source_image_width is the width of the source_image +# source image_height is the height of the source image +def display_objects_in_gui(source_image, filtered_objects): + # copy image so we can draw on it. + display_image = source_image.copy() + source_image_width = source_image.shape[1] + source_image_height = source_image.shape[0] + + # loop through each box and draw it on the image along with a classification label + for obj_index in range(len(filtered_objects)): + center_x = int(filtered_objects[obj_index][1]) + center_y = int(filtered_objects[obj_index][2]) + half_width = int(filtered_objects[obj_index][3])//2 + half_height = int(filtered_objects[obj_index][4])//2 + + # calculate box (left, top) and (right, bottom) coordinates + box_left = max(center_x - half_width, 0) + box_top = max(center_y - half_height, 0) + box_right = min(center_x + half_width, source_image_width) + box_bottom = min(center_y + half_height, source_image_height) + + #draw the rectangle on the image. This is hopefully around the object + box_color = (0, 255, 0) # green box + box_thickness = 2 + cv2.rectangle(display_image, (box_left, box_top),(box_right, box_bottom), box_color, box_thickness) + + # draw the classification label string just above and to the left of the rectangle + label_background_color = (70, 120, 70) # greyish green background for text + label_text_color = (255, 255, 255) # white text + cv2.rectangle(display_image,(box_left, box_top-20),(box_right,box_top), label_background_color, -1) + cv2.putText(display_image,filtered_objects[obj_index][0] + ' : %.2f' % filtered_objects[obj_index][5], (box_left+5,box_top-7), cv2.FONT_HERSHEY_SIMPLEX, 0.5, label_text_color, 1) + + cv2.imshow('TinyYolo (hit key to exit)',display_image) + cv2.waitKey(0) + +# This function is called from the entry point to do +# all the work. +def main(): + print('Running NCS Caffe TinyYolo example') + + # Set logging level and initialize/open the first NCS we find + mvnc.SetGlobalOption(mvnc.GlobalOption.LOG_LEVEL, 0) + devices = mvnc.EnumerateDevices() + if len(devices) == 0: + print('No devices found') + return 1 + device = mvnc.Device(devices[0]) + device.OpenDevice() + + #Load graph from disk and allocate graph via API + with open(tiny_yolo_graph_file, mode='rb') as f: + graph_from_disk = f.read() + + graph = device.AllocateGraph(graph_from_disk) + + # Read image from file, resize it to network width and height + # save a copy in img_cv for display, then convert to float32, normalize (divide by 255), + # and finally convert to convert to float16 to pass to LoadTensor as input for an inference + input_image = cv2.imread(input_image_file) + input_image = cv2.resize(input_image, (NETWORK_IMAGE_WIDTH, NETWORK_IMAGE_HEIGHT), cv2.INTER_LINEAR) + display_image = input_image + input_image = input_image.astype(np.float32) + input_image = np.divide(input_image, 255.0) + + # Load tensor and get result. This executes the inference on the NCS + graph.LoadTensor(input_image.astype(np.float16), 'user object') + output, userobj = graph.GetResult() + + # filter out all the objects/boxes that don't meet thresholds + filtered_objs = filter_objects(output.astype(np.float32), input_image.shape[1], input_image.shape[0]) # fc27 instead of fc12 for yolo_small + + print('Displaying image with objects detected in GUI') + print('Click in the GUI window and hit any key to exit') + #display the filtered objects/boxes in a GUI window + display_objects_in_gui(display_image, filtered_objs) + + #Clean up + graph.DeallocateGraph() + device.CloseDevice() + print('Finished') + + +# main entry point for program. we'll call main() to do what needs to be done. +if __name__ == "__main__": + sys.exit(main())