-
Notifications
You must be signed in to change notification settings - Fork 90
/
ImageNetModelRunner.cs
141 lines (125 loc) · 6.43 KB
/
ImageNetModelRunner.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT license.
namespace Microsoft.Psi.Onnx
{
using System;
using System.Collections.Generic;
using Microsoft.Psi;
using Microsoft.Psi.Components;
using Microsoft.Psi.Imaging;
/// <summary>
/// Component that runs an ImageNet image classification model.
/// </summary>
/// <remarks>
/// This class implements a \psi component that runs an ONNX model trained
/// on the ImageNet dataset that operates on 224x224 RGB images and scores
/// the image for each of the 1000 ImageNet classes. It takes an input
/// stream of \psi images, applies a center-crop, rescales and normalizes
/// the pixel values into the input vector expected by the model. It also
/// parses the model outputs into a list of <see cref="LabeledPrediction"/>
/// values, corresponding to the top N predictions by the model. For
/// convenience, a set of pre-defined model runner configurations are
/// defined for a number of image classification models available in the
/// ONNX Model Zoo (https://github.com/onnx/models/tree/master/vision/classification).
/// The ONNX model file for the corresponding configuration will need to be
/// downloaded locally and the path to the model file will need to be
/// specified when creating the configuration.
/// </remarks>
public class ImageNetModelRunner : ConsumerProducer<Shared<Image>, List<LabeledPrediction>>
{
private readonly float[] onnxInputVector = new float[3 * 224 * 224];
private readonly OnnxModel onnxModel;
private readonly ImageNetModelOutputParser outputParser;
/// <summary>
/// Initializes a new instance of the <see cref="ImageNetModelRunner"/> class.
/// </summary>
/// <param name="pipeline">The pipeline to add the component to.</param>
/// <param name="configuration">The configuration for the compoinent.</param>
/// <remarks>
/// To run on a GPU, use the Microsoft.Psi.Onnx.ModelRunners.Gpu library instead of Microsoft.Psi.Onnx.ModelRunners.Cpu, and set
/// the value of the <pararef name="gpuDeviceId"/> parameter to a valid non-negative integer. Typical device ID values are 0 or 1.
/// </remarks>
public ImageNetModelRunner(Pipeline pipeline, ImageNetModelRunnerConfiguration configuration)
: base(pipeline)
{
// create an ONNX model based on the supplied ImageNet model runner configuration
this.onnxModel = new OnnxModel(new OnnxModelConfiguration()
{
ModelFileName = configuration.ModelFilePath,
InputVectorName = configuration.InputVectorName,
InputVectorSize = 3 * 224 * 224,
OutputVectorName = configuration.OutputVectorName,
GpuDeviceId = configuration.GpuDeviceId,
});
this.outputParser = new ImageNetModelOutputParser(configuration.ImageClassesFilePath, configuration.NumberOfPredictions, configuration.ApplySoftmaxToModelOutput);
}
/// <inheritdoc/>
protected override void Receive(Shared<Image> data, Envelope envelope)
{
// construct the ONNX model input vector (stored in this.onnxInputVector)
// based on the incoming image
this.ConstructOnnxInputVector(data);
// run the model over the input vector
var outputVector = this.onnxModel.GetPrediction(this.onnxInputVector);
// parse the model output into an ordered list of the top-N predictions
var results = this.outputParser.GetPredictions(outputVector);
// post the results
this.Out.Post(results, envelope.OriginatingTime);
}
/// <summary>
/// Constructs the input vector for the ImageNet model for a specified image.
/// </summary>
/// <param name="sharedImage">The image to construct the input vector for.</param>
private void ConstructOnnxInputVector(Shared<Image> sharedImage)
{
var inputImage = sharedImage.Resource;
var inputWidth = sharedImage.Resource.Width;
var inputHeight = sharedImage.Resource.Height;
// crop a center square
var squareSize = Math.Min(inputWidth, inputHeight);
using var squareImage = ImagePool.GetOrCreate(squareSize, squareSize, sharedImage.Resource.PixelFormat);
if (inputWidth > inputHeight)
{
inputImage.Crop(squareImage.Resource, (inputWidth - squareSize) / 2, 0, squareSize, squareSize);
}
else
{
inputImage.Crop(squareImage.Resource, 0, (inputHeight - squareSize) / 2, squareSize, squareSize);
}
// resize the image to 224 x 224
using var resizedImage = ImagePool.GetOrCreate(224, 224, sharedImage.Resource.PixelFormat);
squareImage.Resource.Resize(resizedImage.Resource, 224, 224, SamplingMode.Bilinear);
// if the pixel format does not match, do a conversion before extracting the bytes
var bytes = default(byte[]);
if (sharedImage.Resource.PixelFormat != PixelFormat.BGR_24bpp)
{
using var reformattedImage = ImagePool.GetOrCreate(224, 224, PixelFormat.BGR_24bpp);
resizedImage.Resource.CopyTo(reformattedImage.Resource);
bytes = reformattedImage.Resource.ReadBytes(3 * 224 * 224);
}
else
{
// get the bytes
bytes = resizedImage.Resource.ReadBytes(3 * 224 * 224);
}
// Now populate the onnxInputVector float array / tensor by normalizing
// using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225].
int fi = 0;
// first the red bytes
for (int i = 2; i < bytes.Length; i += 3)
{
this.onnxInputVector[fi++] = ((bytes[i] / 255.0f) - 0.485f) / 0.229f;
}
// then the green bytes
for (int i = 1; i < bytes.Length; i += 3)
{
this.onnxInputVector[fi++] = ((bytes[i] / 255.0f) - 0.456f) / 0.224f;
}
// then the blue bytes
for (int i = 0; i < bytes.Length; i += 3)
{
this.onnxInputVector[fi++] = ((bytes[i] / 255.0f) - 0.406f) / 0.225f;
}
}
}
}