1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <assert.h>
#include <math.h>
#include "aom_dsp/aom_dsp_common.h"
#include "aom_dsp/mathutils.h"
#include "av1/encoder/ml.h"
void av1_nn_output_prec_reduce(float *const output, int num_output) {
const int prec_bits = 9;
const int prec = 1 << prec_bits;
const float inv_prec = (float)(1.0 / prec);
for (int i = 0; i < num_output; i++) {
output[i] = ((int)(output[i] * prec + 0.5)) * inv_prec;
}
}
// Calculate prediction based on the given input features and neural net config.
// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
// layer.
void av1_nn_predict_c(const float *input_nodes,
const NN_CONFIG *const nn_config, int reduce_prec,
float *const output) {
int num_input_nodes = nn_config->num_inputs;
int buf_index = 0;
float buf[2][NN_MAX_NODES_PER_LAYER];
// Propagate hidden layers.
const int num_layers = nn_config->num_hidden_layers;
assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
for (int layer = 0; layer < num_layers; ++layer) {
const float *layer_weights = nn_config->weights[layer];
const float *layer_bias = nn_config->bias[layer];
float *output_nodes = buf[buf_index];
const int num_output_nodes = nn_config->num_hidden_nodes[layer];
assert(num_output_nodes < NN_MAX_NODES_PER_LAYER);
for (int node = 0; node < num_output_nodes; ++node) {
float val = layer_bias[node];
for (int i = 0; i < num_input_nodes; ++i)
val += layer_weights[node * num_input_nodes + i] * input_nodes[i];
// ReLU as activation function.
val = val > 0.0f ? val : 0.0f; // Could use AOMMAX().
output_nodes[node] = val;
}
num_input_nodes = num_output_nodes;
input_nodes = output_nodes;
buf_index = 1 - buf_index;
}
// Final output layer.
const float *layer_weights = nn_config->weights[num_layers];
const float *layer_bias = nn_config->bias[num_layers];
for (int node = 0; node < nn_config->num_outputs; ++node) {
float val = layer_bias[node];
for (int i = 0; i < num_input_nodes; ++i)
val += layer_weights[node * num_input_nodes + i] * input_nodes[i];
output[node] = val;
}
if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_outputs);
}
#if CONFIG_NN_V2
// Applies the ReLu activation to one fc layer
// output[i] = Max(input[i],0.0f)
static float *nn_relu(const float *input, FC_LAYER *layer) {
for (int i = 0; i < layer->num_outputs; ++i) {
layer->output[i] = AOMMAX(input[i], 0.0f);
}
return layer->output;
}
// Applies the Sigmoid activation to one fc layer
// output[i] = 1/(1+exp(input[i]))
static float *nn_sigmoid(const float *input, FC_LAYER *layer) {
for (int i = 0; i < layer->num_outputs; ++i) {
const float tmp = AOMMIN(AOMMAX(input[i], -10.0f), 10.0f);
layer->output[i] = 1.0f / (1.0f + expf(-tmp));
}
return layer->output;
}
// Forward prediction in one fc layer, used in function av1_nn_predict_V2
static float *nn_fc_forward(const float *input, FC_LAYER *layer) {
const float *weights = layer->weights;
const float *bias = layer->bias;
assert(layer->num_outputs < NN_MAX_NODES_PER_LAYER);
// fc
for (int node = 0; node < layer->num_outputs; ++node) {
float val = bias[node];
for (int i = 0; i < layer->num_inputs; ++i) val += weights[i] * input[i];
layer->output[node] = val;
weights += layer->num_inputs;
}
// activation
switch (layer->activation) {
case NONE: return layer->output;
case RELU: return nn_relu(layer->output, layer);
case SIGMOID: return nn_sigmoid(layer->output, layer);
case SOFTSIGN:
assert(0 && "Softsign has not been supported in NN."); // TO DO
return NULL;
default:
assert(0 && "Unknown activation"); // Unknown activation
return NULL;
}
}
void av1_nn_predict_v2(const float *feature, NN_CONFIG_V2 *nn_config,
int reduce_prec, float *output) {
const float *input_nodes = feature;
// Propagate the layers.
const int num_layers = nn_config->num_hidden_layers;
assert(num_layers <= NN_MAX_HIDDEN_LAYERS);
for (int i = 0; i < num_layers; ++i) {
input_nodes = nn_fc_forward(input_nodes, nn_config->layer + i);
assert(nn_config->layer[i + 1].num_inputs ==
nn_config->layer[i].num_outputs);
}
// Final layer
input_nodes = nn_fc_forward(input_nodes, nn_config->layer + num_layers);
assert(nn_config->layer[num_layers].num_outputs == nn_config->num_logits);
// Copy the final layer output
memcpy(output, input_nodes, sizeof(*input_nodes) * nn_config->num_logits);
if (reduce_prec) av1_nn_output_prec_reduce(output, nn_config->num_logits);
}
#endif // CONFIG_NN_V2
void av1_nn_softmax(const float *input, float *output, int n) {
// Softmax function is invariant to adding the same constant
// to all input values, so we subtract the maximum input to avoid
// possible overflow.
float max_input = input[0];
for (int i = 1; i < n; i++) max_input = AOMMAX(max_input, input[i]);
float sum_out = 0.0f;
for (int i = 0; i < n; i++) {
// Clamp to range [-10.0, 0.0] to prevent FE_UNDERFLOW errors.
const float normalized_input = AOMMAX(input[i] - max_input, -10.0f);
output[i] = expf(normalized_input);
sum_out += output[i];
}
for (int i = 0; i < n; i++) output[i] /= sum_out;
}
void av1_nn_fast_softmax_16_c(const float *input, float *output) {
const int kNumClasses = 16;
float max_input = input[0];
for (int i = 1; i < kNumClasses; i++) max_input = AOMMAX(max_input, input[i]);
float sum_out = 0.0f;
for (int i = 0; i < kNumClasses; i++) {
// Clamp to range [-10.0, 0.0] to prevent FE_UNDERFLOW errors.
const float normalized_input = AOMMAX(input[i] - max_input, -10.0f);
output[i] = approx_exp(normalized_input);
sum_out += output[i];
}
for (int i = 0; i < kNumClasses; i++) output[i] /= sum_out;
}
|