summaryrefslogtreecommitdiffstats
path: root/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp')
-rw-r--r--fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp417
1 files changed, 417 insertions, 0 deletions
diff --git a/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp b/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
new file mode 100644
index 000000000..dfd21787c
--- /dev/null
+++ b/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp
@@ -0,0 +1,417 @@
+/*
+ * Copyright (C) 2019 Intel Corporation. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ */
+
+#include "wasi_nn.h"
+#include "wasi_nn_tensorflowlite.hpp"
+#include "logger.h"
+
+#include "bh_common.h"
+#include "bh_platform.h"
+#include "platform_common.h"
+
+#include <tensorflow/lite/interpreter.h>
+#include <tensorflow/lite/kernels/register.h>
+#include <tensorflow/lite/model.h>
+#include <tensorflow/lite/optional_debug_tools.h>
+#include <tensorflow/lite/error_reporter.h>
+
+#if defined(WASI_NN_ENABLE_GPU)
+#include <tensorflow/lite/delegates/gpu/delegate.h>
+#endif
+
+#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+#include <tensorflow/lite/delegates/external/external_delegate.h>
+#endif
+
+/* Maximum number of graphs per WASM instance */
+#define MAX_GRAPHS_PER_INST 10
+/* Maximum number of graph execution context per WASM instance*/
+#define MAX_GRAPH_EXEC_CONTEXTS_PER_INST 10
+
+typedef struct {
+ std::unique_ptr<tflite::Interpreter> interpreter;
+} Interpreter;
+
+typedef struct {
+ char *model_pointer;
+ std::unique_ptr<tflite::FlatBufferModel> model;
+ execution_target target;
+} Model;
+
+typedef struct {
+ uint32_t current_models;
+ Model models[MAX_GRAPHS_PER_INST];
+ uint32_t current_interpreters;
+ Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST];
+ korp_mutex g_lock;
+ TfLiteDelegate *delegate;
+} TFLiteContext;
+
+/* Utils */
+
+static error
+initialize_g(TFLiteContext *tfl_ctx, graph *g)
+{
+ os_mutex_lock(&tfl_ctx->g_lock);
+ if (tfl_ctx->current_models == MAX_GRAPHS_PER_INST) {
+ os_mutex_unlock(&tfl_ctx->g_lock);
+ NN_ERR_PRINTF("Excedded max graphs per WASM instance");
+ return runtime_error;
+ }
+ *g = tfl_ctx->current_models++;
+ os_mutex_unlock(&tfl_ctx->g_lock);
+ return success;
+}
+static error
+initialize_graph_ctx(TFLiteContext *tfl_ctx, graph g,
+ graph_execution_context *ctx)
+{
+ os_mutex_lock(&tfl_ctx->g_lock);
+ if (tfl_ctx->current_interpreters == MAX_GRAPH_EXEC_CONTEXTS_PER_INST) {
+ os_mutex_unlock(&tfl_ctx->g_lock);
+ NN_ERR_PRINTF("Excedded max graph execution context per WASM instance");
+ return runtime_error;
+ }
+ *ctx = tfl_ctx->current_interpreters++;
+ os_mutex_unlock(&tfl_ctx->g_lock);
+ return success;
+}
+
+static error
+is_valid_graph(TFLiteContext *tfl_ctx, graph g)
+{
+ if (g >= MAX_GRAPHS_PER_INST) {
+ NN_ERR_PRINTF("Invalid graph: %d >= %d.", g, MAX_GRAPHS_PER_INST);
+ return runtime_error;
+ }
+ if (tfl_ctx->models[g].model_pointer == NULL) {
+ NN_ERR_PRINTF("Context (model) non-initialized.");
+ return runtime_error;
+ }
+ if (tfl_ctx->models[g].model == NULL) {
+ NN_ERR_PRINTF("Context (tflite model) non-initialized.");
+ return runtime_error;
+ }
+ return success;
+}
+
+static error
+is_valid_graph_execution_context(TFLiteContext *tfl_ctx,
+ graph_execution_context ctx)
+{
+ if (ctx >= MAX_GRAPH_EXEC_CONTEXTS_PER_INST) {
+ NN_ERR_PRINTF("Invalid graph execution context: %d >= %d", ctx,
+ MAX_GRAPH_EXEC_CONTEXTS_PER_INST);
+ return runtime_error;
+ }
+ if (tfl_ctx->interpreters[ctx].interpreter == NULL) {
+ NN_ERR_PRINTF("Context (interpreter) non-initialized.");
+ return runtime_error;
+ }
+ return success;
+}
+
+/* WASI-NN (tensorflow) implementation */
+
+error
+tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder,
+ graph_encoding encoding, execution_target target, graph *g)
+{
+ TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+ if (builder->size != 1) {
+ NN_ERR_PRINTF("Unexpected builder format.");
+ return invalid_argument;
+ }
+
+ if (encoding != tensorflowlite) {
+ NN_ERR_PRINTF("Encoding is not tensorflowlite.");
+ return invalid_argument;
+ }
+
+ if (target != cpu && target != gpu) {
+ NN_ERR_PRINTF("Only CPU and GPU target is supported.");
+ return invalid_argument;
+ }
+
+ error res;
+ if (success != (res = initialize_g(tfl_ctx, g)))
+ return res;
+
+ uint32_t size = builder->buf[0].size;
+
+ // Save model
+ tfl_ctx->models[*g].model_pointer = (char *)wasm_runtime_malloc(size);
+ if (tfl_ctx->models[*g].model_pointer == NULL) {
+ NN_ERR_PRINTF("Error when allocating memory for model.");
+ return missing_memory;
+ }
+
+ bh_memcpy_s(tfl_ctx->models[*g].model_pointer, size, builder->buf[0].buf,
+ size);
+
+ // Save model flatbuffer
+ tfl_ctx->models[*g].model =
+ std::move(tflite::FlatBufferModel::BuildFromBuffer(
+ tfl_ctx->models[*g].model_pointer, size, NULL));
+
+ if (tfl_ctx->models[*g].model == NULL) {
+ NN_ERR_PRINTF("Loading model error.");
+ wasm_runtime_free(tfl_ctx->models[*g].model_pointer);
+ tfl_ctx->models[*g].model_pointer = NULL;
+ return missing_memory;
+ }
+
+ // Save target
+ tfl_ctx->models[*g].target = target;
+ return success;
+}
+
+error
+tensorflowlite_init_execution_context(void *tflite_ctx, graph g,
+ graph_execution_context *ctx)
+{
+ TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+ error res;
+ if (success != (res = is_valid_graph(tfl_ctx, g)))
+ return res;
+
+ if (success != (res = initialize_graph_ctx(tfl_ctx, g, ctx)))
+ return res;
+
+ // Build the interpreter with the InterpreterBuilder.
+ tflite::ops::builtin::BuiltinOpResolver resolver;
+ tflite::InterpreterBuilder tflite_builder(*tfl_ctx->models[g].model,
+ resolver);
+ tflite_builder(&tfl_ctx->interpreters[*ctx].interpreter);
+ if (tfl_ctx->interpreters[*ctx].interpreter == NULL) {
+ NN_ERR_PRINTF("Error when generating the interpreter.");
+ return missing_memory;
+ }
+
+ bool use_default = false;
+ switch (tfl_ctx->models[g].target) {
+ case gpu:
+ {
+#if defined(WASI_NN_ENABLE_GPU)
+ NN_WARN_PRINTF("GPU enabled.");
+ // https://www.tensorflow.org/lite/performance/gpu
+ TfLiteGpuDelegateOptionsV2 options =
+ TfLiteGpuDelegateOptionsV2Default();
+ options.inference_preference =
+ TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED;
+ options.inference_priority1 =
+ TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY;
+ tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options);
+ if (tfl_ctx->delegate == NULL) {
+ NN_ERR_PRINTF("Error when generating GPU delegate.");
+ use_default = true;
+ return missing_memory;
+ }
+ if (tfl_ctx->interpreters[*ctx]
+ .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
+ != kTfLiteOk) {
+ NN_ERR_PRINTF("Error when enabling GPU delegate.");
+ use_default = true;
+ }
+#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+ NN_WARN_PRINTF("external delegation enabled.");
+ TfLiteExternalDelegateOptions options =
+ TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH);
+ tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options);
+ if (tfl_ctx->delegate == NULL) {
+ NN_ERR_PRINTF("Error when generating External delegate.");
+ use_default = true;
+ return missing_memory;
+ }
+ if (tfl_ctx->interpreters[*ctx]
+ .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate)
+ != kTfLiteOk) {
+ NN_ERR_PRINTF("Error when enabling External delegate.");
+ use_default = true;
+ }
+#else
+ NN_WARN_PRINTF("GPU not enabled.");
+ use_default = true;
+#endif
+ break;
+ }
+ default:
+ use_default = true;
+ }
+ if (use_default)
+ NN_WARN_PRINTF("Default encoding is CPU.");
+
+ tfl_ctx->interpreters[*ctx].interpreter->AllocateTensors();
+ return success;
+}
+
+error
+tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx,
+ uint32_t index, tensor *input_tensor)
+{
+ TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+ error res;
+ if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx)))
+ return res;
+
+ uint32_t num_tensors =
+ tfl_ctx->interpreters[ctx].interpreter->inputs().size();
+ NN_DBG_PRINTF("Number of tensors (%d)", num_tensors);
+ if (index + 1 > num_tensors) {
+ return runtime_error;
+ }
+
+ auto tensor = tfl_ctx->interpreters[ctx].interpreter->input_tensor(index);
+ if (tensor == NULL) {
+ NN_ERR_PRINTF("Missing memory");
+ return missing_memory;
+ }
+
+ uint32_t model_tensor_size = 1;
+ for (int i = 0; i < tensor->dims->size; ++i)
+ model_tensor_size *= (uint32_t)tensor->dims->data[i];
+
+ uint32_t input_tensor_size = 1;
+ for (uint32_t i = 0; i < input_tensor->dimensions->size; i++)
+ input_tensor_size *= (uint32_t)input_tensor->dimensions->buf[i];
+
+ if (model_tensor_size != input_tensor_size) {
+ NN_ERR_PRINTF("Input tensor shape from the model is different than the "
+ "one provided");
+ return invalid_argument;
+ }
+
+ auto *input =
+ tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>(
+ index);
+ if (input == NULL)
+ return missing_memory;
+
+ bh_memcpy_s(input, model_tensor_size * sizeof(float), input_tensor->data,
+ model_tensor_size * sizeof(float));
+ return success;
+}
+
+error
+tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx)
+{
+ TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+ error res;
+ if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx)))
+ return res;
+
+ tfl_ctx->interpreters[ctx].interpreter->Invoke();
+ return success;
+}
+
+error
+tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx,
+ uint32_t index, tensor_data output_tensor,
+ uint32_t *output_tensor_size)
+{
+ TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+ error res;
+ if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx)))
+ return res;
+
+ uint32_t num_output_tensors =
+ tfl_ctx->interpreters[ctx].interpreter->outputs().size();
+ NN_DBG_PRINTF("Number of tensors (%d)", num_output_tensors);
+
+ if (index + 1 > num_output_tensors) {
+ return runtime_error;
+ }
+
+ auto tensor = tfl_ctx->interpreters[ctx].interpreter->output_tensor(index);
+ if (tensor == NULL) {
+ NN_ERR_PRINTF("Missing memory");
+ return missing_memory;
+ }
+
+ uint32_t model_tensor_size = 1;
+ for (int i = 0; i < (int)tensor->dims->size; ++i)
+ model_tensor_size *= (uint32_t)tensor->dims->data[i];
+
+ if (*output_tensor_size < model_tensor_size) {
+ NN_ERR_PRINTF("Insufficient memory to copy tensor %d", index);
+ return missing_memory;
+ }
+
+ float *tensor_f =
+ tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>(
+ index);
+ for (uint32_t i = 0; i < model_tensor_size; ++i)
+ NN_DBG_PRINTF("output: %f", tensor_f[i]);
+
+ *output_tensor_size = model_tensor_size;
+ bh_memcpy_s(output_tensor, model_tensor_size * sizeof(float), tensor_f,
+ model_tensor_size * sizeof(float));
+ return success;
+}
+
+void
+tensorflowlite_initialize(void **tflite_ctx)
+{
+ TFLiteContext *tfl_ctx = new TFLiteContext();
+ if (tfl_ctx == NULL) {
+ NN_ERR_PRINTF("Error when allocating memory for tensorflowlite.");
+ return;
+ }
+
+ NN_DBG_PRINTF("Initializing models.");
+ tfl_ctx->current_models = 0;
+ for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
+ tfl_ctx->models[i].model_pointer = NULL;
+ }
+ NN_DBG_PRINTF("Initializing interpreters.");
+ tfl_ctx->current_interpreters = 0;
+
+ if (os_mutex_init(&tfl_ctx->g_lock) != 0) {
+ NN_ERR_PRINTF("Error while initializing the lock");
+ }
+
+ tfl_ctx->delegate = NULL;
+
+ *tflite_ctx = (void *)tfl_ctx;
+}
+
+void
+tensorflowlite_destroy(void *tflite_ctx)
+{
+ /*
+ TensorFlow Lite memory is internally managed by tensorflow
+
+ Related issues:
+ * https://github.com/tensorflow/tensorflow/issues/15880
+ */
+ TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx;
+
+ if (tfl_ctx->delegate != NULL) {
+#if defined(WASI_NN_ENABLE_GPU)
+ TfLiteGpuDelegateV2Delete(tfl_ctx->delegate);
+#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE)
+ TfLiteExternalDelegateDelete(tfl_ctx->delegate);
+#endif
+ }
+
+ NN_DBG_PRINTF("Freeing memory.");
+ for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) {
+ tfl_ctx->models[i].model.reset();
+ if (tfl_ctx->models[i].model_pointer)
+ wasm_runtime_free(tfl_ctx->models[i].model_pointer);
+ tfl_ctx->models[i].model_pointer = NULL;
+ }
+ for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) {
+ tfl_ctx->interpreters[i].interpreter.reset();
+ }
+ os_mutex_destroy(&tfl_ctx->g_lock);
+ delete tfl_ctx;
+ NN_DBG_PRINTF("Memory free'd.");
+}