diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:22 +0000 |
commit | c21c3b0befeb46a51b6bf3758ffa30813bea0ff0 (patch) | |
tree | 9754ff1ca740f6346cf8483ec915d4054bc5da2d /fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp | |
parent | Adding upstream version 1.43.2. (diff) | |
download | netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.tar.xz netdata-c21c3b0befeb46a51b6bf3758ffa30813bea0ff0.zip |
Adding upstream version 1.44.3.upstream/1.44.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp')
-rw-r--r-- | fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp | 417 |
1 files changed, 417 insertions, 0 deletions
diff --git a/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp b/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp new file mode 100644 index 000000000..dfd21787c --- /dev/null +++ b/fluent-bit/lib/wasm-micro-runtime-WAMR-1.2.2/core/iwasm/libraries/wasi-nn/src/wasi_nn_tensorflowlite.cpp @@ -0,0 +1,417 @@ +/* + * Copyright (C) 2019 Intel Corporation. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + */ + +#include "wasi_nn.h" +#include "wasi_nn_tensorflowlite.hpp" +#include "logger.h" + +#include "bh_common.h" +#include "bh_platform.h" +#include "platform_common.h" + +#include <tensorflow/lite/interpreter.h> +#include <tensorflow/lite/kernels/register.h> +#include <tensorflow/lite/model.h> +#include <tensorflow/lite/optional_debug_tools.h> +#include <tensorflow/lite/error_reporter.h> + +#if defined(WASI_NN_ENABLE_GPU) +#include <tensorflow/lite/delegates/gpu/delegate.h> +#endif + +#if defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE) +#include <tensorflow/lite/delegates/external/external_delegate.h> +#endif + +/* Maximum number of graphs per WASM instance */ +#define MAX_GRAPHS_PER_INST 10 +/* Maximum number of graph execution context per WASM instance*/ +#define MAX_GRAPH_EXEC_CONTEXTS_PER_INST 10 + +typedef struct { + std::unique_ptr<tflite::Interpreter> interpreter; +} Interpreter; + +typedef struct { + char *model_pointer; + std::unique_ptr<tflite::FlatBufferModel> model; + execution_target target; +} Model; + +typedef struct { + uint32_t current_models; + Model models[MAX_GRAPHS_PER_INST]; + uint32_t current_interpreters; + Interpreter interpreters[MAX_GRAPH_EXEC_CONTEXTS_PER_INST]; + korp_mutex g_lock; + TfLiteDelegate *delegate; +} TFLiteContext; + +/* Utils */ + +static error +initialize_g(TFLiteContext *tfl_ctx, graph *g) +{ + os_mutex_lock(&tfl_ctx->g_lock); + if (tfl_ctx->current_models == MAX_GRAPHS_PER_INST) { + os_mutex_unlock(&tfl_ctx->g_lock); + NN_ERR_PRINTF("Excedded max graphs per WASM instance"); + return runtime_error; + } + *g = tfl_ctx->current_models++; + os_mutex_unlock(&tfl_ctx->g_lock); + return success; +} +static error +initialize_graph_ctx(TFLiteContext *tfl_ctx, graph g, + graph_execution_context *ctx) +{ + os_mutex_lock(&tfl_ctx->g_lock); + if (tfl_ctx->current_interpreters == MAX_GRAPH_EXEC_CONTEXTS_PER_INST) { + os_mutex_unlock(&tfl_ctx->g_lock); + NN_ERR_PRINTF("Excedded max graph execution context per WASM instance"); + return runtime_error; + } + *ctx = tfl_ctx->current_interpreters++; + os_mutex_unlock(&tfl_ctx->g_lock); + return success; +} + +static error +is_valid_graph(TFLiteContext *tfl_ctx, graph g) +{ + if (g >= MAX_GRAPHS_PER_INST) { + NN_ERR_PRINTF("Invalid graph: %d >= %d.", g, MAX_GRAPHS_PER_INST); + return runtime_error; + } + if (tfl_ctx->models[g].model_pointer == NULL) { + NN_ERR_PRINTF("Context (model) non-initialized."); + return runtime_error; + } + if (tfl_ctx->models[g].model == NULL) { + NN_ERR_PRINTF("Context (tflite model) non-initialized."); + return runtime_error; + } + return success; +} + +static error +is_valid_graph_execution_context(TFLiteContext *tfl_ctx, + graph_execution_context ctx) +{ + if (ctx >= MAX_GRAPH_EXEC_CONTEXTS_PER_INST) { + NN_ERR_PRINTF("Invalid graph execution context: %d >= %d", ctx, + MAX_GRAPH_EXEC_CONTEXTS_PER_INST); + return runtime_error; + } + if (tfl_ctx->interpreters[ctx].interpreter == NULL) { + NN_ERR_PRINTF("Context (interpreter) non-initialized."); + return runtime_error; + } + return success; +} + +/* WASI-NN (tensorflow) implementation */ + +error +tensorflowlite_load(void *tflite_ctx, graph_builder_array *builder, + graph_encoding encoding, execution_target target, graph *g) +{ + TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; + + if (builder->size != 1) { + NN_ERR_PRINTF("Unexpected builder format."); + return invalid_argument; + } + + if (encoding != tensorflowlite) { + NN_ERR_PRINTF("Encoding is not tensorflowlite."); + return invalid_argument; + } + + if (target != cpu && target != gpu) { + NN_ERR_PRINTF("Only CPU and GPU target is supported."); + return invalid_argument; + } + + error res; + if (success != (res = initialize_g(tfl_ctx, g))) + return res; + + uint32_t size = builder->buf[0].size; + + // Save model + tfl_ctx->models[*g].model_pointer = (char *)wasm_runtime_malloc(size); + if (tfl_ctx->models[*g].model_pointer == NULL) { + NN_ERR_PRINTF("Error when allocating memory for model."); + return missing_memory; + } + + bh_memcpy_s(tfl_ctx->models[*g].model_pointer, size, builder->buf[0].buf, + size); + + // Save model flatbuffer + tfl_ctx->models[*g].model = + std::move(tflite::FlatBufferModel::BuildFromBuffer( + tfl_ctx->models[*g].model_pointer, size, NULL)); + + if (tfl_ctx->models[*g].model == NULL) { + NN_ERR_PRINTF("Loading model error."); + wasm_runtime_free(tfl_ctx->models[*g].model_pointer); + tfl_ctx->models[*g].model_pointer = NULL; + return missing_memory; + } + + // Save target + tfl_ctx->models[*g].target = target; + return success; +} + +error +tensorflowlite_init_execution_context(void *tflite_ctx, graph g, + graph_execution_context *ctx) +{ + TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; + + error res; + if (success != (res = is_valid_graph(tfl_ctx, g))) + return res; + + if (success != (res = initialize_graph_ctx(tfl_ctx, g, ctx))) + return res; + + // Build the interpreter with the InterpreterBuilder. + tflite::ops::builtin::BuiltinOpResolver resolver; + tflite::InterpreterBuilder tflite_builder(*tfl_ctx->models[g].model, + resolver); + tflite_builder(&tfl_ctx->interpreters[*ctx].interpreter); + if (tfl_ctx->interpreters[*ctx].interpreter == NULL) { + NN_ERR_PRINTF("Error when generating the interpreter."); + return missing_memory; + } + + bool use_default = false; + switch (tfl_ctx->models[g].target) { + case gpu: + { +#if defined(WASI_NN_ENABLE_GPU) + NN_WARN_PRINTF("GPU enabled."); + // https://www.tensorflow.org/lite/performance/gpu + TfLiteGpuDelegateOptionsV2 options = + TfLiteGpuDelegateOptionsV2Default(); + options.inference_preference = + TFLITE_GPU_INFERENCE_PREFERENCE_SUSTAINED_SPEED; + options.inference_priority1 = + TFLITE_GPU_INFERENCE_PRIORITY_MIN_LATENCY; + tfl_ctx->delegate = TfLiteGpuDelegateV2Create(&options); + if (tfl_ctx->delegate == NULL) { + NN_ERR_PRINTF("Error when generating GPU delegate."); + use_default = true; + return missing_memory; + } + if (tfl_ctx->interpreters[*ctx] + .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate) + != kTfLiteOk) { + NN_ERR_PRINTF("Error when enabling GPU delegate."); + use_default = true; + } +#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE) + NN_WARN_PRINTF("external delegation enabled."); + TfLiteExternalDelegateOptions options = + TfLiteExternalDelegateOptionsDefault(WASI_NN_EXT_DELEGATE_PATH); + tfl_ctx->delegate = TfLiteExternalDelegateCreate(&options); + if (tfl_ctx->delegate == NULL) { + NN_ERR_PRINTF("Error when generating External delegate."); + use_default = true; + return missing_memory; + } + if (tfl_ctx->interpreters[*ctx] + .interpreter->ModifyGraphWithDelegate(tfl_ctx->delegate) + != kTfLiteOk) { + NN_ERR_PRINTF("Error when enabling External delegate."); + use_default = true; + } +#else + NN_WARN_PRINTF("GPU not enabled."); + use_default = true; +#endif + break; + } + default: + use_default = true; + } + if (use_default) + NN_WARN_PRINTF("Default encoding is CPU."); + + tfl_ctx->interpreters[*ctx].interpreter->AllocateTensors(); + return success; +} + +error +tensorflowlite_set_input(void *tflite_ctx, graph_execution_context ctx, + uint32_t index, tensor *input_tensor) +{ + TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; + + error res; + if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx))) + return res; + + uint32_t num_tensors = + tfl_ctx->interpreters[ctx].interpreter->inputs().size(); + NN_DBG_PRINTF("Number of tensors (%d)", num_tensors); + if (index + 1 > num_tensors) { + return runtime_error; + } + + auto tensor = tfl_ctx->interpreters[ctx].interpreter->input_tensor(index); + if (tensor == NULL) { + NN_ERR_PRINTF("Missing memory"); + return missing_memory; + } + + uint32_t model_tensor_size = 1; + for (int i = 0; i < tensor->dims->size; ++i) + model_tensor_size *= (uint32_t)tensor->dims->data[i]; + + uint32_t input_tensor_size = 1; + for (uint32_t i = 0; i < input_tensor->dimensions->size; i++) + input_tensor_size *= (uint32_t)input_tensor->dimensions->buf[i]; + + if (model_tensor_size != input_tensor_size) { + NN_ERR_PRINTF("Input tensor shape from the model is different than the " + "one provided"); + return invalid_argument; + } + + auto *input = + tfl_ctx->interpreters[ctx].interpreter->typed_input_tensor<float>( + index); + if (input == NULL) + return missing_memory; + + bh_memcpy_s(input, model_tensor_size * sizeof(float), input_tensor->data, + model_tensor_size * sizeof(float)); + return success; +} + +error +tensorflowlite_compute(void *tflite_ctx, graph_execution_context ctx) +{ + TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; + + error res; + if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx))) + return res; + + tfl_ctx->interpreters[ctx].interpreter->Invoke(); + return success; +} + +error +tensorflowlite_get_output(void *tflite_ctx, graph_execution_context ctx, + uint32_t index, tensor_data output_tensor, + uint32_t *output_tensor_size) +{ + TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; + + error res; + if (success != (res = is_valid_graph_execution_context(tfl_ctx, ctx))) + return res; + + uint32_t num_output_tensors = + tfl_ctx->interpreters[ctx].interpreter->outputs().size(); + NN_DBG_PRINTF("Number of tensors (%d)", num_output_tensors); + + if (index + 1 > num_output_tensors) { + return runtime_error; + } + + auto tensor = tfl_ctx->interpreters[ctx].interpreter->output_tensor(index); + if (tensor == NULL) { + NN_ERR_PRINTF("Missing memory"); + return missing_memory; + } + + uint32_t model_tensor_size = 1; + for (int i = 0; i < (int)tensor->dims->size; ++i) + model_tensor_size *= (uint32_t)tensor->dims->data[i]; + + if (*output_tensor_size < model_tensor_size) { + NN_ERR_PRINTF("Insufficient memory to copy tensor %d", index); + return missing_memory; + } + + float *tensor_f = + tfl_ctx->interpreters[ctx].interpreter->typed_output_tensor<float>( + index); + for (uint32_t i = 0; i < model_tensor_size; ++i) + NN_DBG_PRINTF("output: %f", tensor_f[i]); + + *output_tensor_size = model_tensor_size; + bh_memcpy_s(output_tensor, model_tensor_size * sizeof(float), tensor_f, + model_tensor_size * sizeof(float)); + return success; +} + +void +tensorflowlite_initialize(void **tflite_ctx) +{ + TFLiteContext *tfl_ctx = new TFLiteContext(); + if (tfl_ctx == NULL) { + NN_ERR_PRINTF("Error when allocating memory for tensorflowlite."); + return; + } + + NN_DBG_PRINTF("Initializing models."); + tfl_ctx->current_models = 0; + for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) { + tfl_ctx->models[i].model_pointer = NULL; + } + NN_DBG_PRINTF("Initializing interpreters."); + tfl_ctx->current_interpreters = 0; + + if (os_mutex_init(&tfl_ctx->g_lock) != 0) { + NN_ERR_PRINTF("Error while initializing the lock"); + } + + tfl_ctx->delegate = NULL; + + *tflite_ctx = (void *)tfl_ctx; +} + +void +tensorflowlite_destroy(void *tflite_ctx) +{ + /* + TensorFlow Lite memory is internally managed by tensorflow + + Related issues: + * https://github.com/tensorflow/tensorflow/issues/15880 + */ + TFLiteContext *tfl_ctx = (TFLiteContext *)tflite_ctx; + + if (tfl_ctx->delegate != NULL) { +#if defined(WASI_NN_ENABLE_GPU) + TfLiteGpuDelegateV2Delete(tfl_ctx->delegate); +#elif defined(WASI_NN_ENABLE_EXTERNAL_DELEGATE) + TfLiteExternalDelegateDelete(tfl_ctx->delegate); +#endif + } + + NN_DBG_PRINTF("Freeing memory."); + for (int i = 0; i < MAX_GRAPHS_PER_INST; ++i) { + tfl_ctx->models[i].model.reset(); + if (tfl_ctx->models[i].model_pointer) + wasm_runtime_free(tfl_ctx->models[i].model_pointer); + tfl_ctx->models[i].model_pointer = NULL; + } + for (int i = 0; i < MAX_GRAPH_EXEC_CONTEXTS_PER_INST; ++i) { + tfl_ctx->interpreters[i].interpreter.reset(); + } + os_mutex_destroy(&tfl_ctx->g_lock); + delete tfl_ctx; + NN_DBG_PRINTF("Memory free'd."); +} |