diff options
Diffstat (limited to '')
-rw-r--r-- | js/src/wasm/WasmSerialize.h | 296 |
1 files changed, 296 insertions, 0 deletions
diff --git a/js/src/wasm/WasmSerialize.h b/js/src/wasm/WasmSerialize.h new file mode 100644 index 0000000000..918e50b42d --- /dev/null +++ b/js/src/wasm/WasmSerialize.h @@ -0,0 +1,296 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- + * vim: set ts=8 sts=2 et sw=2 tw=80: + * + * Copyright 2022 Mozilla Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef wasm_serialize_h +#define wasm_serialize_h + +#include "mozilla/CheckedInt.h" +#include "mozilla/MacroForEach.h" +#include "mozilla/Maybe.h" +#include "mozilla/Result.h" + +#include <cstdint> +#include <cstring> +#include <type_traits> + +namespace js { +namespace wasm { + +class TypeContext; + +// [SMDOC] "Module serialization" +// +// A wasm::Module may be serialized to a binary format that allows for quick +// reloads of a previous compiled wasm binary. +// +// The binary format is optimized for encoding/decoding speed, not size. There +// is no formal specification, and no backwards/forwards compatibility +// guarantees. The prelude of the encoding contains a 'build ID' which must be +// used when reading from a cache entry to determine if it is valid. +// +// Module serialization and deserialization are performed using templated +// functions that allow for (imperfect) abstraction over whether we are decoding +// or encoding the module. It can be viewed as a specialization of the visitor +// pattern. +// +// Each module data structure is visited by a function parameterized by the +// "mode", which may be either: +// 1. MODE_SIZE - We are computing the final encoding size, before encoding it +// 2. MODE_ENCODE - We are actually encoding the module to bytes +// 3. MODE_DECODE - We are decoding the module from bytes +// +// These functions are called "coding" functions, as they are generic to whether +// we are "encoding" or "decoding". The verb tense "code" is used for the +// prefix. +// +// Each coding function takes the item being visited, along with a "Coder" +// which contains the state needed for each mode. This is either a buffer span +// or an accumulated length. The coding function either manipulates the Coder +// directly or delegates to its field's coding functions. +// +// Leaf data types are usually just copied directly to and from memory using a +// generic "CodePod" function. See the "cacheable POD" documentation in this +// file for more information. +// +// Non-leaf data types need an explicit coding function. This function can +// usually be completely generic to decoding/encoding, and delegate to the +// coding functions for each field. Separate decoding/encoding functions may +// be needed when decoding requires initialization logic, such as constructors. +// In this case, it is critical that both functions agree on the fields to be +// coded, and the order they are coded in. +// +// Coding functions are defined as free functions in "WasmSerialize.cpp". When +// they require access to protected state in a type, they may use the +// WASM_DECLARE_FRIEND_SERIALIZE macro. + +// Signal an out of memory condition +struct OutOfMemory {}; + +// The result of serialization, either OK or OOM +using CoderResult = mozilla::Result<mozilla::Ok, OutOfMemory>; + +// CoderMode parameterizes the coding functions +enum CoderMode { + // We are computing the final size of the encoded buffer. This is a discrete + // pass that runs before encoding. + MODE_SIZE, + // We are encoding the module to bytes. + MODE_ENCODE, + // We are decoding the module from bytes. + MODE_DECODE, +}; + +// Coding functions take a different argument depending on which CoderMode +// they are invoked with: +// * MODE_SIZE - const T* +// * MODE_ENCODE - const T* +// * MODE_DECODE - T* +// +// The CoderArg<mode, T> type alias is used to acquire the proper type for +// coding function arguments. +template <CoderMode mode, typename V> +struct CoderArgT; + +template <typename V> +struct CoderArgT<MODE_SIZE, V> { + using T = const V*; +}; + +template <typename V> +struct CoderArgT<MODE_DECODE, V> { + using T = V*; +}; + +template <typename V> +struct CoderArgT<MODE_ENCODE, V> { + using T = const V*; +}; + +template <CoderMode mode, typename T> +using CoderArg = typename CoderArgT<mode, T>::T; + +// Coder is the state provided to all coding functions during module traversal. +template <CoderMode mode> +struct Coder; + +// A Coder<MODE_SIZE> computes the total encoded size of a module +template <> +struct Coder<MODE_SIZE> { + explicit Coder(const TypeContext* types) : types_(types), size_(0) {} + + // The types of the module that we're going to encode. This is required in + // order to encode the original index of types that we encounter. + const TypeContext* types_; + + // The current size of buffer required to serialize this module. + mozilla::CheckedInt<size_t> size_; + + // This function shares a signature with MODE_ENCODE to allow functions to be + // generic across MODE_SIZE/MODE_ENCODE, even though the src pointer is not + // needed for MODE_SIZE. + CoderResult writeBytes(const void* unusedSrc, size_t length); +}; + +// A Coder<MODE_ENCODE> holds the buffer being written to +template <> +struct Coder<MODE_ENCODE> { + Coder(const TypeContext* types, uint8_t* start, size_t length) + : types_(types), buffer_(start), end_(start + length) {} + + // The types of the module that we're encoding. This is required in + // order to encode the original index of types that we encounter. + const TypeContext* types_; + + // The current position in the buffer we're writing to. + uint8_t* buffer_; + // The end position in the buffer we're writing to. + const uint8_t* end_; + + CoderResult writeBytes(const void* src, size_t length); +}; + +// A Coder<MODE_DECODE> holds the buffer being read from +template <> +struct Coder<MODE_DECODE> { + Coder(const uint8_t* start, size_t length) + : types_(nullptr), buffer_(start), end_(start + length) {} + + // The types of the module that we're decoding. This is null until the types + // of this module are decoded. + const TypeContext* types_; + + // The current position in the buffer we're reading from. + const uint8_t* buffer_; + // The end position in the buffer we're reading from. + const uint8_t* end_; + + CoderResult readBytes(void* dest, size_t length); +}; + +// Macros to help types declare friendship with a coding function + +#define WASM_DECLARE_FRIEND_SERIALIZE(TYPE) \ + template <CoderMode mode> \ + friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>); + +#define WASM_DECLARE_FRIEND_SERIALIZE_ARGS(TYPE, ARGS...) \ + template <CoderMode mode> \ + friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>, ARGS); + +// [SMDOC] "Cacheable POD" +// +// Module serialization relies on copying simple structs to and from the +// cache format. We need a way to ensure that we only do this on types that are +// "safe". We call this "cacheable POD". Note: this is not the same thing as +// "POD" as that may contain pointers, which are not cacheable. +// +// We define cacheable POD (C-POD) recursively upon types: +// 1. any integer type is C-POD +// 2. any floating point type is C-POD +// 3. any enum type is C-POD +// 4. any mozilla::Maybe<T> with T: C-POD is C-POD +// 5. any T[N] with T: C-POD is C-POD +// 6. any union where all fields are C-POD is C-POD +// 7. any struct with the following conditions must is C-POD +// * every field's type must be C-POD +// * the parent type, if it exists, must also be C-POD +// * there must be no virtual methods +// +// There are no combination of C++ type traits at this time that can +// automatically meet these criteria, so we are rolling our own system. +// +// We define a "IsCacheablePod" type trait, with builtin rules for cases (1-5). +// The complex cases (6-7) are handled using manual declaration and checking +// macros that must be used upon structs and unions that are considered +// cacheable POD. +// +// See the following macros for details: +// - WASM_DECLARE_CACHEABLE_POD +// - WASM_CHECK_CACHEABLE_POD[_WITH_PARENT] + +// The IsCacheablePod type trait primary template. Contains the rules for +// (cases 1-3). +template <typename T> +struct IsCacheablePod + : public std::conditional_t<std::is_arithmetic_v<T> || std::is_enum_v<T>, + std::true_type, std::false_type> {}; + +// Partial specialization for (case 4). +template <typename T> +struct IsCacheablePod<mozilla::Maybe<T>> + : public std::conditional_t<IsCacheablePod<T>::value, std::true_type, + std::false_type> {}; + +// Partial specialization for (case 5). +template <typename T, size_t N> +struct IsCacheablePod<T[N]> + : public std::conditional_t<IsCacheablePod<T>::value, std::true_type, + std::false_type> {}; + +template <class T> +inline constexpr bool is_cacheable_pod = IsCacheablePod<T>::value; + +// Checks if derrived class will not use the structure alignment for its +// next field. It used when pod is a base class. +#define WASM_CHECK_CACHEABLE_POD_PADDING(Type) \ + class __CHECK_PADING_##Type : public Type { \ + public: \ + char c; \ + }; \ + static_assert(sizeof(__CHECK_PADING_##Type) > sizeof(Type), \ + #Type " will overlap with next field if inherited"); + +// Declare the type 'Type' to be cacheable POD. The definition of the type must +// contain a WASM_CHECK_CACHEABLE_POD[_WITH_PARENT] to ensure all fields of the +// type are cacheable POD. +#define WASM_DECLARE_CACHEABLE_POD(Type) \ + static_assert(!std::is_polymorphic_v<Type>, \ + #Type "must not have virtual methods"); \ + } /* namespace wasm */ \ + } /* namespace js */ \ + template <> \ + struct js::wasm::IsCacheablePod<js::wasm::Type> : public std::true_type {}; \ + namespace js { \ + namespace wasm { + +// Helper: check each field's type to be cacheable POD +#define WASM_CHECK_CACHEABLE_POD_FIELD_(Field) \ + static_assert(js::wasm::IsCacheablePod<decltype(Field)>::value, \ + #Field " must be cacheable pod"); + +// Check every field in a type definition to ensure they are cacheable POD. +#define WASM_CHECK_CACHEABLE_POD(Fields...) \ + MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields)) + +// Check every field in a type definition to ensure they are cacheable POD, and +// check that the parent class is also cacheable POD. +#define WASM_CHECK_CACHEABLE_POD_WITH_PARENT(Parent, Fields...) \ + static_assert(js::wasm::IsCacheablePod<Parent>::value, \ + #Parent " must be cacheable pod"); \ + MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields)) + +// Allow fields that are not cacheable POD but are believed to be safe for +// serialization due to some justification. +#define WASM_ALLOW_NON_CACHEABLE_POD_FIELD(Field, Reason) \ + static_assert(!js::wasm::IsCacheablePod<decltype(Field)>::value, \ + #Field " is not cacheable due to " Reason); + +} // namespace wasm +} // namespace js + +#endif // wasm_serialize_h |