1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
|
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
* vim: set ts=8 sts=2 et sw=2 tw=80:
*
* Copyright 2022 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef wasm_serialize_h
#define wasm_serialize_h
#include "mozilla/CheckedInt.h"
#include "mozilla/MacroForEach.h"
#include "mozilla/Maybe.h"
#include "mozilla/Result.h"
#include <cstdint>
#include <cstring>
#include <type_traits>
namespace js {
namespace wasm {
class TypeContext;
// [SMDOC] "Module serialization"
//
// A wasm::Module may be serialized to a binary format that allows for quick
// reloads of a previous compiled wasm binary.
//
// The binary format is optimized for encoding/decoding speed, not size. There
// is no formal specification, and no backwards/forwards compatibility
// guarantees. The prelude of the encoding contains a 'build ID' which must be
// used when reading from a cache entry to determine if it is valid.
//
// Module serialization and deserialization are performed using templated
// functions that allow for (imperfect) abstraction over whether we are decoding
// or encoding the module. It can be viewed as a specialization of the visitor
// pattern.
//
// Each module data structure is visited by a function parameterized by the
// "mode", which may be either:
// 1. MODE_SIZE - We are computing the final encoding size, before encoding it
// 2. MODE_ENCODE - We are actually encoding the module to bytes
// 3. MODE_DECODE - We are decoding the module from bytes
//
// These functions are called "coding" functions, as they are generic to whether
// we are "encoding" or "decoding". The verb tense "code" is used for the
// prefix.
//
// Each coding function takes the item being visited, along with a "Coder"
// which contains the state needed for each mode. This is either a buffer span
// or an accumulated length. The coding function either manipulates the Coder
// directly or delegates to its field's coding functions.
//
// Leaf data types are usually just copied directly to and from memory using a
// generic "CodePod" function. See the "cacheable POD" documentation in this
// file for more information.
//
// Non-leaf data types need an explicit coding function. This function can
// usually be completely generic to decoding/encoding, and delegate to the
// coding functions for each field. Separate decoding/encoding functions may
// be needed when decoding requires initialization logic, such as constructors.
// In this case, it is critical that both functions agree on the fields to be
// coded, and the order they are coded in.
//
// Coding functions are defined as free functions in "WasmSerialize.cpp". When
// they require access to protected state in a type, they may use the
// WASM_DECLARE_FRIEND_SERIALIZE macro.
// Signal an out of memory condition
struct OutOfMemory {};
// The result of serialization, either OK or OOM
using CoderResult = mozilla::Result<mozilla::Ok, OutOfMemory>;
// CoderMode parameterizes the coding functions
enum CoderMode {
// We are computing the final size of the encoded buffer. This is a discrete
// pass that runs before encoding.
MODE_SIZE,
// We are encoding the module to bytes.
MODE_ENCODE,
// We are decoding the module from bytes.
MODE_DECODE,
};
// Coding functions take a different argument depending on which CoderMode
// they are invoked with:
// * MODE_SIZE - const T*
// * MODE_ENCODE - const T*
// * MODE_DECODE - T*
//
// The CoderArg<mode, T> type alias is used to acquire the proper type for
// coding function arguments.
template <CoderMode mode, typename V>
struct CoderArgT;
template <typename V>
struct CoderArgT<MODE_SIZE, V> {
using T = const V*;
};
template <typename V>
struct CoderArgT<MODE_DECODE, V> {
using T = V*;
};
template <typename V>
struct CoderArgT<MODE_ENCODE, V> {
using T = const V*;
};
template <CoderMode mode, typename T>
using CoderArg = typename CoderArgT<mode, T>::T;
// Coder is the state provided to all coding functions during module traversal.
template <CoderMode mode>
struct Coder;
// A Coder<MODE_SIZE> computes the total encoded size of a module
template <>
struct Coder<MODE_SIZE> {
explicit Coder(const TypeContext* types) : types_(types), size_(0) {}
// The types of the module that we're going to encode. This is required in
// order to encode the original index of types that we encounter.
const TypeContext* types_;
// The current size of buffer required to serialize this module.
mozilla::CheckedInt<size_t> size_;
// This function shares a signature with MODE_ENCODE to allow functions to be
// generic across MODE_SIZE/MODE_ENCODE, even though the src pointer is not
// needed for MODE_SIZE.
CoderResult writeBytes(const void* unusedSrc, size_t length);
};
// A Coder<MODE_ENCODE> holds the buffer being written to
template <>
struct Coder<MODE_ENCODE> {
Coder(const TypeContext* types, uint8_t* start, size_t length)
: types_(types), buffer_(start), end_(start + length) {}
// The types of the module that we're encoding. This is required in
// order to encode the original index of types that we encounter.
const TypeContext* types_;
// The current position in the buffer we're writing to.
uint8_t* buffer_;
// The end position in the buffer we're writing to.
const uint8_t* end_;
CoderResult writeBytes(const void* src, size_t length);
};
// A Coder<MODE_DECODE> holds the buffer being read from
template <>
struct Coder<MODE_DECODE> {
Coder(const uint8_t* start, size_t length)
: types_(nullptr), buffer_(start), end_(start + length) {}
// The types of the module that we're decoding. This is null until the types
// of this module are decoded.
const TypeContext* types_;
// The current position in the buffer we're reading from.
const uint8_t* buffer_;
// The end position in the buffer we're reading from.
const uint8_t* end_;
CoderResult readBytes(void* dest, size_t length);
};
// Macros to help types declare friendship with a coding function
#define WASM_DECLARE_FRIEND_SERIALIZE(TYPE) \
template <CoderMode mode> \
friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>);
#define WASM_DECLARE_FRIEND_SERIALIZE_ARGS(TYPE, ARGS...) \
template <CoderMode mode> \
friend CoderResult Code##TYPE(Coder<mode>&, CoderArg<mode, TYPE>, ARGS);
// [SMDOC] "Cacheable POD"
//
// Module serialization relies on copying simple structs to and from the
// cache format. We need a way to ensure that we only do this on types that are
// "safe". We call this "cacheable POD". Note: this is not the same thing as
// "POD" as that may contain pointers, which are not cacheable.
//
// We define cacheable POD (C-POD) recursively upon types:
// 1. any integer type is C-POD
// 2. any floating point type is C-POD
// 3. any enum type is C-POD
// 4. any mozilla::Maybe<T> with T: C-POD is C-POD
// 5. any T[N] with T: C-POD is C-POD
// 6. any union where all fields are C-POD is C-POD
// 7. any struct with the following conditions must is C-POD
// * every field's type must be C-POD
// * the parent type, if it exists, must also be C-POD
// * there must be no virtual methods
//
// There are no combination of C++ type traits at this time that can
// automatically meet these criteria, so we are rolling our own system.
//
// We define a "IsCacheablePod" type trait, with builtin rules for cases (1-5).
// The complex cases (6-7) are handled using manual declaration and checking
// macros that must be used upon structs and unions that are considered
// cacheable POD.
//
// See the following macros for details:
// - WASM_DECLARE_CACHEABLE_POD
// - WASM_CHECK_CACHEABLE_POD[_WITH_PARENT]
// The IsCacheablePod type trait primary template. Contains the rules for
// (cases 1-3).
template <typename T>
struct IsCacheablePod
: public std::conditional_t<std::is_arithmetic_v<T> || std::is_enum_v<T>,
std::true_type, std::false_type> {};
// Partial specialization for (case 4).
template <typename T>
struct IsCacheablePod<mozilla::Maybe<T>>
: public std::conditional_t<IsCacheablePod<T>::value, std::true_type,
std::false_type> {};
// Partial specialization for (case 5).
template <typename T, size_t N>
struct IsCacheablePod<T[N]>
: public std::conditional_t<IsCacheablePod<T>::value, std::true_type,
std::false_type> {};
template <class T>
inline constexpr bool is_cacheable_pod = IsCacheablePod<T>::value;
// Checks if derrived class will not use the structure alignment for its
// next field. It used when pod is a base class.
#define WASM_CHECK_CACHEABLE_POD_PADDING(Type) \
class __CHECK_PADING_##Type : public Type { \
public: \
char c; \
}; \
static_assert(sizeof(__CHECK_PADING_##Type) > sizeof(Type), \
#Type " will overlap with next field if inherited");
// Declare the type 'Type' to be cacheable POD. The definition of the type must
// contain a WASM_CHECK_CACHEABLE_POD[_WITH_PARENT] to ensure all fields of the
// type are cacheable POD.
#define WASM_DECLARE_CACHEABLE_POD(Type) \
static_assert(!std::is_polymorphic_v<Type>, \
#Type "must not have virtual methods"); \
} /* namespace wasm */ \
} /* namespace js */ \
template <> \
struct js::wasm::IsCacheablePod<js::wasm::Type> : public std::true_type {}; \
namespace js { \
namespace wasm {
// Helper: check each field's type to be cacheable POD
#define WASM_CHECK_CACHEABLE_POD_FIELD_(Field) \
static_assert(js::wasm::IsCacheablePod<decltype(Field)>::value, \
#Field " must be cacheable pod");
// Check every field in a type definition to ensure they are cacheable POD.
#define WASM_CHECK_CACHEABLE_POD(Fields...) \
MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields))
// Check every field in a type definition to ensure they are cacheable POD, and
// check that the parent class is also cacheable POD.
#define WASM_CHECK_CACHEABLE_POD_WITH_PARENT(Parent, Fields...) \
static_assert(js::wasm::IsCacheablePod<Parent>::value, \
#Parent " must be cacheable pod"); \
MOZ_FOR_EACH(WASM_CHECK_CACHEABLE_POD_FIELD_, (), (Fields))
// Allow fields that are not cacheable POD but are believed to be safe for
// serialization due to some justification.
#define WASM_ALLOW_NON_CACHEABLE_POD_FIELD(Field, Reason) \
static_assert(!js::wasm::IsCacheablePod<decltype(Field)>::value, \
#Field " is not cacheable due to " Reason);
} // namespace wasm
} // namespace js
#endif // wasm_serialize_h
|