summaryrefslogtreecommitdiffstats
path: root/third_party/wasm2c/include/wabt/decompiler-naming.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/wasm2c/include/wabt/decompiler-naming.h')
-rw-r--r--third_party/wasm2c/include/wabt/decompiler-naming.h205
1 files changed, 205 insertions, 0 deletions
diff --git a/third_party/wasm2c/include/wabt/decompiler-naming.h b/third_party/wasm2c/include/wabt/decompiler-naming.h
new file mode 100644
index 0000000000..12c40e04fc
--- /dev/null
+++ b/third_party/wasm2c/include/wabt/decompiler-naming.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright 2019 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WABT_DECOMPILER_NAMING_H_
+#define WABT_DECOMPILER_NAMING_H_
+
+#include "wabt/decompiler-ast.h"
+
+#include <set>
+
+namespace wabt {
+
+inline void RenameToIdentifier(std::string& name,
+ Index i,
+ BindingHash& bh,
+ const std::set<std::string_view>* filter) {
+ // Filter out non-identifier characters, and try to reduce the size of
+ // gigantic C++ signature names.
+ std::string s;
+ size_t nesting = 0;
+ size_t read = 0;
+ size_t word_start = 0;
+ for (auto c : name) {
+ read++;
+ // We most certainly don't want to parse the entirety of C++ signatures,
+ // but these names are sometimes several lines long, so would be great
+ // to trim down. One quick way to do that is to remove anything between
+ // nested (), which usually means the parameter list.
+ if (c == '(') {
+ nesting++;
+ }
+ if (c == ')') {
+ nesting--;
+ }
+ if (nesting) {
+ continue;
+ }
+ if (!isalnum(static_cast<unsigned char>(c))) {
+ c = '_';
+ }
+ if (c == '_') {
+ if (s.empty()) {
+ continue; // Skip leading.
+ }
+ if (s.back() == '_') {
+ continue; // Consecutive.
+ }
+ }
+ s += c;
+ if (filter && (c == '_' || read == name.size())) {
+ // We found a "word" inside a snake_case identifier.
+ auto word_end = s.size();
+ if (c == '_') {
+ word_end--;
+ }
+ assert(word_end > word_start);
+ auto word =
+ std::string_view(s.c_str() + word_start, word_end - word_start);
+ if (filter->find(word) != filter->end()) {
+ s.resize(word_start);
+ }
+ word_start = s.size();
+ }
+ }
+ if (!s.empty() && s.back() == '_') {
+ s.pop_back(); // Trailing.
+ }
+ // If after all this culling, we're still gigantic (STL identifier can
+ // easily be hundreds of chars in size), just cut the identifier
+ // down, it will be disambiguated below, if needed.
+ const size_t max_identifier_length = 100;
+ if (s.size() > max_identifier_length) {
+ s.resize(max_identifier_length);
+ }
+ if (s.empty()) {
+ s = "__empty";
+ }
+ // Remove original binding first, such that it doesn't match with our
+ // new name.
+ bh.erase(name);
+ // Find a unique name.
+ Index disambiguator = 0;
+ auto base_len = s.size();
+ for (;;) {
+ if (bh.count(s) == 0) {
+ break;
+ }
+ disambiguator++;
+ s.resize(base_len);
+ s += '_';
+ s += std::to_string(disambiguator);
+ }
+ // Replace name in bindings.
+ name = s;
+ bh.emplace(s, Binding(i));
+}
+
+template <typename T>
+void RenameToIdentifiers(std::vector<T*>& things,
+ BindingHash& bh,
+ const std::set<std::string_view>* filter) {
+ Index i = 0;
+ for (auto thing : things) {
+ RenameToIdentifier(thing->name, i++, bh, filter);
+ }
+}
+
+enum {
+ // This a bit arbitrary, change at will.
+ min_content_identifier_size = 7,
+ max_content_identifier_size = 30
+};
+
+void RenameToContents(std::vector<DataSegment*>& segs, BindingHash& bh) {
+ std::string s;
+ for (auto seg : segs) {
+ if (seg->name.substr(0, 2) != "d_") {
+ // This segment was named explicitly by a symbol.
+ // FIXME: this is not a great check, a symbol could start with d_.
+ continue;
+ }
+ s = "d_";
+ for (auto c : seg->data) {
+ if (isalnum(c) || c == '_') {
+ s += static_cast<char>(c);
+ }
+ if (s.size() >= max_content_identifier_size) {
+ // We truncate any very long names, since those make for hard to
+ // format output. They can be somewhat long though, since data segment
+ // references tend to not occur that often.
+ break;
+ }
+ }
+ if (s.size() < min_content_identifier_size) {
+ // It is useful to have a minimum, since if there few printable characters
+ // in a data section, that is probably a sign of binary, and those few
+ // characters are not going to be very significant.
+ continue;
+ }
+ // We could do the same disambiguition as RenameToIdentifier and
+ // GenerateNames do, but if we come up with a clashing name here it is
+ // likely a sign of not very meaningful binary data, so it is easier to
+ // just keep the original generated name in that case.
+ if (bh.count(s) != 0) {
+ continue;
+ }
+ // Remove original entry.
+ bh.erase(seg->name);
+ seg->name = s;
+ bh.emplace(s, Binding(static_cast<Index>(&seg - &segs[0])));
+ }
+}
+
+// Function names may contain arbitrary C++ syntax, so we want to
+// filter those to look like identifiers. A function name may be set
+// by a name section (applied in ReadBinaryIr, called before this function)
+// or by an export (applied by GenerateNames, called before this function),
+// to both the Func and func_bindings.
+// Those names then further perculate down the IR in ApplyNames (called after
+// this function).
+// To not have to add too many decompiler-specific code into those systems
+// (using a callback??) we instead rename everything here.
+// Also do data section renaming here.
+void RenameAll(Module& module) {
+ // We also filter common C++ keywords/STL idents that make for huge
+ // identifiers.
+ // FIXME: this can obviously give bad results if the input is not C++..
+ std::set<std::string_view> filter = {
+ {"const"}, {"std"}, {"allocator"}, {"char"}, {"basic"},
+ {"traits"}, {"wchar"}, {"t"}, {"void"}, {"int"},
+ {"unsigned"}, {"2"}, {"cxxabiv1"}, {"short"}, {"4096ul"},
+ };
+ RenameToIdentifiers(module.funcs, module.func_bindings, &filter);
+ // Also do this for some other kinds of names, but without the keyword
+ // substitution.
+ RenameToIdentifiers(module.globals, module.global_bindings, nullptr);
+ RenameToIdentifiers(module.tables, module.table_bindings, nullptr);
+ RenameToIdentifiers(module.tags, module.tag_bindings, nullptr);
+ RenameToIdentifiers(module.exports, module.export_bindings, nullptr);
+ RenameToIdentifiers(module.types, module.type_bindings, nullptr);
+ RenameToIdentifiers(module.memories, module.memory_bindings, nullptr);
+ RenameToIdentifiers(module.data_segments, module.data_segment_bindings,
+ nullptr);
+ RenameToIdentifiers(module.elem_segments, module.elem_segment_bindings,
+ nullptr);
+ // Special purpose naming for data segments.
+ RenameToContents(module.data_segments, module.data_segment_bindings);
+}
+
+} // namespace wabt
+
+#endif // WABT_DECOMPILER_NAMING_H_