summaryrefslogtreecommitdiffstats
path: root/third_party/wasm2c/include/wabt/decompiler-ls.h
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/wasm2c/include/wabt/decompiler-ls.h')
-rw-r--r--third_party/wasm2c/include/wabt/decompiler-ls.h267
1 files changed, 267 insertions, 0 deletions
diff --git a/third_party/wasm2c/include/wabt/decompiler-ls.h b/third_party/wasm2c/include/wabt/decompiler-ls.h
new file mode 100644
index 0000000000..c2612fa32a
--- /dev/null
+++ b/third_party/wasm2c/include/wabt/decompiler-ls.h
@@ -0,0 +1,267 @@
+/*
+ * Copyright 2019 WebAssembly Community Group participants
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef WABT_DECOMPILER_LS_H_
+#define WABT_DECOMPILER_LS_H_
+
+#include "wabt/decompiler-ast.h"
+#include "wabt/string-util.h"
+
+#include <map>
+
+namespace wabt {
+
+// Names starting with "u" are unsigned, the rest are "signed or doesn't matter"
+inline const char* GetDecompTypeName(Type t) {
+ switch (t) {
+ case Type::I8: return "byte";
+ case Type::I8U: return "ubyte";
+ case Type::I16: return "short";
+ case Type::I16U: return "ushort";
+ case Type::I32: return "int";
+ case Type::I32U: return "uint";
+ case Type::I64: return "long";
+ case Type::F32: return "float";
+ case Type::F64: return "double";
+ case Type::V128: return "simd";
+ case Type::Func: return "func";
+ case Type::FuncRef: return "funcref";
+ case Type::ExternRef: return "externref";
+ case Type::Void: return "void";
+ default: return "ILLEGAL";
+ }
+}
+
+inline Type GetMemoryType(Type operand_type, Opcode opc) {
+ // TODO: something something SIMD.
+ // TODO: this loses information of the type it is read into.
+ // That may well not be the biggest deal since that is usually obvious
+ // from context, if not, we should probably represent that as a cast around
+ // the access, since it should not be part of the field type.
+ if (operand_type == Type::I32 || operand_type == Type::I64) {
+ auto name = std::string_view(opc.GetName());
+ // FIXME: change into a new column in opcode.def instead?
+ auto is_unsigned = name.substr(name.size() - 2) == "_u";
+ switch (opc.GetMemorySize()) {
+ case 1: return is_unsigned ? Type::I8U : Type::I8;
+ case 2: return is_unsigned ? Type::I16U : Type::I16;
+ case 4: return is_unsigned ? Type::I32U : Type::I32;
+ }
+ }
+ return operand_type;
+}
+
+// Track all loads and stores inside a single function, to be able to detect
+// struct layouts we can use to annotate variables with, to make code more
+// readable.
+struct LoadStoreTracking {
+ struct LSAccess {
+ Address byte_size = 0;
+ Type type = Type::Any;
+ Address align = 0;
+ uint32_t idx = 0;
+ bool is_uniform = true;
+ };
+
+ struct LSVar {
+ std::map<uint64_t, LSAccess> accesses;
+ bool struct_layout = true;
+ Type same_type = Type::Any;
+ Address same_align = kInvalidAddress;
+ Opcode last_opc;
+ };
+
+ void Track(const Node& n) {
+ for (auto& c : n.children) {
+ Track(c);
+ }
+ switch (n.etype) {
+ case ExprType::Load: {
+ auto& le = *cast<LoadExpr>(n.e);
+ LoadStore(le.offset, le.opcode, le.opcode.GetResultType(), le.align,
+ n.children[0]);
+ break;
+ }
+ case ExprType::Store: {
+ auto& se = *cast<StoreExpr>(n.e);
+ LoadStore(se.offset, se.opcode, se.opcode.GetParamType2(), se.align,
+ n.children[0]);
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ const std::string AddrExpName(const Node& addr_exp) const {
+ // TODO: expand this to more kinds of address expressions.
+ switch (addr_exp.etype) {
+ case ExprType::LocalGet:
+ return cast<LocalGetExpr>(addr_exp.e)->var.name();
+ break;
+ case ExprType::LocalTee:
+ return cast<LocalTeeExpr>(addr_exp.e)->var.name();
+ break;
+ default:
+ return "";
+ }
+ }
+
+ void LoadStore(uint64_t offset,
+ Opcode opc,
+ Type type,
+ Address align,
+ const Node& addr_exp) {
+ auto byte_size = opc.GetMemorySize();
+ type = GetMemoryType(type, opc);
+ // We want to associate memory ops of a certain offset & size as being
+ // relative to a uniquely identifiable pointer, such as a local.
+ auto name = AddrExpName(addr_exp);
+ if (name.empty()) {
+ return;
+ }
+ auto& var = vars[name];
+ auto& access = var.accesses[offset];
+ // Check if previous access at this offset (if any) is of same size
+ // and type (see Checklayouts below).
+ if (access.byte_size && ((access.byte_size != byte_size) ||
+ (access.type != type) || (access.align != align)))
+ access.is_uniform = false;
+ // Also exclude weird alignment accesses from structs.
+ if (!opc.IsNaturallyAligned(align))
+ access.is_uniform = false;
+ access.byte_size = byte_size;
+ access.type = type;
+ access.align = align;
+ // Additionally, check if all accesses are to the same type, so
+ // if layout check fails, we can at least declare it as pointer to
+ // a type.
+ if ((var.same_type == type || var.same_type == Type::Any) &&
+ (var.same_align == align || var.same_align == kInvalidAddress)) {
+ var.same_type = type;
+ var.same_align = align;
+ var.last_opc = opc;
+ } else {
+ var.same_type = Type::Void;
+ var.same_align = kInvalidAddress;
+ }
+ }
+
+ void CheckLayouts() {
+ // Here we check if the set of accesses we have collected form a sequence
+ // we could declare as a struct, meaning they are properly aligned,
+ // contiguous, and have no overlaps between different types and sizes.
+ // We do this because an int access of size 2 at offset 0 followed by
+ // a float access of size 4 at offset 4 can compactly represented as a
+ // struct { short, float }, whereas something that reads from overlapping
+ // or discontinuous offsets would need a more complicated syntax that
+ // involves explicit offsets.
+ // We assume that the bulk of memory accesses are of this very regular kind,
+ // so we choose not to even emit struct layouts for irregular ones,
+ // given that they are rare and confusing, and thus do not benefit from
+ // being represented as if they were structs.
+ for (auto& var : vars) {
+ if (var.second.accesses.size() == 1) {
+ // If we have just one access, this is better represented as a pointer
+ // than a struct.
+ var.second.struct_layout = false;
+ continue;
+ }
+ uint64_t cur_offset = 0;
+ uint32_t idx = 0;
+ for (auto& access : var.second.accesses) {
+ access.second.idx = idx++;
+ if (!access.second.is_uniform) {
+ var.second.struct_layout = false;
+ break;
+ }
+ // Align to next access: all elements are expected to be aligned to
+ // a memory address thats a multiple of their own size.
+ auto mask = static_cast<uint64_t>(access.second.byte_size - 1);
+ cur_offset = (cur_offset + mask) & ~mask;
+ if (cur_offset != access.first) {
+ var.second.struct_layout = false;
+ break;
+ }
+ cur_offset += access.second.byte_size;
+ }
+ }
+ }
+
+ std::string IdxToName(uint32_t idx) const {
+ return IndexToAlphaName(idx); // TODO: more descriptive names?
+ }
+
+ std::string GenAlign(Address align, Opcode opc) const {
+ return opc.IsNaturallyAligned(align) ? "" : cat("@", std::to_string(align));
+ }
+
+ std::string GenTypeDecl(const std::string& name) const {
+ auto it = vars.find(name);
+ if (it == vars.end()) {
+ return "";
+ }
+ if (it->second.struct_layout) {
+ std::string s = "{ ";
+ for (auto& access : it->second.accesses) {
+ if (access.second.idx) {
+ s += ", ";
+ }
+ s += IdxToName(access.second.idx);
+ s += ':';
+ s += GetDecompTypeName(access.second.type);
+ }
+ s += " }";
+ return s;
+ }
+ // We don't have a struct layout, or the struct has just one field,
+ // so maybe we can just declare it as a pointer to one type?
+ if (it->second.same_type != Type::Void) {
+ return cat(GetDecompTypeName(it->second.same_type), "_ptr",
+ GenAlign(it->second.same_align, it->second.last_opc));
+ }
+ return "";
+ }
+
+ std::string GenAccess(uint64_t offset, const Node& addr_exp) const {
+ auto name = AddrExpName(addr_exp);
+ if (name.empty()) {
+ return "";
+ }
+ auto it = vars.find(name);
+ if (it == vars.end()) {
+ return "";
+ }
+ if (it->second.struct_layout) {
+ auto ait = it->second.accesses.find(offset);
+ assert(ait != it->second.accesses.end());
+ return IdxToName(ait->second.idx);
+ }
+ // Not a struct, see if it is a typed pointer.
+ if (it->second.same_type != Type::Void) {
+ return "*";
+ }
+ return "";
+ }
+
+ void Clear() { vars.clear(); }
+
+ std::map<std::string, LSVar> vars;
+};
+
+} // namespace wabt
+
+#endif // WABT_DECOMPILER_LS_H_