From 26a029d407be480d791972afb5975cf62c9360a6 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 02:47:55 +0200 Subject: Adding upstream version 124.0.1. Signed-off-by: Daniel Baumann --- third_party/wasm2c/include/wabt/decompiler-ls.h | 267 ++++++++++++++++++++++++ 1 file changed, 267 insertions(+) create mode 100644 third_party/wasm2c/include/wabt/decompiler-ls.h (limited to 'third_party/wasm2c/include/wabt/decompiler-ls.h') diff --git a/third_party/wasm2c/include/wabt/decompiler-ls.h b/third_party/wasm2c/include/wabt/decompiler-ls.h new file mode 100644 index 0000000000..c2612fa32a --- /dev/null +++ b/third_party/wasm2c/include/wabt/decompiler-ls.h @@ -0,0 +1,267 @@ +/* + * Copyright 2019 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef WABT_DECOMPILER_LS_H_ +#define WABT_DECOMPILER_LS_H_ + +#include "wabt/decompiler-ast.h" +#include "wabt/string-util.h" + +#include + +namespace wabt { + +// Names starting with "u" are unsigned, the rest are "signed or doesn't matter" +inline const char* GetDecompTypeName(Type t) { + switch (t) { + case Type::I8: return "byte"; + case Type::I8U: return "ubyte"; + case Type::I16: return "short"; + case Type::I16U: return "ushort"; + case Type::I32: return "int"; + case Type::I32U: return "uint"; + case Type::I64: return "long"; + case Type::F32: return "float"; + case Type::F64: return "double"; + case Type::V128: return "simd"; + case Type::Func: return "func"; + case Type::FuncRef: return "funcref"; + case Type::ExternRef: return "externref"; + case Type::Void: return "void"; + default: return "ILLEGAL"; + } +} + +inline Type GetMemoryType(Type operand_type, Opcode opc) { + // TODO: something something SIMD. + // TODO: this loses information of the type it is read into. + // That may well not be the biggest deal since that is usually obvious + // from context, if not, we should probably represent that as a cast around + // the access, since it should not be part of the field type. + if (operand_type == Type::I32 || operand_type == Type::I64) { + auto name = std::string_view(opc.GetName()); + // FIXME: change into a new column in opcode.def instead? + auto is_unsigned = name.substr(name.size() - 2) == "_u"; + switch (opc.GetMemorySize()) { + case 1: return is_unsigned ? Type::I8U : Type::I8; + case 2: return is_unsigned ? Type::I16U : Type::I16; + case 4: return is_unsigned ? Type::I32U : Type::I32; + } + } + return operand_type; +} + +// Track all loads and stores inside a single function, to be able to detect +// struct layouts we can use to annotate variables with, to make code more +// readable. +struct LoadStoreTracking { + struct LSAccess { + Address byte_size = 0; + Type type = Type::Any; + Address align = 0; + uint32_t idx = 0; + bool is_uniform = true; + }; + + struct LSVar { + std::map accesses; + bool struct_layout = true; + Type same_type = Type::Any; + Address same_align = kInvalidAddress; + Opcode last_opc; + }; + + void Track(const Node& n) { + for (auto& c : n.children) { + Track(c); + } + switch (n.etype) { + case ExprType::Load: { + auto& le = *cast(n.e); + LoadStore(le.offset, le.opcode, le.opcode.GetResultType(), le.align, + n.children[0]); + break; + } + case ExprType::Store: { + auto& se = *cast(n.e); + LoadStore(se.offset, se.opcode, se.opcode.GetParamType2(), se.align, + n.children[0]); + break; + } + default: + break; + } + } + + const std::string AddrExpName(const Node& addr_exp) const { + // TODO: expand this to more kinds of address expressions. + switch (addr_exp.etype) { + case ExprType::LocalGet: + return cast(addr_exp.e)->var.name(); + break; + case ExprType::LocalTee: + return cast(addr_exp.e)->var.name(); + break; + default: + return ""; + } + } + + void LoadStore(uint64_t offset, + Opcode opc, + Type type, + Address align, + const Node& addr_exp) { + auto byte_size = opc.GetMemorySize(); + type = GetMemoryType(type, opc); + // We want to associate memory ops of a certain offset & size as being + // relative to a uniquely identifiable pointer, such as a local. + auto name = AddrExpName(addr_exp); + if (name.empty()) { + return; + } + auto& var = vars[name]; + auto& access = var.accesses[offset]; + // Check if previous access at this offset (if any) is of same size + // and type (see Checklayouts below). + if (access.byte_size && ((access.byte_size != byte_size) || + (access.type != type) || (access.align != align))) + access.is_uniform = false; + // Also exclude weird alignment accesses from structs. + if (!opc.IsNaturallyAligned(align)) + access.is_uniform = false; + access.byte_size = byte_size; + access.type = type; + access.align = align; + // Additionally, check if all accesses are to the same type, so + // if layout check fails, we can at least declare it as pointer to + // a type. + if ((var.same_type == type || var.same_type == Type::Any) && + (var.same_align == align || var.same_align == kInvalidAddress)) { + var.same_type = type; + var.same_align = align; + var.last_opc = opc; + } else { + var.same_type = Type::Void; + var.same_align = kInvalidAddress; + } + } + + void CheckLayouts() { + // Here we check if the set of accesses we have collected form a sequence + // we could declare as a struct, meaning they are properly aligned, + // contiguous, and have no overlaps between different types and sizes. + // We do this because an int access of size 2 at offset 0 followed by + // a float access of size 4 at offset 4 can compactly represented as a + // struct { short, float }, whereas something that reads from overlapping + // or discontinuous offsets would need a more complicated syntax that + // involves explicit offsets. + // We assume that the bulk of memory accesses are of this very regular kind, + // so we choose not to even emit struct layouts for irregular ones, + // given that they are rare and confusing, and thus do not benefit from + // being represented as if they were structs. + for (auto& var : vars) { + if (var.second.accesses.size() == 1) { + // If we have just one access, this is better represented as a pointer + // than a struct. + var.second.struct_layout = false; + continue; + } + uint64_t cur_offset = 0; + uint32_t idx = 0; + for (auto& access : var.second.accesses) { + access.second.idx = idx++; + if (!access.second.is_uniform) { + var.second.struct_layout = false; + break; + } + // Align to next access: all elements are expected to be aligned to + // a memory address thats a multiple of their own size. + auto mask = static_cast(access.second.byte_size - 1); + cur_offset = (cur_offset + mask) & ~mask; + if (cur_offset != access.first) { + var.second.struct_layout = false; + break; + } + cur_offset += access.second.byte_size; + } + } + } + + std::string IdxToName(uint32_t idx) const { + return IndexToAlphaName(idx); // TODO: more descriptive names? + } + + std::string GenAlign(Address align, Opcode opc) const { + return opc.IsNaturallyAligned(align) ? "" : cat("@", std::to_string(align)); + } + + std::string GenTypeDecl(const std::string& name) const { + auto it = vars.find(name); + if (it == vars.end()) { + return ""; + } + if (it->second.struct_layout) { + std::string s = "{ "; + for (auto& access : it->second.accesses) { + if (access.second.idx) { + s += ", "; + } + s += IdxToName(access.second.idx); + s += ':'; + s += GetDecompTypeName(access.second.type); + } + s += " }"; + return s; + } + // We don't have a struct layout, or the struct has just one field, + // so maybe we can just declare it as a pointer to one type? + if (it->second.same_type != Type::Void) { + return cat(GetDecompTypeName(it->second.same_type), "_ptr", + GenAlign(it->second.same_align, it->second.last_opc)); + } + return ""; + } + + std::string GenAccess(uint64_t offset, const Node& addr_exp) const { + auto name = AddrExpName(addr_exp); + if (name.empty()) { + return ""; + } + auto it = vars.find(name); + if (it == vars.end()) { + return ""; + } + if (it->second.struct_layout) { + auto ait = it->second.accesses.find(offset); + assert(ait != it->second.accesses.end()); + return IdxToName(ait->second.idx); + } + // Not a struct, see if it is a typed pointer. + if (it->second.same_type != Type::Void) { + return "*"; + } + return ""; + } + + void Clear() { vars.clear(); } + + std::map vars; +}; + +} // namespace wabt + +#endif // WABT_DECOMPILER_LS_H_ -- cgit v1.2.3