summaryrefslogtreecommitdiffstats
path: root/third_party/wasm2c/include/wabt/decompiler-ls.h
blob: c2612fa32a199a103d2093d3ecd3d90cb78e6eaf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
/*
 * Copyright 2019 WebAssembly Community Group participants
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef WABT_DECOMPILER_LS_H_
#define WABT_DECOMPILER_LS_H_

#include "wabt/decompiler-ast.h"
#include "wabt/string-util.h"

#include <map>

namespace wabt {

// Names starting with "u" are unsigned, the rest are "signed or doesn't matter"
inline const char* GetDecompTypeName(Type t) {
  switch (t) {
    case Type::I8: return "byte";
    case Type::I8U: return "ubyte";
    case Type::I16: return "short";
    case Type::I16U: return "ushort";
    case Type::I32: return "int";
    case Type::I32U: return "uint";
    case Type::I64: return "long";
    case Type::F32: return "float";
    case Type::F64: return "double";
    case Type::V128: return "simd";
    case Type::Func: return "func";
    case Type::FuncRef: return "funcref";
    case Type::ExternRef: return "externref";
    case Type::Void: return "void";
    default: return "ILLEGAL";
  }
}

inline Type GetMemoryType(Type operand_type, Opcode opc) {
  // TODO: something something SIMD.
  // TODO: this loses information of the type it is read into.
  // That may well not be the biggest deal since that is usually obvious
  // from context, if not, we should probably represent that as a cast around
  // the access, since it should not be part of the field type.
  if (operand_type == Type::I32 || operand_type == Type::I64) {
    auto name = std::string_view(opc.GetName());
    // FIXME: change into a new column in opcode.def instead?
    auto is_unsigned = name.substr(name.size() - 2) == "_u";
    switch (opc.GetMemorySize()) {
      case 1: return is_unsigned ? Type::I8U : Type::I8;
      case 2: return is_unsigned ? Type::I16U : Type::I16;
      case 4: return is_unsigned ? Type::I32U : Type::I32;
    }
  }
  return operand_type;
}

// Track all loads and stores inside a single function, to be able to detect
// struct layouts we can use to annotate variables with, to make code more
// readable.
struct LoadStoreTracking {
  struct LSAccess {
    Address byte_size = 0;
    Type type = Type::Any;
    Address align = 0;
    uint32_t idx = 0;
    bool is_uniform = true;
  };

  struct LSVar {
    std::map<uint64_t, LSAccess> accesses;
    bool struct_layout = true;
    Type same_type = Type::Any;
    Address same_align = kInvalidAddress;
    Opcode last_opc;
  };

  void Track(const Node& n) {
    for (auto& c : n.children) {
      Track(c);
    }
    switch (n.etype) {
      case ExprType::Load: {
        auto& le = *cast<LoadExpr>(n.e);
        LoadStore(le.offset, le.opcode, le.opcode.GetResultType(), le.align,
                  n.children[0]);
        break;
      }
      case ExprType::Store: {
        auto& se = *cast<StoreExpr>(n.e);
        LoadStore(se.offset, se.opcode, se.opcode.GetParamType2(), se.align,
                  n.children[0]);
        break;
      }
      default:
        break;
    }
  }

  const std::string AddrExpName(const Node& addr_exp) const {
    // TODO: expand this to more kinds of address expressions.
    switch (addr_exp.etype) {
      case ExprType::LocalGet:
        return cast<LocalGetExpr>(addr_exp.e)->var.name();
        break;
      case ExprType::LocalTee:
        return cast<LocalTeeExpr>(addr_exp.e)->var.name();
        break;
      default:
        return "";
    }
  }

  void LoadStore(uint64_t offset,
                 Opcode opc,
                 Type type,
                 Address align,
                 const Node& addr_exp) {
    auto byte_size = opc.GetMemorySize();
    type = GetMemoryType(type, opc);
    // We want to associate memory ops of a certain offset & size as being
    // relative to a uniquely identifiable pointer, such as a local.
    auto name = AddrExpName(addr_exp);
    if (name.empty()) {
      return;
    }
    auto& var = vars[name];
    auto& access = var.accesses[offset];
    // Check if previous access at this offset (if any) is of same size
    // and type (see Checklayouts below).
    if (access.byte_size && ((access.byte_size != byte_size) ||
                             (access.type != type) || (access.align != align)))
      access.is_uniform = false;
    // Also exclude weird alignment accesses from structs.
    if (!opc.IsNaturallyAligned(align))
      access.is_uniform = false;
    access.byte_size = byte_size;
    access.type = type;
    access.align = align;
    // Additionally, check if all accesses are to the same type, so
    // if layout check fails, we can at least declare it as pointer to
    // a type.
    if ((var.same_type == type || var.same_type == Type::Any) &&
        (var.same_align == align || var.same_align == kInvalidAddress)) {
      var.same_type = type;
      var.same_align = align;
      var.last_opc = opc;
    } else {
      var.same_type = Type::Void;
      var.same_align = kInvalidAddress;
    }
  }

  void CheckLayouts() {
    // Here we check if the set of accesses we have collected form a sequence
    // we could declare as a struct, meaning they are properly aligned,
    // contiguous, and have no overlaps between different types and sizes.
    // We do this because an int access of size 2 at offset 0 followed by
    // a float access of size 4 at offset 4 can compactly represented as a
    // struct { short, float }, whereas something that reads from overlapping
    // or discontinuous offsets would need a more complicated syntax that
    // involves explicit offsets.
    // We assume that the bulk of memory accesses are of this very regular kind,
    // so we choose not to even emit struct layouts for irregular ones,
    // given that they are rare and confusing, and thus do not benefit from
    // being represented as if they were structs.
    for (auto& var : vars) {
      if (var.second.accesses.size() == 1) {
        // If we have just one access, this is better represented as a pointer
        // than a struct.
        var.second.struct_layout = false;
        continue;
      }
      uint64_t cur_offset = 0;
      uint32_t idx = 0;
      for (auto& access : var.second.accesses) {
        access.second.idx = idx++;
        if (!access.second.is_uniform) {
          var.second.struct_layout = false;
          break;
        }
        // Align to next access: all elements are expected to be aligned to
        // a memory address thats a multiple of their own size.
        auto mask = static_cast<uint64_t>(access.second.byte_size - 1);
        cur_offset = (cur_offset + mask) & ~mask;
        if (cur_offset != access.first) {
          var.second.struct_layout = false;
          break;
        }
        cur_offset += access.second.byte_size;
      }
    }
  }

  std::string IdxToName(uint32_t idx) const {
    return IndexToAlphaName(idx);  // TODO: more descriptive names?
  }

  std::string GenAlign(Address align, Opcode opc) const {
    return opc.IsNaturallyAligned(align) ? "" : cat("@", std::to_string(align));
  }

  std::string GenTypeDecl(const std::string& name) const {
    auto it = vars.find(name);
    if (it == vars.end()) {
      return "";
    }
    if (it->second.struct_layout) {
      std::string s = "{ ";
      for (auto& access : it->second.accesses) {
        if (access.second.idx) {
          s += ", ";
        }
        s += IdxToName(access.second.idx);
        s += ':';
        s += GetDecompTypeName(access.second.type);
      }
      s += " }";
      return s;
    }
    // We don't have a struct layout, or the struct has just one field,
    // so maybe we can just declare it as a pointer to one type?
    if (it->second.same_type != Type::Void) {
      return cat(GetDecompTypeName(it->second.same_type), "_ptr",
                 GenAlign(it->second.same_align, it->second.last_opc));
    }
    return "";
  }

  std::string GenAccess(uint64_t offset, const Node& addr_exp) const {
    auto name = AddrExpName(addr_exp);
    if (name.empty()) {
      return "";
    }
    auto it = vars.find(name);
    if (it == vars.end()) {
      return "";
    }
    if (it->second.struct_layout) {
      auto ait = it->second.accesses.find(offset);
      assert(ait != it->second.accesses.end());
      return IdxToName(ait->second.idx);
    }
    // Not a struct, see if it is a typed pointer.
    if (it->second.same_type != Type::Void) {
      return "*";
    }
    return "";
  }

  void Clear() { vars.clear(); }

  std::map<std::string, LSVar> vars;
};

}  // namespace wabt

#endif  // WABT_DECOMPILER_LS_H_