diff options
Diffstat (limited to '')
28 files changed, 12068 insertions, 0 deletions
diff --git a/third_party/rust/wast/src/ast/alias.rs b/third_party/rust/wast/src/ast/alias.rs new file mode 100644 index 0000000000..9bcb4969fb --- /dev/null +++ b/third_party/rust/wast/src/ast/alias.rs @@ -0,0 +1,76 @@ +use crate::ast::{self, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// An `alias` statement used to juggle indices with nested modules. +#[derive(Debug)] +pub struct Alias<'a> { + /// Where this `alias` was defined. + pub span: ast::Span, + /// An identifier that this alias is resolved with (optionally) for name + /// resolution. + pub id: Option<ast::Id<'a>>, + /// An optional name for this alias stored in the custom `name` section. + pub name: Option<ast::NameAnnotation<'a>>, + /// The item in the parent instance that we're aliasing. + pub kind: AliasKind<'a>, +} + +#[derive(Debug)] +#[allow(missing_docs)] +pub enum AliasKind<'a> { + InstanceExport { + instance: ast::ItemRef<'a, kw::instance>, + export: &'a str, + kind: ast::ExportKind, + }, + Outer { + /// The index of the module that this reference is referring to. + module: ast::Index<'a>, + /// The index of the item within `module` that this alias is referering + /// to. + index: ast::Index<'a>, + /// The kind of item that's being aliased. + kind: ast::ExportKind, + }, +} + +impl Alias<'_> { + /// Returns the kind of item defined by this alias. + pub fn item_kind(&self) -> ast::ExportKind { + match self.kind { + AliasKind::InstanceExport { kind, .. } => kind, + AliasKind::Outer { kind, .. } => kind, + } + } +} + +impl<'a> Parse<'a> for Alias<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::alias>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let kind = parser.parens(|p| { + let kind = p.parse()?; + Ok(if parser.parse::<Option<kw::outer>>()?.is_some() { + AliasKind::Outer { + module: parser.parse()?, + index: parser.parse()?, + kind, + } + } else { + AliasKind::InstanceExport { + instance: parser.parse::<ast::IndexOrRef<_>>()?.0, + export: parser.parse()?, + kind, + } + }) + })?; + + Ok(Alias { + span, + id, + name, + kind, + }) + } +} diff --git a/third_party/rust/wast/src/ast/assert_expr.rs b/third_party/rust/wast/src/ast/assert_expr.rs new file mode 100644 index 0000000000..f6fcc9cd12 --- /dev/null +++ b/third_party/rust/wast/src/ast/assert_expr.rs @@ -0,0 +1,154 @@ +use crate::ast::{kw, Float32, Float64, Index, HeapType}; +use crate::parser::{Parse, Parser, Result}; + +/// An expression that is valid inside an `assert_return` directive. +/// +/// As of https://github.com/WebAssembly/spec/pull/1104, spec tests may include `assert_return` +/// directives that allow NaN patterns (`"nan:canonical"`, `"nan:arithmetic"`). Parsing an +/// `AssertExpression` means that: +/// - only constant values (e.g. `i32.const 4`) are used in the `assert_return` directive +/// - the NaN patterns are allowed (they are not allowed in regular `Expression`s). +#[derive(Debug)] +#[allow(missing_docs)] +pub enum AssertExpression<'a> { + I32(i32), + I64(i64), + F32(NanPattern<Float32>), + F64(NanPattern<Float64>), + V128(V128Pattern), + + RefNull(Option<HeapType<'a>>), + RefExtern(u32), + RefFunc(Option<Index<'a>>), + + // Either matches an f32 or f64 for an arithmetic nan pattern + LegacyArithmeticNaN, + // Either matches an f32 or f64 for a canonical nan pattern + LegacyCanonicalNaN, +} + +impl<'a> Parse<'a> for AssertExpression<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let keyword = parser.step(|c| match c.keyword() { + Some(pair) => Ok(pair), + None => Err(c.error("expected a keyword")), + })?; + + match keyword { + "i32.const" => Ok(AssertExpression::I32(parser.parse()?)), + "i64.const" => Ok(AssertExpression::I64(parser.parse()?)), + "f32.const" => Ok(AssertExpression::F32(parser.parse()?)), + "f64.const" => Ok(AssertExpression::F64(parser.parse()?)), + "v128.const" => Ok(AssertExpression::V128(parser.parse()?)), + "ref.null" => Ok(AssertExpression::RefNull(parser.parse()?)), + "ref.extern" => Ok(AssertExpression::RefExtern(parser.parse()?)), + "ref.func" => Ok(AssertExpression::RefFunc(parser.parse()?)), + _ => Err(parser.error("expected a [type].const expression")), + } + } +} + +/// Either a NaN pattern (`nan:canonical`, `nan:arithmetic`) or a value of type `T`. +#[derive(Debug, PartialEq)] +#[allow(missing_docs)] +pub enum NanPattern<T> { + CanonicalNan, + ArithmeticNan, + Value(T), +} + +impl<'a, T> Parse<'a> for NanPattern<T> +where + T: Parse<'a>, +{ + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::nan_canonical>() { + parser.parse::<kw::nan_canonical>()?; + Ok(NanPattern::CanonicalNan) + } else if parser.peek::<kw::nan_arithmetic>() { + parser.parse::<kw::nan_arithmetic>()?; + Ok(NanPattern::ArithmeticNan) + } else { + let val = parser.parse()?; + Ok(NanPattern::Value(val)) + } + } +} + +/// A version of `V128Const` that allows `NanPattern`s. +/// +/// This implementation is necessary because only float types can include NaN patterns; otherwise +/// it is largely similar to the implementation of `V128Const`. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum V128Pattern { + I8x16([i8; 16]), + I16x8([i16; 8]), + I32x4([i32; 4]), + I64x2([i64; 2]), + F32x4([NanPattern<Float32>; 4]), + F64x2([NanPattern<Float64>; 2]), +} + +impl<'a> Parse<'a> for V128Pattern { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i8x16>() { + parser.parse::<kw::i8x16>()?; + Ok(V128Pattern::I8x16([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i16x8>() { + parser.parse::<kw::i16x8>()?; + Ok(V128Pattern::I16x8([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i32x4>() { + parser.parse::<kw::i32x4>()?; + Ok(V128Pattern::I32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i64x2>() { + parser.parse::<kw::i64x2>()?; + Ok(V128Pattern::I64x2([parser.parse()?, parser.parse()?])) + } else if l.peek::<kw::f32x4>() { + parser.parse::<kw::f32x4>()?; + Ok(V128Pattern::F32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::f64x2>() { + parser.parse::<kw::f64x2>()?; + Ok(V128Pattern::F64x2([parser.parse()?, parser.parse()?])) + } else { + Err(l.error()) + } + } +} diff --git a/third_party/rust/wast/src/ast/custom.rs b/third_party/rust/wast/src/ast/custom.rs new file mode 100644 index 0000000000..52d8aa60df --- /dev/null +++ b/third_party/rust/wast/src/ast/custom.rs @@ -0,0 +1,165 @@ +use crate::ast::{self, annotation, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// A wasm custom section within a module. +#[derive(Debug)] +pub struct Custom<'a> { + /// Where this `@custom` was defined. + pub span: ast::Span, + + /// Name of the custom section. + pub name: &'a str, + + /// Where the custom section is being placed, + pub place: CustomPlace, + + /// Payload of this custom section. + pub data: Vec<&'a [u8]>, +} + +/// Possible locations to place a custom section within a module. +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum CustomPlace { + /// This custom section will appear before the first section in the module. + BeforeFirst, + /// This custom section will be placed just before a known section. + Before(CustomPlaceAnchor), + /// This custom section will be placed just after a known section. + After(CustomPlaceAnchor), + /// This custom section will appear after the last section in the module. + AfterLast, +} + +/// Known sections that custom sections can be placed relative to. +#[derive(Debug, PartialEq, Copy, Clone)] +#[allow(missing_docs)] +pub enum CustomPlaceAnchor { + Type, + Import, + Module, + Instance, + Alias, + Func, + Table, + Memory, + Global, + Export, + Start, + Elem, + Code, + Data, + Event, +} + +impl<'a> Parse<'a> for Custom<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<annotation::custom>()?.0; + let name = parser.parse()?; + let place = if parser.peek::<ast::LParen>() { + parser.parens(|p| p.parse())? + } else { + CustomPlace::AfterLast + }; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(Custom { + span, + name, + place, + data, + }) + } +} + +impl<'a> Parse<'a> for CustomPlace { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + let ctor = if l.peek::<kw::before>() { + parser.parse::<kw::before>()?; + if l.peek::<kw::first>() { + parser.parse::<kw::first>()?; + return Ok(CustomPlace::BeforeFirst); + } + CustomPlace::Before as fn(CustomPlaceAnchor) -> _ + } else if l.peek::<kw::after>() { + parser.parse::<kw::after>()?; + if l.peek::<kw::last>() { + parser.parse::<kw::last>()?; + return Ok(CustomPlace::AfterLast); + } + CustomPlace::After + } else { + return Err(l.error()); + }; + Ok(ctor(parser.parse()?)) + } +} + +impl<'a> Parse<'a> for CustomPlaceAnchor { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::r#type>() { + parser.parse::<kw::r#type>()?; + return Ok(CustomPlaceAnchor::Type); + } + if parser.peek::<kw::import>() { + parser.parse::<kw::import>()?; + return Ok(CustomPlaceAnchor::Import); + } + if parser.peek::<kw::func>() { + parser.parse::<kw::func>()?; + return Ok(CustomPlaceAnchor::Func); + } + if parser.peek::<kw::table>() { + parser.parse::<kw::table>()?; + return Ok(CustomPlaceAnchor::Table); + } + if parser.peek::<kw::memory>() { + parser.parse::<kw::memory>()?; + return Ok(CustomPlaceAnchor::Memory); + } + if parser.peek::<kw::global>() { + parser.parse::<kw::global>()?; + return Ok(CustomPlaceAnchor::Global); + } + if parser.peek::<kw::export>() { + parser.parse::<kw::export>()?; + return Ok(CustomPlaceAnchor::Export); + } + if parser.peek::<kw::start>() { + parser.parse::<kw::start>()?; + return Ok(CustomPlaceAnchor::Start); + } + if parser.peek::<kw::elem>() { + parser.parse::<kw::elem>()?; + return Ok(CustomPlaceAnchor::Elem); + } + if parser.peek::<kw::code>() { + parser.parse::<kw::code>()?; + return Ok(CustomPlaceAnchor::Code); + } + if parser.peek::<kw::data>() { + parser.parse::<kw::data>()?; + return Ok(CustomPlaceAnchor::Data); + } + if parser.peek::<kw::event>() { + parser.parse::<kw::event>()?; + return Ok(CustomPlaceAnchor::Event); + } + if parser.peek::<kw::instance>() { + parser.parse::<kw::instance>()?; + return Ok(CustomPlaceAnchor::Instance); + } + if parser.peek::<kw::module>() { + parser.parse::<kw::module>()?; + return Ok(CustomPlaceAnchor::Module); + } + if parser.peek::<kw::alias>() { + parser.parse::<kw::alias>()?; + return Ok(CustomPlaceAnchor::Alias); + } + + Err(parser.error("expected a valid section name")) + } +} diff --git a/third_party/rust/wast/src/ast/event.rs b/third_party/rust/wast/src/ast/event.rs new file mode 100644 index 0000000000..0746afc4c2 --- /dev/null +++ b/third_party/rust/wast/src/ast/event.rs @@ -0,0 +1,44 @@ +use crate::ast::{self, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// A WebAssembly event directive, part of the exception handling proposal. +#[derive(Debug)] +pub struct Event<'a> { + /// Where this event was defined + pub span: ast::Span, + /// An optional name by which to refer to this event in name resolution. + pub id: Option<ast::Id<'a>>, + /// Optional export directives for this event. + pub exports: ast::InlineExport<'a>, + /// The type of event that is defined. + pub ty: EventType<'a>, +} + +/// Listing of various types of events that can be defined in a wasm module. +#[derive(Clone, Debug)] +pub enum EventType<'a> { + /// An exception event, where the payload is the type signature of the event + /// (constructor parameters, etc). + Exception(ast::TypeUse<'a, ast::FunctionType<'a>>), +} + +impl<'a> Parse<'a> for Event<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::event>()?.0; + let id = parser.parse()?; + let exports = parser.parse()?; + let ty = parser.parse()?; + Ok(Event { + span, + id, + exports, + ty, + }) + } +} + +impl<'a> Parse<'a> for EventType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(EventType::Exception(parser.parse()?)) + } +} diff --git a/third_party/rust/wast/src/ast/export.rs b/third_party/rust/wast/src/ast/export.rs new file mode 100644 index 0000000000..eadb0b5cc3 --- /dev/null +++ b/third_party/rust/wast/src/ast/export.rs @@ -0,0 +1,141 @@ +use crate::ast::{self, kw}; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; + +/// A entry in a WebAssembly module's export section. +#[derive(Debug)] +pub struct Export<'a> { + /// Where this export was defined. + pub span: ast::Span, + /// The name of this export from the module. + pub name: &'a str, + /// What's being exported from the module. + pub index: ast::ItemRef<'a, ExportKind>, +} + +/// Different kinds of elements that can be exported from a WebAssembly module, +/// contained in an [`Export`]. +#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] +#[allow(missing_docs)] +pub enum ExportKind { + Func, + Table, + Memory, + Global, + Event, + Module, + Instance, + Type, +} + +impl<'a> Parse<'a> for Export<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(Export { + span: parser.parse::<kw::export>()?.0, + name: parser.parse()?, + index: parser.parse()?, + }) + } +} + +impl<'a> Parse<'a> for ExportKind { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(ExportKind::Func) + } else if l.peek::<kw::table>() { + parser.parse::<kw::table>()?; + Ok(ExportKind::Table) + } else if l.peek::<kw::memory>() { + parser.parse::<kw::memory>()?; + Ok(ExportKind::Memory) + } else if l.peek::<kw::global>() { + parser.parse::<kw::global>()?; + Ok(ExportKind::Global) + } else if l.peek::<kw::event>() { + parser.parse::<kw::event>()?; + Ok(ExportKind::Event) + } else if l.peek::<kw::module>() { + parser.parse::<kw::module>()?; + Ok(ExportKind::Module) + } else if l.peek::<kw::instance>() { + parser.parse::<kw::instance>()?; + Ok(ExportKind::Instance) + } else if l.peek::<kw::r#type>() { + parser.parse::<kw::r#type>()?; + Ok(ExportKind::Type) + } else { + Err(l.error()) + } + } +} + +macro_rules! kw_conversions { + ($($kw:ident => $kind:ident)*) => ($( + impl From<kw::$kw> for ExportKind { + fn from(_: kw::$kw) -> ExportKind { + ExportKind::$kind + } + } + + impl Default for kw::$kw { + fn default() -> kw::$kw { + kw::$kw(ast::Span::from_offset(0)) + } + } + )*); +} + +kw_conversions! { + instance => Instance + module => Module + func => Func + table => Table + global => Global + event => Event + memory => Memory + r#type => Type +} + +/// A listing of inline `(export "foo")` statements on a WebAssembly item in +/// its textual format. +#[derive(Debug)] +pub struct InlineExport<'a> { + /// The extra names to export an item as, if any. + pub names: Vec<&'a str>, +} + +impl<'a> Parse<'a> for InlineExport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut names = Vec::new(); + while parser.peek::<Self>() { + names.push(parser.parens(|p| { + p.parse::<kw::export>()?; + p.parse::<&str>() + })?); + } + Ok(InlineExport { names }) + } +} + +impl Peek for InlineExport<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("export", cursor)) => cursor, + _ => return false, + }; + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline export" + } +} diff --git a/third_party/rust/wast/src/ast/expr.rs b/third_party/rust/wast/src/ast/expr.rs new file mode 100644 index 0000000000..dc4bfb0027 --- /dev/null +++ b/third_party/rust/wast/src/ast/expr.rs @@ -0,0 +1,1767 @@ +use crate::ast::{self, kw, HeapType}; +use crate::parser::{Parse, Parser, Result}; +use std::mem; + +/// An expression, or a list of instructions, in the WebAssembly text format. +/// +/// This expression type will parse s-expression-folded instructions into a flat +/// list of instructions for emission later on. The implicit `end` instruction +/// at the end of an expression is not included in the `instrs` field. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct Expression<'a> { + pub instrs: Box<[Instruction<'a>]>, +} + +impl<'a> Parse<'a> for Expression<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + ExpressionParser::default().parse(parser) + } +} + +/// Helper struct used to parse an `Expression` with helper methods and such. +/// +/// The primary purpose of this is to avoid defining expression parsing as a +/// call-thread-stack recursive function. Since we're parsing user input that +/// runs the risk of blowing the call stack, so we want to be sure to use a heap +/// stack structure wherever possible. +#[derive(Default)] +struct ExpressionParser<'a> { + /// The flat list of instructions that we've parsed so far, and will + /// eventually become the final `Expression`. + instrs: Vec<Instruction<'a>>, + + /// Descriptor of all our nested s-expr blocks. This only happens when + /// instructions themselves are nested. + stack: Vec<Level<'a>>, +} + +enum Paren { + None, + Left, + Right, +} + +/// A "kind" of nested block that we can be parsing inside of. +enum Level<'a> { + /// This is a normal `block` or `loop` or similar, where the instruction + /// payload here is pushed when the block is exited. + EndWith(Instruction<'a>), + + /// This is a pretty special variant which means that we're parsing an `if` + /// statement, and the state of the `if` parsing is tracked internally in + /// the payload. + If(If<'a>), + + /// This means we're either parsing inside of `(then ...)` or `(else ...)` + /// which don't correspond to terminating instructions, we're just in a + /// nested block. + IfArm, + + /// Similar to `If` but for `Try` statements, which has simpler parsing + /// state to track. + Try(Try<'a>), + + /// Similar to `IfArm` but for `(do ...)` and `(catch ...)` blocks. + TryArm, +} + +/// Possible states of "what should be parsed next?" in an `if` expression. +enum If<'a> { + /// Only the `if` has been parsed, next thing to parse is the clause, if + /// any, of the `if` instruction. + Clause(Instruction<'a>), + /// Next thing to parse is the `then` block + Then(Instruction<'a>), + /// Next thing to parse is the `else` block + Else, + /// This `if` statement has finished parsing and if anything remains it's a + /// syntax error. + End, +} + +/// Possible state of "what should be parsed next?" in a `try` expression. +enum Try<'a> { + /// Next thing to parse is the `do` block. + Do(Instruction<'a>), + /// Next thing to parse is `catch`/`catch_all`, or `unwind`. + CatchOrUnwind, + /// Next thing to parse is a `catch` block or `catch_all`. + Catch, + /// This `try` statement has finished parsing and if anything remains it's a + /// syntax error. + End, +} + +impl<'a> ExpressionParser<'a> { + fn parse(mut self, parser: Parser<'a>) -> Result<Expression<'a>> { + // Here we parse instructions in a loop, and we do not recursively + // invoke this parse function to avoid blowing the stack on + // deeply-recursive parses. + // + // Our loop generally only finishes once there's no more input left int + // the `parser`. If there's some unclosed delimiters though (on our + // `stack`), then we also keep parsing to generate error messages if + // there's no input left. + while !parser.is_empty() || !self.stack.is_empty() { + // As a small ease-of-life adjustment here, if we're parsing inside + // of an `if block then we require that all sub-components are + // s-expressions surrounded by `(` and `)`, so verify that here. + if let Some(Level::If(_)) | Some(Level::Try(_)) = self.stack.last() { + if !parser.is_empty() && !parser.peek::<ast::LParen>() { + return Err(parser.error("expected `(`")); + } + } + + match self.paren(parser)? { + // No parenthesis seen? Then we just parse the next instruction + // and move on. + Paren::None => self.instrs.push(parser.parse()?), + + // If we see a left-parenthesis then things are a little + // special. We handle block-like instructions specially + // (`block`, `loop`, and `if`), and otherwise all other + // instructions simply get appended once we reach the end of the + // s-expression. + // + // In all cases here we push something onto the `stack` to get + // popped when the `)` character is seen. + Paren::Left => { + // First up is handling `if` parsing, which is funky in a + // whole bunch of ways. See the method internally for more + // information. + if self.handle_if_lparen(parser)? { + continue; + } + // Second, we handle `try` parsing, which is simpler than + // `if` but more complicated than, e.g., `block`. + if self.handle_try_lparen(parser)? { + continue; + } + match parser.parse()? { + // If block/loop show up then we just need to be sure to + // push an `end` instruction whenever the `)` token is + // seen + i @ Instruction::Block(_) + | i @ Instruction::Loop(_) + | i @ Instruction::Let(_) => { + self.instrs.push(i); + self.stack.push(Level::EndWith(Instruction::End(None))); + } + + // Parsing an `if` instruction is super tricky, so we + // push an `If` scope and we let all our scope-based + // parsing handle the remaining items. + i @ Instruction::If(_) => { + self.stack.push(Level::If(If::Clause(i))); + } + + // Parsing a `try` is easier than `if` but we also push + // a `Try` scope to handle the required nested blocks. + i @ Instruction::Try(_) => { + self.stack.push(Level::Try(Try::Do(i))); + } + + // Anything else means that we're parsing a nested form + // such as `(i32.add ...)` which means that the + // instruction we parsed will be coming at the end. + other => self.stack.push(Level::EndWith(other)), + } + } + + // If we registered a `)` token as being seen, then we're + // guaranteed there's an item in the `stack` stack for us to + // pop. We peel that off and take a look at what it says to do. + Paren::Right => match self.stack.pop().unwrap() { + Level::EndWith(i) => self.instrs.push(i), + Level::IfArm => {} + Level::TryArm => {} + + // If an `if` statement hasn't parsed the clause or `then` + // block, then that's an error because there weren't enough + // items in the `if` statement. Otherwise we're just careful + // to terminate with an `end` instruction. + Level::If(If::Clause(_)) => { + return Err(parser.error("previous `if` had no clause")); + } + Level::If(If::Then(_)) => { + return Err(parser.error("previous `if` had no `then`")); + } + Level::If(_) => { + self.instrs.push(Instruction::End(None)); + } + + // Both `do` and `catch` are required in a `try` statement, so + // we will signal those errors here. Otherwise, terminate with + // an `end` instruction. + Level::Try(Try::Do(_)) => { + return Err(parser.error("previous `try` had no `do`")); + } + Level::Try(Try::CatchOrUnwind) => { + return Err( + parser.error("previous `try` had no `catch`, `catch_all`, or `unwind`") + ); + } + Level::Try(_) => { + self.instrs.push(Instruction::End(None)); + } + }, + } + } + + Ok(Expression { + instrs: self.instrs.into(), + }) + } + + /// Parses either `(`, `)`, or nothing. + fn paren(&self, parser: Parser<'a>) -> Result<Paren> { + parser.step(|cursor| { + Ok(match cursor.lparen() { + Some(rest) => (Paren::Left, rest), + None if self.stack.is_empty() => (Paren::None, cursor), + None => match cursor.rparen() { + Some(rest) => (Paren::Right, rest), + None => (Paren::None, cursor), + }, + }) + }) + } + + /// Handles all parsing of an `if` statement. + /// + /// The syntactical form of an `if` stament looks like: + /// + /// ```wat + /// (if $clause (then $then) (else $else)) + /// ``` + /// + /// but it turns out we practically see a few things in the wild: + /// + /// * inside the `(if ...)` every sub-thing is surrounded by parens + /// * The `then` and `else` keywords are optional + /// * The `$then` and `$else` blocks don't need to be surrounded by parens + /// + /// That's all attempted to be handled here. The part about all sub-parts + /// being surrounded by `(` and `)` means that we hook into the `LParen` + /// parsing above to call this method there unconditionally. + /// + /// Returns `true` if the rest of the arm above should be skipped, or + /// `false` if we should parse the next item as an instruction (because we + /// didn't handle the lparen here). + fn handle_if_lparen(&mut self, parser: Parser<'a>) -> Result<bool> { + // Only execute the code below if there's an `If` listed last. + let i = match self.stack.last_mut() { + Some(Level::If(i)) => i, + _ => return Ok(false), + }; + + // The first thing parsed in an `if` statement is the clause. If the + // clause starts with `then`, however, then we know to skip the clause + // and fall through to below. + if let If::Clause(if_instr) = i { + let instr = mem::replace(if_instr, Instruction::End(None)); + *i = If::Then(instr); + if !parser.peek::<kw::then>() { + return Ok(false); + } + } + + // All `if` statements are required to have a `then`. This is either the + // second s-expr (with or without a leading `then`) or the first s-expr + // with a leading `then`. The optionality of `then` isn't strictly what + // the text spec says but it matches wabt for now. + // + // Note that when we see the `then`, that's when we actually add the + // original `if` instruction to the stream. + if let If::Then(if_instr) = i { + let instr = mem::replace(if_instr, Instruction::End(None)); + self.instrs.push(instr); + *i = If::Else; + if parser.parse::<Option<kw::then>>()?.is_some() { + self.stack.push(Level::IfArm); + return Ok(true); + } + return Ok(false); + } + + // effectively the same as the `then` parsing above + if let If::Else = i { + self.instrs.push(Instruction::Else(None)); + if parser.parse::<Option<kw::r#else>>()?.is_some() { + if parser.is_empty() { + self.instrs.pop(); + } + self.stack.push(Level::IfArm); + return Ok(true); + } + *i = If::End; + return Ok(false); + } + + // If we made it this far then we're at `If::End` which means that there + // were too many s-expressions inside the `(if)` and we don't want to + // parse anything else. + Err(parser.error("too many payloads inside of `(if)`")) + } + + /// Handles parsing of a `try` statement. A `try` statement is simpler + /// than an `if` as the syntactic form is: + /// + /// ```wat + /// (try (do $do) (catch $event $catch)) + /// ``` + /// + /// where the `do` and `catch` keywords are mandatory, even for an empty + /// $do or $catch. + /// + /// Returns `true` if the rest of the arm above should be skipped, or + /// `false` if we should parse the next item as an instruction (because we + /// didn't handle the lparen here). + fn handle_try_lparen(&mut self, parser: Parser<'a>) -> Result<bool> { + // Only execute the code below if there's a `Try` listed last. + let i = match self.stack.last_mut() { + Some(Level::Try(i)) => i, + _ => return Ok(false), + }; + + // Try statements must start with a `do` block. + if let Try::Do(try_instr) = i { + let instr = mem::replace(try_instr, Instruction::End(None)); + self.instrs.push(instr); + if parser.parse::<Option<kw::r#do>>()?.is_some() { + // The state is advanced here only if the parse succeeds in + // order to strictly require the keyword. + *i = Try::CatchOrUnwind; + self.stack.push(Level::TryArm); + return Ok(true); + } + // We return here and continue parsing instead of raising an error + // immediately because the missing keyword will be caught more + // generally in the `Paren::Right` case in `parse`. + return Ok(false); + } + + // After a try's `do`, there are several possible kinds of handlers. + if let Try::CatchOrUnwind = i { + // `catch` may be followed by more `catch`s or `catch_all`. + if parser.parse::<Option<kw::catch>>()?.is_some() { + let evt = parser.parse::<ast::Index<'a>>()?; + self.instrs.push(Instruction::Catch(evt)); + *i = Try::Catch; + self.stack.push(Level::TryArm); + return Ok(true); + } + // `catch_all` can only come at the end and has no argument. + if parser.parse::<Option<kw::catch_all>>()?.is_some() { + self.instrs.push(Instruction::CatchAll); + *i = Try::End; + self.stack.push(Level::TryArm); + return Ok(true); + } + // `unwind` is similar to `catch_all`. + if parser.parse::<Option<kw::unwind>>()?.is_some() { + self.instrs.push(Instruction::Unwind); + *i = Try::End; + self.stack.push(Level::TryArm); + return Ok(true); + } + return Ok(false); + } + + if let Try::Catch = i { + if parser.parse::<Option<kw::catch>>()?.is_some() { + let evt = parser.parse::<ast::Index<'a>>()?; + self.instrs.push(Instruction::Catch(evt)); + *i = Try::Catch; + self.stack.push(Level::TryArm); + return Ok(true); + } + if parser.parse::<Option<kw::catch_all>>()?.is_some() { + self.instrs.push(Instruction::CatchAll); + *i = Try::End; + self.stack.push(Level::TryArm); + return Ok(true); + } + return Err(parser.error("unexpected items after `catch`")); + } + + Err(parser.error("too many payloads inside of `(try)`")) + } +} + +// TODO: document this obscenity +macro_rules! instructions { + (pub enum Instruction<'a> { + $( + $(#[$doc:meta])* + $name:ident $(($($arg:tt)*))? : [$($binary:tt)*] : $instr:tt $( | $deprecated:tt )?, + )* + }) => ( + /// A listing of all WebAssembly instructions that can be in a module + /// that this crate currently parses. + #[derive(Debug)] + #[allow(missing_docs)] + pub enum Instruction<'a> { + $( + $(#[$doc])* + $name $(( instructions!(@ty $($arg)*) ))?, + )* + } + + #[allow(non_snake_case)] + impl<'a> Parse<'a> for Instruction<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + $( + fn $name<'a>(_parser: Parser<'a>) -> Result<Instruction<'a>> { + Ok(Instruction::$name $(( + instructions!(@parse _parser $($arg)*)? + ))?) + } + )* + let parse_remainder = parser.step(|c| { + let (kw, rest) = match c.keyword() { + Some(pair) => pair, + None => return Err(c.error("expected an instruction")), + }; + match kw { + $($instr $( | $deprecated )?=> Ok(($name as fn(_) -> _, rest)),)* + _ => return Err(c.error("unknown operator or unexpected token")), + } + })?; + parse_remainder(parser) + } + } + + impl crate::binary::Encode for Instruction<'_> { + #[allow(non_snake_case)] + fn encode(&self, v: &mut Vec<u8>) { + match self { + $( + Instruction::$name $((instructions!(@first $($arg)*)))? => { + fn encode<'a>($(arg: &instructions!(@ty $($arg)*),)? v: &mut Vec<u8>) { + instructions!(@encode v $($binary)*); + $(<instructions!(@ty $($arg)*) as crate::binary::Encode>::encode(arg, v);)? + } + encode($( instructions!(@first $($arg)*), )? v) + } + )* + } + } + } + + impl<'a> Instruction<'a> { + /// Returns the associated [`MemArg`] if one is available for this + /// instruction. + #[allow(unused_variables, non_snake_case)] + pub fn memarg_mut(&mut self) -> Option<&mut MemArg<'a>> { + match self { + $( + Instruction::$name $((instructions!(@memarg_binding a $($arg)*)))? => { + instructions!(@get_memarg a $($($arg)*)?) + } + )* + } + } + } + ); + + (@ty MemArg<$amt:tt>) => (MemArg<'a>); + (@ty LoadOrStoreLane<$amt:tt>) => (LoadOrStoreLane<'a>); + (@ty $other:ty) => ($other); + + (@first $first:ident $($t:tt)*) => ($first); + + (@parse $parser:ident MemArg<$amt:tt>) => (MemArg::parse($parser, $amt)); + (@parse $parser:ident MemArg) => (compile_error!("must specify `MemArg` default")); + (@parse $parser:ident LoadOrStoreLane<$amt:tt>) => (LoadOrStoreLane::parse($parser, $amt)); + (@parse $parser:ident LoadOrStoreLane) => (compile_error!("must specify `LoadOrStoreLane` default")); + (@parse $parser:ident $other:ty) => ($parser.parse::<$other>()); + + // simd opcodes prefixed with `0xfd` get a varuint32 encoding for their payload + (@encode $dst:ident 0xfd, $simd:tt) => ({ + $dst.push(0xfd); + <u32 as crate::binary::Encode>::encode(&$simd, $dst); + }); + (@encode $dst:ident $($bytes:tt)*) => ($dst.extend_from_slice(&[$($bytes)*]);); + + (@get_memarg $name:ident MemArg<$amt:tt>) => (Some($name)); + (@get_memarg $($other:tt)*) => (None); + + (@memarg_binding $name:ident MemArg<$amt:tt>) => ($name); + (@memarg_binding $name:ident $other:ty) => (_); +} + +instructions! { + pub enum Instruction<'a> { + Block(BlockType<'a>) : [0x02] : "block", + If(BlockType<'a>) : [0x04] : "if", + Else(Option<ast::Id<'a>>) : [0x05] : "else", + Loop(BlockType<'a>) : [0x03] : "loop", + End(Option<ast::Id<'a>>) : [0x0b] : "end", + + Unreachable : [0x00] : "unreachable", + Nop : [0x01] : "nop", + Br(ast::Index<'a>) : [0x0c] : "br", + BrIf(ast::Index<'a>) : [0x0d] : "br_if", + BrTable(BrTableIndices<'a>) : [0x0e] : "br_table", + Return : [0x0f] : "return", + Call(ast::IndexOrRef<'a, kw::func>) : [0x10] : "call", + CallIndirect(CallIndirect<'a>) : [0x11] : "call_indirect", + + // tail-call proposal + ReturnCall(ast::IndexOrRef<'a, kw::func>) : [0x12] : "return_call", + ReturnCallIndirect(CallIndirect<'a>) : [0x13] : "return_call_indirect", + + // function-references proposal + CallRef : [0x14] : "call_ref", + ReturnCallRef : [0x15] : "return_call_ref", + FuncBind(FuncBindType<'a>) : [0x16] : "func.bind", + Let(LetType<'a>) : [0x17] : "let", + + Drop : [0x1a] : "drop", + Select(SelectTypes<'a>) : [] : "select", + LocalGet(ast::Index<'a>) : [0x20] : "local.get" | "get_local", + LocalSet(ast::Index<'a>) : [0x21] : "local.set" | "set_local", + LocalTee(ast::Index<'a>) : [0x22] : "local.tee" | "tee_local", + GlobalGet(ast::IndexOrRef<'a, kw::global>) : [0x23] : "global.get" | "get_global", + GlobalSet(ast::IndexOrRef<'a, kw::global>) : [0x24] : "global.set" | "set_global", + + TableGet(TableArg<'a>) : [0x25] : "table.get", + TableSet(TableArg<'a>) : [0x26] : "table.set", + + I32Load(MemArg<4>) : [0x28] : "i32.load", + I64Load(MemArg<8>) : [0x29] : "i64.load", + F32Load(MemArg<4>) : [0x2a] : "f32.load", + F64Load(MemArg<8>) : [0x2b] : "f64.load", + I32Load8s(MemArg<1>) : [0x2c] : "i32.load8_s", + I32Load8u(MemArg<1>) : [0x2d] : "i32.load8_u", + I32Load16s(MemArg<2>) : [0x2e] : "i32.load16_s", + I32Load16u(MemArg<2>) : [0x2f] : "i32.load16_u", + I64Load8s(MemArg<1>) : [0x30] : "i64.load8_s", + I64Load8u(MemArg<1>) : [0x31] : "i64.load8_u", + I64Load16s(MemArg<2>) : [0x32] : "i64.load16_s", + I64Load16u(MemArg<2>) : [0x33] : "i64.load16_u", + I64Load32s(MemArg<4>) : [0x34] : "i64.load32_s", + I64Load32u(MemArg<4>) : [0x35] : "i64.load32_u", + I32Store(MemArg<4>) : [0x36] : "i32.store", + I64Store(MemArg<8>) : [0x37] : "i64.store", + F32Store(MemArg<4>) : [0x38] : "f32.store", + F64Store(MemArg<8>) : [0x39] : "f64.store", + I32Store8(MemArg<1>) : [0x3a] : "i32.store8", + I32Store16(MemArg<2>) : [0x3b] : "i32.store16", + I64Store8(MemArg<1>) : [0x3c] : "i64.store8", + I64Store16(MemArg<2>) : [0x3d] : "i64.store16", + I64Store32(MemArg<4>) : [0x3e] : "i64.store32", + + // Lots of bulk memory proposal here as well + MemorySize(MemoryArg<'a>) : [0x3f] : "memory.size" | "current_memory", + MemoryGrow(MemoryArg<'a>) : [0x40] : "memory.grow" | "grow_memory", + MemoryInit(MemoryInit<'a>) : [0xfc, 0x08] : "memory.init", + MemoryCopy(MemoryCopy<'a>) : [0xfc, 0x0a] : "memory.copy", + MemoryFill(MemoryArg<'a>) : [0xfc, 0x0b] : "memory.fill", + DataDrop(ast::Index<'a>) : [0xfc, 0x09] : "data.drop", + ElemDrop(ast::Index<'a>) : [0xfc, 0x0d] : "elem.drop", + TableInit(TableInit<'a>) : [0xfc, 0x0c] : "table.init", + TableCopy(TableCopy<'a>) : [0xfc, 0x0e] : "table.copy", + TableFill(TableArg<'a>) : [0xfc, 0x11] : "table.fill", + TableSize(TableArg<'a>) : [0xfc, 0x10] : "table.size", + TableGrow(TableArg<'a>) : [0xfc, 0x0f] : "table.grow", + + RefNull(HeapType<'a>) : [0xd0] : "ref.null", + RefIsNull : [0xd1] : "ref.is_null", + RefExtern(u32) : [0xff] : "ref.extern", // only used in test harness + RefFunc(ast::IndexOrRef<'a, kw::func>) : [0xd2] : "ref.func", + + // function-references proposal + RefAsNonNull : [0xd3] : "ref.as_non_null", + BrOnNull(ast::Index<'a>) : [0xd4] : "br_on_null", + + // gc proposal: eqref + RefEq : [0xd5] : "ref.eq", + + // gc proposal (moz specific, will be removed) + StructNew(ast::Index<'a>) : [0xfb, 0x0] : "struct.new", + + // gc proposal: struct + StructNewWithRtt(ast::Index<'a>) : [0xfb, 0x01] : "struct.new_with_rtt", + StructNewDefaultWithRtt(ast::Index<'a>) : [0xfb, 0x02] : "struct.new_default_with_rtt", + StructGet(StructAccess<'a>) : [0xfb, 0x03] : "struct.get", + StructGetS(StructAccess<'a>) : [0xfb, 0x04] : "struct.get_s", + StructGetU(StructAccess<'a>) : [0xfb, 0x05] : "struct.get_u", + StructSet(StructAccess<'a>) : [0xfb, 0x06] : "struct.set", + + // gc proposal (moz specific, will be removed) + StructNarrow(StructNarrow<'a>) : [0xfb, 0x07] : "struct.narrow", + + // gc proposal: array + ArrayNewWithRtt(ast::Index<'a>) : [0xfb, 0x11] : "array.new_with_rtt", + ArrayNewDefaultWithRtt(ast::Index<'a>) : [0xfb, 0x12] : "array.new_default_with_rtt", + ArrayGet(ast::Index<'a>) : [0xfb, 0x13] : "array.get", + ArrayGetS(ast::Index<'a>) : [0xfb, 0x14] : "array.get_s", + ArrayGetU(ast::Index<'a>) : [0xfb, 0x15] : "array.get_u", + ArraySet(ast::Index<'a>) : [0xfb, 0x16] : "array.set", + ArrayLen(ast::Index<'a>) : [0xfb, 0x17] : "array.len", + + // gc proposal, i31 + I31New : [0xfb, 0x20] : "i31.new", + I31GetS : [0xfb, 0x21] : "i31.get_s", + I31GetU : [0xfb, 0x22] : "i31.get_u", + + // gc proposal, rtt/casting + RTTCanon(HeapType<'a>) : [0xfb, 0x30] : "rtt.canon", + RTTSub(RTTSub<'a>) : [0xfb, 0x31] : "rtt.sub", + RefTest(RefTest<'a>) : [0xfb, 0x40] : "ref.test", + RefCast(RefTest<'a>) : [0xfb, 0x41] : "ref.cast", + BrOnCast(BrOnCast<'a>) : [0xfb, 0x42] : "br_on_cast", + + I32Const(i32) : [0x41] : "i32.const", + I64Const(i64) : [0x42] : "i64.const", + F32Const(ast::Float32) : [0x43] : "f32.const", + F64Const(ast::Float64) : [0x44] : "f64.const", + + I32Clz : [0x67] : "i32.clz", + I32Ctz : [0x68] : "i32.ctz", + I32Popcnt : [0x69] : "i32.popcnt", + I32Add : [0x6a] : "i32.add", + I32Sub : [0x6b] : "i32.sub", + I32Mul : [0x6c] : "i32.mul", + I32DivS : [0x6d] : "i32.div_s", + I32DivU : [0x6e] : "i32.div_u", + I32RemS : [0x6f] : "i32.rem_s", + I32RemU : [0x70] : "i32.rem_u", + I32And : [0x71] : "i32.and", + I32Or : [0x72] : "i32.or", + I32Xor : [0x73] : "i32.xor", + I32Shl : [0x74] : "i32.shl", + I32ShrS : [0x75] : "i32.shr_s", + I32ShrU : [0x76] : "i32.shr_u", + I32Rotl : [0x77] : "i32.rotl", + I32Rotr : [0x78] : "i32.rotr", + + I64Clz : [0x79] : "i64.clz", + I64Ctz : [0x7a] : "i64.ctz", + I64Popcnt : [0x7b] : "i64.popcnt", + I64Add : [0x7c] : "i64.add", + I64Sub : [0x7d] : "i64.sub", + I64Mul : [0x7e] : "i64.mul", + I64DivS : [0x7f] : "i64.div_s", + I64DivU : [0x80] : "i64.div_u", + I64RemS : [0x81] : "i64.rem_s", + I64RemU : [0x82] : "i64.rem_u", + I64And : [0x83] : "i64.and", + I64Or : [0x84] : "i64.or", + I64Xor : [0x85] : "i64.xor", + I64Shl : [0x86] : "i64.shl", + I64ShrS : [0x87] : "i64.shr_s", + I64ShrU : [0x88] : "i64.shr_u", + I64Rotl : [0x89] : "i64.rotl", + I64Rotr : [0x8a] : "i64.rotr", + + F32Abs : [0x8b] : "f32.abs", + F32Neg : [0x8c] : "f32.neg", + F32Ceil : [0x8d] : "f32.ceil", + F32Floor : [0x8e] : "f32.floor", + F32Trunc : [0x8f] : "f32.trunc", + F32Nearest : [0x90] : "f32.nearest", + F32Sqrt : [0x91] : "f32.sqrt", + F32Add : [0x92] : "f32.add", + F32Sub : [0x93] : "f32.sub", + F32Mul : [0x94] : "f32.mul", + F32Div : [0x95] : "f32.div", + F32Min : [0x96] : "f32.min", + F32Max : [0x97] : "f32.max", + F32Copysign : [0x98] : "f32.copysign", + + F64Abs : [0x99] : "f64.abs", + F64Neg : [0x9a] : "f64.neg", + F64Ceil : [0x9b] : "f64.ceil", + F64Floor : [0x9c] : "f64.floor", + F64Trunc : [0x9d] : "f64.trunc", + F64Nearest : [0x9e] : "f64.nearest", + F64Sqrt : [0x9f] : "f64.sqrt", + F64Add : [0xa0] : "f64.add", + F64Sub : [0xa1] : "f64.sub", + F64Mul : [0xa2] : "f64.mul", + F64Div : [0xa3] : "f64.div", + F64Min : [0xa4] : "f64.min", + F64Max : [0xa5] : "f64.max", + F64Copysign : [0xa6] : "f64.copysign", + + I32Eqz : [0x45] : "i32.eqz", + I32Eq : [0x46] : "i32.eq", + I32Ne : [0x47] : "i32.ne", + I32LtS : [0x48] : "i32.lt_s", + I32LtU : [0x49] : "i32.lt_u", + I32GtS : [0x4a] : "i32.gt_s", + I32GtU : [0x4b] : "i32.gt_u", + I32LeS : [0x4c] : "i32.le_s", + I32LeU : [0x4d] : "i32.le_u", + I32GeS : [0x4e] : "i32.ge_s", + I32GeU : [0x4f] : "i32.ge_u", + + I64Eqz : [0x50] : "i64.eqz", + I64Eq : [0x51] : "i64.eq", + I64Ne : [0x52] : "i64.ne", + I64LtS : [0x53] : "i64.lt_s", + I64LtU : [0x54] : "i64.lt_u", + I64GtS : [0x55] : "i64.gt_s", + I64GtU : [0x56] : "i64.gt_u", + I64LeS : [0x57] : "i64.le_s", + I64LeU : [0x58] : "i64.le_u", + I64GeS : [0x59] : "i64.ge_s", + I64GeU : [0x5a] : "i64.ge_u", + + F32Eq : [0x5b] : "f32.eq", + F32Ne : [0x5c] : "f32.ne", + F32Lt : [0x5d] : "f32.lt", + F32Gt : [0x5e] : "f32.gt", + F32Le : [0x5f] : "f32.le", + F32Ge : [0x60] : "f32.ge", + + F64Eq : [0x61] : "f64.eq", + F64Ne : [0x62] : "f64.ne", + F64Lt : [0x63] : "f64.lt", + F64Gt : [0x64] : "f64.gt", + F64Le : [0x65] : "f64.le", + F64Ge : [0x66] : "f64.ge", + + I32WrapI64 : [0xa7] : "i32.wrap_i64" | "i32.wrap/i64", + I32TruncF32S : [0xa8] : "i32.trunc_f32_s" | "i32.trunc_s/f32", + I32TruncF32U : [0xa9] : "i32.trunc_f32_u" | "i32.trunc_u/f32", + I32TruncF64S : [0xaa] : "i32.trunc_f64_s" | "i32.trunc_s/f64", + I32TruncF64U : [0xab] : "i32.trunc_f64_u" | "i32.trunc_u/f64", + I64ExtendI32S : [0xac] : "i64.extend_i32_s" | "i64.extend_s/i32", + I64ExtendI32U : [0xad] : "i64.extend_i32_u" | "i64.extend_u/i32", + I64TruncF32S : [0xae] : "i64.trunc_f32_s" | "i64.trunc_s/f32", + I64TruncF32U : [0xaf] : "i64.trunc_f32_u" | "i64.trunc_u/f32", + I64TruncF64S : [0xb0] : "i64.trunc_f64_s" | "i64.trunc_s/f64", + I64TruncF64U : [0xb1] : "i64.trunc_f64_u" | "i64.trunc_u/f64", + F32ConvertI32S : [0xb2] : "f32.convert_i32_s" | "f32.convert_s/i32", + F32ConvertI32U : [0xb3] : "f32.convert_i32_u" | "f32.convert_u/i32", + F32ConvertI64S : [0xb4] : "f32.convert_i64_s" | "f32.convert_s/i64", + F32ConvertI64U : [0xb5] : "f32.convert_i64_u" | "f32.convert_u/i64", + F32DemoteF64 : [0xb6] : "f32.demote_f64" | "f32.demote/f64", + F64ConvertI32S : [0xb7] : "f64.convert_i32_s" | "f64.convert_s/i32", + F64ConvertI32U : [0xb8] : "f64.convert_i32_u" | "f64.convert_u/i32", + F64ConvertI64S : [0xb9] : "f64.convert_i64_s" | "f64.convert_s/i64", + F64ConvertI64U : [0xba] : "f64.convert_i64_u" | "f64.convert_u/i64", + F64PromoteF32 : [0xbb] : "f64.promote_f32" | "f64.promote/f32", + I32ReinterpretF32 : [0xbc] : "i32.reinterpret_f32" | "i32.reinterpret/f32", + I64ReinterpretF64 : [0xbd] : "i64.reinterpret_f64" | "i64.reinterpret/f64", + F32ReinterpretI32 : [0xbe] : "f32.reinterpret_i32" | "f32.reinterpret/i32", + F64ReinterpretI64 : [0xbf] : "f64.reinterpret_i64" | "f64.reinterpret/i64", + + // non-trapping float to int + I32TruncSatF32S : [0xfc, 0x00] : "i32.trunc_sat_f32_s" | "i32.trunc_s:sat/f32", + I32TruncSatF32U : [0xfc, 0x01] : "i32.trunc_sat_f32_u" | "i32.trunc_u:sat/f32", + I32TruncSatF64S : [0xfc, 0x02] : "i32.trunc_sat_f64_s" | "i32.trunc_s:sat/f64", + I32TruncSatF64U : [0xfc, 0x03] : "i32.trunc_sat_f64_u" | "i32.trunc_u:sat/f64", + I64TruncSatF32S : [0xfc, 0x04] : "i64.trunc_sat_f32_s" | "i64.trunc_s:sat/f32", + I64TruncSatF32U : [0xfc, 0x05] : "i64.trunc_sat_f32_u" | "i64.trunc_u:sat/f32", + I64TruncSatF64S : [0xfc, 0x06] : "i64.trunc_sat_f64_s" | "i64.trunc_s:sat/f64", + I64TruncSatF64U : [0xfc, 0x07] : "i64.trunc_sat_f64_u" | "i64.trunc_u:sat/f64", + + // sign extension proposal + I32Extend8S : [0xc0] : "i32.extend8_s", + I32Extend16S : [0xc1] : "i32.extend16_s", + I64Extend8S : [0xc2] : "i64.extend8_s", + I64Extend16S : [0xc3] : "i64.extend16_s", + I64Extend32S : [0xc4] : "i64.extend32_s", + + // atomics proposal + MemoryAtomicNotify(MemArg<4>) : [0xfe, 0x00] : "memory.atomic.notify" | "atomic.notify", + MemoryAtomicWait32(MemArg<4>) : [0xfe, 0x01] : "memory.atomic.wait32" | "i32.atomic.wait", + MemoryAtomicWait64(MemArg<8>) : [0xfe, 0x02] : "memory.atomic.wait64" | "i64.atomic.wait", + AtomicFence : [0xfe, 0x03, 0x00] : "atomic.fence", + + I32AtomicLoad(MemArg<4>) : [0xfe, 0x10] : "i32.atomic.load", + I64AtomicLoad(MemArg<8>) : [0xfe, 0x11] : "i64.atomic.load", + I32AtomicLoad8u(MemArg<1>) : [0xfe, 0x12] : "i32.atomic.load8_u", + I32AtomicLoad16u(MemArg<2>) : [0xfe, 0x13] : "i32.atomic.load16_u", + I64AtomicLoad8u(MemArg<1>) : [0xfe, 0x14] : "i64.atomic.load8_u", + I64AtomicLoad16u(MemArg<2>) : [0xfe, 0x15] : "i64.atomic.load16_u", + I64AtomicLoad32u(MemArg<4>) : [0xfe, 0x16] : "i64.atomic.load32_u", + I32AtomicStore(MemArg<4>) : [0xfe, 0x17] : "i32.atomic.store", + I64AtomicStore(MemArg<8>) : [0xfe, 0x18] : "i64.atomic.store", + I32AtomicStore8(MemArg<1>) : [0xfe, 0x19] : "i32.atomic.store8", + I32AtomicStore16(MemArg<2>) : [0xfe, 0x1a] : "i32.atomic.store16", + I64AtomicStore8(MemArg<1>) : [0xfe, 0x1b] : "i64.atomic.store8", + I64AtomicStore16(MemArg<2>) : [0xfe, 0x1c] : "i64.atomic.store16", + I64AtomicStore32(MemArg<4>) : [0xfe, 0x1d] : "i64.atomic.store32", + + I32AtomicRmwAdd(MemArg<4>) : [0xfe, 0x1e] : "i32.atomic.rmw.add", + I64AtomicRmwAdd(MemArg<8>) : [0xfe, 0x1f] : "i64.atomic.rmw.add", + I32AtomicRmw8AddU(MemArg<1>) : [0xfe, 0x20] : "i32.atomic.rmw8.add_u", + I32AtomicRmw16AddU(MemArg<2>) : [0xfe, 0x21] : "i32.atomic.rmw16.add_u", + I64AtomicRmw8AddU(MemArg<1>) : [0xfe, 0x22] : "i64.atomic.rmw8.add_u", + I64AtomicRmw16AddU(MemArg<2>) : [0xfe, 0x23] : "i64.atomic.rmw16.add_u", + I64AtomicRmw32AddU(MemArg<4>) : [0xfe, 0x24] : "i64.atomic.rmw32.add_u", + + I32AtomicRmwSub(MemArg<4>) : [0xfe, 0x25] : "i32.atomic.rmw.sub", + I64AtomicRmwSub(MemArg<8>) : [0xfe, 0x26] : "i64.atomic.rmw.sub", + I32AtomicRmw8SubU(MemArg<1>) : [0xfe, 0x27] : "i32.atomic.rmw8.sub_u", + I32AtomicRmw16SubU(MemArg<2>) : [0xfe, 0x28] : "i32.atomic.rmw16.sub_u", + I64AtomicRmw8SubU(MemArg<1>) : [0xfe, 0x29] : "i64.atomic.rmw8.sub_u", + I64AtomicRmw16SubU(MemArg<2>) : [0xfe, 0x2a] : "i64.atomic.rmw16.sub_u", + I64AtomicRmw32SubU(MemArg<4>) : [0xfe, 0x2b] : "i64.atomic.rmw32.sub_u", + + I32AtomicRmwAnd(MemArg<4>) : [0xfe, 0x2c] : "i32.atomic.rmw.and", + I64AtomicRmwAnd(MemArg<8>) : [0xfe, 0x2d] : "i64.atomic.rmw.and", + I32AtomicRmw8AndU(MemArg<1>) : [0xfe, 0x2e] : "i32.atomic.rmw8.and_u", + I32AtomicRmw16AndU(MemArg<2>) : [0xfe, 0x2f] : "i32.atomic.rmw16.and_u", + I64AtomicRmw8AndU(MemArg<1>) : [0xfe, 0x30] : "i64.atomic.rmw8.and_u", + I64AtomicRmw16AndU(MemArg<2>) : [0xfe, 0x31] : "i64.atomic.rmw16.and_u", + I64AtomicRmw32AndU(MemArg<4>) : [0xfe, 0x32] : "i64.atomic.rmw32.and_u", + + I32AtomicRmwOr(MemArg<4>) : [0xfe, 0x33] : "i32.atomic.rmw.or", + I64AtomicRmwOr(MemArg<8>) : [0xfe, 0x34] : "i64.atomic.rmw.or", + I32AtomicRmw8OrU(MemArg<1>) : [0xfe, 0x35] : "i32.atomic.rmw8.or_u", + I32AtomicRmw16OrU(MemArg<2>) : [0xfe, 0x36] : "i32.atomic.rmw16.or_u", + I64AtomicRmw8OrU(MemArg<1>) : [0xfe, 0x37] : "i64.atomic.rmw8.or_u", + I64AtomicRmw16OrU(MemArg<2>) : [0xfe, 0x38] : "i64.atomic.rmw16.or_u", + I64AtomicRmw32OrU(MemArg<4>) : [0xfe, 0x39] : "i64.atomic.rmw32.or_u", + + I32AtomicRmwXor(MemArg<4>) : [0xfe, 0x3a] : "i32.atomic.rmw.xor", + I64AtomicRmwXor(MemArg<8>) : [0xfe, 0x3b] : "i64.atomic.rmw.xor", + I32AtomicRmw8XorU(MemArg<1>) : [0xfe, 0x3c] : "i32.atomic.rmw8.xor_u", + I32AtomicRmw16XorU(MemArg<2>) : [0xfe, 0x3d] : "i32.atomic.rmw16.xor_u", + I64AtomicRmw8XorU(MemArg<1>) : [0xfe, 0x3e] : "i64.atomic.rmw8.xor_u", + I64AtomicRmw16XorU(MemArg<2>) : [0xfe, 0x3f] : "i64.atomic.rmw16.xor_u", + I64AtomicRmw32XorU(MemArg<4>) : [0xfe, 0x40] : "i64.atomic.rmw32.xor_u", + + I32AtomicRmwXchg(MemArg<4>) : [0xfe, 0x41] : "i32.atomic.rmw.xchg", + I64AtomicRmwXchg(MemArg<8>) : [0xfe, 0x42] : "i64.atomic.rmw.xchg", + I32AtomicRmw8XchgU(MemArg<1>) : [0xfe, 0x43] : "i32.atomic.rmw8.xchg_u", + I32AtomicRmw16XchgU(MemArg<2>) : [0xfe, 0x44] : "i32.atomic.rmw16.xchg_u", + I64AtomicRmw8XchgU(MemArg<1>) : [0xfe, 0x45] : "i64.atomic.rmw8.xchg_u", + I64AtomicRmw16XchgU(MemArg<2>) : [0xfe, 0x46] : "i64.atomic.rmw16.xchg_u", + I64AtomicRmw32XchgU(MemArg<4>) : [0xfe, 0x47] : "i64.atomic.rmw32.xchg_u", + + I32AtomicRmwCmpxchg(MemArg<4>) : [0xfe, 0x48] : "i32.atomic.rmw.cmpxchg", + I64AtomicRmwCmpxchg(MemArg<8>) : [0xfe, 0x49] : "i64.atomic.rmw.cmpxchg", + I32AtomicRmw8CmpxchgU(MemArg<1>) : [0xfe, 0x4a] : "i32.atomic.rmw8.cmpxchg_u", + I32AtomicRmw16CmpxchgU(MemArg<2>) : [0xfe, 0x4b] : "i32.atomic.rmw16.cmpxchg_u", + I64AtomicRmw8CmpxchgU(MemArg<1>) : [0xfe, 0x4c] : "i64.atomic.rmw8.cmpxchg_u", + I64AtomicRmw16CmpxchgU(MemArg<2>) : [0xfe, 0x4d] : "i64.atomic.rmw16.cmpxchg_u", + I64AtomicRmw32CmpxchgU(MemArg<4>) : [0xfe, 0x4e] : "i64.atomic.rmw32.cmpxchg_u", + + // proposal: simd + V128Load(MemArg<16>) : [0xfd, 0x00] : "v128.load", + V128Load8x8S(MemArg<8>) : [0xfd, 0x01] : "v128.load8x8_s", + V128Load8x8U(MemArg<8>) : [0xfd, 0x02] : "v128.load8x8_u", + V128Load16x4S(MemArg<8>) : [0xfd, 0x03] : "v128.load16x4_s", + V128Load16x4U(MemArg<8>) : [0xfd, 0x04] : "v128.load16x4_u", + V128Load32x2S(MemArg<8>) : [0xfd, 0x05] : "v128.load32x2_s", + V128Load32x2U(MemArg<8>) : [0xfd, 0x06] : "v128.load32x2_u", + V128Load8Splat(MemArg<1>) : [0xfd, 0x07] : "v128.load8_splat", + V128Load16Splat(MemArg<2>) : [0xfd, 0x08] : "v128.load16_splat", + V128Load32Splat(MemArg<4>) : [0xfd, 0x09] : "v128.load32_splat", + V128Load64Splat(MemArg<8>) : [0xfd, 0x0a] : "v128.load64_splat", + V128Store(MemArg<16>) : [0xfd, 0x0b] : "v128.store", + + V128Const(V128Const) : [0xfd, 0x0c] : "v128.const", + I8x16Shuffle(I8x16Shuffle) : [0xfd, 0x0d] : "i8x16.shuffle", + I8x16Swizzle : [0xfd, 0x0e] : "i8x16.swizzle", + + I8x16Splat : [0xfd, 0x0f] : "i8x16.splat", + I16x8Splat : [0xfd, 0x10] : "i16x8.splat", + I32x4Splat : [0xfd, 0x11] : "i32x4.splat", + I64x2Splat : [0xfd, 0x12] : "i64x2.splat", + F32x4Splat : [0xfd, 0x13] : "f32x4.splat", + F64x2Splat : [0xfd, 0x14] : "f64x2.splat", + + I8x16ExtractLaneS(LaneArg) : [0xfd, 0x15] : "i8x16.extract_lane_s", + I8x16ExtractLaneU(LaneArg) : [0xfd, 0x16] : "i8x16.extract_lane_u", + I8x16ReplaceLane(LaneArg) : [0xfd, 0x17] : "i8x16.replace_lane", + I16x8ExtractLaneS(LaneArg) : [0xfd, 0x18] : "i16x8.extract_lane_s", + I16x8ExtractLaneU(LaneArg) : [0xfd, 0x19] : "i16x8.extract_lane_u", + I16x8ReplaceLane(LaneArg) : [0xfd, 0x1a] : "i16x8.replace_lane", + I32x4ExtractLane(LaneArg) : [0xfd, 0x1b] : "i32x4.extract_lane", + I32x4ReplaceLane(LaneArg) : [0xfd, 0x1c] : "i32x4.replace_lane", + I64x2ExtractLane(LaneArg) : [0xfd, 0x1d] : "i64x2.extract_lane", + I64x2ReplaceLane(LaneArg) : [0xfd, 0x1e] : "i64x2.replace_lane", + F32x4ExtractLane(LaneArg) : [0xfd, 0x1f] : "f32x4.extract_lane", + F32x4ReplaceLane(LaneArg) : [0xfd, 0x20] : "f32x4.replace_lane", + F64x2ExtractLane(LaneArg) : [0xfd, 0x21] : "f64x2.extract_lane", + F64x2ReplaceLane(LaneArg) : [0xfd, 0x22] : "f64x2.replace_lane", + + I8x16Eq : [0xfd, 0x23] : "i8x16.eq", + I8x16Ne : [0xfd, 0x24] : "i8x16.ne", + I8x16LtS : [0xfd, 0x25] : "i8x16.lt_s", + I8x16LtU : [0xfd, 0x26] : "i8x16.lt_u", + I8x16GtS : [0xfd, 0x27] : "i8x16.gt_s", + I8x16GtU : [0xfd, 0x28] : "i8x16.gt_u", + I8x16LeS : [0xfd, 0x29] : "i8x16.le_s", + I8x16LeU : [0xfd, 0x2a] : "i8x16.le_u", + I8x16GeS : [0xfd, 0x2b] : "i8x16.ge_s", + I8x16GeU : [0xfd, 0x2c] : "i8x16.ge_u", + I16x8Eq : [0xfd, 0x2d] : "i16x8.eq", + I16x8Ne : [0xfd, 0x2e] : "i16x8.ne", + I16x8LtS : [0xfd, 0x2f] : "i16x8.lt_s", + I16x8LtU : [0xfd, 0x30] : "i16x8.lt_u", + I16x8GtS : [0xfd, 0x31] : "i16x8.gt_s", + I16x8GtU : [0xfd, 0x32] : "i16x8.gt_u", + I16x8LeS : [0xfd, 0x33] : "i16x8.le_s", + I16x8LeU : [0xfd, 0x34] : "i16x8.le_u", + I16x8GeS : [0xfd, 0x35] : "i16x8.ge_s", + I16x8GeU : [0xfd, 0x36] : "i16x8.ge_u", + I32x4Eq : [0xfd, 0x37] : "i32x4.eq", + I32x4Ne : [0xfd, 0x38] : "i32x4.ne", + I32x4LtS : [0xfd, 0x39] : "i32x4.lt_s", + I32x4LtU : [0xfd, 0x3a] : "i32x4.lt_u", + I32x4GtS : [0xfd, 0x3b] : "i32x4.gt_s", + I32x4GtU : [0xfd, 0x3c] : "i32x4.gt_u", + I32x4LeS : [0xfd, 0x3d] : "i32x4.le_s", + I32x4LeU : [0xfd, 0x3e] : "i32x4.le_u", + I32x4GeS : [0xfd, 0x3f] : "i32x4.ge_s", + I32x4GeU : [0xfd, 0x40] : "i32x4.ge_u", + + F32x4Eq : [0xfd, 0x41] : "f32x4.eq", + F32x4Ne : [0xfd, 0x42] : "f32x4.ne", + F32x4Lt : [0xfd, 0x43] : "f32x4.lt", + F32x4Gt : [0xfd, 0x44] : "f32x4.gt", + F32x4Le : [0xfd, 0x45] : "f32x4.le", + F32x4Ge : [0xfd, 0x46] : "f32x4.ge", + F64x2Eq : [0xfd, 0x47] : "f64x2.eq", + F64x2Ne : [0xfd, 0x48] : "f64x2.ne", + F64x2Lt : [0xfd, 0x49] : "f64x2.lt", + F64x2Gt : [0xfd, 0x4a] : "f64x2.gt", + F64x2Le : [0xfd, 0x4b] : "f64x2.le", + F64x2Ge : [0xfd, 0x4c] : "f64x2.ge", + + V128Not : [0xfd, 0x4d] : "v128.not", + V128And : [0xfd, 0x4e] : "v128.and", + V128Andnot : [0xfd, 0x4f] : "v128.andnot", + V128Or : [0xfd, 0x50] : "v128.or", + V128Xor : [0xfd, 0x51] : "v128.xor", + V128Bitselect : [0xfd, 0x52] : "v128.bitselect", + V128Load8Lane(LoadOrStoreLane<1>) : [0xfd, 0x58] : "v128.load8_lane", + V128Load16Lane(LoadOrStoreLane<2>) : [0xfd, 0x59] : "v128.load16_lane", + V128Load32Lane(LoadOrStoreLane<4>) : [0xfd, 0x5a] : "v128.load32_lane", + V128Load64Lane(LoadOrStoreLane<8>): [0xfd, 0x5b] : "v128.load64_lane", + V128Store8Lane(LoadOrStoreLane<1>) : [0xfd, 0x5c] : "v128.store8_lane", + V128Store16Lane(LoadOrStoreLane<2>) : [0xfd, 0x5d] : "v128.store16_lane", + V128Store32Lane(LoadOrStoreLane<4>) : [0xfd, 0x5e] : "v128.store32_lane", + V128Store64Lane(LoadOrStoreLane<8>) : [0xfd, 0x5f] : "v128.store64_lane", + + I8x16Abs : [0xfd, 0x60] : "i8x16.abs", + I8x16Neg : [0xfd, 0x61] : "i8x16.neg", + V128AnyTrue : [0xfd, 0x62] : "v128.any_true", + I8x16AllTrue : [0xfd, 0x63] : "i8x16.all_true", + I8x16Bitmask : [0xfd, 0x64] : "i8x16.bitmask", + I8x16NarrowI16x8S : [0xfd, 0x65] : "i8x16.narrow_i16x8_s", + I8x16NarrowI16x8U : [0xfd, 0x66] : "i8x16.narrow_i16x8_u", + I8x16Shl : [0xfd, 0x6b] : "i8x16.shl", + I8x16ShrS : [0xfd, 0x6c] : "i8x16.shr_s", + I8x16ShrU : [0xfd, 0x6d] : "i8x16.shr_u", + I8x16Add : [0xfd, 0x6e] : "i8x16.add", + I8x16AddSatS : [0xfd, 0x6f] : "i8x16.add_sat_s", + I8x16AddSatU : [0xfd, 0x70] : "i8x16.add_sat_u", + I8x16Sub : [0xfd, 0x71] : "i8x16.sub", + I8x16SubSatS : [0xfd, 0x72] : "i8x16.sub_sat_s", + I8x16SubSatU : [0xfd, 0x73] : "i8x16.sub_sat_u", + I8x16MinS : [0xfd, 0x76] : "i8x16.min_s", + I8x16MinU : [0xfd, 0x77] : "i8x16.min_u", + I8x16MaxS : [0xfd, 0x78] : "i8x16.max_s", + I8x16MaxU : [0xfd, 0x79] : "i8x16.max_u", + I8x16AvgrU : [0xfd, 0x7b] : "i8x16.avgr_u", + + I16x8Abs : [0xfd, 0x80] : "i16x8.abs", + I16x8Neg : [0xfd, 0x81] : "i16x8.neg", + I16x8AllTrue : [0xfd, 0x83] : "i16x8.all_true", + I16x8Bitmask : [0xfd, 0x84] : "i16x8.bitmask", + I16x8NarrowI32x4S : [0xfd, 0x85] : "i16x8.narrow_i32x4_s", + I16x8NarrowI32x4U : [0xfd, 0x86] : "i16x8.narrow_i32x4_u", + I16x8WidenLowI8x16S : [0xfd, 0x87] : "i16x8.widen_low_i8x16_s", + I16x8WidenHighI8x16S : [0xfd, 0x88] : "i16x8.widen_high_i8x16_s", + I16x8WidenLowI8x16U : [0xfd, 0x89] : "i16x8.widen_low_i8x16_u", + I16x8WidenHighI8x16u : [0xfd, 0x8a] : "i16x8.widen_high_i8x16_u", + I16x8Shl : [0xfd, 0x8b] : "i16x8.shl", + I16x8ShrS : [0xfd, 0x8c] : "i16x8.shr_s", + I16x8ShrU : [0xfd, 0x8d] : "i16x8.shr_u", + I16x8Add : [0xfd, 0x8e] : "i16x8.add", + I16x8AddSatS : [0xfd, 0x8f] : "i16x8.add_sat_s", + I16x8AddSatU : [0xfd, 0x90] : "i16x8.add_sat_u", + I16x8Sub : [0xfd, 0x91] : "i16x8.sub", + I16x8SubSatS : [0xfd, 0x92] : "i16x8.sub_sat_s", + I16x8SubSatU : [0xfd, 0x93] : "i16x8.sub_sat_u", + I16x8Mul : [0xfd, 0x95] : "i16x8.mul", + I16x8MinS : [0xfd, 0x96] : "i16x8.min_s", + I16x8MinU : [0xfd, 0x97] : "i16x8.min_u", + I16x8MaxS : [0xfd, 0x98] : "i16x8.max_s", + I16x8MaxU : [0xfd, 0x99] : "i16x8.max_u", + I16x8ExtMulLowI8x16S : [0xfd, 0x9a] : "i16x8.extmul_low_i8x16_s", + I16x8AvgrU : [0xfd, 0x9b] : "i16x8.avgr_u", + I16x8Q15MulrSatS : [0xfd, 0x9c] : "i16x8.q15mulr_sat_s", + I16x8ExtMulHighI8x16S : [0xfd, 0x9d] : "i16x8.extmul_high_i8x16_s", + I16x8ExtMulLowI8x16U : [0xfd, 0x9e] : "i16x8.extmul_low_i8x16_u", + I16x8ExtMulHighI8x16U : [0xfd, 0x9f] : "i16x8.extmul_high_i8x16_u", + + I32x4Abs : [0xfd, 0xa0] : "i32x4.abs", + I32x4Neg : [0xfd, 0xa1] : "i32x4.neg", + I32x4AllTrue : [0xfd, 0xa3] : "i32x4.all_true", + I32x4Bitmask : [0xfd, 0xa4] : "i32x4.bitmask", + I32x4WidenLowI16x8S : [0xfd, 0xa7] : "i32x4.widen_low_i16x8_s", + I32x4WidenHighI16x8S : [0xfd, 0xa8] : "i32x4.widen_high_i16x8_s", + I32x4WidenLowI16x8U : [0xfd, 0xa9] : "i32x4.widen_low_i16x8_u", + I32x4WidenHighI16x8U : [0xfd, 0xaa] : "i32x4.widen_high_i16x8_u", + I32x4Shl : [0xfd, 0xab] : "i32x4.shl", + I32x4ShrS : [0xfd, 0xac] : "i32x4.shr_s", + I32x4ShrU : [0xfd, 0xad] : "i32x4.shr_u", + I32x4Add : [0xfd, 0xae] : "i32x4.add", + I32x4Sub : [0xfd, 0xb1] : "i32x4.sub", + I32x4Mul : [0xfd, 0xb5] : "i32x4.mul", + I32x4MinS : [0xfd, 0xb6] : "i32x4.min_s", + I32x4MinU : [0xfd, 0xb7] : "i32x4.min_u", + I32x4MaxS : [0xfd, 0xb8] : "i32x4.max_s", + I32x4MaxU : [0xfd, 0xb9] : "i32x4.max_u", + I32x4DotI16x8S : [0xfd, 0xba] : "i32x4.dot_i16x8_s", + I32x4ExtMulLowI16x8S : [0xfd, 0xbb] : "i32x4.extmul_low_i16x8_s", + I32x4ExtMulHighI16x8S : [0xfd, 0xbd] : "i32x4.extmul_high_i16x8_s", + I32x4ExtMulLowI16x8U : [0xfd, 0xbe] : "i32x4.extmul_low_i16x8_u", + I32x4ExtMulHighI16x8U : [0xfd, 0xbf] : "i32x4.extmul_high_i16x8_u", + + I64x2Neg : [0xfd, 0xc1] : "i64x2.neg", + I64x2Shl : [0xfd, 0xcb] : "i64x2.shl", + I64x2Bitmask : [0xfd, 0xc4] : "i64x2.bitmask", + I64x2WidenLowI32x4S : [0xfd, 0xc7] : "i64x2.widen_low_i32x4_s", + I64x2WidenHighI32x4S : [0xfd, 0xc8] : "i64x2.widen_high_i32x4_s", + I64x2WidenLowI32x4U : [0xfd, 0xc9] : "i64x2.widen_low_i32x4_u", + I64x2WidenHighI32x4U : [0xfd, 0xca] : "i64x2.widen_high_i32x4_u", + I64x2ShrS : [0xfd, 0xcc] : "i64x2.shr_s", + I64x2ShrU : [0xfd, 0xcd] : "i64x2.shr_u", + I64x2Add : [0xfd, 0xce] : "i64x2.add", + I64x2Sub : [0xfd, 0xd1] : "i64x2.sub", + I64x2ExtMulLowI32x4S : [0xfd, 0xd2] : "i64x2.extmul_low_i32x4_s", + I64x2ExtMulHighI32x4S : [0xfd, 0xd3] : "i64x2.extmul_high_i32x4_s", + I64x2Mul : [0xfd, 0xd5] : "i64x2.mul", + I64x2ExtMulLowI32x4U : [0xfd, 0xd6] : "i64x2.extmul_low_i32x4_u", + I64x2ExtMulHighI32x4U : [0xfd, 0xd7] : "i64x2.extmul_high_i32x4_u", + + F32x4Ceil : [0xfd, 0xd8] : "f32x4.ceil", + F32x4Floor : [0xfd, 0xd9] : "f32x4.floor", + F32x4Trunc : [0xfd, 0xda] : "f32x4.trunc", + F32x4Nearest : [0xfd, 0xdb] : "f32x4.nearest", + F64x2Ceil : [0xfd, 0xdc] : "f64x2.ceil", + F64x2Floor : [0xfd, 0xdd] : "f64x2.floor", + F64x2Trunc : [0xfd, 0xde] : "f64x2.trunc", + F64x2Nearest : [0xfd, 0xdf] : "f64x2.nearest", + + F32x4Abs : [0xfd, 0xe0] : "f32x4.abs", + F32x4Neg : [0xfd, 0xe1] : "f32x4.neg", + F32x4Sqrt : [0xfd, 0xe3] : "f32x4.sqrt", + F32x4Add : [0xfd, 0xe4] : "f32x4.add", + F32x4Sub : [0xfd, 0xe5] : "f32x4.sub", + F32x4Mul : [0xfd, 0xe6] : "f32x4.mul", + F32x4Div : [0xfd, 0xe7] : "f32x4.div", + F32x4Min : [0xfd, 0xe8] : "f32x4.min", + F32x4Max : [0xfd, 0xe9] : "f32x4.max", + F32x4PMin : [0xfd, 0xea] : "f32x4.pmin", + F32x4PMax : [0xfd, 0xeb] : "f32x4.pmax", + + F64x2Abs : [0xfd, 0xec] : "f64x2.abs", + F64x2Neg : [0xfd, 0xed] : "f64x2.neg", + F64x2Sqrt : [0xfd, 0xef] : "f64x2.sqrt", + F64x2Add : [0xfd, 0xf0] : "f64x2.add", + F64x2Sub : [0xfd, 0xf1] : "f64x2.sub", + F64x2Mul : [0xfd, 0xf2] : "f64x2.mul", + F64x2Div : [0xfd, 0xf3] : "f64x2.div", + F64x2Min : [0xfd, 0xf4] : "f64x2.min", + F64x2Max : [0xfd, 0xf5] : "f64x2.max", + F64x2PMin : [0xfd, 0xf6] : "f64x2.pmin", + F64x2PMax : [0xfd, 0xf7] : "f64x2.pmax", + + I32x4TruncSatF32x4S : [0xfd, 0xf8] : "i32x4.trunc_sat_f32x4_s", + I32x4TruncSatF32x4U : [0xfd, 0xf9] : "i32x4.trunc_sat_f32x4_u", + F32x4ConvertI32x4S : [0xfd, 0xfa] : "f32x4.convert_i32x4_s", + F32x4ConvertI32x4U : [0xfd, 0xfb] : "f32x4.convert_i32x4_u", + + V128Load32Zero(MemArg<4>) : [0xfd, 0xfc] : "v128.load32_zero", + V128Load64Zero(MemArg<8>) : [0xfd, 0xfd] : "v128.load64_zero", + + // Exception handling proposal + CatchAll : [0x05] : "catch_all", // Reuses the else opcode. + Try(BlockType<'a>) : [0x06] : "try", + Catch(ast::Index<'a>) : [0x07] : "catch", + Throw(ast::Index<'a>) : [0x08] : "throw", + Rethrow(ast::Index<'a>) : [0x09] : "rethrow", + Unwind : [0x0a] : "unwind", + } +} + +/// Extra information associated with block-related instructions. +/// +/// This is used to label blocks and also annotate what types are expected for +/// the block. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct BlockType<'a> { + pub label: Option<ast::Id<'a>>, + pub ty: ast::TypeUse<'a, ast::FunctionType<'a>>, +} + +impl<'a> Parse<'a> for BlockType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(BlockType { + label: parser.parse()?, + ty: parser + .parse::<ast::TypeUse<'a, ast::FunctionTypeNoNames<'a>>>()? + .into(), + }) + } +} + +/// Extra information associated with the func.bind instruction. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct FuncBindType<'a> { + pub ty: ast::TypeUse<'a, ast::FunctionType<'a>>, +} + +impl<'a> Parse<'a> for FuncBindType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(FuncBindType { + ty: parser + .parse::<ast::TypeUse<'a, ast::FunctionTypeNoNames<'a>>>()? + .into(), + }) + } +} + +/// Extra information associated with the let instruction. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct LetType<'a> { + pub block: BlockType<'a>, + pub locals: Vec<ast::Local<'a>>, +} + +impl<'a> Parse<'a> for LetType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(LetType { + block: parser.parse()?, + locals: ast::Local::parse_remainder(parser)?, + }) + } +} + +/// Extra information associated with the `br_table` instruction. +#[allow(missing_docs)] +#[derive(Debug)] +pub struct BrTableIndices<'a> { + pub labels: Vec<ast::Index<'a>>, + pub default: ast::Index<'a>, +} + +impl<'a> Parse<'a> for BrTableIndices<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut labels = Vec::new(); + labels.push(parser.parse()?); + while parser.peek::<ast::Index>() { + labels.push(parser.parse()?); + } + let default = labels.pop().unwrap(); + Ok(BrTableIndices { labels, default }) + } +} + +/// Payload for lane-related instructions. Unsigned with no + prefix. +#[derive(Debug)] +pub struct LaneArg { + /// The lane argument. + pub lane: u8, +} + +impl<'a> Parse<'a> for LaneArg { + fn parse(parser: Parser<'a>) -> Result<Self> { + let lane = parser.step(|c| { + if let Some((i, rest)) = c.integer() { + if i.sign() == None { + let (src, radix) = i.val(); + let val = u8::from_str_radix(src, radix) + .map_err(|_| c.error("malformed lane index"))?; + Ok((val, rest)) + } else { + Err(c.error("unexpected token")) + } + } else { + Err(c.error("expected a lane index")) + } + })?; + Ok(LaneArg { lane }) + } +} + +/// Payload for memory-related instructions indicating offset/alignment of +/// memory accesses. +#[derive(Debug)] +pub struct MemArg<'a> { + /// The alignment of this access. + /// + /// This is not stored as a log, this is the actual alignment (e.g. 1, 2, 4, + /// 8, etc). + pub align: u32, + /// The offset, in bytes of this access. + pub offset: u32, + /// The memory index we're accessing + pub memory: ast::ItemRef<'a, kw::memory>, +} + +impl<'a> MemArg<'a> { + fn parse(parser: Parser<'a>, default_align: u32) -> Result<Self> { + fn parse_field(name: &str, parser: Parser<'_>) -> Result<Option<u32>> { + parser.step(|c| { + let (kw, rest) = match c.keyword() { + Some(p) => p, + None => return Ok((None, c)), + }; + if !kw.starts_with(name) { + return Ok((None, c)); + } + let kw = &kw[name.len()..]; + if !kw.starts_with("=") { + return Ok((None, c)); + } + let num = &kw[1..]; + let num = if num.starts_with("0x") { + match u32::from_str_radix(&num[2..], 16) { + Ok(n) => n, + Err(_) => return Err(c.error("i32 constant out of range")), + } + } else { + match num.parse() { + Ok(n) => n, + Err(_) => return Err(c.error("i32 constant out of range")), + } + }; + + Ok((Some(num), rest)) + }) + } + let memory = parser + .parse::<Option<ast::ItemRef<'a, kw::memory>>>()? + .unwrap_or(idx_zero(parser.prev_span(), kw::memory)); + let offset = parse_field("offset", parser)?.unwrap_or(0); + let align = match parse_field("align", parser)? { + Some(n) if !n.is_power_of_two() => { + return Err(parser.error("alignment must be a power of two")) + } + n => n.unwrap_or(default_align), + }; + + Ok(MemArg { + offset, + align, + memory, + }) + } +} + +fn idx_zero<T>(span: ast::Span, mk_kind: fn(ast::Span) -> T) -> ast::ItemRef<'static, T> { + ast::ItemRef::Item { + kind: mk_kind(span), + idx: ast::Index::Num(0, span), + exports: Vec::new(), + } +} + +/// Extra data associated with the `loadN_lane` and `storeN_lane` instructions. +#[derive(Debug)] +pub struct LoadOrStoreLane<'a> { + /// The memory argument for this instruction. + pub memarg: MemArg<'a>, + /// The lane argument for this instruction. + pub lane: LaneArg +} + +impl<'a> LoadOrStoreLane<'a> { + fn parse(parser: Parser<'a>, default_align: u32) -> Result<Self> { + Ok(LoadOrStoreLane { + memarg: MemArg::parse(parser, default_align)?, + lane: LaneArg::parse(parser)? + }) + } +} + +/// Extra data associated with the `call_indirect` instruction. +#[derive(Debug)] +pub struct CallIndirect<'a> { + /// The table that this call is going to be indexing. + pub table: ast::ItemRef<'a, kw::table>, + /// Type type signature that this `call_indirect` instruction is using. + pub ty: ast::TypeUse<'a, ast::FunctionType<'a>>, +} + +impl<'a> Parse<'a> for CallIndirect<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let prev_span = parser.prev_span(); + let mut table: Option<ast::IndexOrRef<_>> = parser.parse()?; + let ty = parser.parse::<ast::TypeUse<'a, ast::FunctionTypeNoNames<'a>>>()?; + // Turns out the official test suite at this time thinks table + // identifiers comes first but wabt's test suites asserts differently + // putting them second. Let's just handle both. + if table.is_none() { + table = parser.parse()?; + } + Ok(CallIndirect { + table: table.map(|i| i.0).unwrap_or(idx_zero(prev_span, kw::table)), + ty: ty.into(), + }) + } +} + +/// Extra data associated with the `table.init` instruction +#[derive(Debug)] +pub struct TableInit<'a> { + /// The index of the table we're copying into. + pub table: ast::ItemRef<'a, kw::table>, + /// The index of the element segment we're copying into a table. + pub elem: ast::Index<'a>, +} + +impl<'a> Parse<'a> for TableInit<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let prev_span = parser.prev_span(); + let (elem, table) = + if parser.peek::<ast::ItemRef<kw::table>>() || parser.peek2::<ast::Index>() { + let table = parser.parse::<ast::IndexOrRef<_>>()?.0; + (parser.parse()?, table) + } else { + (parser.parse()?, idx_zero(prev_span, kw::table)) + }; + Ok(TableInit { table, elem }) + } +} + +/// Extra data associated with the `table.copy` instruction. +#[derive(Debug)] +pub struct TableCopy<'a> { + /// The index of the destination table to copy into. + pub dst: ast::ItemRef<'a, kw::table>, + /// The index of the source table to copy from. + pub src: ast::ItemRef<'a, kw::table>, +} + +impl<'a> Parse<'a> for TableCopy<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let (dst, src) = match parser.parse::<Option<ast::IndexOrRef<_>>>()? { + Some(dst) => (dst.0, parser.parse::<ast::IndexOrRef<_>>()?.0), + None => ( + idx_zero(parser.prev_span(), kw::table), + idx_zero(parser.prev_span(), kw::table), + ), + }; + Ok(TableCopy { dst, src }) + } +} + +/// Extra data associated with unary table instructions. +#[derive(Debug)] +pub struct TableArg<'a> { + /// The index of the table argument. + pub dst: ast::ItemRef<'a, kw::table>, +} + +impl<'a> Parse<'a> for TableArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let dst = if let Some(dst) = parser.parse::<Option<ast::IndexOrRef<_>>>()? { + dst.0 + } else { + idx_zero(parser.prev_span(), kw::table) + }; + Ok(TableArg { dst }) + } +} + +/// Extra data associated with unary memory instructions. +#[derive(Debug)] +pub struct MemoryArg<'a> { + /// The index of the memory space. + pub mem: ast::ItemRef<'a, kw::memory>, +} + +impl<'a> Parse<'a> for MemoryArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mem = if let Some(mem) = parser.parse::<Option<ast::IndexOrRef<_>>>()? { + mem.0 + } else { + idx_zero(parser.prev_span(), kw::memory) + }; + Ok(MemoryArg { mem }) + } +} + +/// Extra data associated with the `memory.init` instruction +#[derive(Debug)] +pub struct MemoryInit<'a> { + /// The index of the data segment we're copying into memory. + pub data: ast::Index<'a>, + /// The index of the memory we're copying into, + pub mem: ast::ItemRef<'a, kw::memory>, +} + +impl<'a> Parse<'a> for MemoryInit<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let data = parser.parse()?; + let mem = parser + .parse::<Option<ast::IndexOrRef<_>>>()? + .map(|i| i.0) + .unwrap_or(idx_zero(parser.prev_span(), kw::memory)); + Ok(MemoryInit { data, mem }) + } +} + +/// Extra data associated with the `memory.copy` instruction +#[derive(Debug)] +pub struct MemoryCopy<'a> { + /// The index of the memory we're copying from. + pub src: ast::ItemRef<'a, kw::memory>, + /// The index of the memory we're copying to. + pub dst: ast::ItemRef<'a, kw::memory>, +} + +impl<'a> Parse<'a> for MemoryCopy<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let (src, dst) = match parser.parse::<Option<ast::IndexOrRef<_>>>()? { + Some(dst) => (parser.parse::<ast::IndexOrRef<_>>()?.0, dst.0), + None => ( + idx_zero(parser.prev_span(), kw::memory), + idx_zero(parser.prev_span(), kw::memory), + ), + }; + Ok(MemoryCopy { src, dst }) + } +} + +/// Extra data associated with the `struct.get/set` instructions +#[derive(Debug)] +pub struct StructAccess<'a> { + /// The index of the struct type we're accessing. + pub r#struct: ast::Index<'a>, + /// The index of the field of the struct we're accessing + pub field: ast::Index<'a>, +} + +impl<'a> Parse<'a> for StructAccess<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(StructAccess { + r#struct: parser.parse()?, + field: parser.parse()?, + }) + } +} + +/// Extra data associated with the `struct.narrow` instruction +#[derive(Debug)] +pub struct StructNarrow<'a> { + /// The type of the struct we're casting from + pub from: ast::ValType<'a>, + /// The type of the struct we're casting to + pub to: ast::ValType<'a>, +} + +impl<'a> Parse<'a> for StructNarrow<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(StructNarrow { + from: parser.parse()?, + to: parser.parse()?, + }) + } +} + +/// Different ways to specify a `v128.const` instruction +#[derive(Debug)] +#[rustfmt::skip] +#[allow(missing_docs)] +pub enum V128Const { + I8x16([i8; 16]), + I16x8([i16; 8]), + I32x4([i32; 4]), + I64x2([i64; 2]), + F32x4([ast::Float32; 4]), + F64x2([ast::Float64; 2]), +} + +impl V128Const { + /// Returns the raw little-ended byte sequence used to represent this + /// `v128` constant` + /// + /// This is typically suitable for encoding as the payload of the + /// `v128.const` instruction. + #[rustfmt::skip] + pub fn to_le_bytes(&self) -> [u8; 16] { + match self { + V128Const::I8x16(arr) => [ + arr[0] as u8, + arr[1] as u8, + arr[2] as u8, + arr[3] as u8, + arr[4] as u8, + arr[5] as u8, + arr[6] as u8, + arr[7] as u8, + arr[8] as u8, + arr[9] as u8, + arr[10] as u8, + arr[11] as u8, + arr[12] as u8, + arr[13] as u8, + arr[14] as u8, + arr[15] as u8, + ], + V128Const::I16x8(arr) => { + let a1 = arr[0].to_le_bytes(); + let a2 = arr[1].to_le_bytes(); + let a3 = arr[2].to_le_bytes(); + let a4 = arr[3].to_le_bytes(); + let a5 = arr[4].to_le_bytes(); + let a6 = arr[5].to_le_bytes(); + let a7 = arr[6].to_le_bytes(); + let a8 = arr[7].to_le_bytes(); + [ + a1[0], a1[1], + a2[0], a2[1], + a3[0], a3[1], + a4[0], a4[1], + a5[0], a5[1], + a6[0], a6[1], + a7[0], a7[1], + a8[0], a8[1], + ] + } + V128Const::I32x4(arr) => { + let a1 = arr[0].to_le_bytes(); + let a2 = arr[1].to_le_bytes(); + let a3 = arr[2].to_le_bytes(); + let a4 = arr[3].to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], + a2[0], a2[1], a2[2], a2[3], + a3[0], a3[1], a3[2], a3[3], + a4[0], a4[1], a4[2], a4[3], + ] + } + V128Const::I64x2(arr) => { + let a1 = arr[0].to_le_bytes(); + let a2 = arr[1].to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], a1[4], a1[5], a1[6], a1[7], + a2[0], a2[1], a2[2], a2[3], a2[4], a2[5], a2[6], a2[7], + ] + } + V128Const::F32x4(arr) => { + let a1 = arr[0].bits.to_le_bytes(); + let a2 = arr[1].bits.to_le_bytes(); + let a3 = arr[2].bits.to_le_bytes(); + let a4 = arr[3].bits.to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], + a2[0], a2[1], a2[2], a2[3], + a3[0], a3[1], a3[2], a3[3], + a4[0], a4[1], a4[2], a4[3], + ] + } + V128Const::F64x2(arr) => { + let a1 = arr[0].bits.to_le_bytes(); + let a2 = arr[1].bits.to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], a1[4], a1[5], a1[6], a1[7], + a2[0], a2[1], a2[2], a2[3], a2[4], a2[5], a2[6], a2[7], + ] + } + } + } +} + +impl<'a> Parse<'a> for V128Const { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i8x16>() { + parser.parse::<kw::i8x16>()?; + Ok(V128Const::I8x16([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i16x8>() { + parser.parse::<kw::i16x8>()?; + Ok(V128Const::I16x8([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i32x4>() { + parser.parse::<kw::i32x4>()?; + Ok(V128Const::I32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i64x2>() { + parser.parse::<kw::i64x2>()?; + Ok(V128Const::I64x2([parser.parse()?, parser.parse()?])) + } else if l.peek::<kw::f32x4>() { + parser.parse::<kw::f32x4>()?; + Ok(V128Const::F32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::f64x2>() { + parser.parse::<kw::f64x2>()?; + Ok(V128Const::F64x2([parser.parse()?, parser.parse()?])) + } else { + Err(l.error()) + } + } +} + +/// Lanes being shuffled in the `i8x16.shuffle` instruction +#[derive(Debug)] +pub struct I8x16Shuffle { + #[allow(missing_docs)] + pub lanes: [u8; 16], +} + +impl<'a> Parse<'a> for I8x16Shuffle { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(I8x16Shuffle { + lanes: [ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ], + }) + } +} + +/// Payload of the `select` instructions +#[derive(Debug)] +pub struct SelectTypes<'a> { + #[allow(missing_docs)] + pub tys: Option<Vec<ast::ValType<'a>>>, +} + +impl<'a> Parse<'a> for SelectTypes<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut tys = None; + while parser.peek2::<kw::result>() { + let mut list = Vec::new(); + parser.parens(|p| { + p.parse::<kw::result>()?; + while !p.is_empty() { + list.push(p.parse()?); + } + Ok(()) + })?; + tys = Some(list); + } + Ok(SelectTypes { tys }) + } +} + +/// Payload of the `br_on_exn` instruction +#[derive(Debug)] +#[allow(missing_docs)] +pub struct BrOnExn<'a> { + pub label: ast::Index<'a>, + pub exn: ast::Index<'a>, +} + +impl<'a> Parse<'a> for BrOnExn<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let label = parser.parse()?; + let exn = parser.parse()?; + Ok(BrOnExn { label, exn }) + } +} + +/// Payload of the `br_on_cast` instruction +#[derive(Debug)] +#[allow(missing_docs)] +pub struct BrOnCast<'a> { + pub label: ast::Index<'a>, + pub val: HeapType<'a>, + pub rtt: HeapType<'a>, +} + +impl<'a> Parse<'a> for BrOnCast<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let label = parser.parse()?; + let val = parser.parse()?; + let rtt = parser.parse()?; + Ok(BrOnCast { label, val, rtt }) + } +} + +/// Payload of the `rtt.sub` instruction +#[derive(Debug)] +#[allow(missing_docs)] +pub struct RTTSub<'a> { + pub depth: u32, + pub input_rtt: HeapType<'a>, + pub output_rtt: HeapType<'a>, +} + +impl<'a> Parse<'a> for RTTSub<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let depth = parser.parse()?; + let input_rtt = parser.parse()?; + let output_rtt = parser.parse()?; + Ok(RTTSub { + depth, + input_rtt, + output_rtt, + }) + } +} + +/// Payload of the `ref.test/cast` instruction +#[derive(Debug)] +#[allow(missing_docs)] +pub struct RefTest<'a> { + pub val: HeapType<'a>, + pub rtt: HeapType<'a>, +} + +impl<'a> Parse<'a> for RefTest<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let val = parser.parse()?; + let rtt = parser.parse()?; + Ok(RefTest { val, rtt }) + } +} diff --git a/third_party/rust/wast/src/ast/func.rs b/third_party/rust/wast/src/ast/func.rs new file mode 100644 index 0000000000..6bc6105552 --- /dev/null +++ b/third_party/rust/wast/src/ast/func.rs @@ -0,0 +1,115 @@ +use crate::ast::{self, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// A WebAssembly function to be inserted into a module. +/// +/// This is a member of both the function and code sections. +#[derive(Debug)] +pub struct Func<'a> { + /// Where this `func` was defined. + pub span: ast::Span, + /// An identifier that this function is resolved with (optionally) for name + /// resolution. + pub id: Option<ast::Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<ast::NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: ast::InlineExport<'a>, + /// What kind of function this is, be it an inline-defined or imported + /// function. + pub kind: FuncKind<'a>, + /// The type that this function will have. + pub ty: ast::TypeUse<'a, ast::FunctionType<'a>>, +} + +/// Possible ways to define a function in the text format. +#[derive(Debug)] +pub enum FuncKind<'a> { + /// A function which is actually defined as an import, such as: + /// + /// ```text + /// (func (type 3) (import "foo" "bar")) + /// ``` + Import(ast::InlineImport<'a>), + + /// Almost all functions, those defined inline in a wasm module. + Inline { + /// The list of locals, if any, for this function. + locals: Vec<Local<'a>>, + + /// The instructions of the function. + expression: ast::Expression<'a>, + }, +} + +impl<'a> Parse<'a> for Func<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::func>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + let (ty, kind) = if let Some(import) = parser.parse()? { + (parser.parse()?, FuncKind::Import(import)) + } else { + let ty = parser.parse()?; + let locals = Local::parse_remainder(parser)?; + ( + ty, + FuncKind::Inline { + locals, + expression: parser.parse()?, + }, + ) + }; + + Ok(Func { + span, + id, + name, + exports, + ty, + kind, + }) + } +} + +/// A local for a `func` or `let` instruction. +/// +/// Each local has an optional identifier for name resolution, an optional name +/// for the custom `name` section, and a value type. +#[derive(Debug)] +pub struct Local<'a> { + /// An identifier that this local is resolved with (optionally) for name + /// resolution. + pub id: Option<ast::Id<'a>>, + /// An optional name for this local stored in the custom `name` section. + pub name: Option<ast::NameAnnotation<'a>>, + /// The value type of this local. + pub ty: ast::ValType<'a>, +} + +impl<'a> Local<'a> { + pub(crate) fn parse_remainder(parser: Parser<'a>) -> Result<Vec<Local<'a>>> { + let mut locals = Vec::new(); + while parser.peek2::<kw::local>() { + parser.parens(|p| { + p.parse::<kw::local>()?; + if p.is_empty() { + return Ok(()); + } + let id: Option<_> = p.parse()?; + let name: Option<_> = p.parse()?; + let ty = p.parse()?; + let parse_more = id.is_none() && name.is_none(); + locals.push(Local { id, name, ty }); + while parse_more && !p.is_empty() { + locals.push(Local { id: None, name: None, ty: p.parse()? }); + } + Ok(()) + })?; + } + Ok(locals) + } +} diff --git a/third_party/rust/wast/src/ast/global.rs b/third_party/rust/wast/src/ast/global.rs new file mode 100644 index 0000000000..78ca637488 --- /dev/null +++ b/third_party/rust/wast/src/ast/global.rs @@ -0,0 +1,53 @@ +use crate::ast::{self, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// A WebAssembly global in a module +#[derive(Debug)] +pub struct Global<'a> { + /// Where this `global` was defined. + pub span: ast::Span, + /// An optional name to reference this global by + pub id: Option<ast::Id<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: ast::InlineExport<'a>, + /// The type of this global, both its value type and whether it's mutable. + pub ty: ast::GlobalType<'a>, + /// What kind of global this defined as. + pub kind: GlobalKind<'a>, +} + +/// Different kinds of globals that can be defined in a module. +#[derive(Debug)] +pub enum GlobalKind<'a> { + /// A global which is actually defined as an import, such as: + /// + /// ```text + /// (global i32 (import "foo" "bar")) + /// ``` + Import(ast::InlineImport<'a>), + + /// A global defined inline in the module itself + Inline(ast::Expression<'a>), +} + +impl<'a> Parse<'a> for Global<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::global>()?.0; + let id = parser.parse()?; + let exports = parser.parse()?; + + let (ty, kind) = if let Some(import) = parser.parse()? { + (parser.parse()?, GlobalKind::Import(import)) + } else { + (parser.parse()?, GlobalKind::Inline(parser.parse()?)) + }; + Ok(Global { + span, + id, + exports, + ty, + kind, + }) + } +} diff --git a/third_party/rust/wast/src/ast/import.rs b/third_party/rust/wast/src/ast/import.rs new file mode 100644 index 0000000000..62e5fb91ee --- /dev/null +++ b/third_party/rust/wast/src/ast/import.rs @@ -0,0 +1,176 @@ +use crate::ast::{self, kw}; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; + +/// An `import` statement and entry in a WebAssembly module. +#[derive(Debug, Clone)] +pub struct Import<'a> { + /// Where this `import` was defined + pub span: ast::Span, + /// The module that this statement is importing from + pub module: &'a str, + /// The name of the field in the module this statement imports from. + pub field: Option<&'a str>, + /// The item that's being imported. + pub item: ItemSig<'a>, +} + +impl<'a> Parse<'a> for Import<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::import>()?.0; + let module = parser.parse()?; + let field = parser.parse()?; + let item = parser.parens(|p| p.parse())?; + Ok(Import { + span, + module, + field, + item, + }) + } +} + +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub struct ItemSig<'a> { + /// Where this item is defined in the source. + pub span: ast::Span, + /// An optional identifier used during name resolution to refer to this item + /// from the rest of the module. + pub id: Option<ast::Id<'a>>, + /// An optional name which, for functions, will be stored in the + /// custom `name` section. + pub name: Option<ast::NameAnnotation<'a>>, + /// What kind of item this is. + pub kind: ItemKind<'a>, +} + +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub enum ItemKind<'a> { + Func(ast::TypeUse<'a, ast::FunctionType<'a>>), + Table(ast::TableType<'a>), + Memory(ast::MemoryType), + Global(ast::GlobalType<'a>), + Event(ast::EventType<'a>), + Module(ast::TypeUse<'a, ast::ModuleType<'a>>), + Instance(ast::TypeUse<'a, ast::InstanceType<'a>>), +} + +impl<'a> Parse<'a> for ItemSig<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + let span = parser.parse::<kw::func>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: parser.parse()?, + kind: ItemKind::Func(parser.parse()?), + }) + } else if l.peek::<kw::table>() { + let span = parser.parse::<kw::table>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Table(parser.parse()?), + }) + } else if l.peek::<kw::memory>() { + let span = parser.parse::<kw::memory>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Memory(parser.parse()?), + }) + } else if l.peek::<kw::global>() { + let span = parser.parse::<kw::global>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Global(parser.parse()?), + }) + } else if l.peek::<kw::event>() { + let span = parser.parse::<kw::event>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Event(parser.parse()?), + }) + } else if l.peek::<kw::module>() { + let span = parser.parse::<kw::module>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Module(parser.parse()?), + }) + } else if l.peek::<kw::instance>() { + let span = parser.parse::<kw::instance>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Instance(parser.parse()?), + }) + } else { + Err(l.error()) + } + } +} + +/// A listing of a inline `(import "foo")` statement. +/// +/// Note that when parsing this type it is somewhat unconventional that it +/// parses its own surrounding parentheses. This is typically an optional type, +/// so it's so far been a bit nicer to have the optionality handled through +/// `Peek` rather than `Option<T>`. +#[derive(Debug, Copy, Clone)] +#[allow(missing_docs)] +pub struct InlineImport<'a> { + pub module: &'a str, + pub field: Option<&'a str>, +} + +impl<'a> Parse<'a> for InlineImport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|p| { + p.parse::<kw::import>()?; + Ok(InlineImport { + module: p.parse()?, + field: p.parse()?, + }) + }) + } +} + +impl Peek for InlineImport<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("import", cursor)) => cursor, + _ => return false, + }; + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + + // optional field + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => cursor, + }; + + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline import" + } +} diff --git a/third_party/rust/wast/src/ast/instance.rs b/third_party/rust/wast/src/ast/instance.rs new file mode 100644 index 0000000000..6b41f477a9 --- /dev/null +++ b/third_party/rust/wast/src/ast/instance.rs @@ -0,0 +1,86 @@ +use crate::ast::{self, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// A nested WebAssembly instance to be created as part of a module. +#[derive(Debug)] +pub struct Instance<'a> { + /// Where this `instance` was defined. + pub span: ast::Span, + /// An identifier that this instance is resolved with (optionally) for name + /// resolution. + pub id: Option<ast::Id<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: ast::InlineExport<'a>, + /// What kind of instance this is, be it an inline-defined or imported one. + pub kind: InstanceKind<'a>, +} + +/// Possible ways to define a instance in the text format. +#[derive(Debug)] +pub enum InstanceKind<'a> { + /// An instance which is actually defined as an import, such as: + Import { + /// Where we're importing from + import: ast::InlineImport<'a>, + /// The type that this instance will have. + ty: ast::TypeUse<'a, ast::InstanceType<'a>>, + }, + + /// Instances whose instantiation is defined inline. + Inline { + /// Module that we're instantiating + module: ast::ItemRef<'a, kw::module>, + /// Arguments used to instantiate the instance + args: Vec<InstanceArg<'a>>, + }, +} + +/// Arguments to the `instantiate` instruction +#[derive(Debug)] +#[allow(missing_docs)] +pub struct InstanceArg<'a> { + pub name: &'a str, + pub index: ast::ItemRef<'a, ast::ExportKind>, +} + +impl<'a> Parse<'a> for Instance<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::instance>()?.0; + let id = parser.parse()?; + let exports = parser.parse()?; + + let kind = if let Some(import) = parser.parse()? { + InstanceKind::Import { + import, + ty: parser.parse()?, + } + } else { + parser.parens(|p| { + p.parse::<kw::instantiate>()?; + let module = p.parse::<ast::IndexOrRef<_>>()?.0; + let mut args = Vec::new(); + while !p.is_empty() { + args.push(p.parse()?); + } + Ok(InstanceKind::Inline { module, args }) + })? + }; + + Ok(Instance { + span, + id, + exports, + kind, + }) + } +} + +impl<'a> Parse<'a> for InstanceArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(InstanceArg { + name: parser.parse()?, + index: parser.parse()?, + }) + } +} diff --git a/third_party/rust/wast/src/ast/memory.rs b/third_party/rust/wast/src/ast/memory.rs new file mode 100644 index 0000000000..ed3d907af4 --- /dev/null +++ b/third_party/rust/wast/src/ast/memory.rs @@ -0,0 +1,250 @@ +use crate::ast::{self, kw}; +use crate::parser::{Lookahead1, Parse, Parser, Peek, Result}; + +/// A defined WebAssembly memory instance inside of a module. +#[derive(Debug)] +pub struct Memory<'a> { + /// Where this `memory` was defined + pub span: ast::Span, + /// An optional name to refer to this memory by. + pub id: Option<ast::Id<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: ast::InlineExport<'a>, + /// How this memory is defined in the module. + pub kind: MemoryKind<'a>, +} + +/// Different syntactical ways a memory can be defined in a module. +#[derive(Debug)] +pub enum MemoryKind<'a> { + /// This memory is actually an inlined import definition. + #[allow(missing_docs)] + Import { + import: ast::InlineImport<'a>, + ty: ast::MemoryType, + }, + + /// A typical memory definition which simply says the limits of the memory + Normal(ast::MemoryType), + + /// The data of this memory, starting from 0, explicitly listed + Inline { + /// Whether or not this will be creating a 32-bit memory + is_32: bool, + /// The inline data specified for this memory + data: Vec<DataVal<'a>>, + }, +} + +impl<'a> Parse<'a> for Memory<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::memory>()?.0; + let id = parser.parse()?; + let exports = parser.parse()?; + + // Afterwards figure out which style this is, either: + // + // * `(import "a" "b") limits` + // * `(data ...)` + // * `limits` + let mut l = parser.lookahead1(); + let kind = if let Some(import) = parser.parse()? { + MemoryKind::Import { + import, + ty: parser.parse()?, + } + } else if l.peek::<ast::LParen>() || parser.peek2::<ast::LParen>() { + let is_32 = if parser.parse::<Option<kw::i32>>()?.is_some() { + true + } else if parser.parse::<Option<kw::i64>>()?.is_some() { + false + } else { + true + }; + let data = parser.parens(|parser| { + parser.parse::<kw::data>()?; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(data) + })?; + MemoryKind::Inline { data, is_32 } + } else if l.peek::<u32>() || l.peek::<kw::i32>() || l.peek::<kw::i64>() { + MemoryKind::Normal(parser.parse()?) + } else { + return Err(l.error()); + }; + Ok(Memory { + span, + id, + exports, + kind, + }) + } +} + +/// A `data` directive in a WebAssembly module. +#[derive(Debug)] +pub struct Data<'a> { + /// Where this `data` was defined + pub span: ast::Span, + + /// The optional name of this data segment + pub id: Option<ast::Id<'a>>, + + /// Whether this data segment is passive or active + pub kind: DataKind<'a>, + + /// Bytes for this `Data` segment, viewed as the concatenation of all the + /// contained slices. + pub data: Vec<DataVal<'a>>, +} + +/// Different kinds of data segments, either passive or active. +#[derive(Debug)] +pub enum DataKind<'a> { + /// A passive data segment which isn't associated with a memory and is + /// referenced from various instructions. + Passive, + + /// An active data segment which is associated and loaded into a particular + /// memory on module instantiation. + Active { + /// The memory that this `Data` will be associated with. + memory: ast::ItemRef<'a, kw::memory>, + + /// Initial offset to load this data segment at + offset: ast::Expression<'a>, + }, +} + +impl<'a> Parse<'a> for Data<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::data>()?.0; + let id = parser.parse()?; + + // The `passive` keyword is mentioned in the current spec but isn't + // mentioned in `wabt` tests, so consider it optional for now + let kind = if parser.peek::<kw::passive>() { + parser.parse::<kw::passive>()?; + DataKind::Passive + + // If data directly follows then assume this is a passive segment + } else if parser.peek::<&[u8]>() { + DataKind::Passive + + // ... and otherwise we must be attached to a particular memory as well + // as having an initialization offset. + } else { + let memory = if let Some(index) = parser.parse::<Option<ast::IndexOrRef<_>>>()? { + index.0 + } else { + ast::ItemRef::Item { + kind: kw::memory(parser.prev_span()), + idx: ast::Index::Num(0, span), + exports: Vec::new(), + } + }; + let offset = parser.parens(|parser| { + if parser.peek::<kw::offset>() { + parser.parse::<kw::offset>()?; + } + parser.parse() + })?; + DataKind::Active { memory, offset } + }; + + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(Data { + span, + id, + kind, + data, + }) + } +} + +/// Differnet ways the value of a data segment can be defined. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum DataVal<'a> { + String(&'a [u8]), + Integral(Vec<u8>), +} + +impl DataVal<'_> { + /// Returns the length, in bytes, of the memory used to represent this data + /// value. + pub fn len(&self) -> usize { + match self { + DataVal::String(s) => s.len(), + DataVal::Integral(s) => s.len(), + } + } + + /// Pushes the value of this data value onto the provided list of bytes. + pub fn push_onto(&self, dst: &mut Vec<u8>) { + match self { + DataVal::String(s) => dst.extend_from_slice(s), + DataVal::Integral(s) => dst.extend_from_slice(s), + } + } +} + +impl<'a> Parse<'a> for DataVal<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if !parser.peek::<ast::LParen>() { + return Ok(DataVal::String(parser.parse()?)); + } + + return parser.parens(|p| { + let mut result = Vec::new(); + let mut lookahead = p.lookahead1(); + let l = &mut lookahead; + let r = &mut result; + if consume::<kw::i8, i8, _>(p, l, r, |u, v| v.push(u as u8))? + || consume::<kw::i16, i16, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + || consume::<kw::i32, i32, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + || consume::<kw::i64, i64, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + || consume::<kw::f32, ast::Float32, _>(p, l, r, |u, v| { + v.extend(&u.bits.to_le_bytes()) + })? + || consume::<kw::f64, ast::Float64, _>(p, l, r, |u, v| { + v.extend(&u.bits.to_le_bytes()) + })? + || consume::<kw::v128, ast::V128Const, _>(p, l, r, |u, v| { + v.extend(&u.to_le_bytes()) + })? + { + Ok(DataVal::Integral(result)) + } else { + Err(lookahead.error()) + } + }); + + fn consume<'a, T: Peek + Parse<'a>, U: Parse<'a>, F>( + parser: Parser<'a>, + lookahead: &mut Lookahead1<'a>, + dst: &mut Vec<u8>, + push: F, + ) -> Result<bool> + where + F: Fn(U, &mut Vec<u8>), + { + if !lookahead.peek::<T>() { + return Ok(false); + } + parser.parse::<T>()?; + while !parser.is_empty() { + let val = parser.parse::<U>()?; + push(val, dst); + } + Ok(true) + } + } +} diff --git a/third_party/rust/wast/src/ast/mod.rs b/third_party/rust/wast/src/ast/mod.rs new file mode 100644 index 0000000000..7b708cd403 --- /dev/null +++ b/third_party/rust/wast/src/ast/mod.rs @@ -0,0 +1,430 @@ +/// A macro to create a custom keyword parser. +/// +/// This macro is invoked in one of two forms: +/// +/// ``` +/// // keyword derived from the Rust identifier: +/// wast::custom_keyword!(foo); +/// +/// // or an explicitly specified string representation of the keyword: +/// wast::custom_keyword!(my_keyword = "the-wasm-keyword"); +/// ``` +/// +/// This can then be used to parse custom keyword for custom items, such as: +/// +/// ``` +/// use wast::parser::{Parser, Result, Parse}; +/// +/// struct InlineModule<'a> { +/// inline_text: &'a str, +/// } +/// +/// mod kw { +/// wast::custom_keyword!(inline); +/// } +/// +/// // Parse an inline string module of the form: +/// // +/// // (inline "(module (func))") +/// impl<'a> Parse<'a> for InlineModule<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// parser.parse::<kw::inline>()?; +/// Ok(InlineModule { +/// inline_text: parser.parse()?, +/// }) +/// } +/// } +/// ``` +/// +/// Note that the keyword name can only start with a lower-case letter, i.e. 'a'..'z'. +#[macro_export] +macro_rules! custom_keyword { + ($name:ident) => { + $crate::custom_keyword!($name = stringify!($name)); + }; + ($name:ident = $kw:expr) => { + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + #[derive(Debug, Copy, Clone)] + pub struct $name(pub $crate::Span); + + impl<'a> $crate::parser::Parse<'a> for $name { + fn parse(parser: $crate::parser::Parser<'a>) -> $crate::parser::Result<Self> { + parser.step(|c| { + if let Some((kw, rest)) = c.keyword() { + if kw == $kw { + return Ok(($name(c.cur_span()), rest)); + } + } + Err(c.error(concat!("expected keyword `", $kw, "`"))) + }) + } + } + + impl $crate::parser::Peek for $name { + fn peek(cursor: $crate::parser::Cursor<'_>) -> bool { + if let Some((kw, _rest)) = cursor.keyword() { + kw == $kw + } else { + false + } + } + + fn display() -> &'static str { + concat!("`", $kw, "`") + } + } + }; +} + +/// A macro for defining custom reserved symbols. +/// +/// This is like `custom_keyword!` but for reserved symbols (`Token::Reserved`) +/// instead of keywords (`Token::Keyword`). +/// +/// ``` +/// use wast::parser::{Parser, Result, Parse}; +/// +/// // Define a custom reserved symbol, the "spaceship" operator: `<=>`. +/// wast::custom_reserved!(spaceship = "<=>"); +/// +/// /// A "three-way comparison" like `(<=> a b)` that returns -1 if `a` is less +/// /// than `b`, 0 if they're equal, and 1 if `a` is greater than `b`. +/// struct ThreeWayComparison<'a> { +/// lhs: wast::Expression<'a>, +/// rhs: wast::Expression<'a>, +/// } +/// +/// impl<'a> Parse<'a> for ThreeWayComparison<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// parser.parse::<spaceship>()?; +/// let lhs = parser.parse()?; +/// let rhs = parser.parse()?; +/// Ok(ThreeWayComparison { lhs, rhs }) +/// } +/// } +/// ``` +#[macro_export] +macro_rules! custom_reserved { + ($name:ident) => { + $crate::custom_reserved!($name = stringify!($name)); + }; + ($name:ident = $rsv:expr) => { + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + #[derive(Debug)] + pub struct $name(pub $crate::Span); + + impl<'a> $crate::parser::Parse<'a> for $name { + fn parse(parser: $crate::parser::Parser<'a>) -> $crate::parser::Result<Self> { + parser.step(|c| { + if let Some((rsv, rest)) = c.reserved() { + if rsv == $rsv { + return Ok(($name(c.cur_span()), rest)); + } + } + Err(c.error(concat!("expected reserved symbol `", $rsv, "`"))) + }) + } + } + + impl $crate::parser::Peek for $name { + fn peek(cursor: $crate::parser::Cursor<'_>) -> bool { + if let Some((rsv, _rest)) = cursor.reserved() { + rsv == $rsv + } else { + false + } + } + + fn display() -> &'static str { + concat!("`", $rsv, "`") + } + } + }; +} + +/// A macro, like [`custom_keyword`], to create a type which can be used to +/// parse/peek annotation directives. +/// +/// Note that when you're parsing custom annotations it can be somewhat tricky +/// due to the nature that most of them are skipped. You'll want to be sure to +/// consult the documentation of [`Parser::register_annotation`][register] when +/// using this macro. +/// +/// # Examples +/// +/// To see an example of how to use this macro, let's invent our own syntax for +/// the [producers section][section] which looks like: +/// +/// ```wat +/// (@producer "wat" "1.0.2") +/// ``` +/// +/// Here, for simplicity, we'll assume everything is a `processed-by` directive, +/// but you could get much more fancy with this as well. +/// +/// ``` +/// # use wast::*; +/// # use wast::parser::*; +/// +/// // First we define the custom annotation keyword we're using, and by +/// // convention we define it in an `annotation` module. +/// mod annotation { +/// wast::annotation!(producer); +/// } +/// +/// struct Producer<'a> { +/// name: &'a str, +/// version: &'a str, +/// } +/// +/// impl<'a> Parse<'a> for Producer<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // Remember that parser conventionally parse the *interior* of an +/// // s-expression, so we parse our `@producer` annotation and then we +/// // parse the payload of our annotation. +/// parser.parse::<annotation::producer>()?; +/// Ok(Producer { +/// name: parser.parse()?, +/// version: parser.parse()?, +/// }) +/// } +/// } +/// ``` +/// +/// Note though that this is only half of the parser for annotations. The other +/// half is calling the [`register_annotation`][register] method at the right +/// time to ensure the parser doesn't automatically skip our `@producer` +/// directive. Note that we *can't* call it inside the `Parse for Producer` +/// definition because that's too late and the annotation would already have +/// been skipped. +/// +/// Instead we'll need to call it from a higher level-parser before the +/// parenthesis have been parsed, like so: +/// +/// ``` +/// # use wast::*; +/// # use wast::parser::*; +/// struct Module<'a> { +/// fields: Vec<ModuleField<'a>>, +/// } +/// +/// impl<'a> Parse<'a> for Module<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // .. parse module header here ... +/// +/// // register our custom `@producer` annotation before we start +/// // parsing the parentheses of each field +/// let _r = parser.register_annotation("producer"); +/// +/// let mut fields = Vec::new(); +/// while !parser.is_empty() { +/// fields.push(parser.parens(|p| p.parse())?); +/// } +/// Ok(Module { fields }) +/// } +/// } +/// +/// enum ModuleField<'a> { +/// Producer(Producer<'a>), +/// // ... +/// } +/// # struct Producer<'a>(&'a str); +/// # impl<'a> Parse<'a> for Producer<'a> { +/// # fn parse(parser: Parser<'a>) -> Result<Self> { Ok(Producer(parser.parse()?)) } +/// # } +/// # mod annotation { wast::annotation!(producer); } +/// +/// impl<'a> Parse<'a> for ModuleField<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // and here `peek` works and our delegated parsing works because the +/// // annotation has been registered. +/// if parser.peek::<annotation::producer>() { +/// return Ok(ModuleField::Producer(parser.parse()?)); +/// } +/// +/// // .. typically we'd parse other module fields here... +/// +/// Err(parser.error("unknown module field")) +/// } +/// } +/// ``` +/// +/// [register]: crate::parser::Parser::register_annotation +/// [section]: https://github.com/WebAssembly/tool-conventions/blob/master/ProducersSection.md +#[macro_export] +macro_rules! annotation { + ($name:ident) => { + $crate::annotation!($name = stringify!($name)); + }; + ($name:ident = $annotation:expr) => { + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + #[derive(Debug)] + pub struct $name(pub $crate::Span); + + impl<'a> $crate::parser::Parse<'a> for $name { + fn parse(parser: $crate::parser::Parser<'a>) -> $crate::parser::Result<Self> { + parser.step(|c| { + if let Some((a, rest)) = c.annotation() { + if a == $annotation { + return Ok(($name(c.cur_span()), rest)); + } + } + Err(c.error(concat!("expected annotation `@", $annotation, "`"))) + }) + } + } + + impl $crate::parser::Peek for $name { + fn peek(cursor: $crate::parser::Cursor<'_>) -> bool { + if let Some((a, _rest)) = cursor.annotation() { + a == $annotation + } else { + false + } + } + + fn display() -> &'static str { + concat!("`@", $annotation, "`") + } + } + }; +} + +macro_rules! reexport { + ($(mod $name:ident;)*) => ($(mod $name; pub use self::$name::*;)*); +} + +reexport! { + mod token; +} + +#[cfg(feature = "wasm-module")] +reexport! { + mod alias; + mod assert_expr; + mod custom; + mod event; + mod export; + mod expr; + mod func; + mod global; + mod import; + mod instance; + mod memory; + mod module; + mod nested_module; + mod table; + mod types; + mod wast; +} + +/// Common keyword used to parse WebAssembly text files. +pub mod kw { + custom_keyword!(after); + custom_keyword!(alias); + custom_keyword!(any); + custom_keyword!(anyfunc); + custom_keyword!(anyref); + custom_keyword!(arg); + custom_keyword!(array); + custom_keyword!(assert_exhaustion); + custom_keyword!(assert_invalid); + custom_keyword!(assert_malformed); + custom_keyword!(assert_return); + custom_keyword!(assert_return_arithmetic_nan); + custom_keyword!(assert_return_arithmetic_nan_f32x4); + custom_keyword!(assert_return_arithmetic_nan_f64x2); + custom_keyword!(assert_return_canonical_nan); + custom_keyword!(assert_return_canonical_nan_f32x4); + custom_keyword!(assert_return_canonical_nan_f64x2); + custom_keyword!(assert_return_func); + custom_keyword!(assert_trap); + custom_keyword!(assert_unlinkable); + custom_keyword!(before); + custom_keyword!(binary); + custom_keyword!(block); + custom_keyword!(catch); + custom_keyword!(catch_all); + custom_keyword!(code); + custom_keyword!(data); + custom_keyword!(declare); + custom_keyword!(r#do = "do"); + custom_keyword!(elem); + custom_keyword!(end); + custom_keyword!(event); + custom_keyword!(exn); + custom_keyword!(exnref); + custom_keyword!(export); + custom_keyword!(r#extern = "extern"); + custom_keyword!(externref); + custom_keyword!(eq); + custom_keyword!(eqref); + custom_keyword!(f32); + custom_keyword!(f32x4); + custom_keyword!(f64); + custom_keyword!(f64x2); + custom_keyword!(field); + custom_keyword!(first); + custom_keyword!(func); + custom_keyword!(funcref); + custom_keyword!(get); + custom_keyword!(global); + custom_keyword!(i16); + custom_keyword!(i16x8); + custom_keyword!(i31); + custom_keyword!(i31ref); + custom_keyword!(i32); + custom_keyword!(i32x4); + custom_keyword!(i64); + custom_keyword!(i64x2); + custom_keyword!(i8); + custom_keyword!(i8x16); + custom_keyword!(import); + custom_keyword!(instance); + custom_keyword!(instantiate); + custom_keyword!(invoke); + custom_keyword!(item); + custom_keyword!(last); + custom_keyword!(local); + custom_keyword!(memory); + custom_keyword!(module); + custom_keyword!(modulecode); + custom_keyword!(nan_arithmetic = "nan:arithmetic"); + custom_keyword!(nan_canonical = "nan:canonical"); + custom_keyword!(null); + custom_keyword!(nullref); + custom_keyword!(offset); + custom_keyword!(outer); + custom_keyword!(param); + custom_keyword!(parent); + custom_keyword!(passive); + custom_keyword!(quote); + custom_keyword!(r#else = "else"); + custom_keyword!(r#if = "if"); + custom_keyword!(r#loop = "loop"); + custom_keyword!(r#mut = "mut"); + custom_keyword!(r#type = "type"); + custom_keyword!(r#ref = "ref"); + custom_keyword!(ref_func = "ref.func"); + custom_keyword!(ref_null = "ref.null"); + custom_keyword!(register); + custom_keyword!(result); + custom_keyword!(rtt); + custom_keyword!(shared); + custom_keyword!(start); + custom_keyword!(r#struct = "struct"); + custom_keyword!(table); + custom_keyword!(then); + custom_keyword!(r#try = "try"); + custom_keyword!(unwind); + custom_keyword!(v128); +} + +/// Common annotations used to parse WebAssembly text files. +pub mod annotation { + annotation!(custom); + annotation!(name); +} diff --git a/third_party/rust/wast/src/ast/module.rs b/third_party/rust/wast/src/ast/module.rs new file mode 100644 index 0000000000..e3bee2d050 --- /dev/null +++ b/third_party/rust/wast/src/ast/module.rs @@ -0,0 +1,239 @@ +use crate::ast::{self, annotation, kw}; +use crate::parser::{Parse, Parser, Result}; + +pub use crate::resolve::Names; + +/// A `*.wat` file parser, or a parser for one parenthesized module. +/// +/// This is the top-level type which you'll frequently parse when working with +/// this crate. A `*.wat` file is either one `module` s-expression or a sequence +/// of s-expressions that are module fields. +pub struct Wat<'a> { + #[allow(missing_docs)] + pub module: Module<'a>, +} + +impl<'a> Parse<'a> for Wat<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if !parser.has_meaningful_tokens() { + return Err(parser.error("expected at least one module field")); + } + let _r = parser.register_annotation("custom"); + let module = if !parser.peek2::<kw::module>() { + let fields = ModuleField::parse_remaining(parser)?; + Module { + span: ast::Span { offset: 0 }, + id: None, + name: None, + kind: ModuleKind::Text(fields), + } + } else { + parser.parens(|parser| parser.parse())? + }; + module.validate(parser)?; + Ok(Wat { module }) + } +} + +/// A parsed WebAssembly module. +pub struct Module<'a> { + /// Where this `module` was defined + pub span: ast::Span, + /// An optional identifier this module is known by + pub id: Option<ast::Id<'a>>, + /// An optional `@name` annotation for this module + pub name: Option<ast::NameAnnotation<'a>>, + /// What kind of module this was parsed as. + pub kind: ModuleKind<'a>, +} + +/// The different kinds of ways to define a module. +pub enum ModuleKind<'a> { + /// A module defined in the textual s-expression format. + Text(Vec<ModuleField<'a>>), + /// A module that had its raw binary bytes defined via the `binary` + /// directive. + Binary(Vec<&'a [u8]>), +} + +impl<'a> Module<'a> { + /// Performs a name resolution pass on this [`Module`], resolving all + /// symbolic names to indices. + /// + /// The WAT format contains a number of shorthands to make it easier to + /// write, such as inline exports, inline imports, inline type definitions, + /// etc. Additionally it allows using symbolic names such as `$foo` instead + /// of using indices. This module will postprocess an AST to remove all of + /// this syntactic sugar, preparing the AST for binary emission. This is + /// where expansion and name resolution happens. + /// + /// This function will mutate the AST of this [`Module`] and replace all + /// [`super::Index`] arguments with `Index::Num`. This will also expand inline + /// exports/imports listed on fields and handle various other shorthands of + /// the text format. + /// + /// If successful the AST was modified to be ready for binary encoding. A + /// [`Names`] structure is also returned so if you'd like to do your own + /// name lookups on the result you can do so as well. + /// + /// # Errors + /// + /// If an error happens during resolution, such a name resolution error or + /// items are found in the wrong order, then an error is returned. + pub fn resolve(&mut self) -> std::result::Result<Names<'a>, crate::Error> { + crate::resolve::resolve(self) + } + + /// Encodes this [`Module`] to its binary form. + /// + /// This function will take the textual representation in [`Module`] and + /// perform all steps necessary to convert it to a binary WebAssembly + /// module, suitable for writing to a `*.wasm` file. This function may + /// internally modify the [`Module`], for example: + /// + /// * Name resolution is performed to ensure that `Index::Id` isn't present + /// anywhere in the AST. + /// + /// * Inline shorthands such as imports/exports/types are all expanded to be + /// dedicated fields of the module. + /// + /// * Module fields may be shuffled around to preserve index ordering from + /// expansions. + /// + /// After all of this expansion has happened the module will be converted to + /// its binary form and returned as a `Vec<u8>`. This is then suitable to + /// hand off to other wasm runtimes and such. + /// + /// # Errors + /// + /// This function can return an error for name resolution errors and other + /// expansion-related errors. + pub fn encode(&mut self) -> std::result::Result<Vec<u8>, crate::Error> { + self.resolve()?; + Ok(crate::binary::encode(self)) + } + + fn validate(&self, parser: Parser<'_>) -> Result<()> { + let mut starts = 0; + if let ModuleKind::Text(fields) = &self.kind { + for item in fields.iter() { + if let ModuleField::Start(_) = item { + starts += 1; + } + } + } + if starts > 1 { + return Err(parser.error("multiple start sections found")); + } + Ok(()) + } +} + +impl<'a> Parse<'a> for Module<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let _r = parser.register_annotation("custom"); + let span = parser.parse::<kw::module>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + + let kind = if parser.peek::<kw::binary>() { + parser.parse::<kw::binary>()?; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + ModuleKind::Binary(data) + } else { + ModuleKind::Text(ModuleField::parse_remaining(parser)?) + }; + Ok(Module { + span, + id, + name, + kind, + }) + } +} + +/// A listing of all possible fields that can make up a WebAssembly module. +#[allow(missing_docs)] +#[derive(Debug)] +pub enum ModuleField<'a> { + Type(ast::Type<'a>), + Import(ast::Import<'a>), + Func(ast::Func<'a>), + Table(ast::Table<'a>), + Memory(ast::Memory<'a>), + Global(ast::Global<'a>), + Export(ast::Export<'a>), + Start(ast::ItemRef<'a, kw::func>), + Elem(ast::Elem<'a>), + Data(ast::Data<'a>), + Event(ast::Event<'a>), + Custom(ast::Custom<'a>), + Instance(ast::Instance<'a>), + NestedModule(ast::NestedModule<'a>), + Alias(ast::Alias<'a>), +} + +impl<'a> ModuleField<'a> { + fn parse_remaining(parser: Parser<'a>) -> Result<Vec<ModuleField>> { + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(ModuleField::parse)?); + } + Ok(fields) + } +} + +impl<'a> Parse<'a> for ModuleField<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::r#type>() { + return Ok(ModuleField::Type(parser.parse()?)); + } + if parser.peek::<kw::import>() { + return Ok(ModuleField::Import(parser.parse()?)); + } + if parser.peek::<kw::func>() { + return Ok(ModuleField::Func(parser.parse()?)); + } + if parser.peek::<kw::table>() { + return Ok(ModuleField::Table(parser.parse()?)); + } + if parser.peek::<kw::memory>() { + return Ok(ModuleField::Memory(parser.parse()?)); + } + if parser.peek::<kw::global>() { + return Ok(ModuleField::Global(parser.parse()?)); + } + if parser.peek::<kw::export>() { + return Ok(ModuleField::Export(parser.parse()?)); + } + if parser.peek::<kw::start>() { + parser.parse::<kw::start>()?; + return Ok(ModuleField::Start(parser.parse::<ast::IndexOrRef<_>>()?.0)); + } + if parser.peek::<kw::elem>() { + return Ok(ModuleField::Elem(parser.parse()?)); + } + if parser.peek::<kw::data>() { + return Ok(ModuleField::Data(parser.parse()?)); + } + if parser.peek::<kw::event>() { + return Ok(ModuleField::Event(parser.parse()?)); + } + if parser.peek::<annotation::custom>() { + return Ok(ModuleField::Custom(parser.parse()?)); + } + if parser.peek::<kw::instance>() { + return Ok(ModuleField::Instance(parser.parse()?)); + } + if parser.peek::<kw::module>() { + return Ok(ModuleField::NestedModule(parser.parse()?)); + } + if parser.peek::<kw::alias>() { + return Ok(ModuleField::Alias(parser.parse()?)); + } + Err(parser.error("expected valid module field")) + } +} diff --git a/third_party/rust/wast/src/ast/nested_module.rs b/third_party/rust/wast/src/ast/nested_module.rs new file mode 100644 index 0000000000..af3f1a95a4 --- /dev/null +++ b/third_party/rust/wast/src/ast/nested_module.rs @@ -0,0 +1,115 @@ +use crate::ast::{self, kw}; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; + +/// A nested WebAssembly nested module to be created as part of a module. +#[derive(Debug)] +pub struct NestedModule<'a> { + /// Where this `nested module` was defined. + pub span: ast::Span, + /// An identifier that this nested module is resolved with (optionally) for name + /// resolution. + pub id: Option<ast::Id<'a>>, + /// An optional name for this module stored in the custom `name` section. + pub name: Option<ast::NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: ast::InlineExport<'a>, + /// What kind of nested module this is, be it an inline-defined or imported one. + pub kind: NestedModuleKind<'a>, +} + +/// Possible ways to define a nested module in the text format. +#[derive(Debug)] +pub enum NestedModuleKind<'a> { + /// An nested module which is actually defined as an import, such as: + Import { + /// Where this nested module is imported from + import: ast::InlineImport<'a>, + /// The type that this nested module will have. + ty: ast::TypeUse<'a, ast::ModuleType<'a>>, + }, + + /// Nested modules whose instantiation is defined inline. + Inline { + /// Fields in the nested module. + fields: Vec<ast::ModuleField<'a>>, + }, +} + +impl<'a> Parse<'a> for NestedModule<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + // This is sort of a fundamental limitation of the way this crate is + // designed. Everything is done through recursive descent parsing which + // means, well, that we're recursively going down the stack as we parse + // nested data structures. While we can handle this for wasm expressions + // since that's a pretty local decision, handling this for nested + // modules which be far trickier. For now we just say that when the + // parser goes too deep we return an error saying there's too many + // nested modules. It would be great to not return an error here, + // though! + if parser.parens_depth() > 100 { + return Err(parser.error("module nesting too deep")); + } + + let span = parser.parse::<kw::module>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + let kind = if let Some(import) = parser.parse()? { + NestedModuleKind::Import { + import, + ty: parser.parse()?, + } + } else { + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(|p| p.parse())?); + } + NestedModuleKind::Inline { fields } + }; + + Ok(NestedModule { + span, + id, + name, + exports, + kind, + }) + } +} + +// Note that this is a custom implementation to get multi-token lookahead to +// figure out how to parse `(type ...` when it's in an inline module. If this is +// `(type $x)` or `(type 0)` then it's an inline type annotation, otherwise it's +// probably a typedef like `(type $x (func))` or something like that. We only +// want to parse the two-token variant right now. +struct InlineType; + +impl Peek for InlineType { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("type", cursor)) => cursor, + _ => return false, + }; + + // optional identifier + let cursor = match cursor.id() { + Some((_, cursor)) => cursor, + None => match cursor.integer() { + Some((_, cursor)) => cursor, + None => return false, + }, + }; + + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline type" + } +} diff --git a/third_party/rust/wast/src/ast/table.rs b/third_party/rust/wast/src/ast/table.rs new file mode 100644 index 0000000000..d5e71531eb --- /dev/null +++ b/third_party/rust/wast/src/ast/table.rs @@ -0,0 +1,234 @@ +use crate::ast::{self, kw}; +use crate::parser::{Parse, Parser, Result}; + +/// A WebAssembly `table` directive in a module. +#[derive(Debug)] +pub struct Table<'a> { + /// Where this table was defined. + pub span: ast::Span, + /// An optional name to refer to this table by. + pub id: Option<ast::Id<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: ast::InlineExport<'a>, + /// How this table is textually defined in the module. + pub kind: TableKind<'a>, +} + +/// Different ways to textually define a table. +#[derive(Debug)] +pub enum TableKind<'a> { + /// This table is actually an inlined import definition. + #[allow(missing_docs)] + Import { + import: ast::InlineImport<'a>, + ty: ast::TableType<'a>, + }, + + /// A typical memory definition which simply says the limits of the table + Normal(ast::TableType<'a>), + + /// The elem segments of this table, starting from 0, explicitly listed + Inline { + /// The element type of this table. + elem: ast::RefType<'a>, + /// The element table entries to have, and the length of this list is + /// the limits of the table as well. + payload: ElemPayload<'a>, + }, +} + +impl<'a> Parse<'a> for Table<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::table>()?.0; + let id = parser.parse()?; + let exports = parser.parse()?; + + // Afterwards figure out which style this is, either: + // + // * `elemtype (elem ...)` + // * `(import "a" "b") limits` + // * `limits` + let mut l = parser.lookahead1(); + let kind = if l.peek::<ast::RefType>() { + let elem = parser.parse()?; + let payload = parser.parens(|p| { + p.parse::<kw::elem>()?; + let ty = if parser.peek::<ast::LParen>() { + Some(elem) + } else { + None + }; + ElemPayload::parse_tail(parser, ty) + })?; + TableKind::Inline { elem, payload } + } else if l.peek::<u32>() { + TableKind::Normal(parser.parse()?) + } else if let Some(import) = parser.parse()? { + TableKind::Import { + import, + ty: parser.parse()?, + } + } else { + return Err(l.error()); + }; + Ok(Table { + span, + id, + exports, + kind, + }) + } +} + +/// An `elem` segment in a WebAssembly module. +#[derive(Debug)] +pub struct Elem<'a> { + /// Where this `elem` was defined. + pub span: ast::Span, + /// An optional name by which to refer to this segment. + pub id: Option<ast::Id<'a>>, + /// The way this segment was defined in the module. + pub kind: ElemKind<'a>, + /// The payload of this element segment, typically a list of functions. + pub payload: ElemPayload<'a>, +} + +/// Different ways to define an element segment in an mdoule. +#[derive(Debug)] +pub enum ElemKind<'a> { + /// A passive segment that isn't associated with a table and can be used in + /// various bulk-memory instructions. + Passive, + + /// A declared element segment that is purely used to declare function + /// references. + Declared, + + /// An active segment associated with a table. + Active { + /// The table this `elem` is initializing. + table: ast::ItemRef<'a, kw::table>, + /// The offset within `table` that we'll initialize at. + offset: ast::Expression<'a>, + }, +} + +/// Different ways to define the element segment payload in a module. +#[derive(Debug, Clone)] +pub enum ElemPayload<'a> { + /// This element segment has a contiguous list of function indices + Indices(Vec<ast::ItemRef<'a, kw::func>>), + + /// This element segment has a list of optional function indices, + /// represented as expressions using `ref.func` and `ref.null`. + Exprs { + /// The desired type of each expression below. + ty: ast::RefType<'a>, + /// The expressions, currently optional function indices, in this + /// segment. + exprs: Vec<Option<ast::ItemRef<'a, kw::func>>>, + }, +} + +impl<'a> Parse<'a> for Elem<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::elem>()?.0; + let id = parser.parse()?; + + let kind = if parser.peek::<u32>() + || (parser.peek::<ast::LParen>() && !parser.peek::<ast::RefType>()) + { + let table = if let Some(index) = parser.parse::<Option<ast::IndexOrRef<_>>>()? { + index.0 + } else { + ast::ItemRef::Item { + kind: kw::table(parser.prev_span()), + idx: ast::Index::Num(0, span), + exports: Vec::new(), + } + }; + let offset = parser.parens(|parser| { + if parser.peek::<kw::offset>() { + parser.parse::<kw::offset>()?; + } + parser.parse() + })?; + ElemKind::Active { table, offset } + } else if parser.peek::<kw::declare>() { + parser.parse::<kw::declare>()?; + ElemKind::Declared + } else { + ElemKind::Passive + }; + let payload = parser.parse()?; + Ok(Elem { + span, + id, + kind, + payload, + }) + } +} + +impl<'a> Parse<'a> for ElemPayload<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + ElemPayload::parse_tail(parser, parser.parse()?) + } +} + +impl<'a> ElemPayload<'a> { + fn parse_tail(parser: Parser<'a>, ty: Option<ast::RefType<'a>>) -> Result<Self> { + let ty = match ty { + None => { + parser.parse::<Option<kw::func>>()?; + ast::RefType::func() + } + Some(ty) => ty, + }; + if let ast::HeapType::Func = ty.heap { + if parser.peek::<ast::IndexOrRef<kw::func>>() { + let mut elems = Vec::new(); + while !parser.is_empty() { + elems.push(parser.parse::<ast::IndexOrRef<_>>()?.0); + } + return Ok(ElemPayload::Indices(elems)); + } + } + let mut exprs = Vec::new(); + while !parser.is_empty() { + let func = parser.parens(|p| match p.parse::<Option<kw::item>>()? { + Some(_) => { + if parser.peek::<ast::LParen>() { + parser.parens(|p| parse_ref_func(p, ty)) + } else { + parse_ref_func(parser, ty) + } + } + None => parse_ref_func(parser, ty), + })?; + exprs.push(func); + } + Ok(ElemPayload::Exprs { exprs, ty }) + } +} + +fn parse_ref_func<'a>( + parser: Parser<'a>, + ty: ast::RefType<'a>, +) -> Result<Option<ast::ItemRef<'a, kw::func>>> { + let mut l = parser.lookahead1(); + if l.peek::<kw::ref_null>() { + parser.parse::<kw::ref_null>()?; + let null_ty: ast::HeapType = parser.parse()?; + if ty.heap != null_ty { + return Err(parser.error("elem segment item doesn't match elem segment type")); + } + Ok(None) + } else if l.peek::<kw::ref_func>() { + parser.parse::<kw::ref_func>()?; + Ok(Some(parser.parse::<ast::IndexOrRef<_>>()?.0)) + } else { + Err(l.error()) + } +} diff --git a/third_party/rust/wast/src/ast/token.rs b/third_party/rust/wast/src/ast/token.rs new file mode 100644 index 0000000000..581627cb3d --- /dev/null +++ b/third_party/rust/wast/src/ast/token.rs @@ -0,0 +1,757 @@ +use crate::ast::{annotation, kw}; +use crate::lexer::FloatVal; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::str; + +/// A position in the original source stream, used to render errors. +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] +pub struct Span { + pub(crate) offset: usize, +} + +impl Span { + /// Construct a `Span` from a byte offset in the source file. + pub fn from_offset(offset: usize) -> Self { + Span { offset } + } + + /// Returns the line/column information of this span within `text`. + /// Line and column numbers are 0-indexed. User presentation is typically + /// 1-indexed, but 0-indexing is appropriate for internal use with + /// iterators and slices. + pub fn linecol_in(&self, text: &str) -> (usize, usize) { + let mut cur = 0; + // Use split_terminator instead of lines so that if there is a `\r`, + // it is included in the offset calculation. The `+1` values below + // account for the `\n`. + for (i, line) in text.split_terminator('\n').enumerate() { + if cur + line.len() + 1 > self.offset { + return (i, self.offset - cur); + } + cur += line.len() + 1; + } + (text.lines().count(), 0) + } +} + +/// An identifier in a WebAssembly module, prefixed by `$` in the textual +/// format. +/// +/// An identifier is used to symbolically refer to items in a a wasm module, +/// typically via the [`Index`] type. +#[derive(Copy, Clone)] +pub struct Id<'a> { + name: &'a str, + gen: u32, + span: Span, +} + +impl<'a> Id<'a> { + fn new(name: &'a str, span: Span) -> Id<'a> { + Id { name, gen: 0, span } + } + + pub(crate) fn gensym(span: Span, gen: u32) -> Id<'a> { + Id { + name: "gensym", + gen, + span, + } + } + + /// Returns the underlying name of this identifier. + /// + /// The name returned does not contain the leading `$`. + pub fn name(&self) -> &'a str { + self.name + } + + /// Returns span of this identifier in the original source + pub fn span(&self) -> Span { + self.span + } + + pub(crate) fn is_gensym(&self) -> bool { + self.gen != 0 + } +} + +impl<'a> Hash for Id<'a> { + fn hash<H: Hasher>(&self, hasher: &mut H) { + self.name.hash(hasher); + self.gen.hash(hasher); + } +} + +impl<'a> PartialEq for Id<'a> { + fn eq(&self, other: &Id<'a>) -> bool { + self.name == other.name && self.gen == other.gen + } +} + +impl<'a> Eq for Id<'a> {} + +impl<'a> Parse<'a> for Id<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + if let Some((name, rest)) = c.id() { + return Ok((Id::new(name, c.cur_span()), rest)); + } + Err(c.error("expected an identifier")) + }) + } +} + +impl fmt::Debug for Id<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.gen != 0 { + f.debug_struct("Id").field("gen", &self.gen).finish() + } else { + self.name.fmt(f) + } + } +} + +impl Peek for Id<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.id().is_some() + } + + fn display() -> &'static str { + "an identifier" + } +} + +/// A reference to another item in a wasm module. +/// +/// This type is used for items referring to other items (such as `call $foo` +/// referencing function `$foo`). References can be either an index (u32) or an +/// [`Id`] in the textual format. +/// +/// The emission phase of a module will ensure that `Index::Id` is never used +/// and switch them all to `Index::Num`. +#[derive(Copy, Clone, Debug)] +pub enum Index<'a> { + /// A numerical index that this references. The index space this is + /// referencing is implicit based on where this [`Index`] is stored. + Num(u32, Span), + /// A human-readable identifier this references. Like `Num`, the namespace + /// this references is based on where this is stored. + Id(Id<'a>), +} + +impl Index<'_> { + /// Returns the source location where this `Index` was defined. + pub fn span(&self) -> Span { + match self { + Index::Num(_, span) => *span, + Index::Id(id) => id.span(), + } + } + + pub(crate) fn is_resolved(&self) -> bool { + match self { + Index::Num(..) => true, + _ => false, + } + } +} + +impl<'a> Parse<'a> for Index<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<Id>() { + Ok(Index::Id(parser.parse()?)) + } else if l.peek::<u32>() { + let (val, span) = parser.parse()?; + Ok(Index::Num(val, span)) + } else { + Err(l.error()) + } + } +} + +impl Peek for Index<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + u32::peek(cursor) || Id::peek(cursor) + } + + fn display() -> &'static str { + "an index" + } +} + +impl<'a> From<Id<'a>> for Index<'a> { + fn from(id: Id<'a>) -> Index<'a> { + Index::Id(id) + } +} + +impl PartialEq for Index<'_> { + fn eq(&self, other: &Index<'_>) -> bool { + match (self, other) { + (Index::Num(a, _), Index::Num(b, _)) => a == b, + (Index::Id(a), Index::Id(b)) => a == b, + _ => false, + } + } +} + +impl Eq for Index<'_> {} + +impl Hash for Index<'_> { + fn hash<H: Hasher>(&self, hasher: &mut H) { + match self { + Index::Num(a, _) => { + 0u8.hash(hasher); + a.hash(hasher); + } + Index::Id(a) => { + 1u8.hash(hasher); + a.hash(hasher); + } + } + } +} + +/// Parses `(func $foo)` +/// +/// Optionally includes export strings for module-linking sugar syntax for alias +/// injection. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub enum ItemRef<'a, K> { + Outer { + kind: K, + module: Index<'a>, + idx: Index<'a>, + }, + Item { + kind: K, + idx: Index<'a>, + exports: Vec<&'a str>, + }, +} + +impl<'a, K> ItemRef<'a, K> { + /// Unwraps the underlying `Index` for `ItemRef::Item`. + /// + /// Panics if this is `ItemRef::Outer` or if exports haven't been expanded + /// yet. + pub fn unwrap_index(&self) -> &Index<'a> { + match self { + ItemRef::Item { idx, exports, .. } => { + debug_assert!(exports.len() == 0); + idx + } + ItemRef::Outer { .. } => panic!("unwrap_index called on Parent"), + } + } +} + +impl<'a, K: Parse<'a>> Parse<'a> for ItemRef<'a, K> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|parser| { + let kind = parser.parse::<K>()?; + if parser.peek::<kw::outer>() { + parser.parse::<kw::outer>()?; + let module = parser.parse()?; + let idx = parser.parse()?; + Ok(ItemRef::Outer { kind, module, idx }) + } else { + let idx = parser.parse()?; + let mut exports = Vec::new(); + while !parser.is_empty() { + exports.push(parser.parse()?); + } + Ok(ItemRef::Item { kind, idx, exports }) + } + }) + } +} + +impl<'a, K: Peek> Peek for ItemRef<'a, K> { + fn peek(cursor: Cursor<'_>) -> bool { + match cursor.lparen() { + Some(remaining) => K::peek(remaining), + None => false, + } + } + + fn display() -> &'static str { + "an item reference" + } +} + +/// Convenience structure to parse `$f` or `(item $f)`. +#[derive(Clone, Debug)] +pub struct IndexOrRef<'a, K>(pub ItemRef<'a, K>); + +impl<'a, K> Parse<'a> for IndexOrRef<'a, K> +where + K: Parse<'a> + Default, +{ + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<Index<'_>>() { + Ok(IndexOrRef(ItemRef::Item { + kind: K::default(), + idx: parser.parse()?, + exports: Vec::new(), + })) + } else { + Ok(IndexOrRef(parser.parse()?)) + } + } +} + +impl<'a, K: Peek> Peek for IndexOrRef<'a, K> { + fn peek(cursor: Cursor<'_>) -> bool { + Index::peek(cursor) || ItemRef::<K>::peek(cursor) + } + + fn display() -> &'static str { + "an item reference" + } +} + +/// An `@name` annotation in source, currently of the form `@name "foo"` +#[derive(Copy, Clone, PartialEq, Debug)] +pub struct NameAnnotation<'a> { + /// The name specified for the item + pub name: &'a str, +} + +impl<'a> Parse<'a> for NameAnnotation<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<annotation::name>()?; + let name = parser.parse()?; + Ok(NameAnnotation { name }) + } +} + +impl<'a> Parse<'a> for Option<NameAnnotation<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let _r = parser.register_annotation("name"); + Ok(if parser.peek2::<annotation::name>() { + Some(parser.parens(|p| p.parse())?) + } else { + None + }) + } +} + +macro_rules! integers { + ($($i:ident($u:ident))*) => ($( + impl<'a> Parse<'a> for $i { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(parser.parse::<($i, Span)>()?.0) + } + } + + impl<'a> Parse<'a> for ($i, Span) { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + if let Some((i, rest)) = c.integer() { + let (s, base) = i.val(); + let val = $i::from_str_radix(s, base) + .or_else(|_| { + $u::from_str_radix(s, base).map(|i| i as $i) + }); + return match val { + Ok(n) => Ok(((n, c.cur_span()), rest)), + Err(_) => Err(c.error(concat!( + "invalid ", + stringify!($i), + " number: constant out of range", + ))), + }; + } + Err(c.error(concat!("expected a ", stringify!($i)))) + }) + } + } + + impl Peek for $i { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.integer().is_some() + } + + fn display() -> &'static str { + stringify!($i) + } + } + )*) +} + +integers! { + u8(u8) u16(u16) u32(u32) u64(u64) + i8(u8) i16(u16) i32(u32) i64(u64) +} + +impl<'a> Parse<'a> for &'a [u8] { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + if let Some((i, rest)) = c.string() { + return Ok((i, rest)); + } + Err(c.error("expected a string")) + }) + } +} + +impl Peek for &'_ [u8] { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.string().is_some() + } + + fn display() -> &'static str { + "string" + } +} + +impl<'a> Parse<'a> for &'a str { + fn parse(parser: Parser<'a>) -> Result<Self> { + str::from_utf8(parser.parse()?).map_err(|_| parser.error("malformed UTF-8 encoding")) + } +} + +impl Parse<'_> for String { + fn parse(parser: Parser<'_>) -> Result<Self> { + Ok(<&str>::parse(parser)?.to_string()) + } +} + +impl Peek for &'_ str { + fn peek(cursor: Cursor<'_>) -> bool { + <&[u8]>::peek(cursor) + } + + fn display() -> &'static str { + <&[u8]>::display() + } +} + +macro_rules! float { + ($($name:ident => { + bits: $int:ident, + float: $float:ident, + exponent_bits: $exp_bits:tt, + name: $parse:ident, + })*) => ($( + /// A parsed floating-point type + #[derive(Debug)] + pub struct $name { + /// The raw bits that this floating point number represents. + pub bits: $int, + } + + impl<'a> Parse<'a> for $name { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + let (val, rest) = if let Some((f, rest)) = c.float() { + ($parse(f.val()), rest) + } else if let Some((i, rest)) = c.integer() { + let (s, base) = i.val(); + ( + $parse(&FloatVal::Val { + hex: base == 16, + integral: s.into(), + decimal: None, + exponent: None, + }), + rest, + ) + } else { + return Err(c.error("expected a float")); + }; + match val { + Some(bits) => Ok(($name { bits }, rest)), + None => Err(c.error("invalid float value: constant out of range")), + } + }) + } + } + + fn $parse(val: &FloatVal<'_>) -> Option<$int> { + // Compute a few well-known constants about the float representation + // given the parameters to the macro here. + let width = std::mem::size_of::<$int>() * 8; + let neg_offset = width - 1; + let exp_offset = neg_offset - $exp_bits; + let signif_bits = width - 1 - $exp_bits; + let signif_mask = (1 << exp_offset) - 1; + let bias = (1 << ($exp_bits - 1)) - 1; + + let (hex, integral, decimal, exponent_str) = match val { + // Infinity is when the exponent bits are all set and + // the significand is zero. + FloatVal::Inf { negative } => { + let exp_bits = (1 << $exp_bits) - 1; + let neg_bit = *negative as $int; + return Some( + (neg_bit << neg_offset) | + (exp_bits << exp_offset) + ); + } + + // NaN is when the exponent bits are all set and + // the significand is nonzero. The default of NaN is + // when only the highest bit of the significand is set. + FloatVal::Nan { negative, val } => { + let exp_bits = (1 << $exp_bits) - 1; + let neg_bit = *negative as $int; + let signif = val.unwrap_or(1 << (signif_bits - 1)) as $int; + // If the significand is zero then this is actually infinity + // so we fail to parse it. + if signif & signif_mask == 0 { + return None; + } + return Some( + (neg_bit << neg_offset) | + (exp_bits << exp_offset) | + (signif & signif_mask) + ); + } + + // This is trickier, handle this below + FloatVal::Val { hex, integral, decimal, exponent } => { + (hex, integral, decimal, exponent) + } + }; + + // Rely on Rust's standard library to parse base 10 floats + // correctly. + if !*hex { + let mut s = integral.to_string(); + if let Some(decimal) = decimal { + s.push_str("."); + s.push_str(&decimal); + } + if let Some(exponent) = exponent_str { + s.push_str("e"); + s.push_str(&exponent); + } + let float = s.parse::<$float>().ok()?; + // looks like the `*.wat` format considers infinite overflow to + // be invalid. + if float.is_infinite() { + return None; + } + return Some(float.to_bits()); + } + + // Parsing hex floats is... hard! I don't really know what most of + // this below does. It was copied from Gecko's implementation in + // `WasmTextToBinary.cpp`. Would love comments on this if you have + // them! + let decimal = decimal.as_ref().map(|s| &**s).unwrap_or(""); + let negative = integral.starts_with('-'); + let integral = integral.trim_start_matches('-').trim_start_matches('0'); + + // Do a bunch of work up front to locate the first non-zero digit + // to determine the initial exponent. There's a number of + // adjustments depending on where the digit was found, but the + // general idea here is that I'm not really sure why things are + // calculated the way they are but it should match Gecko. + let decimal_no_leading = decimal.trim_start_matches('0'); + let decimal_iter = if integral.is_empty() { + decimal_no_leading.chars() + } else { + decimal.chars() + }; + let mut digits = integral.chars() + .map(|c| (to_hex(c) as $int, false)) + .chain(decimal_iter.map(|c| (to_hex(c) as $int, true))); + let lead_nonzero_digit = match digits.next() { + Some((c, _)) => c, + // No digits? Must be `+0` or `-0`, being careful to handle the + // sign encoding here. + None if negative => return Some(1 << (width - 1)), + None => return Some(0), + }; + let mut significand = 0 as $int; + let mut exponent = if !integral.is_empty() { + 1 + } else { + -((decimal.len() - decimal_no_leading.len() + 1) as i32) + 1 + }; + let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4; + exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?; + let mut significand_pos = (width - (4 - (lz as usize))) as isize; + assert!(significand_pos >= 0); + significand |= lead_nonzero_digit << significand_pos; + + // Now that we've got an anchor in the string we parse the remaining + // digits. Again, not entirely sure why everything is the way it is + // here! This is copied frmo gecko. + let mut discarded_extra_nonzero = false; + for (digit, decimal) in digits { + if !decimal { + exponent += 4; + } + if significand_pos > -4 { + significand_pos -= 4; + } + + if significand_pos >= 0 { + significand |= digit << significand_pos; + } else if significand_pos > -4 { + significand |= digit >> (4 - significand_pos); + discarded_extra_nonzero = (digit & !((!0) >> (4 - significand_pos))) != 0; + } else if digit != 0 { + discarded_extra_nonzero = true; + } + } + + exponent = exponent.checked_add(match exponent_str { + Some(s) => s.parse::<i32>().ok()?, + None => 0, + })?; + debug_assert!(significand != 0); + + let (encoded_exponent, encoded_significand, discarded_significand) = + if exponent <= -bias { + // Underflow to subnormal or zero. + let shift = exp_offset as i32 + exponent + bias; + if shift == 0 { + (0, 0, significand) + } else if shift < 0 || shift >= width as i32 { + (0, 0, 0) + } else { + ( + 0, + significand >> (width as i32 - shift), + significand << shift, + ) + } + } else if exponent <= bias { + // Normal (non-zero). The significand's leading 1 is encoded + // implicitly. + ( + ((exponent + bias) as $int) << exp_offset, + (significand >> (width - exp_offset - 1)) & signif_mask, + significand << (exp_offset + 1), + ) + } else { + // Overflow to infinity. + ( + ((1 << $exp_bits) - 1) << exp_offset, + 0, + 0, + ) + }; + + let bits = encoded_exponent | encoded_significand; + + // Apply rounding. If this overflows the significand, it carries + // into the exponent bit according to the magic of the IEEE 754 + // encoding. + // + // Or rather, the comment above is what Gecko says so it's copied + // here too. + let msb = 1 << (width - 1); + let bits = bits + + (((discarded_significand & msb != 0) + && ((discarded_significand & !msb != 0) || + discarded_extra_nonzero || + // ties to even + (encoded_significand & 1 != 0))) as $int); + + // Just before we return the bits be sure to handle the sign bit we + // found at the beginning. + let bits = if negative { + bits | (1 << (width - 1)) + } else { + bits + }; + // looks like the `*.wat` format considers infinite overflow to + // be invalid. + if $float::from_bits(bits).is_infinite() { + return None; + } + Some(bits) + } + + )*) +} + +float! { + Float32 => { + bits: u32, + float: f32, + exponent_bits: 8, + name: strtof, + } + Float64 => { + bits: u64, + float: f64, + exponent_bits: 11, + name: strtod, + } +} + +fn to_hex(c: char) -> u8 { + match c { + 'a'..='f' => c as u8 - b'a' + 10, + 'A'..='F' => c as u8 - b'A' + 10, + _ => c as u8 - b'0', + } +} + +/// A convenience type to use with [`Parser::peek`](crate::parser::Parser::peek) +/// to see if the next token is an s-expression. +pub struct LParen { + _priv: (), +} + +impl Peek for LParen { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.lparen().is_some() + } + + fn display() -> &'static str { + "left paren" + } +} + +#[cfg(test)] +mod tests { + #[test] + fn hex_strtof() { + macro_rules! f { + ($a:tt) => (f!(@mk $a, None, None)); + ($a:tt p $e:tt) => (f!(@mk $a, None, Some($e.into()))); + ($a:tt . $b:tt) => (f!(@mk $a, Some($b.into()), None)); + ($a:tt . $b:tt p $e:tt) => (f!(@mk $a, Some($b.into()), Some($e.into()))); + (@mk $a:tt, $b:expr, $e:expr) => (crate::lexer::FloatVal::Val { + hex: true, + integral: $a.into(), + decimal: $b, + exponent: $e + }); + } + assert_eq!(super::strtof(&f!("0")), Some(0)); + assert_eq!(super::strtof(&f!("0" . "0")), Some(0)); + assert_eq!(super::strtof(&f!("0" . "0" p "2354")), Some(0)); + assert_eq!(super::strtof(&f!("-0")), Some(1 << 31)); + assert_eq!(super::strtof(&f!("f32")), Some(0x45732000)); + assert_eq!(super::strtof(&f!("0" . "f32")), Some(0x3f732000)); + assert_eq!(super::strtof(&f!("1" . "2")), Some(0x3f900000)); + assert_eq!( + super::strtof(&f!("0" . "00000100000000000" p "-126")), + Some(0) + ); + assert_eq!( + super::strtof(&f!("1" . "fffff4" p "-106")), + Some(0x0afffffa) + ); + assert_eq!(super::strtof(&f!("fffff98" p "-133")), Some(0x0afffffa)); + assert_eq!(super::strtof(&f!("0" . "081" p "023")), Some(0x48810000)); + assert_eq!( + super::strtof(&f!("1" . "00000100000000000" p "-50")), + Some(0x26800000) + ); + } +} diff --git a/third_party/rust/wast/src/ast/types.rs b/third_party/rust/wast/src/ast/types.rs new file mode 100644 index 0000000000..8a27c13bde --- /dev/null +++ b/third_party/rust/wast/src/ast/types.rs @@ -0,0 +1,807 @@ +use crate::ast::{self, kw}; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use std::mem; + +/// The value types for a wasm module. +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum ValType<'a> { + I32, + I64, + F32, + F64, + V128, + Ref(RefType<'a>), + Rtt(u32, ast::Index<'a>), +} + +impl<'a> Parse<'a> for ValType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i32>() { + parser.parse::<kw::i32>()?; + Ok(ValType::I32) + } else if l.peek::<kw::i64>() { + parser.parse::<kw::i64>()?; + Ok(ValType::I64) + } else if l.peek::<kw::f32>() { + parser.parse::<kw::f32>()?; + Ok(ValType::F32) + } else if l.peek::<kw::f64>() { + parser.parse::<kw::f64>()?; + Ok(ValType::F64) + } else if l.peek::<kw::v128>() { + parser.parse::<kw::v128>()?; + Ok(ValType::V128) + } else if l.peek::<RefType>() { + Ok(ValType::Ref(parser.parse()?)) + } else if l.peek::<ast::LParen>() { + parser.parens(|p| { + let mut l = p.lookahead1(); + if l.peek::<kw::rtt>() { + p.parse::<kw::rtt>()?; + Ok(ValType::Rtt(p.parse()?, p.parse()?)) + } else { + Err(l.error()) + } + }) + } else { + Err(l.error()) + } + } +} + +impl<'a> Peek for ValType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::i32::peek(cursor) + || kw::i64::peek(cursor) + || kw::f32::peek(cursor) + || kw::f64::peek(cursor) + || kw::v128::peek(cursor) + || (ast::LParen::peek(cursor) && kw::rtt::peek2(cursor)) + || RefType::peek(cursor) + } + fn display() -> &'static str { + "valtype" + } +} + +/// A heap type for a reference type +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum HeapType<'a> { + /// An untyped function reference: funcref. This is part of the reference + /// types proposal. + Func, + /// A reference to any host value: externref. This is part of the reference + /// types proposal. + Extern, + /// A reference to any reference value: anyref. This is part of the GC + /// proposal. + Any, + /// A reference to an exception: exnref. This is part of the exception + /// handling proposal. + Exn, + /// A reference that has an identity that can be compared: eqref. This is + /// part of the GC proposal. + Eq, + /// An unboxed 31-bit integer: i31ref. This may be going away if there is no common + /// supertype of all reference types. Part of the GC proposal. + I31, + /// A reference to a function, struct, or array: ref T. This is part of the + /// GC proposal. + Index(ast::Index<'a>), +} + +impl<'a> Parse<'a> for HeapType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(HeapType::Func) + } else if l.peek::<kw::r#extern>() { + parser.parse::<kw::r#extern>()?; + Ok(HeapType::Extern) + } else if l.peek::<kw::r#any>() { + parser.parse::<kw::r#any>()?; + Ok(HeapType::Any) + } else if l.peek::<kw::exn>() { + parser.parse::<kw::exn>()?; + Ok(HeapType::Exn) + } else if l.peek::<kw::eq>() { + parser.parse::<kw::eq>()?; + Ok(HeapType::Eq) + } else if l.peek::<kw::i31>() { + parser.parse::<kw::i31>()?; + Ok(HeapType::I31) + } else if l.peek::<ast::Index>() { + Ok(HeapType::Index(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +impl<'a> Peek for HeapType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::func::peek(cursor) + || kw::r#extern::peek(cursor) + || kw::any::peek(cursor) + || kw::exn::peek(cursor) + || kw::eq::peek(cursor) + || kw::i31::peek(cursor) + || (ast::LParen::peek(cursor) && kw::r#type::peek2(cursor)) + } + fn display() -> &'static str { + "heaptype" + } +} + +/// A reference type in a wasm module. +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub struct RefType<'a> { + pub nullable: bool, + pub heap: HeapType<'a>, +} + +impl<'a> RefType<'a> { + /// A `funcref` as an abbreviation for `(ref null func)`. + pub fn func() -> Self { + RefType { + nullable: true, + heap: HeapType::Func, + } + } + + /// An `externref` as an abbreviation for `(ref null extern)`. + pub fn r#extern() -> Self { + RefType { + nullable: true, + heap: HeapType::Extern, + } + } + + /// An `anyref` as an abbreviation for `(ref null any)`. + pub fn any() -> Self { + RefType { + nullable: true, + heap: HeapType::Any, + } + } + + /// An `exnref` as an abbreviation for `(ref null exn)`. + pub fn exn() -> Self { + RefType { + nullable: true, + heap: HeapType::Exn, + } + } + + /// An `eqref` as an abbreviation for `(ref null eq)`. + pub fn eq() -> Self { + RefType { + nullable: true, + heap: HeapType::Eq, + } + } + + /// An `i31ref` as an abbreviation for `(ref null i31)`. + pub fn i31() -> Self { + RefType { + nullable: true, + heap: HeapType::I31, + } + } +} + +impl<'a> Parse<'a> for RefType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::funcref>() { + parser.parse::<kw::funcref>()?; + Ok(RefType::func()) + } else if l.peek::<kw::anyfunc>() { + parser.parse::<kw::anyfunc>()?; + Ok(RefType::func()) + } else if l.peek::<kw::externref>() { + parser.parse::<kw::externref>()?; + Ok(RefType::r#extern()) + } else if l.peek::<kw::anyref>() { + parser.parse::<kw::anyref>()?; + Ok(RefType::any()) + } else if l.peek::<kw::exnref>() { + parser.parse::<kw::exnref>()?; + Ok(RefType::exn()) + } else if l.peek::<kw::eqref>() { + parser.parse::<kw::eqref>()?; + Ok(RefType::eq()) + } else if l.peek::<kw::i31ref>() { + parser.parse::<kw::i31ref>()?; + Ok(RefType::i31()) + } else if l.peek::<ast::LParen>() { + parser.parens(|p| { + let mut l = parser.lookahead1(); + if l.peek::<kw::r#ref>() { + p.parse::<kw::r#ref>()?; + + let mut nullable = false; + if parser.peek::<kw::null>() { + parser.parse::<kw::null>()?; + nullable = true; + } + + Ok(RefType { + nullable, + heap: parser.parse()?, + }) + } else { + Err(l.error()) + } + }) + } else { + Err(l.error()) + } + } +} + +impl<'a> Peek for RefType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::funcref::peek(cursor) + || /* legacy */ kw::anyfunc::peek(cursor) + || kw::externref::peek(cursor) + || kw::anyref::peek(cursor) + || kw::exnref::peek(cursor) + || kw::eqref::peek(cursor) + || kw::i31ref::peek(cursor) + || (ast::LParen::peek(cursor) && kw::r#ref::peek2(cursor)) + } + fn display() -> &'static str { + "reftype" + } +} + +/// The types of values that may be used in a struct or array. +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum StorageType<'a> { + I8, + I16, + Val(ValType<'a>), +} + +impl<'a> Parse<'a> for StorageType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i8>() { + parser.parse::<kw::i8>()?; + Ok(StorageType::I8) + } else if l.peek::<kw::i16>() { + parser.parse::<kw::i16>()?; + Ok(StorageType::I16) + } else if l.peek::<ValType>() { + Ok(StorageType::Val(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +/// Type for a `global` in a wasm module +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct GlobalType<'a> { + /// The element type of this `global` + pub ty: ValType<'a>, + /// Whether or not the global is mutable or not. + pub mutable: bool, +} + +impl<'a> Parse<'a> for GlobalType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek2::<kw::r#mut>() { + parser.parens(|p| { + p.parse::<kw::r#mut>()?; + Ok(GlobalType { + ty: parser.parse()?, + mutable: true, + }) + }) + } else { + Ok(GlobalType { + ty: parser.parse()?, + mutable: false, + }) + } + } +} + +/// Min/max limits used for tables/memories. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct Limits { + /// The minimum number of units for this type. + pub min: u32, + /// An optional maximum number of units for this type. + pub max: Option<u32>, +} + +impl<'a> Parse<'a> for Limits { + fn parse(parser: Parser<'a>) -> Result<Self> { + let min = parser.parse()?; + let max = if parser.peek::<u32>() { + Some(parser.parse()?) + } else { + None + }; + Ok(Limits { min, max }) + } +} + +/// Min/max limits used for 64-bit memories +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct Limits64 { + /// The minimum number of units for this type. + pub min: u64, + /// An optional maximum number of units for this type. + pub max: Option<u64>, +} + +impl<'a> Parse<'a> for Limits64 { + fn parse(parser: Parser<'a>) -> Result<Self> { + let min = parser.parse()?; + let max = if parser.peek::<u64>() { + Some(parser.parse()?) + } else { + None + }; + Ok(Limits64 { min, max }) + } +} + +/// Configuration for a table of a wasm mdoule +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct TableType<'a> { + /// Limits on the element sizes of this table + pub limits: Limits, + /// The type of element stored in this table + pub elem: RefType<'a>, +} + +impl<'a> Parse<'a> for TableType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(TableType { + limits: parser.parse()?, + elem: parser.parse()?, + }) + } +} + +/// Configuration for a memory of a wasm module +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum MemoryType { + /// A 32-bit memory + B32 { + /// Limits on the page sizes of this memory + limits: Limits, + /// Whether or not this is a shared (atomic) memory type + shared: bool, + }, + /// A 64-bit memory + B64 { + /// Limits on the page sizes of this memory + limits: Limits64, + /// Whether or not this is a shared (atomic) memory type + shared: bool, + }, +} + +impl<'a> Parse<'a> for MemoryType { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::i64>() { + parser.parse::<kw::i64>()?; + let limits = parser.parse()?; + let shared = parser.parse::<Option<kw::shared>>()?.is_some(); + Ok(MemoryType::B64 { limits, shared }) + } else { + parser.parse::<Option<kw::i32>>()?; + let limits = parser.parse()?; + let shared = parser.parse::<Option<kw::shared>>()?.is_some(); + Ok(MemoryType::B32 { limits, shared }) + } + } +} + +/// A function type with parameters and results. +#[derive(Clone, Debug, Default)] +pub struct FunctionType<'a> { + /// The parameters of a function, optionally each having an identifier for + /// name resolution and a name for the custom `name` section. + pub params: Box< + [( + Option<ast::Id<'a>>, + Option<ast::NameAnnotation<'a>>, + ValType<'a>, + )], + >, + /// The results types of a function. + pub results: Box<[ValType<'a>]>, +} + +impl<'a> FunctionType<'a> { + fn finish_parse(&mut self, allow_names: bool, parser: Parser<'a>) -> Result<()> { + let mut params = Vec::from(mem::take(&mut self.params)); + let mut results = Vec::from(mem::take(&mut self.results)); + while parser.peek2::<kw::param>() || parser.peek2::<kw::result>() { + parser.parens(|p| { + let mut l = p.lookahead1(); + if l.peek::<kw::param>() { + if results.len() > 0 { + return Err(p.error( + "result before parameter (or unexpected token): \ + cannot list params after results", + )); + } + p.parse::<kw::param>()?; + if p.is_empty() { + return Ok(()); + } + let (id, name) = if allow_names { + (p.parse::<Option<_>>()?, p.parse::<Option<_>>()?) + } else { + (None, None) + }; + let parse_more = id.is_none() && name.is_none(); + let ty = p.parse()?; + params.push((id, name, ty)); + while parse_more && !p.is_empty() { + params.push((None, None, p.parse()?)); + } + } else if l.peek::<kw::result>() { + p.parse::<kw::result>()?; + while !p.is_empty() { + results.push(p.parse()?); + } + } else { + return Err(l.error()); + } + Ok(()) + })?; + } + self.params = params.into(); + self.results = results.into(); + Ok(()) + } +} + +impl<'a> Parse<'a> for FunctionType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut ret = FunctionType { + params: Box::new([]), + results: Box::new([]), + }; + ret.finish_parse(true, parser)?; + Ok(ret) + } +} + +impl<'a> Peek for FunctionType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + if let Some(next) = cursor.lparen() { + match next.keyword() { + Some(("param", _)) | Some(("result", _)) => return true, + _ => {} + } + } + + false + } + + fn display() -> &'static str { + "function type" + } +} + +/// A function type with parameters and results. +#[derive(Clone, Debug, Default)] +pub struct FunctionTypeNoNames<'a>(pub FunctionType<'a>); + +impl<'a> Parse<'a> for FunctionTypeNoNames<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut ret = FunctionType { + params: Box::new([]), + results: Box::new([]), + }; + ret.finish_parse(false, parser)?; + Ok(FunctionTypeNoNames(ret)) + } +} + +impl<'a> Peek for FunctionTypeNoNames<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + FunctionType::peek(cursor) + } + + fn display() -> &'static str { + FunctionType::display() + } +} + +impl<'a> From<FunctionTypeNoNames<'a>> for FunctionType<'a> { + fn from(ty: FunctionTypeNoNames<'a>) -> FunctionType<'a> { + ty.0 + } +} + +/// A struct type with fields. +#[derive(Clone, Debug)] +pub struct StructType<'a> { + /// The fields of the struct + pub fields: Vec<StructField<'a>>, +} + +impl<'a> Parse<'a> for StructType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut ret = StructType { fields: Vec::new() }; + while !parser.is_empty() { + let field = if parser.peek2::<kw::field>() { + parser.parens(|parser| { + parser.parse::<kw::field>()?; + StructField::parse(parser, true) + }) + } else { + StructField::parse(parser, false) + }; + ret.fields.push(field?); + } + Ok(ret) + } +} + +/// A field of a struct type. +#[derive(Clone, Debug)] +pub struct StructField<'a> { + /// An optional identifier for name resolution. + pub id: Option<ast::Id<'a>>, + /// Whether this field may be mutated or not. + pub mutable: bool, + /// The storage type stored in this field. + pub ty: StorageType<'a>, +} + +impl<'a> StructField<'a> { + fn parse(parser: Parser<'a>, with_id: bool) -> Result<Self> { + let id = if with_id { parser.parse()? } else { None }; + let (ty, mutable) = if parser.peek2::<kw::r#mut>() { + let ty = parser.parens(|parser| { + parser.parse::<kw::r#mut>()?; + parser.parse() + })?; + (ty, true) + } else { + (parser.parse::<StorageType<'a>>()?, false) + }; + Ok(StructField { id, mutable, ty }) + } +} + +/// An array type with fields. +#[derive(Clone, Debug)] +pub struct ArrayType<'a> { + /// Whether this field may be mutated or not. + pub mutable: bool, + /// The storage type stored in this field. + pub ty: StorageType<'a>, +} + +impl<'a> Parse<'a> for ArrayType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let (ty, mutable) = if parser.peek2::<kw::r#mut>() { + let ty = parser.parens(|parser| { + parser.parse::<kw::r#mut>()?; + parser.parse() + })?; + (ty, true) + } else { + (parser.parse::<StorageType<'a>>()?, false) + }; + Ok(ArrayType { mutable, ty }) + } +} + +/// A type for a nested module +#[derive(Clone, Debug, Default)] +pub struct ModuleType<'a> { + /// The imports that are expected for this module type. + pub imports: Vec<ast::Import<'a>>, + /// The exports that this module type is expected to have. + pub exports: Vec<ExportType<'a>>, +} + +impl<'a> Parse<'a> for ModuleType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut imports = Vec::new(); + while parser.peek2::<kw::import>() { + imports.push(parser.parens(|p| p.parse())?); + } + let mut exports = Vec::new(); + while parser.peek2::<kw::export>() { + parser.parens(|p| { + exports.push(p.parse()?); + Ok(()) + })?; + } + Ok(ModuleType { imports, exports }) + } +} + +impl<'a> Peek for ModuleType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + if let Some(next) = cursor.lparen() { + match next.keyword() { + Some(("import", _)) | Some(("export", _)) => return true, + _ => {} + } + } + + false + } + + fn display() -> &'static str { + "module type" + } +} + +/// A type for a nested instance +#[derive(Clone, Debug, Default)] +pub struct InstanceType<'a> { + /// The exported types from this instance + pub exports: Vec<ExportType<'a>>, +} + +impl<'a> Parse<'a> for InstanceType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut exports = Vec::new(); + while !parser.is_empty() { + exports.push(parser.parens(|p| p.parse())?); + } + Ok(InstanceType { exports }) + } +} + +impl<'a> Peek for InstanceType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + if let Some(next) = cursor.lparen() { + match next.keyword() { + Some(("export", _)) => return true, + _ => {} + } + } + + false + } + + fn display() -> &'static str { + "instance type" + } +} + +/// The type of an exported item from a module or instance. +#[derive(Debug, Clone)] +pub struct ExportType<'a> { + /// Where this export was defined. + pub span: ast::Span, + /// The name of this export. + pub name: &'a str, + /// The signature of the item that's exported. + pub item: ast::ItemSig<'a>, +} + +impl<'a> Parse<'a> for ExportType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::export>()?.0; + let name = parser.parse()?; + let item = parser.parens(|p| p.parse())?; + Ok(ExportType { span, name, item }) + } +} + +/// A definition of a type. +#[derive(Debug)] +pub enum TypeDef<'a> { + /// A function type definition. + Func(FunctionType<'a>), + /// A struct type definition. + Struct(StructType<'a>), + /// An array type definition. + Array(ArrayType<'a>), + /// A module type definition. + Module(ModuleType<'a>), + /// An instance type definition. + Instance(InstanceType<'a>), +} + +/// A type declaration in a module +#[derive(Debug)] +pub struct Type<'a> { + /// Where this type was defined. + pub span: ast::Span, + /// An optional identifer to refer to this `type` by as part of name + /// resolution. + pub id: Option<ast::Id<'a>>, + /// The type that we're declaring. + pub def: TypeDef<'a>, +} + +impl<'a> Parse<'a> for Type<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::r#type>()?.0; + let id = parser.parse()?; + let def = parser.parens(|parser| { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(TypeDef::Func(parser.parse()?)) + } else if l.peek::<kw::r#struct>() { + parser.parse::<kw::r#struct>()?; + Ok(TypeDef::Struct(parser.parse()?)) + } else if l.peek::<kw::array>() { + parser.parse::<kw::array>()?; + Ok(TypeDef::Array(parser.parse()?)) + } else if l.peek::<kw::module>() { + parser.parse::<kw::module>()?; + Ok(TypeDef::Module(parser.parse()?)) + } else if l.peek::<kw::instance>() { + parser.parse::<kw::instance>()?; + Ok(TypeDef::Instance(parser.parse()?)) + } else { + Err(l.error()) + } + })?; + Ok(Type { span, id, def }) + } +} + +/// A reference to a type defined in this module. +#[derive(Clone, Debug)] +pub struct TypeUse<'a, T> { + /// The type that we're referencing, if it was present. + pub index: Option<ast::ItemRef<'a, kw::r#type>>, + /// The inline type, if present. + pub inline: Option<T>, +} + +impl<'a, T> TypeUse<'a, T> { + /// Constructs a new instance of `TypeUse` without an inline definition but + /// with an index specified. + pub fn new_with_index(idx: ast::Index<'a>) -> TypeUse<'a, T> { + TypeUse { + index: Some(ast::ItemRef::Item { + idx, + kind: kw::r#type::default(), + exports: Vec::new(), + }), + inline: None, + } + } +} + +impl<'a, T: Peek + Parse<'a>> Parse<'a> for TypeUse<'a, T> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let index = if parser.peek2::<kw::r#type>() { + Some(parser.parse()?) + } else { + None + }; + let inline = parser.parse()?; + + Ok(TypeUse { index, inline }) + } +} + +impl<'a> From<TypeUse<'a, FunctionTypeNoNames<'a>>> for TypeUse<'a, FunctionType<'a>> { + fn from(src: TypeUse<'a, FunctionTypeNoNames<'a>>) -> TypeUse<'a, FunctionType<'a>> { + TypeUse { + index: src.index, + inline: src.inline.map(|x| x.into()), + } + } +} diff --git a/third_party/rust/wast/src/ast/wast.rs b/third_party/rust/wast/src/ast/wast.rs new file mode 100644 index 0000000000..dd373d40e4 --- /dev/null +++ b/third_party/rust/wast/src/ast/wast.rs @@ -0,0 +1,356 @@ +use crate::ast::{self, kw}; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::{AssertExpression, NanPattern, V128Pattern}; + +/// A parsed representation of a `*.wast` file. +/// +/// WAST files are not officially specified but are used in the official test +/// suite to write official spec tests for wasm. This type represents a parsed +/// `*.wast` file which parses a list of directives in a file. +pub struct Wast<'a> { + #[allow(missing_docs)] + pub directives: Vec<WastDirective<'a>>, +} + +impl<'a> Parse<'a> for Wast<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut directives = Vec::new(); + + // If it looks like a directive token is in the stream then we parse a + // bunch of directives, otherwise assume this is an inline module. + if parser.peek2::<WastDirectiveToken>() { + while !parser.is_empty() { + directives.push(parser.parens(|p| p.parse())?); + } + } else { + let module = parser.parse::<ast::Wat>()?.module; + directives.push(WastDirective::Module(module)); + } + Ok(Wast { directives }) + } +} + +struct WastDirectiveToken; + +impl Peek for WastDirectiveToken { + fn peek(cursor: Cursor<'_>) -> bool { + let kw = match cursor.keyword() { + Some((kw, _)) => kw, + None => return false, + }; + kw.starts_with("assert_") || kw == "module" || kw == "register" || kw == "invoke" + } + + fn display() -> &'static str { + unimplemented!() + } +} + +/// The different kinds of directives found in a `*.wast` file. +/// +/// It's not entirely clear to me what all of these are per se, but they're only +/// really interesting to test harnesses mostly. +#[allow(missing_docs)] +pub enum WastDirective<'a> { + Module(ast::Module<'a>), + QuoteModule { + span: ast::Span, + source: Vec<&'a [u8]>, + }, + AssertMalformed { + span: ast::Span, + module: QuoteModule<'a>, + message: &'a str, + }, + AssertInvalid { + span: ast::Span, + module: ast::Module<'a>, + message: &'a str, + }, + Register { + span: ast::Span, + name: &'a str, + module: Option<ast::Id<'a>>, + }, + Invoke(WastInvoke<'a>), + AssertTrap { + span: ast::Span, + exec: WastExecute<'a>, + message: &'a str, + }, + AssertReturn { + span: ast::Span, + exec: WastExecute<'a>, + results: Vec<ast::AssertExpression<'a>>, + }, + AssertExhaustion { + span: ast::Span, + call: WastInvoke<'a>, + message: &'a str, + }, + AssertUnlinkable { + span: ast::Span, + module: ast::Module<'a>, + message: &'a str, + }, +} + +impl WastDirective<'_> { + /// Returns the location in the source that this directive was defined at + pub fn span(&self) -> ast::Span { + match self { + WastDirective::Module(m) => m.span, + WastDirective::AssertMalformed { span, .. } + | WastDirective::Register { span, .. } + | WastDirective::QuoteModule{ span, .. } + | WastDirective::AssertTrap { span, .. } + | WastDirective::AssertReturn { span, .. } + | WastDirective::AssertExhaustion { span, .. } + | WastDirective::AssertUnlinkable { span, .. } + | WastDirective::AssertInvalid { span, .. } => *span, + WastDirective::Invoke(i) => i.span, + } + } +} + +impl<'a> Parse<'a> for WastDirective<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::module>() { + if parser.peek2::<kw::quote>() { + parser.parse::<kw::module>()?; + let span = parser.parse::<kw::quote>()?.0; + let mut source = Vec::new(); + while !parser.is_empty() { + source.push(parser.parse()?); + } + Ok(WastDirective::QuoteModule { span, source }) + } else { + Ok(WastDirective::Module(parser.parse()?)) + } + } else if l.peek::<kw::assert_malformed>() { + let span = parser.parse::<kw::assert_malformed>()?.0; + Ok(WastDirective::AssertMalformed { + span, + module: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_invalid>() { + let span = parser.parse::<kw::assert_invalid>()?.0; + Ok(WastDirective::AssertInvalid { + span, + module: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::register>() { + let span = parser.parse::<kw::register>()?.0; + Ok(WastDirective::Register { + span, + name: parser.parse()?, + module: parser.parse()?, + }) + } else if l.peek::<kw::invoke>() { + Ok(WastDirective::Invoke(parser.parse()?)) + } else if l.peek::<kw::assert_trap>() { + let span = parser.parse::<kw::assert_trap>()?.0; + Ok(WastDirective::AssertTrap { + span, + exec: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_return>() { + let span = parser.parse::<kw::assert_return>()?.0; + let exec = parser.parens(|p| p.parse())?; + let mut results = Vec::new(); + while !parser.is_empty() { + results.push(parser.parens(|p| p.parse())?); + } + Ok(WastDirective::AssertReturn { + span, + exec, + results, + }) + } else if l.peek::<kw::assert_return_canonical_nan>() { + let span = parser.parse::<kw::assert_return_canonical_nan>()?.0; + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::LegacyCanonicalNaN], + }) + } else if l.peek::<kw::assert_return_canonical_nan_f32x4>() { + let span = parser.parse::<kw::assert_return_canonical_nan_f32x4>()?.0; + let pat = V128Pattern::F32x4([ + NanPattern::CanonicalNan, + NanPattern::CanonicalNan, + NanPattern::CanonicalNan, + NanPattern::CanonicalNan, + ]); + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::V128(pat)], + }) + } else if l.peek::<kw::assert_return_canonical_nan_f64x2>() { + let span = parser.parse::<kw::assert_return_canonical_nan_f64x2>()?.0; + let pat = V128Pattern::F64x2([NanPattern::CanonicalNan, NanPattern::CanonicalNan]); + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::V128(pat)], + }) + } else if l.peek::<kw::assert_return_arithmetic_nan>() { + let span = parser.parse::<kw::assert_return_arithmetic_nan>()?.0; + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::LegacyArithmeticNaN], + }) + } else if l.peek::<kw::assert_return_arithmetic_nan_f32x4>() { + let span = parser.parse::<kw::assert_return_arithmetic_nan_f32x4>()?.0; + let pat = V128Pattern::F32x4([ + NanPattern::ArithmeticNan, + NanPattern::ArithmeticNan, + NanPattern::ArithmeticNan, + NanPattern::ArithmeticNan, + ]); + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::V128(pat)], + }) + } else if l.peek::<kw::assert_return_arithmetic_nan_f64x2>() { + let span = parser.parse::<kw::assert_return_arithmetic_nan_f64x2>()?.0; + let pat = V128Pattern::F64x2([NanPattern::ArithmeticNan, NanPattern::ArithmeticNan]); + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::V128(pat)], + }) + } else if l.peek::<kw::assert_return_func>() { + let span = parser.parse::<kw::assert_return_func>()?.0; + Ok(WastDirective::AssertReturn { + span, + exec: parser.parens(|p| p.parse())?, + results: vec![AssertExpression::RefFunc(None)], + }) + } else if l.peek::<kw::assert_exhaustion>() { + let span = parser.parse::<kw::assert_exhaustion>()?.0; + Ok(WastDirective::AssertExhaustion { + span, + call: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_unlinkable>() { + let span = parser.parse::<kw::assert_unlinkable>()?.0; + Ok(WastDirective::AssertUnlinkable { + span, + module: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else { + Err(l.error()) + } + } +} + +#[allow(missing_docs)] +pub enum WastExecute<'a> { + Invoke(WastInvoke<'a>), + Module(ast::Module<'a>), + Get { + module: Option<ast::Id<'a>>, + global: &'a str, + }, +} + +impl<'a> Parse<'a> for WastExecute<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::invoke>() { + Ok(WastExecute::Invoke(parser.parse()?)) + } else if l.peek::<kw::module>() { + Ok(WastExecute::Module(parser.parse()?)) + } else if l.peek::<kw::get>() { + parser.parse::<kw::get>()?; + Ok(WastExecute::Get { + module: parser.parse()?, + global: parser.parse()?, + }) + } else { + Err(l.error()) + } + } +} + +#[allow(missing_docs)] +pub struct WastInvoke<'a> { + pub span: ast::Span, + pub module: Option<ast::Id<'a>>, + pub name: &'a str, + pub args: Vec<ast::Expression<'a>>, +} + +impl<'a> Parse<'a> for WastInvoke<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::invoke>()?.0; + let module = parser.parse()?; + let name = parser.parse()?; + let mut args = Vec::new(); + while !parser.is_empty() { + args.push(parser.parens(|p| p.parse())?); + } + Ok(WastInvoke { + span, + module, + name, + args, + }) + } +} + +#[allow(missing_docs)] +pub enum QuoteModule<'a> { + Module(ast::Module<'a>), + Quote(Vec<&'a [u8]>), +} + +impl<'a> Parse<'a> for QuoteModule<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek2::<kw::quote>() { + parser.parse::<kw::module>()?; + parser.parse::<kw::quote>()?; + let mut src = Vec::new(); + while !parser.is_empty() { + src.push(parser.parse()?); + } + Ok(QuoteModule::Quote(src)) + } else { + Ok(QuoteModule::Module(parser.parse()?)) + } + } +} + +#[cfg(test)] +mod tests { + use crate::ast::wast::WastDirective; + use crate::parser::{parse, ParseBuffer}; + + macro_rules! assert_parses_to_directive { + ($text:expr, $pattern:pat) => {{ + let buffer = ParseBuffer::new($text).unwrap(); + let directive: WastDirective = parse(&buffer).unwrap(); + if let $pattern = directive { + } else { + panic!("assertion failed") + } + }}; + } + + #[test] + fn assert_nan() { + assert_parses_to_directive!("assert_return_canonical_nan_f32x4 (invoke \"foo\" (f32.const 0))", WastDirective::AssertReturn { .. }); + assert_parses_to_directive!("assert_return_canonical_nan_f64x2 (invoke \"foo\" (f32.const 0))", WastDirective::AssertReturn { .. }); + assert_parses_to_directive!("assert_return_arithmetic_nan_f32x4 (invoke \"foo\" (f32.const 0))", WastDirective::AssertReturn { .. }); + assert_parses_to_directive!("assert_return_arithmetic_nan_f64x2 (invoke \"foo\" (f32.const 0))", WastDirective::AssertReturn { .. }); + } +} diff --git a/third_party/rust/wast/src/binary.rs b/third_party/rust/wast/src/binary.rs new file mode 100644 index 0000000000..8c559734b5 --- /dev/null +++ b/third_party/rust/wast/src/binary.rs @@ -0,0 +1,1242 @@ +use crate::ast::*; + +pub fn encode(module: &Module<'_>) -> Vec<u8> { + match &module.kind { + ModuleKind::Text(fields) => encode_fields(&module.id, &module.name, fields), + ModuleKind::Binary(bytes) => bytes.iter().flat_map(|b| b.iter().cloned()).collect(), + } +} + +fn encode_fields( + module_id: &Option<Id<'_>>, + module_name: &Option<NameAnnotation<'_>>, + fields: &[ModuleField<'_>], +) -> Vec<u8> { + use crate::ast::CustomPlace::*; + use crate::ast::CustomPlaceAnchor::*; + + let mut types = Vec::new(); + let mut imports = Vec::new(); + let mut funcs = Vec::new(); + let mut tables = Vec::new(); + let mut memories = Vec::new(); + let mut globals = Vec::new(); + let mut exports = Vec::new(); + let mut start = Vec::new(); + let mut elem = Vec::new(); + let mut data = Vec::new(); + let mut events = Vec::new(); + let mut customs = Vec::new(); + let mut instances = Vec::new(); + let mut modules = Vec::new(); + let mut aliases = Vec::new(); + for field in fields { + match field { + ModuleField::Type(i) => types.push(i), + ModuleField::Import(i) => imports.push(i), + ModuleField::Func(i) => funcs.push(i), + ModuleField::Table(i) => tables.push(i), + ModuleField::Memory(i) => memories.push(i), + ModuleField::Global(i) => globals.push(i), + ModuleField::Export(i) => exports.push(i), + ModuleField::Start(i) => start.push(i), + ModuleField::Elem(i) => elem.push(i), + ModuleField::Data(i) => data.push(i), + ModuleField::Event(i) => events.push(i), + ModuleField::Custom(i) => customs.push(i), + ModuleField::Instance(i) => instances.push(i), + ModuleField::NestedModule(i) => modules.push(i), + ModuleField::Alias(a) => aliases.push(a), + } + } + + let mut e = Encoder { + wasm: Vec::new(), + tmp: Vec::new(), + customs: &customs, + }; + e.wasm.extend(b"\0asm"); + e.wasm.extend(b"\x01\0\0\0"); + + e.custom_sections(BeforeFirst); + + let mut items = fields + .iter() + .filter(|i| match i { + ModuleField::Alias(_) + | ModuleField::Type(_) + | ModuleField::Import(_) + | ModuleField::NestedModule(_) + | ModuleField::Instance(_) => true, + _ => false, + }) + .peekable(); + + // A special path is used for now to handle non-module-linking modules to + // work around WebAssembly/annotations#11 + if aliases.len() == 0 && modules.len() == 0 && instances.len() == 0 { + e.section_list(1, Type, &types); + e.section_list(2, Import, &imports); + } else { + while let Some(field) = items.next() { + macro_rules! list { + ($code:expr, $name:ident) => { + list!($code, $name, $name) + }; + ($code:expr, $field:ident, $custom:ident) => { + if let ModuleField::$field(f) = field { + let mut list = vec![f]; + while let Some(ModuleField::$field(f)) = items.peek() { + list.push(f); + items.next(); + } + e.section_list($code, $custom, &list); + } + }; + } + list!(1, Type); + list!(2, Import); + list!(14, NestedModule, Module); + list!(15, Instance); + list!(16, Alias); + } + } + + let functys = funcs.iter().map(|f| &f.ty).collect::<Vec<_>>(); + e.section_list(3, Func, &functys); + e.section_list(4, Table, &tables); + e.section_list(5, Memory, &memories); + e.section_list(13, Event, &events); + e.section_list(6, Global, &globals); + e.section_list(7, Export, &exports); + e.custom_sections(Before(Start)); + if let Some(start) = start.get(0) { + e.section(8, start); + } + e.custom_sections(After(Start)); + e.section_list(9, Elem, &elem); + if contains_bulk_memory(&funcs) { + e.section(12, &data.len()); + } + e.section_list(10, Code, &funcs); + e.section_list(11, Data, &data); + + let names = find_names(module_id, module_name, fields); + if !names.is_empty() { + e.section(0, &("name", names)); + } + e.custom_sections(AfterLast); + + return e.wasm; + + fn contains_bulk_memory(funcs: &[&crate::ast::Func<'_>]) -> bool { + funcs + .iter() + .filter_map(|f| match &f.kind { + FuncKind::Inline { expression, .. } => Some(expression), + _ => None, + }) + .flat_map(|e| e.instrs.iter()) + .any(|i| match i { + Instruction::MemoryInit(_) | Instruction::DataDrop(_) => true, + _ => false, + }) + } +} + +struct Encoder<'a> { + wasm: Vec<u8>, + tmp: Vec<u8>, + customs: &'a [&'a Custom<'a>], +} + +impl Encoder<'_> { + fn section(&mut self, id: u8, section: &dyn Encode) { + self.tmp.truncate(0); + section.encode(&mut self.tmp); + self.wasm.push(id); + self.tmp.encode(&mut self.wasm); + } + + fn custom_sections(&mut self, place: CustomPlace) { + for entry in self.customs.iter() { + if entry.place == place { + self.section(0, &(entry.name, entry)); + } + } + } + + fn section_list(&mut self, id: u8, anchor: CustomPlaceAnchor, list: &[impl Encode]) { + self.custom_sections(CustomPlace::Before(anchor)); + if !list.is_empty() { + self.section(id, &list) + } + self.custom_sections(CustomPlace::After(anchor)); + } +} + +pub(crate) trait Encode { + fn encode(&self, e: &mut Vec<u8>); +} + +impl<T: Encode + ?Sized> Encode for &'_ T { + fn encode(&self, e: &mut Vec<u8>) { + T::encode(self, e) + } +} + +impl<T: Encode> Encode for [T] { + fn encode(&self, e: &mut Vec<u8>) { + self.len().encode(e); + for item in self { + item.encode(e); + } + } +} + +impl<T: Encode> Encode for Vec<T> { + fn encode(&self, e: &mut Vec<u8>) { + <[T]>::encode(self, e) + } +} + +impl Encode for str { + fn encode(&self, e: &mut Vec<u8>) { + self.len().encode(e); + e.extend_from_slice(self.as_bytes()); + } +} + +impl Encode for usize { + fn encode(&self, e: &mut Vec<u8>) { + assert!(*self <= u32::max_value() as usize); + (*self as u32).encode(e) + } +} + +impl Encode for u8 { + fn encode(&self, e: &mut Vec<u8>) { + e.push(*self); + } +} + +impl Encode for u32 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::unsigned(e, (*self).into()).unwrap(); + } +} + +impl Encode for i32 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::signed(e, (*self).into()).unwrap(); + } +} + +impl Encode for u64 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::unsigned(e, (*self).into()).unwrap(); + } +} + +impl Encode for i64 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::signed(e, *self).unwrap(); + } +} + +impl Encode for FunctionType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.params.len().encode(e); + for (_, _, ty) in self.params.iter() { + ty.encode(e); + } + self.results.encode(e); + } +} + +impl Encode for StructType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.fields.len().encode(e); + for field in self.fields.iter() { + field.ty.encode(e); + (field.mutable as i32).encode(e); + } + } +} + +impl Encode for ArrayType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + (self.mutable as i32).encode(e); + } +} + +impl Encode for ModuleType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.imports.encode(e); + self.exports.encode(e); + } +} + +impl Encode for InstanceType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.exports.encode(e); + } +} + +impl Encode for ExportType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.name.encode(e); + self.item.encode(e); + } +} + +impl Encode for Type<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.def { + TypeDef::Func(func) => { + e.push(0x60); + func.encode(e) + } + TypeDef::Struct(r#struct) => { + e.push(0x5f); + r#struct.encode(e) + } + TypeDef::Array(array) => { + e.push(0x5e); + array.encode(e) + } + TypeDef::Module(module) => { + e.push(0x61); + module.encode(e) + } + TypeDef::Instance(instance) => { + e.push(0x62); + instance.encode(e) + } + } + } +} + +impl Encode for Option<Id<'_>> { + fn encode(&self, _e: &mut Vec<u8>) { + // used for parameters in the tuple impl as well as instruction labels + } +} + +impl<T: Encode, U: Encode> Encode for (T, U) { + fn encode(&self, e: &mut Vec<u8>) { + self.0.encode(e); + self.1.encode(e); + } +} + +impl<'a> Encode for ValType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ValType::I32 => e.push(0x7f), + ValType::I64 => e.push(0x7e), + ValType::F32 => e.push(0x7d), + ValType::F64 => e.push(0x7c), + ValType::V128 => e.push(0x7b), + ValType::Rtt(depth, index) => { + e.push(0x69); + depth.encode(e); + index.encode(e); + } + ValType::Ref(ty) => { + ty.encode(e); + } + } + } +} + +impl<'a> Encode for HeapType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + HeapType::Func => e.push(0x70), + HeapType::Extern => e.push(0x6f), + HeapType::Any => e.push(0x6e), + HeapType::Eq => e.push(0x6d), + HeapType::I31 => e.push(0x6a), + HeapType::Exn => e.push(0x68), + HeapType::Index(index) => { + index.encode(e); + } + } + } +} + +impl<'a> Encode for RefType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + // The 'funcref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Func, + } => e.push(0x70), + // The 'externref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Extern, + } => e.push(0x6f), + // The 'eqref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Eq, + } => e.push(0x6d), + // The 'i31ref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::I31, + } => e.push(0x6a), + // The 'exnref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Exn, + } => e.push(0x68), + + // Generic 'ref opt <heaptype>' encoding + RefType { + nullable: true, + heap, + } => { + e.push(0x6c); + heap.encode(e); + } + // Generic 'ref <heaptype>' encoding + RefType { + nullable: false, + heap, + } => { + e.push(0x6b); + heap.encode(e); + } + } + } +} + +impl<'a> Encode for StorageType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + StorageType::I8 => e.push(0x7a), + StorageType::I16 => e.push(0x79), + StorageType::Val(ty) => { + ty.encode(e); + } + } + } +} + +impl Encode for Import<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.module.encode(e); + match self.field { + Some(s) => s.encode(e), + None => { + e.push(0x00); + e.push(0xff); + } + } + self.item.encode(e); + } +} + +impl Encode for ItemSig<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.kind { + ItemKind::Func(f) => { + e.push(0x00); + f.encode(e); + } + ItemKind::Table(f) => { + e.push(0x01); + f.encode(e); + } + ItemKind::Memory(f) => { + e.push(0x02); + f.encode(e); + } + ItemKind::Global(f) => { + e.push(0x03); + f.encode(e); + } + ItemKind::Event(f) => { + e.push(0x04); + f.encode(e); + } + ItemKind::Module(m) => { + e.push(0x05); + m.encode(e); + } + ItemKind::Instance(i) => { + e.push(0x06); + i.encode(e); + } + } + } +} + +impl<T> Encode for TypeUse<'_, T> { + fn encode(&self, e: &mut Vec<u8>) { + self.index + .as_ref() + .expect("TypeUse should be filled in by this point") + .encode(e) + } +} + +impl Encode for Index<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + Index::Num(n, _) => n.encode(e), + Index::Id(n) => panic!("unresolved index in emission: {:?}", n), + } + } +} + +impl<T> Encode for IndexOrRef<'_, T> { + fn encode(&self, e: &mut Vec<u8>) { + self.0.encode(e); + } +} + +impl<T> Encode for ItemRef<'_, T> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ItemRef::Outer { .. } => panic!("should be expanded previously"), + ItemRef::Item { idx, exports, .. } => { + assert!(exports.is_empty()); + idx.encode(e); + } + } + } +} + +impl<'a> Encode for TableType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + self.elem.encode(e); + self.limits.encode(e); + } +} + +impl Encode for Limits { + fn encode(&self, e: &mut Vec<u8>) { + match self.max { + Some(max) => { + e.push(0x01); + self.min.encode(e); + max.encode(e); + } + None => { + e.push(0x00); + self.min.encode(e); + } + } + } +} + +impl Encode for MemoryType { + fn encode(&self, e: &mut Vec<u8>) { + match self { + MemoryType::B32 { limits, shared } => { + let flag_max = limits.max.is_some() as u8; + let flag_shared = *shared as u8; + let flags = flag_max | (flag_shared << 1); + e.push(flags); + limits.min.encode(e); + if let Some(max) = limits.max { + max.encode(e); + } + } + MemoryType::B64 { limits, shared } => { + let flag_max = limits.max.is_some() as u8; + let flag_shared = *shared as u8; + let flags = flag_max | (flag_shared << 1) | 0x04; + e.push(flags); + limits.min.encode(e); + if let Some(max) = limits.max { + max.encode(e); + } + } + } + } +} + +impl<'a> Encode for GlobalType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + if self.mutable { + e.push(0x01); + } else { + e.push(0x00); + } + } +} + +impl Encode for Table<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + match &self.kind { + TableKind::Normal(t) => t.encode(e), + _ => panic!("TableKind should be normal during encoding"), + } + } +} + +impl Encode for Memory<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + match &self.kind { + MemoryKind::Normal(t) => t.encode(e), + _ => panic!("MemoryKind should be normal during encoding"), + } + } +} + +impl Encode for Global<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + self.ty.encode(e); + match &self.kind { + GlobalKind::Inline(expr) => expr.encode(e), + _ => panic!("GlobalKind should be inline during encoding"), + } + } +} + +impl Encode for Export<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.name.encode(e); + if let ItemRef::Item { kind, .. } = &self.index { + kind.encode(e); + } + self.index.encode(e); + } +} + +impl Encode for ExportKind { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ExportKind::Func => e.push(0x00), + ExportKind::Table => e.push(0x01), + ExportKind::Memory => e.push(0x02), + ExportKind::Global => e.push(0x03), + ExportKind::Event => e.push(0x04), + ExportKind::Module => e.push(0x05), + ExportKind::Instance => e.push(0x06), + ExportKind::Type => e.push(0x07), + } + } +} + +impl Encode for Elem<'_> { + fn encode(&self, e: &mut Vec<u8>) { + // Try to switch element expressions to indices if we can which uses a + // more MVP-compatible encoding. + // + // FIXME(WebAssembly/wabt#1447) ideally we wouldn't do this so we could + // be faithful to the original format. + let mut to_encode = self.payload.clone(); + if let ElemPayload::Exprs { + ty: + RefType { + nullable: true, + heap: HeapType::Func, + }, + exprs, + } = &to_encode + { + if let Some(indices) = extract_indices(exprs) { + to_encode = ElemPayload::Indices(indices); + } + } + + match (&self.kind, &to_encode) { + ( + ElemKind::Active { + table: + ItemRef::Item { + idx: Index::Num(0, _), + .. + }, + offset, + }, + ElemPayload::Indices(_), + ) => { + e.push(0x00); + offset.encode(e); + } + (ElemKind::Passive, ElemPayload::Indices(_)) => { + e.push(0x01); // flags + e.push(0x00); // extern_kind + } + (ElemKind::Active { table, offset }, ElemPayload::Indices(_)) => { + e.push(0x02); // flags + table.encode(e); + offset.encode(e); + e.push(0x00); // extern_kind + } + ( + ElemKind::Active { + table: + ItemRef::Item { + idx: Index::Num(0, _), + .. + }, + offset, + }, + ElemPayload::Exprs { + ty: + RefType { + nullable: true, + heap: HeapType::Func, + }, + .. + }, + ) => { + e.push(0x04); + offset.encode(e); + } + (ElemKind::Passive, ElemPayload::Exprs { ty, .. }) => { + e.push(0x05); + ty.encode(e); + } + (ElemKind::Active { table, offset }, ElemPayload::Exprs { ty, .. }) => { + e.push(0x06); + table.encode(e); + offset.encode(e); + ty.encode(e); + } + (ElemKind::Declared, ElemPayload::Indices(_)) => { + e.push(0x03); // flags + e.push(0x00); // extern_kind + } + (ElemKind::Declared, ElemPayload::Exprs { ty, .. }) => { + e.push(0x07); // flags + ty.encode(e); + } + } + + to_encode.encode(e); + + fn extract_indices<'a>( + indices: &[Option<ItemRef<'a, kw::func>>], + ) -> Option<Vec<ItemRef<'a, kw::func>>> { + indices.iter().cloned().collect() + } + } +} + +impl Encode for ElemPayload<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ElemPayload::Indices(v) => v.encode(e), + ElemPayload::Exprs { exprs, ty } => { + exprs.len().encode(e); + for idx in exprs { + match idx { + Some(idx) => { + Instruction::RefFunc(IndexOrRef(idx.clone())).encode(e); + } + None => { + Instruction::RefNull(ty.heap).encode(e); + } + } + Instruction::End(None).encode(e); + } + } + } + } +} + +impl Encode for Data<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.kind { + DataKind::Passive => e.push(0x01), + DataKind::Active { memory, offset } => { + if let ItemRef::Item { + idx: Index::Num(0, _), + .. + } = memory + { + e.push(0x00); + } else { + e.push(0x02); + memory.encode(e); + } + offset.encode(e); + } + } + self.data.iter().map(|l| l.len()).sum::<usize>().encode(e); + for val in self.data.iter() { + val.push_onto(e); + } + } +} + +impl Encode for Func<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + let mut tmp = Vec::new(); + let (expr, locals) = match &self.kind { + FuncKind::Inline { expression, locals } => (expression, locals), + _ => panic!("should only have inline functions in emission"), + }; + + locals.encode(&mut tmp); + expr.encode(&mut tmp); + + tmp.len().encode(e); + e.extend_from_slice(&tmp); + } +} + +impl Encode for Vec<Local<'_>> { + fn encode(&self, e: &mut Vec<u8>) { + let mut locals_compressed = Vec::<(u32, ValType)>::new(); + for local in self { + if let Some((cnt, prev)) = locals_compressed.last_mut() { + if *prev == local.ty { + *cnt += 1; + continue; + } + } + locals_compressed.push((1, local.ty)); + } + locals_compressed.encode(e); + } +} + +impl Encode for Expression<'_> { + fn encode(&self, e: &mut Vec<u8>) { + for instr in self.instrs.iter() { + instr.encode(e); + } + e.push(0x0b); + } +} + +impl Encode for BlockType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + // block types using an index are encoded as an sleb, not a uleb + if let Some(ItemRef::Item { + idx: Index::Num(n, _), + .. + }) = &self.ty.index + { + return i64::from(*n).encode(e); + } + let ty = self + .ty + .inline + .as_ref() + .expect("function type not filled in"); + if ty.params.is_empty() && ty.results.is_empty() { + return e.push(0x40); + } + if ty.params.is_empty() && ty.results.len() == 1 { + return ty.results[0].encode(e); + } + panic!("multi-value block types should have an index"); + } +} + +impl Encode for FuncBindType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + } +} + +impl Encode for LetType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.block.encode(e); + self.locals.encode(e); + } +} + +impl Encode for LaneArg { + fn encode(&self, e: &mut Vec<u8>) { + self.lane.encode(e); + } +} + +impl Encode for MemArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.memory { + ItemRef::Item { + idx: Index::Num(0, _), + .. + } => { + self.align.trailing_zeros().encode(e); + self.offset.encode(e); + } + n => { + (self.align.trailing_zeros() | (1 << 6)).encode(e); + self.offset.encode(e); + n.encode(e); + } + } + } +} + +impl Encode for LoadOrStoreLane<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.memarg.encode(e); + self.lane.encode(e); + } +} + +impl Encode for CallIndirect<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + self.table.encode(e); + } +} + +impl Encode for TableInit<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.elem.encode(e); + self.table.encode(e); + } +} + +impl Encode for TableCopy<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dst.encode(e); + self.src.encode(e); + } +} + +impl Encode for TableArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dst.encode(e); + } +} + +impl Encode for MemoryArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.mem.encode(e); + } +} + +impl Encode for MemoryInit<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.data.encode(e); + self.mem.encode(e); + } +} + +impl Encode for MemoryCopy<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dst.encode(e); + self.src.encode(e); + } +} + +impl Encode for BrTableIndices<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.labels.encode(e); + self.default.encode(e); + } +} + +impl Encode for Float32 { + fn encode(&self, e: &mut Vec<u8>) { + e.extend_from_slice(&self.bits.to_le_bytes()); + } +} + +impl Encode for Float64 { + fn encode(&self, e: &mut Vec<u8>) { + e.extend_from_slice(&self.bits.to_le_bytes()); + } +} + +struct Names<'a> { + module: Option<&'a str>, + funcs: Vec<(u32, &'a str)>, + locals: Vec<(u32, Vec<(u32, &'a str)>)>, +} + +fn find_names<'a>( + module_id: &Option<Id<'a>>, + module_name: &Option<NameAnnotation<'a>>, + fields: &[ModuleField<'a>], +) -> Names<'a> { + fn get_name<'a>(id: &Option<Id<'a>>, name: &Option<NameAnnotation<'a>>) -> Option<&'a str> { + name.as_ref().map(|n| n.name).or(id.and_then(|id| { + if id.is_gensym() { + None + } else { + Some(id.name()) + } + })) + } + + let mut funcs = Vec::new(); + let mut locals = Vec::new(); + let mut idx = 0; + for field in fields { + match field { + ModuleField::Import(i) => { + match i.item.kind { + ItemKind::Func(_) => {} + _ => continue, + } + + if let Some(name) = get_name(&i.item.id, &i.item.name) { + funcs.push((idx, name)); + } + + idx += 1; + } + ModuleField::Func(f) => { + if let Some(name) = get_name(&f.id, &f.name) { + funcs.push((idx, name)); + } + let mut local_names = Vec::new(); + let mut local_idx = 0; + + // Consult the inline type listed for local names of parameters. + // This is specifically preserved during the name resolution + // pass, but only for functions, so here we can look at the + // original source's names. + if let Some(ty) = &f.ty.inline { + for (id, name, _) in ty.params.iter() { + if let Some(name) = get_name(id, name) { + local_names.push((local_idx, name)); + } + local_idx += 1; + } + } + if let FuncKind::Inline { locals, .. } = &f.kind { + for local in locals { + if let Some(name) = get_name(&local.id, &local.name) { + local_names.push((local_idx, name)); + } + local_idx += 1; + } + } + if local_names.len() > 0 { + locals.push((idx, local_names)); + } + idx += 1; + } + ModuleField::Alias(Alias { + id, + name, + kind: + AliasKind::InstanceExport { + kind: ExportKind::Func, + .. + }, + .. + }) => { + if let Some(name) = get_name(id, name) { + funcs.push((idx, name)); + } + idx += 1; + } + _ => {} + } + } + + Names { + module: get_name(module_id, module_name), + funcs, + locals, + } +} + +impl Names<'_> { + fn is_empty(&self) -> bool { + self.module.is_none() && self.funcs.is_empty() && self.locals.is_empty() + } +} + +impl Encode for Names<'_> { + fn encode(&self, dst: &mut Vec<u8>) { + let mut tmp = Vec::new(); + + let mut subsec = |id: u8, data: &mut Vec<u8>| { + dst.push(id); + data.encode(dst); + data.truncate(0); + }; + + if let Some(id) = self.module { + id.encode(&mut tmp); + subsec(0, &mut tmp); + } + if self.funcs.len() > 0 { + self.funcs.encode(&mut tmp); + subsec(1, &mut tmp); + } + if self.locals.len() > 0 { + self.locals.encode(&mut tmp); + subsec(2, &mut tmp); + } + } +} + +impl Encode for Id<'_> { + fn encode(&self, dst: &mut Vec<u8>) { + assert!(!self.is_gensym()); + self.name().encode(dst); + } +} + +impl Encode for V128Const { + fn encode(&self, dst: &mut Vec<u8>) { + dst.extend_from_slice(&self.to_le_bytes()); + } +} + +impl Encode for I8x16Shuffle { + fn encode(&self, dst: &mut Vec<u8>) { + dst.extend_from_slice(&self.lanes); + } +} + +impl<'a> Encode for SelectTypes<'a> { + fn encode(&self, dst: &mut Vec<u8>) { + match &self.tys { + Some(list) => { + dst.push(0x1c); + list.encode(dst); + } + None => dst.push(0x1b), + } + } +} + +impl Encode for Custom<'_> { + fn encode(&self, e: &mut Vec<u8>) { + for list in self.data.iter() { + e.extend_from_slice(list); + } + } +} + +impl Encode for Event<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + } +} + +impl Encode for EventType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + EventType::Exception(ty) => { + e.push(0x00); + ty.encode(e); + } + } + } +} + +impl Encode for BrOnExn<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.label.encode(e); + self.exn.encode(e); + } +} + +impl Encode for BrOnCast<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.label.encode(e); + self.val.encode(e); + self.rtt.encode(e); + } +} + +impl Encode for RTTSub<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.depth.encode(e); + self.input_rtt.encode(e); + self.output_rtt.encode(e); + } +} + +impl Encode for RefTest<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.val.encode(e); + self.rtt.encode(e); + } +} + +impl Encode for StructAccess<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.r#struct.encode(e); + self.field.encode(e); + } +} + +impl Encode for StructNarrow<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.from.encode(e); + self.to.encode(e); + } +} + +impl Encode for NestedModule<'_> { + fn encode(&self, e: &mut Vec<u8>) { + let fields = match &self.kind { + NestedModuleKind::Inline { fields, .. } => fields, + _ => panic!("should only have inline modules in emission"), + }; + + encode_fields(&self.id, &self.name, fields).encode(e); + } +} + +impl Encode for Instance<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + let (module, args) = match &self.kind { + InstanceKind::Inline { module, args } => (module, args), + _ => panic!("should only have inline instances in emission"), + }; + e.push(0x00); + module.encode(e); + args.encode(e); + } +} + +impl Encode for InstanceArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.name.encode(e); + if let ItemRef::Item { kind, .. } = &self.index { + kind.encode(e); + } + self.index.encode(e); + } +} + +impl Encode for Alias<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.kind { + AliasKind::InstanceExport { + instance, + export, + kind, + } => { + e.push(0x00); + instance.encode(e); + kind.encode(e); + export.encode(e); + } + AliasKind::Outer { + module, + index, + kind, + } => { + e.push(0x01); + module.encode(e); + kind.encode(e); + index.encode(e); + } + } + } +} diff --git a/third_party/rust/wast/src/lexer.rs b/third_party/rust/wast/src/lexer.rs new file mode 100644 index 0000000000..99c46239f1 --- /dev/null +++ b/third_party/rust/wast/src/lexer.rs @@ -0,0 +1,1125 @@ +//! Definition of a lexer for the WebAssembly text format. +//! +//! This module provides a [`Lexer`][] type which is an iterate over the raw +//! tokens of a WebAssembly text file. A [`Lexer`][] accounts for every single +//! byte in a WebAssembly text field, returning tokens even for comments and +//! whitespace. Typically you'll ignore comments and whitespace, however. +//! +//! If you'd like to iterate over the tokens in a file you can do so via: +//! +//! ``` +//! # fn foo() -> Result<(), wast::Error> { +//! use wast::lexer::Lexer; +//! +//! let wat = "(module (func $foo))"; +//! for token in Lexer::new(wat) { +//! println!("{:?}", token?); +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! Note that you'll typically not use this module but will rather use +//! [`ParseBuffer`](crate::parser::ParseBuffer) instead. +//! +//! [`Lexer`]: crate::lexer::Lexer + +use crate::{Error, Span}; +use std::borrow::Cow; +use std::char; +use std::fmt; +use std::iter; +use std::str; + +/// A structure used to lex the s-expression syntax of WAT files. +/// +/// This structure is used to generate [`Source`] items, which should account for +/// every single byte of the input as we iterate over it. A [`LexError`] is +/// returned for any non-lexable text. +#[derive(Clone)] +pub struct Lexer<'a> { + it: iter::Peekable<str::CharIndices<'a>>, + input: &'a str, +} + +/// A fragment of source lex'd from an input string. +/// +/// This enumeration contains all kinds of fragments, including comments and +/// whitespace. For most cases you'll probably ignore these and simply look at +/// tokens. +#[derive(Debug, PartialEq)] +pub enum Token<'a> { + /// A line comment, preceded with `;;` + LineComment(&'a str), + + /// A block comment, surrounded by `(;` and `;)`. Note that these can be + /// nested. + BlockComment(&'a str), + + /// A fragment of source that represents whitespace. + Whitespace(&'a str), + + /// A left-parenthesis, including the source text for where it comes from. + LParen(&'a str), + /// A right-parenthesis, including the source text for where it comes from. + RParen(&'a str), + + /// A string literal, which is actually a list of bytes. + String(WasmString<'a>), + + /// An identifier (like `$foo`). + /// + /// All identifiers start with `$` and the payload here is the original + /// source text. + Id(&'a str), + + /// A keyword, or something that starts with an alphabetic character. + /// + /// The payload here is the original source text. + Keyword(&'a str), + + /// A reserved series of `idchar` symbols. Unknown what this is meant to be + /// used for, you'll probably generate an error about an unexpected token. + Reserved(&'a str), + + /// An integer. + Integer(Integer<'a>), + + /// A float. + Float(Float<'a>), +} + +/// Errors that can be generated while lexing. +/// +/// All lexing errors have line/colum/position information as well as a +/// `LexError` indicating what kind of error happened while lexing. +#[derive(Debug, Clone, PartialEq)] +pub enum LexError { + /// A dangling block comment was found with an unbalanced `(;` which was + /// never terminated in the file. + DanglingBlockComment, + + /// An unexpected character was encountered when generally parsing and + /// looking for something else. + Unexpected(char), + + /// An invalid `char` in a string literal was found. + InvalidStringElement(char), + + /// An invalid string escape letter was found (the thing after the `\` in + /// string literals) + InvalidStringEscape(char), + + /// An invalid hexadecimal digit was found. + InvalidHexDigit(char), + + /// An invalid base-10 digit was found. + InvalidDigit(char), + + /// Parsing expected `wanted` but ended up finding `found` instead where the + /// two characters aren't the same. + Expected { + /// The character that was expected to be found + wanted: char, + /// The character that was actually found + found: char, + }, + + /// We needed to parse more but EOF (or end of the string) was encountered. + UnexpectedEof, + + /// A number failed to parse because it was too big to fit within the target + /// type. + NumberTooBig, + + /// An invalid unicode value was found in a `\u{...}` escape in a string, + /// only valid unicode scalars can be escaped that way. + InvalidUnicodeValue(u32), + + /// A lone underscore was found when parsing a number, since underscores + /// should always be preceded and succeeded with a digit of some form. + LoneUnderscore, + + #[doc(hidden)] + __Nonexhaustive, +} + +/// A sign token for an integer. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum SignToken { + /// Plus sign: "+", + Plus, + /// Minus sign: "-", + Minus, +} + +/// A parsed integer, signed or unsigned. +/// +/// Methods can be use to access the value of the integer. +#[derive(Debug, PartialEq)] +pub struct Integer<'a>(Box<IntegerInner<'a>>); + +#[derive(Debug, PartialEq)] +struct IntegerInner<'a> { + sign: Option<SignToken>, + src: &'a str, + val: Cow<'a, str>, + hex: bool, +} + +/// A parsed float. +/// +/// Methods can be use to access the value of the float. +#[derive(Debug, PartialEq)] +pub struct Float<'a>(Box<FloatInner<'a>>); + +#[derive(Debug, PartialEq)] +struct FloatInner<'a> { + src: &'a str, + val: FloatVal<'a>, +} + +/// A parsed string. +#[derive(Debug, PartialEq)] +pub struct WasmString<'a>(Box<WasmStringInner<'a>>); + +#[derive(Debug, PartialEq)] +struct WasmStringInner<'a> { + src: &'a str, + val: Cow<'a, [u8]>, +} + +/// Possible parsed float values +#[derive(Debug, PartialEq)] +pub enum FloatVal<'a> { + /// A float `NaN` representation + Nan { + /// The specific bits to encode for this float, optionally + val: Option<u64>, + /// Whether or not this is a negative `NaN` or not. + negative: bool, + }, + /// An float infinite representation, + Inf { + #[allow(missing_docs)] + negative: bool, + }, + /// A parsed and separated floating point value + Val { + /// Whether or not the `integral` and `decimal` are specified in hex + hex: bool, + /// The float parts before the `.` + integral: Cow<'a, str>, + /// The float parts after the `.` + decimal: Option<Cow<'a, str>>, + /// The exponent to multiple this `integral.decimal` portion of the + /// float by. If `hex` is true this is `2^exponent` and otherwise it's + /// `10^exponent` + exponent: Option<Cow<'a, str>>, + }, +} + +impl<'a> Lexer<'a> { + /// Creates a new lexer which will lex the `input` source string. + pub fn new(input: &str) -> Lexer<'_> { + Lexer { + it: input.char_indices().peekable(), + input, + } + } + + /// Returns the original source input that we're lexing. + pub fn input(&self) -> &'a str { + self.input + } + + /// Lexes the next token in the input. + /// + /// Returns `Some` if a token is found or `None` if we're at EOF. + /// + /// # Errors + /// + /// Returns an error if the input is malformed. + pub fn parse(&mut self) -> Result<Option<Token<'a>>, Error> { + if let Some(ws) = self.ws() { + return Ok(Some(Token::Whitespace(ws))); + } + if let Some(comment) = self.comment()? { + return Ok(Some(comment)); + } + if let Some(token) = self.token()? { + return Ok(Some(token)); + } + match self.it.next() { + Some((i, ch)) => Err(self.error(i, LexError::Unexpected(ch))), + None => Ok(None), + } + } + + fn token(&mut self) -> Result<Option<Token<'a>>, Error> { + // First two are easy, they're just parens + if let Some(pos) = self.eat_char('(') { + return Ok(Some(Token::LParen(&self.input[pos..pos + 1]))); + } + if let Some(pos) = self.eat_char(')') { + return Ok(Some(Token::RParen(&self.input[pos..pos + 1]))); + } + + // Strings are also pretty easy, leading `"` is a dead giveaway + if let Some(pos) = self.eat_char('"') { + let val = self.string()?; + let src = &self.input[pos..self.cur()]; + return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner { + val, + src, + }))))); + } + + let (start, prefix) = match self.it.peek().cloned() { + Some((i, ch)) if is_idchar(ch) => (i, ch), + Some((i, ch)) if is_reserved_extra(ch) => { + self.it.next(); + return Ok(Some(Token::Reserved(&self.input[i..self.cur()]))); + } + Some((i, ch)) => return Err(self.error(i, LexError::Unexpected(ch))), + None => return Ok(None), + }; + + while let Some((_, ch)) = self.it.peek().cloned() { + if is_idchar(ch) { + self.it.next(); + } else { + break; + } + } + + let reserved = &self.input[start..self.cur()]; + if let Some(number) = self.number(reserved) { + Ok(Some(number)) + } else if prefix == '$' && reserved.len() > 1 { + Ok(Some(Token::Id(reserved))) + } else if 'a' <= prefix && prefix <= 'z' { + Ok(Some(Token::Keyword(reserved))) + } else { + Ok(Some(Token::Reserved(reserved))) + } + } + + fn number(&self, src: &'a str) -> Option<Token<'a>> { + let (sign, num) = if src.starts_with('+') { + (Some(SignToken::Plus), &src[1..]) + } else if src.starts_with('-') { + (Some(SignToken::Minus), &src[1..]) + } else { + (None, src) + }; + + let negative = sign == Some(SignToken::Minus); + + // Handle `inf` and `nan` which are special numbers here + if num == "inf" { + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Inf { negative }, + })))); + } else if num == "nan" { + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Nan { + val: None, + negative, + }, + })))); + } else if num.starts_with("nan:0x") { + let mut it = num[6..].chars(); + let to_parse = skip_undescores(&mut it, false, char::is_ascii_hexdigit)?; + if it.next().is_some() { + return None; + } + let n = u64::from_str_radix(&to_parse, 16).ok()?; + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Nan { + val: Some(n), + negative, + }, + })))); + } + + // Figure out if we're a hex number or not + let (mut it, hex, test_valid) = if num.starts_with("0x") { + ( + num[2..].chars(), + true, + char::is_ascii_hexdigit as fn(&char) -> bool, + ) + } else { + ( + num.chars(), + false, + char::is_ascii_digit as fn(&char) -> bool, + ) + }; + + // Evaluate the first part, moving out all underscores + let val = skip_undescores(&mut it, negative, test_valid)?; + + match it.clone().next() { + // If we're followed by something this may be a float so keep going. + Some(_) => {} + + // Otherwise this is a valid integer literal! + None => { + return Some(Token::Integer(Integer(Box::new(IntegerInner { + sign, + src, + val, + hex, + })))) + } + } + + // A number can optionally be after the decimal so only actually try to + // parse one if it's there. + let decimal = if it.clone().next() == Some('.') { + it.next(); + match it.clone().next() { + Some(c) if test_valid(&c) => Some(skip_undescores(&mut it, false, test_valid)?), + Some(_) | None => None, + } + } else { + None + }; + + // Figure out if there's an exponential part here to make a float, and + // if so parse it but defer its actual calculation until later. + let exponent = match (hex, it.next()) { + (true, Some('p')) | (true, Some('P')) | (false, Some('e')) | (false, Some('E')) => { + let negative = match it.clone().next() { + Some('-') => { + it.next(); + true + } + Some('+') => { + it.next(); + false + } + _ => false, + }; + Some(skip_undescores(&mut it, negative, char::is_ascii_digit)?) + } + (_, None) => None, + _ => return None, + }; + + // We should have eaten everything by now, if not then this is surely + // not a float or integer literal. + if it.next().is_some() { + return None; + } + + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Val { + hex, + integral: val, + exponent, + decimal, + }, + })))); + + fn skip_undescores<'a>( + it: &mut str::Chars<'a>, + negative: bool, + good: fn(&char) -> bool, + ) -> Option<Cow<'a, str>> { + enum State { + Raw, + Collecting(String), + } + let mut last_underscore = false; + let mut state = if negative { + State::Collecting("-".to_string()) + } else { + State::Raw + }; + let input = it.as_str(); + let first = it.next()?; + if !good(&first) { + return None; + } + if let State::Collecting(s) = &mut state { + s.push(first); + } + let mut last = 1; + while let Some(c) = it.clone().next() { + if c == '_' && !last_underscore { + if let State::Raw = state { + state = State::Collecting(input[..last].to_string()); + } + it.next(); + last_underscore = true; + continue; + } + if !good(&c) { + break; + } + if let State::Collecting(s) = &mut state { + s.push(c); + } + last_underscore = false; + it.next(); + last += 1; + } + if last_underscore { + return None; + } + Some(match state { + State::Raw => input[..last].into(), + State::Collecting(s) => s.into(), + }) + } + } + + /// Attempts to consume whitespace from the input stream, returning `None` + /// if there's no whitespace to consume + fn ws(&mut self) -> Option<&'a str> { + let start = self.cur(); + loop { + match self.it.peek() { + Some((_, ' ')) | Some((_, '\n')) | Some((_, '\r')) | Some((_, '\t')) => { + drop(self.it.next()) + } + _ => break, + } + } + let end = self.cur(); + if start != end { + Some(&self.input[start..end]) + } else { + None + } + } + + /// Attempts to read a comment from the input stream + fn comment(&mut self) -> Result<Option<Token<'a>>, Error> { + if let Some(start) = self.eat_str(";;") { + loop { + match self.it.peek() { + None | Some((_, '\n')) => break, + _ => drop(self.it.next()), + } + } + let end = self.cur(); + return Ok(Some(Token::LineComment(&self.input[start..end]))); + } + if let Some(start) = self.eat_str("(;") { + let mut level = 1; + while let Some((_, ch)) = self.it.next() { + if ch == '(' && self.eat_char(';').is_some() { + level += 1; + } + if ch == ';' && self.eat_char(')').is_some() { + level -= 1; + if level == 0 { + let end = self.cur(); + return Ok(Some(Token::BlockComment(&self.input[start..end]))); + } + } + } + + return Err(self.error(start, LexError::DanglingBlockComment)); + } + Ok(None) + } + + /// Reads everything for a literal string except the leading `"`. Returns + /// the string value that has been read. + fn string(&mut self) -> Result<Cow<'a, [u8]>, Error> { + enum State { + Start(usize), + String(Vec<u8>), + } + let mut state = State::Start(self.cur()); + loop { + match self.it.next() { + Some((i, '\\')) => { + match state { + State::String(_) => {} + State::Start(start) => { + state = State::String(self.input[start..i].as_bytes().to_vec()); + } + } + let buf = match &mut state { + State::String(b) => b, + State::Start(_) => unreachable!(), + }; + match self.it.next() { + Some((_, '"')) => buf.push(b'"'), + Some((_, '\'')) => buf.push(b'\''), + Some((_, 't')) => buf.push(b'\t'), + Some((_, 'n')) => buf.push(b'\n'), + Some((_, 'r')) => buf.push(b'\r'), + Some((_, '\\')) => buf.push(b'\\'), + Some((i, 'u')) => { + self.must_eat_char('{')?; + let n = self.hexnum()?; + let c = char::from_u32(n) + .ok_or_else(|| self.error(i, LexError::InvalidUnicodeValue(n)))?; + buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()); + self.must_eat_char('}')?; + } + Some((_, c1)) if c1.is_ascii_hexdigit() => { + let (_, c2) = self.hexdigit()?; + buf.push(to_hex(c1) * 16 + c2); + } + Some((i, c)) => return Err(self.error(i, LexError::InvalidStringEscape(c))), + None => return Err(self.error(self.input.len(), LexError::UnexpectedEof)), + } + } + Some((_, '"')) => break, + Some((i, c)) => { + if (c as u32) < 0x20 || c as u32 == 0x7f { + return Err(self.error(i, LexError::InvalidStringElement(c))); + } + match &mut state { + State::Start(_) => {} + State::String(v) => { + v.extend(c.encode_utf8(&mut [0; 4]).as_bytes()); + } + } + } + None => return Err(self.error(self.input.len(), LexError::UnexpectedEof)), + } + } + match state { + State::Start(pos) => Ok(self.input[pos..self.cur() - 1].as_bytes().into()), + State::String(s) => Ok(s.into()), + } + } + + fn hexnum(&mut self) -> Result<u32, Error> { + let (_, n) = self.hexdigit()?; + let mut last_underscore = false; + let mut n = n as u32; + while let Some((i, c)) = self.it.peek().cloned() { + if c == '_' { + self.it.next(); + last_underscore = true; + continue; + } + if !c.is_ascii_hexdigit() { + break; + } + last_underscore = false; + self.it.next(); + n = n + .checked_mul(16) + .and_then(|n| n.checked_add(to_hex(c) as u32)) + .ok_or_else(|| self.error(i, LexError::NumberTooBig))?; + } + if last_underscore { + let cur = self.cur(); + return Err(self.error(cur - 1, LexError::LoneUnderscore)); + } + Ok(n) + } + + /// Reads a hexidecimal digit from the input stream, returning where it's + /// defined and the hex value. Returns an error on EOF or an invalid hex + /// digit. + fn hexdigit(&mut self) -> Result<(usize, u8), Error> { + let (i, ch) = self.must_char()?; + if ch.is_ascii_hexdigit() { + Ok((i, to_hex(ch))) + } else { + Err(self.error(i, LexError::InvalidHexDigit(ch))) + } + } + + /// Returns where the match started, if any + fn eat_str(&mut self, s: &str) -> Option<usize> { + if !self.cur_str().starts_with(s) { + return None; + } + let ret = self.cur(); + for _ in s.chars() { + self.it.next(); + } + Some(ret) + } + + /// Returns where the match happened, if any + fn eat_char(&mut self, needle: char) -> Option<usize> { + match self.it.peek() { + Some((i, c)) if *c == needle => { + let ret = *i; + self.it.next(); + Some(ret) + } + _ => None, + } + } + + /// Reads the next character from the input string and where it's located, + /// returning an error if the input stream is empty. + fn must_char(&mut self) -> Result<(usize, char), Error> { + self.it + .next() + .ok_or_else(|| self.error(self.input.len(), LexError::UnexpectedEof)) + } + + /// Expects that a specific character must be read next + fn must_eat_char(&mut self, wanted: char) -> Result<usize, Error> { + let (pos, found) = self.must_char()?; + if wanted == found { + Ok(pos) + } else { + Err(self.error(pos, LexError::Expected { wanted, found })) + } + } + + /// Returns the current position of our iterator through the input string + fn cur(&mut self) -> usize { + self.it.peek().map(|p| p.0).unwrap_or(self.input.len()) + } + + /// Returns the remaining string that we have left to parse + fn cur_str(&mut self) -> &'a str { + &self.input[self.cur()..] + } + + /// Creates an error at `pos` with the specified `kind` + fn error(&self, pos: usize, kind: LexError) -> Error { + Error::lex(Span { offset: pos }, self.input, kind) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Result<Token<'a>, Error>; + + fn next(&mut self) -> Option<Self::Item> { + self.parse().transpose() + } +} + +impl<'a> Token<'a> { + /// Returns the original source text for this token. + pub fn src(&self) -> &'a str { + match self { + Token::Whitespace(s) => s, + Token::BlockComment(s) => s, + Token::LineComment(s) => s, + Token::LParen(s) => s, + Token::RParen(s) => s, + Token::String(s) => s.src(), + Token::Id(s) => s, + Token::Keyword(s) => s, + Token::Reserved(s) => s, + Token::Integer(i) => i.src(), + Token::Float(f) => f.src(), + } + } +} + +impl<'a> Integer<'a> { + /// Returns the sign token for this integer. + pub fn sign(&self) -> Option<SignToken> { + self.0.sign + } + + /// Returns the original source text for this integer. + pub fn src(&self) -> &'a str { + self.0.src + } + + /// Returns the value string that can be parsed for this integer, as well as + /// the base that it should be parsed in + pub fn val(&self) -> (&str, u32) { + (&self.0.val, if self.0.hex { 16 } else { 10 }) + } +} + +impl<'a> Float<'a> { + /// Returns the original source text for this integer. + pub fn src(&self) -> &'a str { + self.0.src + } + + /// Returns a parsed value of this float with all of the components still + /// listed as strings. + pub fn val(&self) -> &FloatVal<'a> { + &self.0.val + } +} + +impl<'a> WasmString<'a> { + /// Returns the original source text for this string. + pub fn src(&self) -> &'a str { + self.0.src + } + + /// Returns a parsed value, as a list of bytes, for this string. + pub fn val(&self) -> &[u8] { + &self.0.val + } +} + +fn to_hex(c: char) -> u8 { + match c { + 'a'..='f' => c as u8 - b'a' + 10, + 'A'..='F' => c as u8 - b'A' + 10, + _ => c as u8 - b'0', + } +} + +fn is_idchar(c: char) -> bool { + match c { + '0'..='9' + | 'a'..='z' + | 'A'..='Z' + | '!' + | '#' + | '$' + | '%' + | '&' + | '\'' + | '*' + | '+' + | '-' + | '.' + | '/' + | ':' + | '<' + | '=' + | '>' + | '?' + | '@' + | '\\' + | '^' + | '_' + | '`' + | '|' + | '~' => true, + _ => false, + } +} + +fn is_reserved_extra(c: char) -> bool { + match c { + ',' | ';' | '[' | ']' | '{' | '}' => true, + _ => false, + } +} + +impl fmt::Display for LexError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use LexError::*; + match self { + DanglingBlockComment => f.write_str("unterminated block comment")?, + Unexpected(c) => write!(f, "unexpected character {:?}", c)?, + InvalidStringElement(c) => write!(f, "invalid character in string {:?}", c)?, + InvalidStringEscape(c) => write!(f, "invalid string escape {:?}", c)?, + InvalidHexDigit(c) => write!(f, "invalid hex digit {:?}", c)?, + InvalidDigit(c) => write!(f, "invalid decimal digit {:?}", c)?, + Expected { wanted, found } => write!(f, "expected {:?} but found {:?}", wanted, found)?, + UnexpectedEof => write!(f, "unexpected end-of-file")?, + NumberTooBig => f.write_str("number is too big to parse")?, + InvalidUnicodeValue(c) => write!(f, "invalid unicode scalar value 0x{:x}", c)?, + LoneUnderscore => write!(f, "bare underscore in numeric literal")?, + __Nonexhaustive => unreachable!(), + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ws_smoke() { + fn get_whitespace(input: &str) -> &str { + match Lexer::new(input).parse().expect("no first token") { + Some(Token::Whitespace(s)) => s, + other => panic!("unexpected {:?}", other), + } + } + assert_eq!(get_whitespace(" "), " "); + assert_eq!(get_whitespace(" "), " "); + assert_eq!(get_whitespace(" \n "), " \n "); + assert_eq!(get_whitespace(" x"), " "); + assert_eq!(get_whitespace(" ;"), " "); + } + + #[test] + fn line_comment_smoke() { + fn get_line_comment(input: &str) -> &str { + match Lexer::new(input).parse().expect("no first token") { + Some(Token::LineComment(s)) => s, + other => panic!("unexpected {:?}", other), + } + } + assert_eq!(get_line_comment(";;"), ";;"); + assert_eq!(get_line_comment(";; xyz"), ";; xyz"); + assert_eq!(get_line_comment(";; xyz\nabc"), ";; xyz"); + assert_eq!(get_line_comment(";;\nabc"), ";;"); + assert_eq!(get_line_comment(";; \nabc"), ";; "); + } + + #[test] + fn block_comment_smoke() { + fn get_block_comment(input: &str) -> &str { + match Lexer::new(input).parse().expect("no first token") { + Some(Token::BlockComment(s)) => s, + other => panic!("unexpected {:?}", other), + } + } + assert_eq!(get_block_comment("(;;)"), "(;;)"); + assert_eq!(get_block_comment("(; ;)"), "(; ;)"); + assert_eq!(get_block_comment("(; (;;) ;)"), "(; (;;) ;)"); + } + + fn get_token(input: &str) -> Token<'_> { + Lexer::new(input) + .parse() + .expect("no first token") + .expect("no token") + } + + #[test] + fn lparen() { + assert_eq!(get_token("(("), Token::LParen("(")); + } + + #[test] + fn rparen() { + assert_eq!(get_token(")("), Token::RParen(")")); + } + + #[test] + fn strings() { + fn get_string(input: &str) -> Vec<u8> { + match get_token(input) { + Token::String(s) => { + assert_eq!(input, s.src()); + s.val().to_vec() + } + other => panic!("not string {:?}", other), + } + } + assert_eq!(&*get_string("\"\""), b""); + assert_eq!(&*get_string("\"a\""), b"a"); + assert_eq!(&*get_string("\"a b c d\""), b"a b c d"); + assert_eq!(&*get_string("\"\\\"\""), b"\""); + assert_eq!(&*get_string("\"\\'\""), b"'"); + assert_eq!(&*get_string("\"\\n\""), b"\n"); + assert_eq!(&*get_string("\"\\t\""), b"\t"); + assert_eq!(&*get_string("\"\\r\""), b"\r"); + assert_eq!(&*get_string("\"\\\\\""), b"\\"); + assert_eq!(&*get_string("\"\\01\""), &[1]); + assert_eq!(&*get_string("\"\\u{1}\""), &[1]); + assert_eq!( + &*get_string("\"\\u{0f3}\""), + '\u{0f3}'.encode_utf8(&mut [0; 4]).as_bytes() + ); + assert_eq!( + &*get_string("\"\\u{0_f_3}\""), + '\u{0f3}'.encode_utf8(&mut [0; 4]).as_bytes() + ); + + for i in 0..=255i32 { + let s = format!("\"\\{:02x}\"", i); + assert_eq!(&*get_string(&s), &[i as u8]); + } + } + + #[test] + fn id() { + fn get_id(input: &str) -> &str { + match get_token(input) { + Token::Id(s) => s, + other => panic!("not id {:?}", other), + } + } + assert_eq!(get_id("$x"), "$x"); + assert_eq!(get_id("$xyz"), "$xyz"); + assert_eq!(get_id("$x_z"), "$x_z"); + assert_eq!(get_id("$0^"), "$0^"); + assert_eq!(get_id("$0^;;"), "$0^"); + assert_eq!(get_id("$0^ ;;"), "$0^"); + } + + #[test] + fn keyword() { + fn get_keyword(input: &str) -> &str { + match get_token(input) { + Token::Keyword(s) => s, + other => panic!("not id {:?}", other), + } + } + assert_eq!(get_keyword("x"), "x"); + assert_eq!(get_keyword("xyz"), "xyz"); + assert_eq!(get_keyword("x_z"), "x_z"); + assert_eq!(get_keyword("x_z "), "x_z"); + assert_eq!(get_keyword("x_z "), "x_z"); + } + + #[test] + fn reserved() { + fn get_reserved(input: &str) -> &str { + match get_token(input) { + Token::Reserved(s) => s, + other => panic!("not reserved {:?}", other), + } + } + assert_eq!(get_reserved("$ "), "$"); + assert_eq!(get_reserved("^_x "), "^_x"); + } + + #[test] + fn integer() { + fn get_integer(input: &str) -> String { + match get_token(input) { + Token::Integer(i) => { + assert_eq!(input, i.src()); + i.val().0.to_string() + } + other => panic!("not integer {:?}", other), + } + } + assert_eq!(get_integer("1"), "1"); + assert_eq!(get_integer("0"), "0"); + assert_eq!(get_integer("-1"), "-1"); + assert_eq!(get_integer("+1"), "1"); + assert_eq!(get_integer("+1_000"), "1000"); + assert_eq!(get_integer("+1_0_0_0"), "1000"); + assert_eq!(get_integer("+0x10"), "10"); + assert_eq!(get_integer("-0x10"), "-10"); + assert_eq!(get_integer("0x10"), "10"); + } + + #[test] + fn float() { + fn get_float(input: &str) -> FloatVal<'_> { + match get_token(input) { + Token::Float(i) => { + assert_eq!(input, i.src()); + i.0.val + } + other => panic!("not reserved {:?}", other), + } + } + assert_eq!( + get_float("nan"), + FloatVal::Nan { + val: None, + negative: false + }, + ); + assert_eq!( + get_float("-nan"), + FloatVal::Nan { + val: None, + negative: true, + }, + ); + assert_eq!( + get_float("+nan"), + FloatVal::Nan { + val: None, + negative: false, + }, + ); + assert_eq!( + get_float("+nan:0x1"), + FloatVal::Nan { + val: Some(1), + negative: false, + }, + ); + assert_eq!( + get_float("nan:0x7f_ffff"), + FloatVal::Nan { + val: Some(0x7fffff), + negative: false, + }, + ); + assert_eq!(get_float("inf"), FloatVal::Inf { negative: false }); + assert_eq!(get_float("-inf"), FloatVal::Inf { negative: true }); + assert_eq!(get_float("+inf"), FloatVal::Inf { negative: false }); + + assert_eq!( + get_float("1.2"), + FloatVal::Val { + integral: "1".into(), + decimal: Some("2".into()), + exponent: None, + hex: false, + }, + ); + assert_eq!( + get_float("1.2e3"), + FloatVal::Val { + integral: "1".into(), + decimal: Some("2".into()), + exponent: Some("3".into()), + hex: false, + }, + ); + assert_eq!( + get_float("-1_2.1_1E+0_1"), + FloatVal::Val { + integral: "-12".into(), + decimal: Some("11".into()), + exponent: Some("01".into()), + hex: false, + }, + ); + assert_eq!( + get_float("+1_2.1_1E-0_1"), + FloatVal::Val { + integral: "12".into(), + decimal: Some("11".into()), + exponent: Some("-01".into()), + hex: false, + }, + ); + assert_eq!( + get_float("0x1_2.3_4p5_6"), + FloatVal::Val { + integral: "12".into(), + decimal: Some("34".into()), + exponent: Some("56".into()), + hex: true, + }, + ); + assert_eq!( + get_float("+0x1_2.3_4P-5_6"), + FloatVal::Val { + integral: "12".into(), + decimal: Some("34".into()), + exponent: Some("-56".into()), + hex: true, + }, + ); + assert_eq!( + get_float("1."), + FloatVal::Val { + integral: "1".into(), + decimal: None, + exponent: None, + hex: false, + }, + ); + assert_eq!( + get_float("0x1p-24"), + FloatVal::Val { + integral: "1".into(), + decimal: None, + exponent: Some("-24".into()), + hex: true, + }, + ); + } +} diff --git a/third_party/rust/wast/src/lib.rs b/third_party/rust/wast/src/lib.rs new file mode 100644 index 0000000000..e8da13b64b --- /dev/null +++ b/third_party/rust/wast/src/lib.rs @@ -0,0 +1,235 @@ +//! A crate for low-level parsing of the WebAssembly text formats: WAT and WAST. +//! +//! This crate is intended to be a low-level detail of the `wat` crate, +//! providing a low-level parsing API for parsing WebAssembly text format +//! structures. The API provided by this crate is very similar to +//! [`syn`](https://docs.rs/syn) and provides the ability to write customized +//! parsers which may be an extension to the core WebAssembly text format. For +//! more documentation see the [`parser`] module. +//! +//! # High-level Overview +//! +//! This crate provides a few major pieces of functionality +//! +//! * [`lexer`] - this is a raw lexer for the wasm text format. This is not +//! customizable, but if you'd like to iterate over raw tokens this is the +//! module for you. You likely won't use this much. +//! +//! * [`parser`] - this is the workhorse of this crate. The [`parser`] module +//! provides the [`Parse`][] trait primarily and utilities +//! around working with a [`Parser`](`parser::Parser`) to parse streams of +//! tokens. +//! +//! * [`Module`] - this contains an Abstract Syntax Tree (AST) of the +//! WebAssembly Text format (WAT) as well as the unofficial WAST format. This +//! also has a [`Module::encode`] method to emit a module in its binary form. +//! +//! # Stability and WebAssembly Features +//! +//! This crate provides support for many in-progress WebAssembly features such +//! as reference types, multi-value, etc. Be sure to check out the documentation +//! of the [`wast` crate](https://docs.rs/wast) for policy information on crate +//! stability vs WebAssembly Features. The tl;dr; version is that this crate +//! will issue semver-non-breaking releases which will break the parsing of the +//! text format. This crate, unlike `wast`, is expected to have numerous Rust +//! public API changes, all of which will be accompanied with a semver-breaking +//! release. +//! +//! # Compile-time Cargo features +//! +//! This crate has a `wasm-module` feature which is turned on by default which +//! includes all necessary support to parse full WebAssembly modules. If you +//! don't need this (for example you're parsing your own s-expression format) +//! then this feature can be disabled. +//! +//! [`Parse`]: parser::Parse +//! [`LexError`]: lexer::LexError + +#![deny(missing_docs, broken_intra_doc_links)] + +use std::fmt; +use std::path::{Path, PathBuf}; + +#[cfg(feature = "wasm-module")] +mod binary; +#[cfg(feature = "wasm-module")] +mod resolve; + +mod ast; +pub use self::ast::*; + +pub mod lexer; +pub mod parser; + +/// A convenience error type to tie together all the detailed errors produced by +/// this crate. +/// +/// This type can be created from a [`lexer::LexError`] or [`parser::Error`]. +/// This also contains storage for file/text information so a nice error can be +/// rendered along the same lines of rustc's own error messages (minus the +/// color). +/// +/// This type is typically suitable for use in public APIs for consumers of this +/// crate. +#[derive(Debug)] +pub struct Error { + inner: Box<ErrorInner>, +} + +#[derive(Debug)] +struct ErrorInner { + text: Option<Text>, + file: Option<PathBuf>, + span: Span, + kind: ErrorKind, +} + +#[derive(Debug)] +struct Text { + line: usize, + col: usize, + snippet: String, +} + +#[derive(Debug)] +enum ErrorKind { + Lex(lexer::LexError), + Custom(String), +} + +impl Error { + fn lex(span: Span, content: &str, kind: lexer::LexError) -> Error { + let mut ret = Error { + inner: Box::new(ErrorInner { + text: None, + file: None, + span, + kind: ErrorKind::Lex(kind), + }), + }; + ret.set_text(content); + return ret; + } + + fn parse(span: Span, content: &str, message: String) -> Error { + let mut ret = Error { + inner: Box::new(ErrorInner { + text: None, + file: None, + span, + kind: ErrorKind::Custom(message), + }), + }; + ret.set_text(content); + return ret; + } + + /// Creates a new error with the given `message` which is targeted at the + /// given `span` + /// + /// Note that you'll want to ensure that `set_text` or `set_path` is called + /// on the resulting error to improve the rendering of the error message. + pub fn new(span: Span, message: String) -> Error { + Error { + inner: Box::new(ErrorInner { + text: None, + file: None, + span, + kind: ErrorKind::Custom(message), + }), + } + } + + /// Return the `Span` for this error. + pub fn span(&self) -> Span { + self.inner.span + } + + /// To provide a more useful error this function can be used to extract + /// relevant textual information about this error into the error itself. + /// + /// The `contents` here should be the full text of the original file being + /// parsed, and this will extract a sub-slice as necessary to render in the + /// `Display` implementation later on. + pub fn set_text(&mut self, contents: &str) { + if self.inner.text.is_some() { + return; + } + self.inner.text = Some(Text::new(contents, self.inner.span)); + } + + /// To provide a more useful error this function can be used to set + /// the file name that this error is associated with. + /// + /// The `path` here will be stored in this error and later rendered in the + /// `Display` implementation. + pub fn set_path(&mut self, path: &Path) { + if self.inner.file.is_some() { + return; + } + self.inner.file = Some(path.to_path_buf()); + } + + /// Returns the underlying `LexError`, if any, that describes this error. + pub fn lex_error(&self) -> Option<&lexer::LexError> { + match &self.inner.kind { + ErrorKind::Lex(e) => Some(e), + _ => None, + } + } + + /// Returns the underlying message, if any, that describes this error. + pub fn message(&self) -> String { + match &self.inner.kind { + ErrorKind::Lex(e) => e.to_string(), + ErrorKind::Custom(e) => e.clone(), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let err = match &self.inner.kind { + ErrorKind::Lex(e) => e as &dyn fmt::Display, + ErrorKind::Custom(e) => e as &dyn fmt::Display, + }; + let text = match &self.inner.text { + Some(text) => text, + None => { + return write!(f, "{} at byte offset {}", err, self.inner.span.offset); + } + }; + let file = self + .inner + .file + .as_ref() + .and_then(|p| p.to_str()) + .unwrap_or("<anon>"); + write!( + f, + "\ +{err} + --> {file}:{line}:{col} + | + {line:4} | {text} + | {marker:>0$}", + text.col + 1, + file = file, + line = text.line + 1, + col = text.col + 1, + err = err, + text = text.snippet, + marker = "^", + ) + } +} + +impl std::error::Error for Error {} + +impl Text { + fn new(content: &str, span: Span) -> Text { + let (line, col) = span.linecol_in(content); + let snippet = content.lines().nth(line).unwrap_or("").to_string(); + Text { line, col, snippet } + } +} diff --git a/third_party/rust/wast/src/parser.rs b/third_party/rust/wast/src/parser.rs new file mode 100644 index 0000000000..cb6f0b5879 --- /dev/null +++ b/third_party/rust/wast/src/parser.rs @@ -0,0 +1,1263 @@ +//! Traits for parsing the WebAssembly Text format +//! +//! This module contains the traits, abstractions, and utilities needed to +//! define custom parsers for WebAssembly text format items. This module exposes +//! a recursive descent parsing strategy and centers around the +//! [`Parse`](crate::parser::Parse) trait for defining new fragments of +//! WebAssembly text syntax. +//! +//! The top-level [`parse`](crate::parser::parse) function can be used to fully parse AST fragments: +//! +//! ``` +//! use wast::Wat; +//! use wast::parser::{self, ParseBuffer}; +//! +//! # fn foo() -> Result<(), wast::Error> { +//! let wat = "(module (func))"; +//! let buf = ParseBuffer::new(wat)?; +//! let module = parser::parse::<Wat>(&buf)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! and you can also define your own new syntax with the +//! [`Parse`](crate::parser::Parse) trait: +//! +//! ``` +//! use wast::{kw, Import, Func}; +//! use wast::parser::{Parser, Parse, Result}; +//! +//! // Fields of a WebAssembly which only allow imports and functions, and all +//! // imports must come before all the functions +//! struct OnlyImportsAndFunctions<'a> { +//! imports: Vec<Import<'a>>, +//! functions: Vec<Func<'a>>, +//! } +//! +//! impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> { +//! fn parse(parser: Parser<'a>) -> Result<Self> { +//! // While the second token is `import` (the first is `(`, so we care +//! // about the second) we parse an `ast::ModuleImport` inside of +//! // parentheses. The `parens` function here ensures that what we +//! // parse inside of it is surrounded by `(` and `)`. +//! let mut imports = Vec::new(); +//! while parser.peek2::<kw::import>() { +//! let import = parser.parens(|p| p.parse())?; +//! imports.push(import); +//! } +//! +//! // Afterwards we assume everything else is a function. Note that +//! // `parse` here is a generic function and type inference figures out +//! // that we're parsing functions here and imports above. +//! let mut functions = Vec::new(); +//! while !parser.is_empty() { +//! let func = parser.parens(|p| p.parse())?; +//! functions.push(func); +//! } +//! +//! Ok(OnlyImportsAndFunctions { imports, functions }) +//! } +//! } +//! ``` +//! +//! This module is heavily inspired by [`syn`](https://docs.rs/syn) so you can +//! likely also draw inspiration from the excellent examples in the `syn` crate. + +use crate::lexer::{Float, Integer, Lexer, Token}; +use crate::{Error, Span}; +use std::cell::{Cell, RefCell}; +use std::collections::HashMap; +use std::fmt; +use std::usize; + +/// A top-level convenience parseing function that parss a `T` from `buf` and +/// requires that all tokens in `buf` are consume. +/// +/// This generic parsing function can be used to parse any `T` implementing the +/// [`Parse`] trait. It is not used from [`Parse`] trait implementations. +/// +/// # Examples +/// +/// ``` +/// use wast::Wat; +/// use wast::parser::{self, ParseBuffer}; +/// +/// # fn foo() -> Result<(), wast::Error> { +/// let wat = "(module (func))"; +/// let buf = ParseBuffer::new(wat)?; +/// let module = parser::parse::<Wat>(&buf)?; +/// # Ok(()) +/// # } +/// ``` +/// +/// or parsing simply a fragment +/// +/// ``` +/// use wast::parser::{self, ParseBuffer}; +/// +/// # fn foo() -> Result<(), wast::Error> { +/// let wat = "12"; +/// let buf = ParseBuffer::new(wat)?; +/// let val = parser::parse::<u32>(&buf)?; +/// assert_eq!(val, 12); +/// # Ok(()) +/// # } +/// ``` +pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> { + let parser = buf.parser(); + let result = parser.parse()?; + if parser.cursor().advance_token().is_none() { + Ok(result) + } else { + Err(parser.error("extra tokens remaining after parse")) + } +} + +/// A trait for parsing a fragment of syntax in a recursive descent fashion. +/// +/// The [`Parse`] trait is main abstraction you'll be working with when defining +/// custom parser or custom syntax for your WebAssembly text format (or when +/// using the official format items). Almost all items in the +/// [`ast`](crate::ast) module implement the [`Parse`] trait, and you'll +/// commonly use this with: +/// +/// * The top-level [`parse`] function to parse an entire input. +/// * The intermediate [`Parser::parse`] function to parse an item out of an +/// input stream and then parse remaining items. +/// +/// Implementation of [`Parse`] take a [`Parser`] as input and will mutate the +/// parser as they parse syntax. Once a token is consume it cannot be +/// "un-consumed". Utilities such as [`Parser::peek`] and [`Parser::lookahead1`] +/// can be used to determine what to parse next. +/// +/// ## When to parse `(` and `)`? +/// +/// Conventionally types are not responsible for parsing their own `(` and `)` +/// tokens which surround the type. For example WebAssembly imports look like: +/// +/// ```text +/// (import "foo" "bar" (func (type 0))) +/// ``` +/// +/// but the [`Import`](crate::ast::Import) type parser looks like: +/// +/// ``` +/// # use wast::kw; +/// # use wast::parser::{Parser, Parse, Result}; +/// # struct Import<'a>(&'a str); +/// impl<'a> Parse<'a> for Import<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// parser.parse::<kw::import>()?; +/// // ... +/// # panic!() +/// } +/// } +/// ``` +/// +/// It is assumed here that the `(` and `)` tokens which surround an `import` +/// statement in the WebAssembly text format are parsed by the parent item +/// parsing `Import`. +/// +/// Note that this is just a convention, so it's not necessarily required for +/// all types. It's recommended that your types stick to this convention where +/// possible to avoid nested calls to [`Parser::parens`] or accidentally trying +/// to parse too many parenthesis. +/// +/// # Examples +/// +/// Let's say you want to define your own WebAssembly text format which only +/// contains imports and functions. You also require all imports to be listed +/// before all functions. An example [`Parse`] implementation might look like: +/// +/// ``` +/// use wast::{Import, Func, kw}; +/// use wast::parser::{Parser, Parse, Result}; +/// +/// // Fields of a WebAssembly which only allow imports and functions, and all +/// // imports must come before all the functions +/// struct OnlyImportsAndFunctions<'a> { +/// imports: Vec<Import<'a>>, +/// functions: Vec<Func<'a>>, +/// } +/// +/// impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // While the second token is `import` (the first is `(`, so we care +/// // about the second) we parse an `ast::ModuleImport` inside of +/// // parentheses. The `parens` function here ensures that what we +/// // parse inside of it is surrounded by `(` and `)`. +/// let mut imports = Vec::new(); +/// while parser.peek2::<kw::import>() { +/// let import = parser.parens(|p| p.parse())?; +/// imports.push(import); +/// } +/// +/// // Afterwards we assume everything else is a function. Note that +/// // `parse` here is a generic function and type inference figures out +/// // that we're parsing functions here and imports above. +/// let mut functions = Vec::new(); +/// while !parser.is_empty() { +/// let func = parser.parens(|p| p.parse())?; +/// functions.push(func); +/// } +/// +/// Ok(OnlyImportsAndFunctions { imports, functions }) +/// } +/// } +/// ``` +pub trait Parse<'a>: Sized { + /// Attempts to parse `Self` from `parser`, returning an error if it could + /// not be parsed. + /// + /// This method will mutate the state of `parser` after attempting to parse + /// an instance of `Self`. If an error happens then it is likely fatal and + /// there is no guarantee of how many tokens have been consumed from + /// `parser`. + /// + /// As recommended in the documentation of [`Parse`], implementations of + /// this function should not start out by parsing `(` and `)` tokens, but + /// rather parents calling recursive parsers should parse the `(` and `)` + /// tokens for their child item that's being parsed. + /// + /// # Errors + /// + /// This function will return an error if `Self` could not be parsed. Note + /// that creating an [`Error`] is not exactly a cheap operation, so + /// [`Error`] is typically fatal and propagated all the way back to the top + /// parse call site. + fn parse(parser: Parser<'a>) -> Result<Self>; +} + +/// A trait for types which be used to "peek" to see if they're the next token +/// in an input stream of [`Parser`]. +/// +/// Often when implementing [`Parse`] you'll need to query what the next token +/// in the stream is to figure out what to parse next. This [`Peek`] trait +/// defines the set of types that can be tested whether they're the next token +/// in the input stream. +/// +/// Implementations of [`Peek`] should only be present on types that consume +/// exactly one token (not zero, not more, exactly one). Types implementing +/// [`Peek`] should also typically implement [`Parse`] should also typically +/// implement [`Parse`]. +/// +/// See the documentation of [`Parser::peek`] for example usage. +pub trait Peek { + /// Tests to see whether this token is the first token within the [`Cursor`] + /// specified. + /// + /// Returns `true` if [`Parse`] for this type is highly likely to succeed + /// failing no other error conditions happening (like an integer literal + /// being too big). + fn peek(cursor: Cursor<'_>) -> bool; + + /// The same as `peek`, except it checks the token immediately following + /// the current token. + fn peek2(mut cursor: Cursor<'_>) -> bool { + if cursor.advance_token().is_some() { + Self::peek(cursor) + } else { + false + } + } + + /// Returns a human-readable name of this token to display when generating + /// errors about this token missing. + fn display() -> &'static str; +} + +/// A convenience type definition for `Result` where the error is hardwired to +/// [`Error`]. +pub type Result<T> = std::result::Result<T, Error>; + +/// A low-level buffer of tokens which represents a completely lexed file. +/// +/// A `ParseBuffer` will immediately lex an entire file and then store all +/// tokens internally. A `ParseBuffer` only used to pass to the top-level +/// [`parse`] function. +pub struct ParseBuffer<'a> { + // list of tokens from the tokenized source (including whitespace and + // comments), and the second element is how to skip this token, if it can be + // skipped. + tokens: Box<[(Token<'a>, Cell<NextTokenAt>)]>, + input: &'a str, + cur: Cell<usize>, + known_annotations: RefCell<HashMap<String, usize>>, + depth: Cell<usize>, +} + +#[derive(Copy, Clone, Debug)] +enum NextTokenAt { + /// Haven't computed where the next token is yet. + Unknown, + /// Previously computed the index of the next token. + Index(usize), + /// There is no next token, this is the last token. + Eof, +} + +/// An in-progress parser for the tokens of a WebAssembly text file. +/// +/// A `Parser` is argument to the [`Parse`] trait and is now the input stream is +/// interacted with to parse new items. Cloning [`Parser`] or copying a parser +/// refers to the same stream of tokens to parse, you cannot clone a [`Parser`] +/// and clone two items. +/// +/// For more information about a [`Parser`] see its methods. +#[derive(Copy, Clone)] +pub struct Parser<'a> { + buf: &'a ParseBuffer<'a>, +} + +/// A helpful structure to perform a lookahead of one token to determine what to +/// parse. +/// +/// For more information see the [`Parser::lookahead1`] method. +pub struct Lookahead1<'a> { + parser: Parser<'a>, + attempts: Vec<&'static str>, +} + +/// An immutable cursor into a list of tokens. +/// +/// This cursor cannot be mutated but can be used to parse more tokens in a list +/// of tokens. Cursors are created from the [`Parser::step`] method. This is a +/// very low-level parsing structure and you likely won't use it much. +#[derive(Copy, Clone)] +pub struct Cursor<'a> { + parser: Parser<'a>, + cur: usize, +} + +impl ParseBuffer<'_> { + /// Creates a new [`ParseBuffer`] by lexing the given `input` completely. + /// + /// # Errors + /// + /// Returns an error if `input` fails to lex. + pub fn new(input: &str) -> Result<ParseBuffer<'_>> { + let mut tokens = Vec::new(); + for token in Lexer::new(input) { + tokens.push((token?, Cell::new(NextTokenAt::Unknown))); + } + let ret = ParseBuffer { + tokens: tokens.into_boxed_slice(), + cur: Cell::new(0), + depth: Cell::new(0), + input, + known_annotations: Default::default(), + }; + ret.validate_annotations()?; + Ok(ret) + } + + fn parser(&self) -> Parser<'_> { + Parser { buf: self } + } + + // Validates that all annotations properly parse in that they have balanced + // delimiters. This is required since while parsing we generally skip + // annotations and there's no real opportunity to return a parse error. + fn validate_annotations(&self) -> Result<()> { + use crate::lexer::Token::*; + enum State { + None, + LParen, + Annotation { depth: usize, span: Span }, + } + let mut state = State::None; + for token in self.tokens.iter() { + state = match (&token.0, state) { + // From nothing, a `(` starts the search for an annotation + (LParen(_), State::None) => State::LParen, + // ... otherwise in nothing we alwyas preserve that state. + (_, State::None) => State::None, + + // If the previous state was an `LParen`, we may have an + // annotation if the next keyword is reserved + (Reserved(s), State::LParen) if s.starts_with("@") && s.len() > 0 => { + let offset = self.input_pos(s); + State::Annotation { + span: Span { offset }, + depth: 1, + } + } + // ... otherwise anything after an `LParen` kills the lparen + // state. + (_, State::LParen) => State::None, + + // Once we're in an annotation we need to balance parentheses, + // so handle the depth changes. + (LParen(_), State::Annotation { span, depth }) => State::Annotation { + span, + depth: depth + 1, + }, + (RParen(_), State::Annotation { depth: 1, .. }) => State::None, + (RParen(_), State::Annotation { span, depth }) => State::Annotation { + span, + depth: depth - 1, + }, + // ... and otherwise all tokens are allowed in annotations. + (_, s @ State::Annotation { .. }) => s, + }; + } + if let State::Annotation { span, .. } = state { + return Err(Error::new(span, format!("unclosed annotation"))); + } + Ok(()) + } + + fn input_pos(&self, src: &str) -> usize { + src.as_ptr() as usize - self.input.as_ptr() as usize + } +} + +impl<'a> Parser<'a> { + /// Returns whether there are no more `Token` tokens to parse from this + /// [`Parser`]. + /// + /// This indicates that either we've reached the end of the input, or we're + /// a sub-[`Parser`] inside of a parenthesized expression and we've hit the + /// `)` token. + /// + /// Note that if `false` is returned there *may* be more comments. Comments + /// and whitespace are not considered for whether this parser is empty. + pub fn is_empty(self) -> bool { + match self.cursor().advance_token() { + Some(Token::RParen(_)) | None => true, + Some(_) => false, // more tokens to parse! + } + } + + pub(crate) fn has_meaningful_tokens(self) -> bool { + self.buf.tokens[self.cursor().cur..] + .iter() + .any(|(t, _)| match t { + Token::Whitespace(_) | Token::LineComment(_) | Token::BlockComment(_) => false, + _ => true, + }) + } + + /// Parses a `T` from this [`Parser`]. + /// + /// This method has a trivial definition (it simply calls + /// [`T::parse`](Parse::parse)) but is here for syntactic purposes. This is + /// what you'll call 99% of the time in a [`Parse`] implementation in order + /// to parse sub-items. + /// + /// Typically you always want to use `?` with the result of this method, you + /// should not handle errors and decide what else to parse. To handle + /// branches in parsing, use [`Parser::peek`]. + /// + /// # Examples + /// + /// A good example of using `parse` is to see how the [`TableType`] type is + /// parsed in this crate. A [`TableType`] is defined in the official + /// specification as [`tabletype`][spec] and is defined as: + /// + /// [spec]: https://webassembly.github.io/spec/core/text/types.html#table-types + /// + /// ```text + /// tabletype ::= lim:limits et:reftype + /// ``` + /// + /// so to parse a [`TableType`] we recursively need to parse a [`Limits`] + /// and a [`RefType`] + /// + /// ``` + /// # use wast::*; + /// # use wast::parser::*; + /// struct TableType<'a> { + /// limits: Limits, + /// elem: RefType<'a>, + /// } + /// + /// impl<'a> Parse<'a> for TableType<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // parse the `lim` then `et` in sequence + /// Ok(TableType { + /// limits: parser.parse()?, + /// elem: parser.parse()?, + /// }) + /// } + /// } + /// ``` + /// + /// [`Limits`]: crate::ast::Limits + /// [`TableType`]: crate::ast::TableType + /// [`RefType`]: crate::ast::RefType + pub fn parse<T: Parse<'a>>(self) -> Result<T> { + T::parse(self) + } + + /// Performs a cheap test to see whether the current token in this stream is + /// `T`. + /// + /// This method can be used to efficiently determine what next to parse. The + /// [`Peek`] trait is defined for types which can be used to test if they're + /// the next item in the input stream. + /// + /// Nothing is actually parsed in this method, nor does this mutate the + /// state of this [`Parser`]. Instead, this simply performs a check. + /// + /// This method is frequently combined with the [`Parser::lookahead1`] + /// method to automatically produce nice error messages if some tokens + /// aren't found. + /// + /// # Examples + /// + /// For an example of using the `peek` method let's take a look at parsing + /// the [`Limits`] type. This is [defined in the official spec][spec] as: + /// + /// ```text + /// limits ::= n:u32 + /// | n:u32 m:u32 + /// ``` + /// + /// which means that it's either one `u32` token or two, so we need to know + /// whether to consume two tokens or one: + /// + /// ``` + /// # use wast::parser::*; + /// struct Limits { + /// min: u32, + /// max: Option<u32>, + /// } + /// + /// impl<'a> Parse<'a> for Limits { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Always parse the first number... + /// let min = parser.parse()?; + /// + /// // ... and then test if there's a second number before parsing + /// let max = if parser.peek::<u32>() { + /// Some(parser.parse()?) + /// } else { + /// None + /// }; + /// + /// Ok(Limits { min, max }) + /// } + /// } + /// ``` + /// + /// [spec]: https://webassembly.github.io/spec/core/text/types.html#limits + /// [`Limits`]: crate::ast::Limits + pub fn peek<T: Peek>(self) -> bool { + T::peek(self.cursor()) + } + + /// Same as the [`Parser::peek`] method, except checks the next token, not + /// the current token. + pub fn peek2<T: Peek>(self) -> bool { + let mut cursor = self.cursor(); + if cursor.advance_token().is_some() { + T::peek(cursor) + } else { + false + } + } + + /// A helper structure to perform a sequence of `peek` operations and if + /// they all fail produce a nice error message. + /// + /// This method purely exists for conveniently producing error messages and + /// provides no functionality that [`Parser::peek`] doesn't already give. + /// The [`Lookahead1`] structure has one main method [`Lookahead1::peek`], + /// which is the same method as [`Parser::peek`]. The difference is that the + /// [`Lookahead1::error`] method needs no arguments. + /// + /// # Examples + /// + /// Let's look at the parsing of [`Index`]. This type is either a `u32` or + /// an [`Id`] and is used in name resolution primarily. The [official + /// grammar for an index][spec] is: + /// + /// ```text + /// idx ::= x:u32 + /// | v:id + /// ``` + /// + /// Which is to say that an index is either a `u32` or an [`Id`]. When + /// parsing an [`Index`] we can do: + /// + /// ``` + /// # use wast::*; + /// # use wast::parser::*; + /// enum Index<'a> { + /// Num(u32), + /// Id(Id<'a>), + /// } + /// + /// impl<'a> Parse<'a> for Index<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// let mut l = parser.lookahead1(); + /// if l.peek::<Id>() { + /// Ok(Index::Id(parser.parse()?)) + /// } else if l.peek::<u32>() { + /// Ok(Index::Num(parser.parse()?)) + /// } else { + /// // produces error message of `expected identifier or u32` + /// Err(l.error()) + /// } + /// } + /// } + /// ``` + /// + /// [spec]: https://webassembly.github.io/spec/core/text/modules.html#indices + /// [`Index`]: crate::ast::Index + /// [`Id`]: crate::ast::Id + pub fn lookahead1(self) -> Lookahead1<'a> { + Lookahead1 { + attempts: Vec::new(), + parser: self, + } + } + + /// Parse an item surrounded by parentheses. + /// + /// WebAssembly's text format is all based on s-expressions, so naturally + /// you're going to want to parse a lot of parenthesized things! As noted in + /// the documentation of [`Parse`] you typically don't parse your own + /// surrounding `(` and `)` tokens, but the parser above you parsed them for + /// you. This is method method the parser above you uses. + /// + /// This method will parse a `(` token, and then call `f` on a sub-parser + /// which when finished asserts that a `)` token is the next token. This + /// requires that `f` consumes all tokens leading up to the paired `)`. + /// + /// Usage will often simply be `parser.parens(|p| p.parse())?` to + /// automatically parse a type within parentheses, but you can, as always, + /// go crazy and do whatever you'd like too. + /// + /// # Examples + /// + /// A good example of this is to see how a `Module` is parsed. This isn't + /// the exact definition, but it's close enough! + /// + /// ``` + /// # use wast::*; + /// # use wast::parser::*; + /// struct Module<'a> { + /// fields: Vec<ModuleField<'a>>, + /// } + /// + /// impl<'a> Parse<'a> for Module<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Modules start out with a `module` keyword + /// parser.parse::<kw::module>()?; + /// + /// // And then everything else is `(field ...)`, so while we've got + /// // items left we continuously parse parenthesized items. + /// let mut fields = Vec::new(); + /// while !parser.is_empty() { + /// fields.push(parser.parens(|p| p.parse())?); + /// } + /// Ok(Module { fields }) + /// } + /// } + /// ``` + pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> { + self.buf.depth.set(self.buf.depth.get() + 1); + let before = self.buf.cur.get(); + let res = self.step(|cursor| { + let mut cursor = match cursor.lparen() { + Some(rest) => rest, + None => return Err(cursor.error("expected `(`")), + }; + cursor.parser.buf.cur.set(cursor.cur); + let result = f(cursor.parser)?; + cursor.cur = cursor.parser.buf.cur.get(); + match cursor.rparen() { + Some(rest) => Ok((result, rest)), + None => Err(cursor.error("expected `)`")), + } + }); + self.buf.depth.set(self.buf.depth.get() - 1); + if res.is_err() { + self.buf.cur.set(before); + } + return res; + } + + /// Return the depth of nested parens we've parsed so far. + /// + /// This is a low-level method that is only useful for implementing + /// recursion limits in custom parsers. + pub fn parens_depth(&self) -> usize { + self.buf.depth.get() + } + + fn cursor(self) -> Cursor<'a> { + Cursor { + parser: self, + cur: self.buf.cur.get(), + } + } + + /// A low-level parsing method you probably won't use. + /// + /// This is used to implement parsing of the most primitive types in the + /// [`ast`](crate::ast) module. You probably don't want to use this, but + /// probably want to use something like [`Parser::parse`] or + /// [`Parser::parens`]. + pub fn step<F, T>(self, f: F) -> Result<T> + where + F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>, + { + let (result, cursor) = f(self.cursor())?; + self.buf.cur.set(cursor.cur); + Ok(result) + } + + /// Creates an error whose line/column information is pointing at the + /// current token. + /// + /// This is used to produce human-readable error messages which point to the + /// right location in the input stream, and the `msg` here is arbitrary text + /// used to associate with the error and indicate why it was generated. + pub fn error(self, msg: impl fmt::Display) -> Error { + self.error_at(self.cursor().cur_span(), &msg) + } + + fn error_at(self, span: Span, msg: &dyn fmt::Display) -> Error { + Error::parse(span, self.buf.input, msg.to_string()) + } + + /// Returns the span of the current token + pub fn cur_span(&self) -> Span { + self.cursor().cur_span() + } + + /// Returns the span of the previous token + pub fn prev_span(&self) -> Span { + self.cursor().prev_span().unwrap_or(Span::from_offset(0)) + } + + /// Registers a new known annotation with this parser to allow parsing + /// annotations with this name. + /// + /// [WebAssembly annotations][annotation] are a proposal for the text format + /// which allows decorating the text format with custom structured + /// information. By default all annotations are ignored when parsing, but + /// the whole purpose of them is to sometimes parse them! + /// + /// To support parsing text annotations this method is used to allow + /// annotations and their tokens to *not* be skipped. Once an annotation is + /// registered with this method, then while the return value has not been + /// dropped (e.g. the scope of where this function is called) annotations + /// with the name `annotation` will be parse of the token stream and not + /// implicitly skipped. + /// + /// # Skipping annotations + /// + /// The behavior of skipping unknown/unregistered annotations can be + /// somewhat subtle and surprising, so if you're interested in parsing + /// annotations it's important to point out the importance of this method + /// and where to call it. + /// + /// Generally when parsing tokens you'll be bottoming out in various + /// `Cursor` methods. These are all documented as advancing the stream as + /// much as possible to the next token, skipping "irrelevant stuff" like + /// comments, whitespace, etc. The `Cursor` methods will also skip unknown + /// annotations. This means that if you parse *any* token, it will skip over + /// any number of annotations that are unknown at all times. + /// + /// To parse an annotation you must, before parsing any token of the + /// annotation, register the annotation via this method. This includes the + /// beginning `(` token, which is otherwise skipped if the annotation isn't + /// marked as registered. Typically parser parse the *contents* of an + /// s-expression, so this means that the outer parser of an s-expression + /// must register the custom annotation name, rather than the inner parser. + /// + /// # Return + /// + /// This function returns an RAII guard which, when dropped, will unregister + /// the `annotation` given. Parsing `annotation` is only supported while the + /// returned value is still alive, and once dropped the parser will go back + /// to skipping annotations with the name `annotation`. + /// + /// # Example + /// + /// Let's see an example of how the `@name` annotation is parsed for modules + /// to get an idea of how this works: + /// + /// ``` + /// # use wast::*; + /// # use wast::parser::*; + /// struct Module<'a> { + /// name: Option<NameAnnotation<'a>>, + /// } + /// + /// impl<'a> Parse<'a> for Module<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Modules start out with a `module` keyword + /// parser.parse::<kw::module>()?; + /// + /// // Next may be `(@name "foo")`. Typically this annotation would + /// // skipped, but we don't want it skipped, so we register it. + /// // Note that the parse implementation of + /// // `Option<NameAnnotation>` is the one that consumes the + /// // parentheses here. + /// let _r = parser.register_annotation("name"); + /// let name = parser.parse()?; + /// + /// // ... and normally you'd otherwise parse module fields here ... + /// + /// Ok(Module { name }) + /// } + /// } + /// ``` + /// + /// Another example is how we parse the `@custom` annotation. Note that this + /// is parsed as part of `ModuleField`, so note how the annotation is + /// registered *before* we parse the parentheses of the annotation. + /// + /// ``` + /// # use wast::*; + /// # use wast::parser::*; + /// struct Module<'a> { + /// fields: Vec<ModuleField<'a>>, + /// } + /// + /// impl<'a> Parse<'a> for Module<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Modules start out with a `module` keyword + /// parser.parse::<kw::module>()?; + /// + /// // register the `@custom` annotation *first* before we start + /// // parsing fields, because each field is contained in + /// // parentheses and to parse the parentheses of an annotation we + /// // have to known to not skip it. + /// let _r = parser.register_annotation("custom"); + /// + /// let mut fields = Vec::new(); + /// while !parser.is_empty() { + /// fields.push(parser.parens(|p| p.parse())?); + /// } + /// Ok(Module { fields }) + /// } + /// } + /// + /// enum ModuleField<'a> { + /// Custom(Custom<'a>), + /// // ... + /// } + /// + /// impl<'a> Parse<'a> for ModuleField<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Note that because we have previously registered the `@custom` + /// // annotation with the parser we known that `peek` methods like + /// // this, working on the annotation token, are enabled to ever + /// // return `true`. + /// if parser.peek::<annotation::custom>() { + /// return Ok(ModuleField::Custom(parser.parse()?)); + /// } + /// + /// // .. typically we'd parse other module fields here... + /// + /// Err(parser.error("unknown module field")) + /// } + /// } + /// ``` + /// + /// [annotation]: https://github.com/WebAssembly/annotations + pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b + where + 'a: 'b, + { + let mut annotations = self.buf.known_annotations.borrow_mut(); + if !annotations.contains_key(annotation) { + annotations.insert(annotation.to_string(), 0); + } + *annotations.get_mut(annotation).unwrap() += 1; + + return RemoveOnDrop(self, annotation); + + struct RemoveOnDrop<'a>(Parser<'a>, &'a str); + + impl Drop for RemoveOnDrop<'_> { + fn drop(&mut self) { + let mut annotations = self.0.buf.known_annotations.borrow_mut(); + let slot = annotations.get_mut(self.1).unwrap(); + *slot -= 1; + } + } + } +} + +impl<'a> Cursor<'a> { + /// Returns the span of the next `Token` token. + /// + /// Does not take into account whitespace or comments. + pub fn cur_span(&self) -> Span { + let offset = match self.clone().advance_token() { + Some(t) => self.parser.buf.input_pos(t.src()), + None => self.parser.buf.input.len(), + }; + Span { offset } + } + + /// Returns the span of the previous `Token` token. + /// + /// Does not take into account whitespace or comments. + pub(crate) fn prev_span(&self) -> Option<Span> { + let (token, _) = self.parser.buf.tokens.get(self.cur.checked_sub(1)?)?; + Some(Span { + offset: self.parser.buf.input_pos(token.src()), + }) + } + + /// Same as [`Parser::error`], but works with the current token in this + /// [`Cursor`] instead. + pub fn error(&self, msg: impl fmt::Display) -> Error { + self.parser.error_at(self.cur_span(), &msg) + } + + /// Attempts to advance this cursor if the current token is a `(`. + /// + /// If the current token is `(`, returns a new [`Cursor`] pointing at the + /// rest of the tokens in the stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn lparen(mut self) -> Option<Self> { + match self.advance_token()? { + Token::LParen(_) => Some(self), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a `)`. + /// + /// If the current token is `)`, returns a new [`Cursor`] pointing at the + /// rest of the tokens in the stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn rparen(mut self) -> Option<Self> { + match self.advance_token()? { + Token::RParen(_) => Some(self), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Id`](crate::lexer::Token) + /// + /// If the current token is `Id`, returns the identifier minus the leading + /// `$` character as well as a new [`Cursor`] pointing at the rest of the + /// tokens in the stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn id(mut self) -> Option<(&'a str, Self)> { + match self.advance_token()? { + Token::Id(id) => Some((&id[1..], self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Keyword`](crate::lexer::Token) + /// + /// If the current token is `Keyword`, returns the keyword as well as a new + /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise + /// returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn keyword(mut self) -> Option<(&'a str, Self)> { + match self.advance_token()? { + Token::Keyword(id) => Some((id, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Reserved`](crate::lexer::Token) + /// + /// If the current token is `Reserved`, returns the reserved token as well + /// as a new [`Cursor`] pointing at the rest of the tokens in the stream. + /// Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn reserved(mut self) -> Option<(&'a str, Self)> { + match self.advance_token()? { + Token::Reserved(id) => Some((id, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Integer`](crate::lexer::Token) + /// + /// If the current token is `Integer`, returns the integer as well as a new + /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise + /// returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn integer(mut self) -> Option<(&'a Integer<'a>, Self)> { + match self.advance_token()? { + Token::Integer(i) => Some((i, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Float`](crate::lexer::Token) + /// + /// If the current token is `Float`, returns the float as well as a new + /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise + /// returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn float(mut self) -> Option<(&'a Float<'a>, Self)> { + match self.advance_token()? { + Token::Float(f) => Some((f, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::String`](crate::lexer::Token) + /// + /// If the current token is `String`, returns the byte value of the string + /// as well as a new [`Cursor`] pointing at the rest of the tokens in the + /// stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn string(mut self) -> Option<(&'a [u8], Self)> { + match self.advance_token()? { + Token::String(s) => Some((s.val(), self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Reserved`](crate::lexer::Token) and looks like the start of an + /// annotation. + /// + /// [Annotations][annotation] are a WebAssembly proposal for the text format + /// which allows placing structured text inside of a text file, for example + /// to specify the name section or other custom sections. + /// + /// This function will attempt to see if the current token is the `@foo` + /// part of the annotation. This requires the previous token to be `(` and + /// the current token is `Reserved` which starts with `@` and has a nonzero + /// length for the following name. + /// + /// Note that this will skip *unknown* annoations. Only pre-registered + /// annotations will be returned here. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + /// + /// [annotation]: https://github.com/WebAssembly/annotations + pub fn annotation(self) -> Option<(&'a str, Self)> { + let (token, cursor) = self.reserved()?; + if !token.starts_with("@") || token.len() <= 1 { + return None; + } + match &self.parser.buf.tokens.get(self.cur.wrapping_sub(1))?.0 { + Token::LParen(_) => Some((&token[1..], cursor)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::LineComment`](crate::lexer::Token) or a + /// [`Token::BlockComment`](crate::lexer::Token) + /// + /// This function will only skip whitespace, no other tokens. + pub fn comment(mut self) -> Option<(&'a str, Self)> { + let comment = loop { + match &self.parser.buf.tokens.get(self.cur)?.0 { + Token::LineComment(c) | Token::BlockComment(c) => { + self.cur += 1; + break c; + } + Token::Whitespace(_) => { + self.cur += 1; + } + _ => return None, + } + }; + Some((comment, self)) + } + + fn advance_token(&mut self) -> Option<&'a Token<'a>> { + let known_annotations = self.parser.buf.known_annotations.borrow(); + let is_known_annotation = |name: &str| match known_annotations.get(name) { + Some(0) | None => false, + Some(_) => true, + }; + + loop { + let (token, next) = self.parser.buf.tokens.get(self.cur)?; + + // If we're currently pointing at a token, and it's not the start + // of an annotation, then we return that token and advance + // ourselves to just after that token. + match token { + Token::Whitespace(_) | Token::LineComment(_) | Token::BlockComment(_) => {} + _ => match self.annotation_start() { + Some(n) if !is_known_annotation(n) => {} + _ => { + self.cur += 1; + return Some(token); + } + }, + } + + // ... otherwise we need to skip the current token, and possibly + // more. Here we're skipping whitespace, comments, annotations, etc. + // Basically stuff that's intended to not be that relevant to the + // text format. This is a pretty common operation, though, and we + // may do it multiple times through peeks and such. As a result + // this is somewhat cached. + // + // The `next` field, if "unknown", means we haven't calculated the + // next token. Otherwise it's an index of where to resume searching + // for the next token. + // + // Note that this entire operation happens in a loop (hence the + // "somewhat cached") because the set of known annotations is + // dynamic and we can't cache which annotations are skipped. What we + // can do though is cache the number of tokens in the annotation so + // we know how to skip ahead of it. + match next.get() { + NextTokenAt::Unknown => match self.find_next() { + Some(i) => { + next.set(NextTokenAt::Index(i)); + self.cur = i; + } + None => { + next.set(NextTokenAt::Eof); + return None; + } + }, + NextTokenAt::Eof => return None, + NextTokenAt::Index(i) => self.cur = i, + } + } + } + + fn annotation_start(&self) -> Option<&'a str> { + match self.parser.buf.tokens.get(self.cur).map(|p| &p.0) { + Some(Token::LParen(_)) => {} + _ => return None, + } + let reserved = match self.parser.buf.tokens.get(self.cur + 1).map(|p| &p.0) { + Some(Token::Reserved(n)) => n, + _ => return None, + }; + if reserved.starts_with("@") && reserved.len() > 1 { + Some(&reserved[1..]) + } else { + None + } + } + + /// Finds the next "real" token from the current position onwards. + /// + /// This is a somewhat expensive operation to call quite a lot, so it's + /// cached in the token list. See the comment above in `advance_token` for + /// how this works. + /// + /// Returns the index of the next relevant token to parse + fn find_next(mut self) -> Option<usize> { + // If we're pointing to the start of annotation we need to skip it + // in its entirety, so match the parentheses and figure out where + // the annotation ends. + if self.annotation_start().is_some() { + let mut depth = 1; + self.cur += 1; + while depth > 0 { + match &self.parser.buf.tokens.get(self.cur)?.0 { + Token::LParen(_) => depth += 1, + Token::RParen(_) => depth -= 1, + _ => {} + } + self.cur += 1; + } + return Some(self.cur); + } + + // ... otherwise we're pointing at whitespace/comments, so we need to + // figure out how many of them we can skip. + loop { + let (token, _) = self.parser.buf.tokens.get(self.cur)?; + // and otherwise we skip all comments/whitespace and otherwise + // get real intersted once a normal `Token` pops up. + match token { + Token::Whitespace(_) | Token::LineComment(_) | Token::BlockComment(_) => { + self.cur += 1 + } + _ => return Some(self.cur), + } + } + } +} + +impl Lookahead1<'_> { + /// Attempts to see if `T` is the next token in the [`Parser`] this + /// [`Lookahead1`] references. + /// + /// For more information see [`Parser::lookahead1`] and [`Parser::peek`] + pub fn peek<T: Peek>(&mut self) -> bool { + if self.parser.peek::<T>() { + true + } else { + self.attempts.push(T::display()); + false + } + } + + /// Generates an error message saying that one of the tokens passed to + /// [`Lookahead1::peek`] method was expected. + /// + /// Before calling this method you should call [`Lookahead1::peek`] for all + /// possible tokens you'd like to parse. + pub fn error(self) -> Error { + match self.attempts.len() { + 0 => { + if self.parser.is_empty() { + self.parser.error("unexpected end of input") + } else { + self.parser.error("unexpected token") + } + } + 1 => { + let message = format!("unexpected token, expected {}", self.attempts[0]); + self.parser.error(&message) + } + 2 => { + let message = format!( + "unexpected token, expected {} or {}", + self.attempts[0], self.attempts[1] + ); + self.parser.error(&message) + } + _ => { + let join = self.attempts.join(", "); + let message = format!("unexpected token, expected one of: {}", join); + self.parser.error(&message) + } + } + } +} + +impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> { + fn parse(parser: Parser<'a>) -> Result<Option<T>> { + if parser.peek::<T>() { + Ok(Some(parser.parse()?)) + } else { + Ok(None) + } + } +} diff --git a/third_party/rust/wast/src/resolve/aliases.rs b/third_party/rust/wast/src/resolve/aliases.rs new file mode 100644 index 0000000000..91d386517b --- /dev/null +++ b/third_party/rust/wast/src/resolve/aliases.rs @@ -0,0 +1,286 @@ +use crate::ast::*; +use crate::resolve::gensym; +use std::collections::{hash_map::Entry, HashMap}; + +pub fn run(fields: &mut Vec<ModuleField>) { + let mut cur = 0; + let mut cx = Expander::default(); + + // Note that insertion here is somewhat tricky. We're injecting aliases + // which will affect the index spaces for each kind of item being aliased. + // In the final binary aliases will come before all locally defined items, + // notably via the sorting in binary emission of this crate. To account for + // this index space behavior we need to ensure that aliases all appear at + // the right place in the module. + // + // The general algorithm here is that aliases discovered in the "header" of + // the module, e.g. imports/aliases/types/etc, are all inserted preceding + // the field that the alias is found within. After the header, however, the + // position of the header is recorded and all future aliases will be + // inserted at that location. + // + // This ends up meaning that aliases found in globals/functions/tables/etc + // will precede all of those definitions, being positioned at a point that + // should come after all the instances that are defined as well. Overall + // this isn't the cleanest algorithm and probably isn't the final form of + // those. It's hoped that discussion on WebAssembly/module-linking#25 might + // lead to a good solution. + let mut insertion_point = None; + while cur < fields.len() { + let field = &mut fields[cur]; + match field { + ModuleField::Alias(_) + | ModuleField::Type(_) + | ModuleField::Import(_) + | ModuleField::NestedModule(_) + | ModuleField::Instance(_) => {} + _ if insertion_point.is_none() => insertion_point = Some(cur), + _ => {} + } + cx.process(field); + if insertion_point.is_none() { + for item in cx.to_prepend.drain(..) { + fields.insert(cur, item); + cur += 1; + } + } + cur += 1; + } + if let Some(mut i) = insertion_point { + for item in cx.to_prepend.drain(..) { + fields.insert(i, item); + i += 1; + } + } + assert!(cx.to_prepend.is_empty()); +} + +#[derive(Default)] +struct Expander<'a> { + to_prepend: Vec<ModuleField<'a>>, + instances: HashMap<(Index<'a>, &'a str, ExportKind), Index<'a>>, + parents: HashMap<(Index<'a>, Index<'a>, ExportKind), Index<'a>>, +} + +impl<'a> Expander<'a> { + fn process(&mut self, field: &mut ModuleField<'a>) { + match field { + ModuleField::Alias(a) => { + let id = gensym::fill(a.span, &mut a.id); + match &mut a.kind { + AliasKind::InstanceExport { + instance, + export, + kind, + } => { + self.expand(instance); + self.instances + .insert((*instance.unwrap_index(), export, *kind), id.into()); + } + AliasKind::Outer { + module, + index, + kind, + } => { + self.parents.insert((*module, *index, *kind), id.into()); + } + } + } + + ModuleField::Instance(i) => { + if let InstanceKind::Inline { module, args } = &mut i.kind { + self.expand(module); + for arg in args { + self.expand(&mut arg.index); + } + } + } + + ModuleField::Elem(e) => { + if let ElemKind::Active { table, .. } = &mut e.kind { + self.expand(table); + } + match &mut e.payload { + ElemPayload::Indices(funcs) => { + for func in funcs { + self.expand(func); + } + } + ElemPayload::Exprs { exprs, .. } => { + for func in exprs { + if let Some(func) = func { + self.expand(func); + } + } + } + } + } + + ModuleField::Data(e) => { + if let DataKind::Active { memory, .. } = &mut e.kind { + self.expand(memory); + } + } + + ModuleField::Export(e) => self.expand(&mut e.index), + + ModuleField::Func(f) => { + self.expand_type_use(&mut f.ty); + if let FuncKind::Inline { expression, .. } = &mut f.kind { + self.expand_expr(expression); + } + } + + ModuleField::Import(i) => self.expand_item_sig(&mut i.item), + + ModuleField::Global(g) => { + if let GlobalKind::Inline(expr) = &mut g.kind { + self.expand_expr(expr); + } + } + + ModuleField::Start(s) => self.expand(s), + + ModuleField::Event(e) => match &mut e.ty { + EventType::Exception(t) => self.expand_type_use(t), + }, + + ModuleField::NestedModule(m) => match &mut m.kind { + NestedModuleKind::Import { ty, .. } => self.expand_type_use(ty), + NestedModuleKind::Inline { fields } => run(fields), + }, + + ModuleField::Custom(_) + | ModuleField::Memory(_) + | ModuleField::Table(_) + | ModuleField::Type(_) => {} + } + } + + fn expand_item_sig(&mut self, sig: &mut ItemSig<'a>) { + match &mut sig.kind { + ItemKind::Func(t) => self.expand_type_use(t), + ItemKind::Module(t) => self.expand_type_use(t), + ItemKind::Instance(t) => self.expand_type_use(t), + ItemKind::Table(_) => {} + ItemKind::Memory(_) => {} + ItemKind::Global(_) => {} + ItemKind::Event(_) => {} + } + } + + fn expand_type_use<T>(&mut self, ty: &mut TypeUse<'a, T>) { + if let Some(index) = &mut ty.index { + self.expand(index); + } + } + + fn expand_expr(&mut self, expr: &mut Expression<'a>) { + for instr in expr.instrs.iter_mut() { + self.expand_instr(instr); + } + } + + fn expand_instr(&mut self, instr: &mut Instruction<'a>) { + use Instruction::*; + + if let Some(m) = instr.memarg_mut() { + self.expand(&mut m.memory); + } + + match instr { + Call(i) | ReturnCall(i) | RefFunc(i) => self.expand(&mut i.0), + CallIndirect(i) | ReturnCallIndirect(i) => { + self.expand(&mut i.table); + self.expand_type_use(&mut i.ty); + } + TableInit(i) => self.expand(&mut i.table), + MemoryInit(i) => self.expand(&mut i.mem), + TableCopy(i) => { + self.expand(&mut i.src); + self.expand(&mut i.dst); + } + MemoryCopy(i) => { + self.expand(&mut i.src); + self.expand(&mut i.dst); + } + GlobalSet(g) | GlobalGet(g) => self.expand(&mut g.0), + TableGet(t) | TableSet(t) | TableFill(t) | TableSize(t) | TableGrow(t) => { + self.expand(&mut t.dst) + } + + MemorySize(m) | MemoryGrow(m) | MemoryFill(m) => self.expand(&mut m.mem), + + _ => {} + } + } + + fn expand<T>(&mut self, item: &mut ItemRef<'a, T>) + where + T: Into<ExportKind> + Copy, + { + match item { + ItemRef::Outer { kind, module, idx } => { + let key = (*module, *idx, (*kind).into()); + let idx = match self.parents.entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(v) => { + let span = idx.span(); + let id = gensym::gen(span); + self.to_prepend.push(ModuleField::Alias(Alias { + span, + id: Some(id), + name: None, + kind: AliasKind::Outer { + module: *module, + index: *idx, + kind: (*kind).into(), + }, + })); + *v.insert(Index::Id(id)) + } + }; + *item = ItemRef::Item { + kind: *kind, + idx, + exports: Vec::new(), + }; + } + ItemRef::Item { kind, idx, exports } => { + let mut cur = *idx; + let len = exports.len(); + for (i, export) in exports.drain(..).enumerate() { + let kind = if i < len - 1 { + ExportKind::Instance + } else { + (*kind).into() + }; + let key = (cur, export, kind); + cur = match self.instances.entry(key) { + Entry::Occupied(e) => *e.get(), + Entry::Vacant(v) => { + let span = idx.span(); + let id = gensym::gen(span); + self.to_prepend.push(ModuleField::Alias(Alias { + span, + id: Some(id), + name: None, + kind: AliasKind::InstanceExport { + kind, + instance: ItemRef::Item { + kind: kw::instance(span), + idx: cur, + exports: Vec::new(), + }, + export, + }, + })); + *v.insert(Index::Id(id)) + } + }; + } + *idx = cur; + } + } + } +} diff --git a/third_party/rust/wast/src/resolve/deinline_import_export.rs b/third_party/rust/wast/src/resolve/deinline_import_export.rs new file mode 100644 index 0000000000..107dd5c3d5 --- /dev/null +++ b/third_party/rust/wast/src/resolve/deinline_import_export.rs @@ -0,0 +1,270 @@ +use crate::ast::*; +use crate::resolve::gensym; +use std::mem; + +pub fn run(fields: &mut Vec<ModuleField>) { + let mut cur = 0; + let mut to_append = Vec::new(); + while cur < fields.len() { + let item = &mut fields[cur]; + match item { + ModuleField::Func(f) => { + for name in f.exports.names.drain(..) { + to_append.push(export(f.span, name, ExportKind::Func, &mut f.id)); + } + match f.kind { + FuncKind::Import(import) => { + *item = ModuleField::Import(Import { + span: f.span, + module: import.module, + field: import.field, + item: ItemSig { + span: f.span, + id: f.id, + name: f.name, + kind: ItemKind::Func(f.ty.clone()), + }, + }); + } + FuncKind::Inline { .. } => {} + } + } + + ModuleField::Memory(m) => { + for name in m.exports.names.drain(..) { + to_append.push(export(m.span, name, ExportKind::Memory, &mut m.id)); + } + match m.kind { + MemoryKind::Import { import, ty } => { + *item = ModuleField::Import(Import { + span: m.span, + module: import.module, + field: import.field, + item: ItemSig { + span: m.span, + id: m.id, + name: None, + kind: ItemKind::Memory(ty), + }, + }); + } + // If data is defined inline insert an explicit `data` module + // field here instead, switching this to a `Normal` memory. + MemoryKind::Inline { is_32, ref data } => { + let len = data.iter().map(|l| l.len()).sum::<usize>() as u32; + let pages = (len + page_size() - 1) / page_size(); + let kind = MemoryKind::Normal(if is_32 { + MemoryType::B32 { + limits: Limits { + min: pages, + max: Some(pages), + }, + shared: false, + } + } else { + MemoryType::B64 { + limits: Limits64 { + min: u64::from(pages), + max: Some(u64::from(pages)), + }, + shared: false, + } + }); + let data = match mem::replace(&mut m.kind, kind) { + MemoryKind::Inline { data, .. } => data, + _ => unreachable!(), + }; + let id = gensym::fill(m.span, &mut m.id); + to_append.push(ModuleField::Data(Data { + span: m.span, + id: None, + kind: DataKind::Active { + memory: item_ref(kw::memory(m.span), id), + offset: Expression { + instrs: Box::new([Instruction::I32Const(0)]), + }, + }, + data, + })); + } + + MemoryKind::Normal(_) => {} + } + } + + ModuleField::Table(t) => { + for name in t.exports.names.drain(..) { + to_append.push(export(t.span, name, ExportKind::Table, &mut t.id)); + } + match &mut t.kind { + TableKind::Import { import, ty } => { + *item = ModuleField::Import(Import { + span: t.span, + module: import.module, + field: import.field, + item: ItemSig { + span: t.span, + id: t.id, + name: None, + kind: ItemKind::Table(*ty), + }, + }); + } + // If data is defined inline insert an explicit `data` module + // field here instead, switching this to a `Normal` memory. + TableKind::Inline { payload, elem } => { + let len = match payload { + ElemPayload::Indices(v) => v.len(), + ElemPayload::Exprs { exprs, .. } => exprs.len(), + }; + let kind = TableKind::Normal(TableType { + limits: Limits { + min: len as u32, + max: Some(len as u32), + }, + elem: *elem, + }); + let payload = match mem::replace(&mut t.kind, kind) { + TableKind::Inline { payload, .. } => payload, + _ => unreachable!(), + }; + let id = gensym::fill(t.span, &mut t.id); + to_append.push(ModuleField::Elem(Elem { + span: t.span, + id: None, + kind: ElemKind::Active { + table: item_ref(kw::table(t.span), id), + offset: Expression { + instrs: Box::new([Instruction::I32Const(0)]), + }, + }, + payload, + })); + } + + TableKind::Normal(_) => {} + } + } + + ModuleField::Global(g) => { + for name in g.exports.names.drain(..) { + to_append.push(export(g.span, name, ExportKind::Global, &mut g.id)); + } + match g.kind { + GlobalKind::Import(import) => { + *item = ModuleField::Import(Import { + span: g.span, + module: import.module, + field: import.field, + item: ItemSig { + span: g.span, + id: g.id, + name: None, + kind: ItemKind::Global(g.ty), + }, + }); + } + GlobalKind::Inline { .. } => {} + } + } + + ModuleField::Event(e) => { + for name in e.exports.names.drain(..) { + to_append.push(export(e.span, name, ExportKind::Event, &mut e.id)); + } + } + + ModuleField::Instance(i) => { + for name in i.exports.names.drain(..) { + to_append.push(export(i.span, name, ExportKind::Instance, &mut i.id)); + } + match &mut i.kind { + InstanceKind::Import { import, ty } => { + *item = ModuleField::Import(Import { + span: i.span, + module: import.module, + field: import.field, + item: ItemSig { + span: i.span, + id: i.id, + name: None, + kind: ItemKind::Instance(mem::replace( + ty, + TypeUse::new_with_index(Index::Num(0, Span::from_offset(0))), + )), + }, + }); + } + InstanceKind::Inline { .. } => {} + } + } + + ModuleField::NestedModule(m) => { + for name in m.exports.names.drain(..) { + to_append.push(export(m.span, name, ExportKind::Module, &mut m.id)); + } + match &mut m.kind { + NestedModuleKind::Import { import, ty } => { + *item = ModuleField::Import(Import { + span: m.span, + module: import.module, + field: import.field, + item: ItemSig { + span: m.span, + id: m.id, + name: m.name, + kind: ItemKind::Module(mem::replace( + ty, + TypeUse::new_with_index(Index::Num(0, Span::from_offset(0))), + )), + }, + }); + } + NestedModuleKind::Inline { fields, .. } => { + run(fields); + } + }; + } + + ModuleField::Import(_) + | ModuleField::Type(_) + | ModuleField::Export(_) + | ModuleField::Alias(_) + | ModuleField::Start(_) + | ModuleField::Elem(_) + | ModuleField::Data(_) + | ModuleField::Custom(_) => {} + } + + fields.splice(cur..cur, to_append.drain(..)); + cur += 1; + } + + assert!(to_append.is_empty()); + + fn page_size() -> u32 { + 1 << 16 + } +} + +fn export<'a>( + span: Span, + name: &'a str, + kind: ExportKind, + id: &mut Option<Id<'a>>, +) -> ModuleField<'a> { + let id = gensym::fill(span, id); + ModuleField::Export(Export { + span, + name, + index: item_ref(kind, id), + }) +} + +fn item_ref<'a, K>(kind: K, id: impl Into<Index<'a>>) -> ItemRef<'a, K> { + ItemRef::Item { + kind, + idx: id.into(), + exports: Vec::new(), + } +} diff --git a/third_party/rust/wast/src/resolve/gensym.rs b/third_party/rust/wast/src/resolve/gensym.rs new file mode 100644 index 0000000000..5f6d94133a --- /dev/null +++ b/third_party/rust/wast/src/resolve/gensym.rs @@ -0,0 +1,20 @@ +use crate::ast::{Id, Span}; +use std::cell::Cell; + +thread_local!(static NEXT: Cell<u32> = Cell::new(0)); + +pub fn reset() { + NEXT.with(|c| c.set(0)); +} + +pub fn gen(span: Span) -> Id<'static> { + NEXT.with(|next| { + let gen = next.get() + 1; + next.set(gen); + Id::gensym(span, gen) + }) +} + +pub fn fill<'a>(span: Span, slot: &mut Option<Id<'a>>) -> Id<'a> { + *slot.get_or_insert_with(|| gen(span)) +} diff --git a/third_party/rust/wast/src/resolve/mod.rs b/third_party/rust/wast/src/resolve/mod.rs new file mode 100644 index 0000000000..d92ca5fd51 --- /dev/null +++ b/third_party/rust/wast/src/resolve/mod.rs @@ -0,0 +1,133 @@ +use crate::ast::*; +use crate::Error; + +mod aliases; +mod deinline_import_export; +mod gensym; +mod names; +mod types; + +#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] +pub enum Ns { + Func, + Table, + Global, + Memory, + Module, + Instance, + Event, + Type, +} + +impl Ns { + fn from_export(kind: &ExportKind) -> Ns { + match kind { + ExportKind::Func => Ns::Func, + ExportKind::Table => Ns::Table, + ExportKind::Global => Ns::Global, + ExportKind::Memory => Ns::Memory, + ExportKind::Instance => Ns::Instance, + ExportKind::Module => Ns::Module, + ExportKind::Event => Ns::Event, + ExportKind::Type => Ns::Type, + } + } +} + +pub fn resolve<'a>(module: &mut Module<'a>) -> Result<Names<'a>, Error> { + let fields = match &mut module.kind { + ModuleKind::Text(fields) => fields, + _ => return Ok(Default::default()), + }; + + // Ensure that each resolution of a module is deterministic in the names + // that it generates by resetting our thread-local symbol generator. + gensym::reset(); + + // First up, de-inline import/export annotations. + // + // This ensures we only have to deal with inline definitions and to + // calculate exports we only have to look for a particular kind of module + // field. + deinline_import_export::run(fields); + + aliases::run(fields); + + // With a canonical form of imports make sure that imports are all listed + // first. + for i in 1..fields.len() { + let span = match &fields[i] { + ModuleField::Import(i) => i.span, + _ => continue, + }; + let name = match &fields[i - 1] { + ModuleField::Memory(_) => "memory", + ModuleField::Func(_) => "function", + ModuleField::Table(_) => "table", + ModuleField::Global(_) => "global", + _ => continue, + }; + return Err(Error::new(span, format!("import after {}", name))); + } + + // Expand all `TypeUse` annotations so all necessary `type` nodes are + // present in the AST. + types::expand(fields); + + // Perform name resolution over all `Index` items to resolve them all to + // indices instead of symbolic names. + let resolver = names::resolve(module.id, fields)?; + Ok(Names { resolver }) +} + +/// Representation of the results of name resolution for a module. +/// +/// This structure is returned from the +/// [`Module::resolve`](crate::Module::resolve) function and can be used to +/// resolve your own name arguments if you have any. +#[derive(Default)] +pub struct Names<'a> { + resolver: names::Resolver<'a>, +} + +impl<'a> Names<'a> { + /// Resolves `idx` within the function namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the function namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_func(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Func)?; + Ok(()) + } + + /// Resolves `idx` within the memory namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the memory namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_memory(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Memory)?; + Ok(()) + } + + /// Resolves `idx` within the table namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the table namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_table(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Table)?; + Ok(()) + } + + /// Resolves `idx` within the global namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the global namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_global(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Global)?; + Ok(()) + } +} diff --git a/third_party/rust/wast/src/resolve/names.rs b/third_party/rust/wast/src/resolve/names.rs new file mode 100644 index 0000000000..91d710634a --- /dev/null +++ b/third_party/rust/wast/src/resolve/names.rs @@ -0,0 +1,1058 @@ +use crate::ast::*; +use crate::resolve::Ns; +use crate::Error; +use std::collections::{HashMap, HashSet}; + +pub fn resolve<'a>( + id: Option<Id<'a>>, + fields: &mut Vec<ModuleField<'a>>, +) -> Result<Resolver<'a>, Error> { + let mut names = HashMap::new(); + let mut parents = Parents { + prev: None, + cur_id: id, + depth: 0, + names: &mut names, + }; + let mut resolver = Resolver::default(); + resolver.process(&mut parents, fields)?; + Ok(resolver) +} + +/// Context structure used to perform name resolution. +#[derive(Default)] +pub struct Resolver<'a> { + // Namespaces within each module. Note that each namespace carries with it + // information about the signature of the item in that namespace. The + // signature is later used to synthesize the type of a module and inject + // type annotations if necessary. + funcs: Namespace<'a>, + globals: Namespace<'a>, + tables: Namespace<'a>, + memories: Namespace<'a>, + types: Namespace<'a>, + events: Namespace<'a>, + modules: Namespace<'a>, + instances: Namespace<'a>, + datas: Namespace<'a>, + elems: Namespace<'a>, + fields: Namespace<'a>, + type_info: Vec<TypeInfo<'a>>, + implicit_instances: HashSet<&'a str>, +} + +impl<'a> Resolver<'a> { + fn process( + &mut self, + parents: &mut Parents<'a, '_>, + fields: &mut Vec<ModuleField<'a>>, + ) -> Result<(), Error> { + // Number everything in the module, recording what names correspond to + // what indices. + for field in fields.iter_mut() { + self.register(field)?; + } + + // Then we can replace all our `Index::Id` instances with `Index::Num` + // in the AST. Note that this also recurses into nested modules. + for field in fields.iter_mut() { + self.resolve_field(field, parents)?; + } + Ok(()) + } + + fn register(&mut self, item: &ModuleField<'a>) -> Result<(), Error> { + match item { + ModuleField::Import(i) => { + // Account for implicit instances created by two-level imports + // first. At this time they never have a name. + if i.field.is_some() { + if self.implicit_instances.insert(i.module) { + self.instances.register(None, "instance")?; + } + } + match &i.item.kind { + ItemKind::Func(_) => self.funcs.register(i.item.id, "func")?, + ItemKind::Memory(_) => self.memories.register(i.item.id, "memory")?, + ItemKind::Table(_) => self.tables.register(i.item.id, "table")?, + ItemKind::Global(_) => self.globals.register(i.item.id, "global")?, + ItemKind::Event(_) => self.events.register(i.item.id, "event")?, + ItemKind::Module(_) => self.modules.register(i.item.id, "module")?, + ItemKind::Instance(_) => self.instances.register(i.item.id, "instance")?, + } + } + ModuleField::Global(i) => self.globals.register(i.id, "global")?, + ModuleField::Memory(i) => self.memories.register(i.id, "memory")?, + ModuleField::Func(i) => self.funcs.register(i.id, "func")?, + ModuleField::Table(i) => self.tables.register(i.id, "table")?, + ModuleField::NestedModule(m) => self.modules.register(m.id, "module")?, + ModuleField::Instance(i) => self.instances.register(i.id, "instance")?, + + ModuleField::Type(i) => { + match &i.def { + // For GC structure types we need to be sure to populate the + // field namespace here as well. + // + // The field namespace is global, but the resolved indices + // are relative to the struct they are defined in + TypeDef::Struct(r#struct) => { + for (i, field) in r#struct.fields.iter().enumerate() { + if let Some(id) = field.id { + self.fields.register_specific(id, i as u32, "field")?; + } + } + } + + TypeDef::Instance(_) + | TypeDef::Array(_) + | TypeDef::Func(_) + | TypeDef::Module(_) => {} + } + + // Record function signatures as we see them to so we can + // generate errors for mismatches in references such as + // `call_indirect`. + match &i.def { + TypeDef::Func(f) => { + let params = f.params.iter().map(|p| p.2).collect(); + let results = f.results.clone(); + self.type_info.push(TypeInfo::Func { params, results }); + } + _ => self.type_info.push(TypeInfo::Other), + } + + self.types.register(i.id, "type")? + } + ModuleField::Elem(e) => self.elems.register(e.id, "elem")?, + ModuleField::Data(d) => self.datas.register(d.id, "data")?, + ModuleField::Event(e) => self.events.register(e.id, "event")?, + ModuleField::Alias(a) => match a.item_kind() { + ExportKind::Func => self.funcs.register(a.id, "func")?, + ExportKind::Table => self.tables.register(a.id, "table")?, + ExportKind::Memory => self.memories.register(a.id, "memory")?, + ExportKind::Global => self.globals.register(a.id, "global")?, + ExportKind::Instance => self.instances.register(a.id, "instance")?, + ExportKind::Module => self.modules.register(a.id, "module")?, + ExportKind::Event => self.events.register(a.id, "event")?, + ExportKind::Type => { + self.type_info.push(TypeInfo::Other); + self.types.register(a.id, "type")? + } + }, + + // These fields don't define any items in any index space. + ModuleField::Export(_) | ModuleField::Start(_) | ModuleField::Custom(_) => { + return Ok(()) + } + }; + + Ok(()) + } + + fn resolve_field( + &self, + field: &mut ModuleField<'a>, + parents: &mut Parents<'a, '_>, + ) -> Result<(), Error> { + match field { + ModuleField::Import(i) => { + self.resolve_item_sig(&mut i.item)?; + Ok(()) + } + + ModuleField::Type(ty) => { + match &mut ty.def { + TypeDef::Func(func) => func.resolve(self)?, + TypeDef::Struct(struct_) => { + for field in &mut struct_.fields { + self.resolve_storagetype(&mut field.ty)?; + } + } + TypeDef::Array(array) => self.resolve_storagetype(&mut array.ty)?, + TypeDef::Module(m) => m.resolve(self)?, + TypeDef::Instance(i) => i.resolve(self)?, + } + Ok(()) + } + + ModuleField::Func(f) => { + let (idx, inline) = self.resolve_type_use(&mut f.ty)?; + let n = match idx { + Index::Num(n, _) => *n, + Index::Id(_) => panic!("expected `Num`"), + }; + if let FuncKind::Inline { locals, expression } = &mut f.kind { + // Resolve (ref T) in locals + for local in locals.iter_mut() { + self.resolve_valtype(&mut local.ty)?; + } + + // Build a scope with a local namespace for the function + // body + let mut scope = Namespace::default(); + + // Parameters come first in the scope... + if let Some(inline) = &inline { + for (id, _, _) in inline.params.iter() { + scope.register(*id, "local")?; + } + } else if let Some(TypeInfo::Func { params, .. }) = + self.type_info.get(n as usize) + { + for _ in 0..params.len() { + scope.register(None, "local")?; + } + } + + // .. followed by locals themselves + for local in locals { + scope.register(local.id, "local")?; + } + + // Initialize the expression resolver with this scope + let mut resolver = ExprResolver::new(self, scope); + + // and then we can resolve the expression! + resolver.resolve(expression)?; + + // specifically save the original `sig`, if it was present, + // because that's what we're using for local names. + f.ty.inline = inline; + } + Ok(()) + } + + ModuleField::Elem(e) => { + match &mut e.kind { + ElemKind::Active { table, offset } => { + self.resolve_item_ref(table)?; + self.resolve_expr(offset)?; + } + ElemKind::Passive { .. } | ElemKind::Declared { .. } => {} + } + match &mut e.payload { + ElemPayload::Indices(elems) => { + for idx in elems { + self.resolve_item_ref(idx)?; + } + } + ElemPayload::Exprs { exprs, ty } => { + for funcref in exprs { + if let Some(idx) = funcref { + self.resolve_item_ref(idx)?; + } + } + self.resolve_heaptype(&mut ty.heap)?; + } + } + Ok(()) + } + + ModuleField::Data(d) => { + if let DataKind::Active { memory, offset } = &mut d.kind { + self.resolve_item_ref(memory)?; + self.resolve_expr(offset)?; + } + Ok(()) + } + + ModuleField::Start(i) => { + self.resolve_item_ref(i)?; + Ok(()) + } + + ModuleField::Export(e) => { + self.resolve_item_ref(&mut e.index)?; + Ok(()) + } + + ModuleField::Global(g) => { + self.resolve_valtype(&mut g.ty.ty)?; + if let GlobalKind::Inline(expr) = &mut g.kind { + self.resolve_expr(expr)?; + } + Ok(()) + } + + ModuleField::Event(e) => { + match &mut e.ty { + EventType::Exception(ty) => { + self.resolve_type_use(ty)?; + } + } + Ok(()) + } + + ModuleField::Instance(i) => { + if let InstanceKind::Inline { module, args } = &mut i.kind { + self.resolve_item_ref(module)?; + for arg in args { + self.resolve_item_ref(&mut arg.index)?; + } + } + Ok(()) + } + + ModuleField::NestedModule(m) => { + let fields = match &mut m.kind { + NestedModuleKind::Inline { fields } => fields, + NestedModuleKind::Import { .. } => panic!("should only be inline"), + }; + Resolver::default().process(&mut parents.push(self, m.id), fields)?; + Ok(()) + } + + ModuleField::Table(t) => { + if let TableKind::Normal(t) = &mut t.kind { + self.resolve_heaptype(&mut t.elem.heap)?; + } + Ok(()) + } + + ModuleField::Alias(a) => { + match &mut a.kind { + AliasKind::InstanceExport { instance, .. } => { + self.resolve_item_ref(instance)?; + } + AliasKind::Outer { + module, + index, + kind, + } => { + match (index, module) { + // If both indices are numeric then don't try to + // resolve anything since we could fail to walk up + // the parent chain, producing a wat2wasm error that + // should probably be a wasm validation error. + (Index::Num(..), Index::Num(..)) => {} + (index, module) => { + parents + .resolve(module)? + .resolve(index, Ns::from_export(kind))?; + } + } + } + } + Ok(()) + } + + ModuleField::Memory(_) | ModuleField::Custom(_) => Ok(()), + } + } + + fn resolve_valtype(&self, ty: &mut ValType<'a>) -> Result<(), Error> { + match ty { + ValType::Ref(ty) => self.resolve_heaptype(&mut ty.heap)?, + ValType::Rtt(_d, i) => { + self.resolve(i, Ns::Type)?; + } + _ => {} + } + Ok(()) + } + + fn resolve_heaptype(&self, ty: &mut HeapType<'a>) -> Result<(), Error> { + match ty { + HeapType::Index(i) => { + self.resolve(i, Ns::Type)?; + } + _ => {} + } + Ok(()) + } + + fn resolve_storagetype(&self, ty: &mut StorageType<'a>) -> Result<(), Error> { + match ty { + StorageType::Val(ty) => self.resolve_valtype(ty)?, + _ => {} + } + Ok(()) + } + + fn resolve_item_sig(&self, item: &mut ItemSig<'a>) -> Result<(), Error> { + match &mut item.kind { + ItemKind::Func(t) | ItemKind::Event(EventType::Exception(t)) => { + self.resolve_type_use(t)?; + } + ItemKind::Global(t) => self.resolve_valtype(&mut t.ty)?, + ItemKind::Instance(t) => { + self.resolve_type_use(t)?; + } + ItemKind::Module(m) => { + self.resolve_type_use(m)?; + } + ItemKind::Table(t) => { + self.resolve_heaptype(&mut t.elem.heap)?; + } + ItemKind::Memory(_) => {} + } + Ok(()) + } + + fn resolve_type_use<'b, T>( + &self, + ty: &'b mut TypeUse<'a, T>, + ) -> Result<(&'b Index<'a>, Option<T>), Error> + where + T: TypeReference<'a>, + { + let idx = ty.index.as_mut().unwrap(); + let idx = self.resolve_item_ref(idx)?; + + // If the type was listed inline *and* it was specified via a type index + // we need to assert they're the same. + // + // Note that we resolve the type first to transform all names to + // indices to ensure that all the indices line up. + if let Some(inline) = &mut ty.inline { + inline.resolve(self)?; + inline.check_matches(idx, self)?; + } + + Ok((idx, ty.inline.take())) + } + + fn resolve_expr(&self, expr: &mut Expression<'a>) -> Result<(), Error> { + ExprResolver::new(self, Namespace::default()).resolve(expr) + } + + pub fn resolve(&self, idx: &mut Index<'a>, ns: Ns) -> Result<u32, Error> { + match ns { + Ns::Func => self.funcs.resolve(idx, "func"), + Ns::Table => self.tables.resolve(idx, "table"), + Ns::Global => self.globals.resolve(idx, "global"), + Ns::Memory => self.memories.resolve(idx, "memory"), + Ns::Instance => self.instances.resolve(idx, "instance"), + Ns::Module => self.modules.resolve(idx, "module"), + Ns::Event => self.events.resolve(idx, "event"), + Ns::Type => self.types.resolve(idx, "type"), + } + } + + fn resolve_item_ref<'b, K>(&self, item: &'b mut ItemRef<'a, K>) -> Result<&'b Index<'a>, Error> + where + K: Into<ExportKind> + Copy, + { + match item { + ItemRef::Item { idx, kind, exports } => { + debug_assert!(exports.len() == 0); + self.resolve( + idx, + match (*kind).into() { + ExportKind::Func => Ns::Func, + ExportKind::Table => Ns::Table, + ExportKind::Global => Ns::Global, + ExportKind::Memory => Ns::Memory, + ExportKind::Instance => Ns::Instance, + ExportKind::Module => Ns::Module, + ExportKind::Event => Ns::Event, + ExportKind::Type => Ns::Type, + }, + )?; + Ok(idx) + } + // should be expanded by now + ItemRef::Outer { .. } => unreachable!(), + } + } +} + +#[derive(Default)] +pub struct Namespace<'a> { + names: HashMap<Id<'a>, u32>, + count: u32, +} + +impl<'a> Namespace<'a> { + fn register(&mut self, name: Option<Id<'a>>, desc: &str) -> Result<u32, Error> { + let index = self.alloc(); + if let Some(name) = name { + if let Some(_prev) = self.names.insert(name, index) { + // FIXME: temporarily allow duplicately-named data and element + // segments. This is a sort of dumb hack to get the spec test + // suite working (ironically). + // + // So as background, the text format disallows duplicate + // identifiers, causing a parse error if they're found. There + // are two tests currently upstream, however, data.wast and + // elem.wast, which *look* like they have duplicately named + // element and data segments. These tests, however, are using + // pre-bulk-memory syntax where a bare identifier was the + // table/memory being initialized. In post-bulk-memory this + // identifier is the name of the segment. Since we implement + // post-bulk-memory features that means that we're parsing the + // memory/table-to-initialize as the name of the segment. + // + // This is technically incorrect behavior but no one is + // hopefully relying on this too much. To get the spec tests + // passing we ignore errors for elem/data segments. Once the + // spec tests get updated enough we can remove this condition + // and return errors for them. + if desc != "elem" && desc != "data" { + return Err(Error::new( + name.span(), + format!("duplicate {} identifier", desc), + )); + } + } + } + Ok(index) + } + + fn alloc(&mut self) -> u32 { + let index = self.count; + self.count += 1; + return index; + } + + fn register_specific(&mut self, name: Id<'a>, index: u32, desc: &str) -> Result<(), Error> { + if let Some(_prev) = self.names.insert(name, index) { + return Err(Error::new( + name.span(), + format!("duplicate identifier for {}", desc), + )); + } + Ok(()) + } + + fn resolve(&self, idx: &mut Index<'a>, desc: &str) -> Result<u32, Error> { + let id = match idx { + Index::Num(n, _) => return Ok(*n), + Index::Id(id) => id, + }; + if let Some(&n) = self.names.get(id) { + *idx = Index::Num(n, id.span()); + return Ok(n); + } + Err(resolve_error(*id, desc)) + } +} + +fn resolve_error(id: Id<'_>, ns: &str) -> Error { + assert!( + !id.is_gensym(), + "symbol generated by `wast` itself cannot be resolved {:?}", + id + ); + Error::new( + id.span(), + format!("failed to find {} named `${}`", ns, id.name()), + ) +} + +#[derive(Debug, Clone)] +struct ExprBlock<'a> { + // The label of the block + label: Option<Id<'a>>, + // Whether this block pushed a new scope for resolving locals + pushed_scope: bool, +} + +struct ExprResolver<'a, 'b> { + resolver: &'b Resolver<'a>, + // Scopes tracks the local namespace and dynamically grows as we enter/exit + // `let` blocks + scopes: Vec<Namespace<'a>>, + blocks: Vec<ExprBlock<'a>>, +} + +impl<'a, 'b> ExprResolver<'a, 'b> { + fn new(resolver: &'b Resolver<'a>, initial_scope: Namespace<'a>) -> ExprResolver<'a, 'b> { + ExprResolver { + resolver, + scopes: vec![initial_scope], + blocks: Vec::new(), + } + } + + fn resolve(&mut self, expr: &mut Expression<'a>) -> Result<(), Error> { + for instr in expr.instrs.iter_mut() { + self.resolve_instr(instr)?; + } + Ok(()) + } + + fn resolve_block_type(&mut self, bt: &mut BlockType<'a>) -> Result<(), Error> { + // Ok things get interesting here. First off when parsing `bt` + // *optionally* has an index and a function type listed. If + // they're both not present it's equivalent to 0 params and 0 + // results. + // + // In MVP wasm blocks can have 0 params and 0-1 results. Now + // there's also multi-value. We want to prefer MVP wasm wherever + // possible (for backcompat) so we want to list this block as + // being an "MVP" block if we can. The encoder only has + // `BlockType` to work with, so it'll be looking at `params` and + // `results` to figure out what to encode. If `params` and + // `results` fit within MVP, then it uses MVP encoding + // + // To put all that together, here we handle: + // + // * If the `index` was specified, resolve it and use it as the + // source of truth. If this turns out to be an MVP type, + // record it as such. + // * Otherwise use `params` and `results` as the source of + // truth. *If* this were a non-MVP compatible block `index` + // would be filled by by `tyexpand.rs`. + // + // tl;dr; we handle the `index` here if it's set and then fill + // out `params` and `results` if we can, otherwise no work + // happens. + if bt.ty.index.is_some() { + let (ty, _) = self.resolver.resolve_type_use(&mut bt.ty)?; + let n = match ty { + Index::Num(n, _) => *n, + Index::Id(_) => panic!("expected `Num`"), + }; + let ty = match self.resolver.type_info.get(n as usize) { + Some(TypeInfo::Func { params, results }) => (params, results), + _ => return Ok(()), + }; + if ty.0.len() == 0 && ty.1.len() <= 1 { + let mut inline = FunctionType::default(); + inline.results = ty.1.clone(); + bt.ty.inline = Some(inline); + bt.ty.index = None; + } + } + + // If the inline annotation persists to this point then resolve + // all of its inline value types. + if let Some(inline) = &mut bt.ty.inline { + inline.resolve(self.resolver)?; + } + Ok(()) + } + + fn resolve_instr(&mut self, instr: &mut Instruction<'a>) -> Result<(), Error> { + use crate::ast::Instruction::*; + + if let Some(m) = instr.memarg_mut() { + self.resolver.resolve_item_ref(&mut m.memory)?; + } + + match instr { + MemorySize(i) | MemoryGrow(i) | MemoryFill(i) => { + self.resolver.resolve_item_ref(&mut i.mem)?; + } + MemoryInit(i) => { + self.resolver.datas.resolve(&mut i.data, "data")?; + self.resolver.resolve_item_ref(&mut i.mem)?; + } + MemoryCopy(i) => { + self.resolver.resolve_item_ref(&mut i.src)?; + self.resolver.resolve_item_ref(&mut i.dst)?; + } + DataDrop(i) => { + self.resolver.datas.resolve(i, "data")?; + } + + TableInit(i) => { + self.resolver.elems.resolve(&mut i.elem, "elem")?; + self.resolver.resolve_item_ref(&mut i.table)?; + } + ElemDrop(i) => { + self.resolver.elems.resolve(i, "elem")?; + } + + TableCopy(i) => { + self.resolver.resolve_item_ref(&mut i.dst)?; + self.resolver.resolve_item_ref(&mut i.src)?; + } + + TableFill(i) | TableSet(i) | TableGet(i) | TableSize(i) | TableGrow(i) => { + self.resolver.resolve_item_ref(&mut i.dst)?; + } + + GlobalSet(i) | GlobalGet(i) => { + self.resolver.resolve_item_ref(&mut i.0)?; + } + + LocalSet(i) | LocalGet(i) | LocalTee(i) => { + assert!(self.scopes.len() > 0); + // Resolve a local by iterating over scopes from most recent + // to less recent. This allows locals added by `let` blocks to + // shadow less recent locals. + for (depth, scope) in self.scopes.iter().enumerate().rev() { + if let Err(e) = scope.resolve(i, "local") { + if depth == 0 { + // There are no more scopes left, report this as + // the result + return Err(e); + } + } else { + break; + } + } + // We must have taken the `break` and resolved the local + assert!(i.is_resolved()); + } + + Call(i) | RefFunc(i) | ReturnCall(i) => { + self.resolver.resolve_item_ref(&mut i.0)?; + } + + CallIndirect(c) | ReturnCallIndirect(c) => { + self.resolver.resolve_item_ref(&mut c.table)?; + self.resolver.resolve_type_use(&mut c.ty)?; + } + + FuncBind(b) => { + self.resolver.resolve_type_use(&mut b.ty)?; + } + + Let(t) => { + // Resolve (ref T) in locals + for local in &mut t.locals { + self.resolver.resolve_valtype(&mut local.ty)?; + } + + // Register all locals defined in this let + let mut scope = Namespace::default(); + for local in &t.locals { + scope.register(local.id, "local")?; + } + self.scopes.push(scope); + self.blocks.push(ExprBlock { + label: t.block.label, + pushed_scope: true, + }); + + self.resolve_block_type(&mut t.block)?; + } + + Block(bt) | If(bt) | Loop(bt) | Try(bt) => { + self.blocks.push(ExprBlock { + label: bt.label, + pushed_scope: false, + }); + self.resolve_block_type(bt)?; + } + + // On `End` instructions we pop a label from the stack, and for both + // `End` and `Else` instructions if they have labels listed we + // verify that they match the label at the beginning of the block. + Else(_) | End(_) => { + let (matching_block, label) = match &instr { + Else(label) => (self.blocks.last().cloned(), label), + End(label) => (self.blocks.pop(), label), + _ => unreachable!(), + }; + let matching_block = match matching_block { + Some(l) => l, + None => return Ok(()), + }; + + // Reset the local scopes to before this block was entered + if matching_block.pushed_scope { + if let End(_) = instr { + self.scopes.pop(); + } + } + + let label = match label { + Some(l) => l, + None => return Ok(()), + }; + if Some(*label) == matching_block.label { + return Ok(()); + } + return Err(Error::new( + label.span(), + "mismatching labels between end and block".to_string(), + )); + } + + Br(i) | BrIf(i) | BrOnNull(i) => { + self.resolve_label(i)?; + } + + BrTable(i) => { + for label in i.labels.iter_mut() { + self.resolve_label(label)?; + } + self.resolve_label(&mut i.default)?; + } + + Throw(i) => { + self.resolver.resolve(i, Ns::Event)?; + } + Rethrow(i) => { + self.resolve_label(i)?; + } + Catch(i) => { + self.resolver.resolve(i, Ns::Event)?; + } + + BrOnCast(b) => { + self.resolve_label(&mut b.label)?; + self.resolver.resolve_heaptype(&mut b.val)?; + self.resolver.resolve_heaptype(&mut b.rtt)?; + } + + Select(s) => { + if let Some(list) = &mut s.tys { + for ty in list { + self.resolver.resolve_valtype(ty)?; + } + } + } + + StructNew(i) + | StructNewWithRtt(i) + | StructNewDefaultWithRtt(i) + | ArrayNewWithRtt(i) + | ArrayNewDefaultWithRtt(i) + | ArrayGet(i) + | ArrayGetS(i) + | ArrayGetU(i) + | ArraySet(i) + | ArrayLen(i) => { + self.resolver.resolve(i, Ns::Type)?; + } + RTTCanon(t) => { + self.resolver.resolve_heaptype(t)?; + } + RTTSub(s) => { + self.resolver.resolve_heaptype(&mut s.input_rtt)?; + self.resolver.resolve_heaptype(&mut s.output_rtt)?; + } + RefTest(t) | RefCast(t) => { + self.resolver.resolve_heaptype(&mut t.val)?; + self.resolver.resolve_heaptype(&mut t.rtt)?; + } + + StructSet(s) | StructGet(s) | StructGetS(s) | StructGetU(s) => { + self.resolver.resolve(&mut s.r#struct, Ns::Type)?; + self.resolver.fields.resolve(&mut s.field, "field")?; + } + StructNarrow(s) => { + self.resolver.resolve_valtype(&mut s.from)?; + self.resolver.resolve_valtype(&mut s.to)?; + } + + RefNull(ty) => self.resolver.resolve_heaptype(ty)?, + + _ => {} + } + Ok(()) + } + + fn resolve_label(&self, label: &mut Index<'a>) -> Result<(), Error> { + let id = match label { + Index::Num(..) => return Ok(()), + Index::Id(id) => *id, + }; + let idx = self + .blocks + .iter() + .rev() + .enumerate() + .filter_map(|(i, b)| b.label.map(|l| (i, l))) + .find(|(_, l)| *l == id); + match idx { + Some((idx, _)) => { + *label = Index::Num(idx as u32, id.span()); + Ok(()) + } + None => Err(resolve_error(id, "label")), + } + } +} + +struct Parents<'a, 'b> { + prev: Option<ParentNode<'a, 'b>>, + cur_id: Option<Id<'a>>, + depth: usize, + names: &'b mut HashMap<Id<'a>, usize>, +} + +struct ParentNode<'a, 'b> { + resolver: &'b Resolver<'a>, + id: Option<Id<'a>>, + prev: Option<&'b ParentNode<'a, 'b>>, + prev_depth: Option<usize>, +} + +impl<'a, 'b> Parents<'a, 'b> { + fn push<'c>(&'c mut self, resolver: &'c Resolver<'a>, id: Option<Id<'a>>) -> Parents<'a, 'c> + where + 'b: 'c, + { + let prev_depth = if let Some(id) = self.cur_id { + self.names.insert(id, self.depth) + } else { + None + }; + Parents { + prev: Some(ParentNode { + prev: self.prev.as_ref(), + resolver, + id: self.cur_id, + prev_depth, + }), + cur_id: id, + depth: self.depth + 1, + names: &mut *self.names, + } + } + + fn resolve(&self, index: &mut Index<'a>) -> Result<&'b Resolver<'a>, Error> { + let mut i = match *index { + Index::Num(n, _) => n, + Index::Id(id) => match self.names.get(&id) { + Some(idx) => (self.depth - *idx - 1) as u32, + None => return Err(resolve_error(id, "parent module")), + }, + }; + *index = Index::Num(i, index.span()); + let mut cur = match self.prev.as_ref() { + Some(n) => n, + None => { + return Err(Error::new( + index.span(), + "cannot use `outer` alias in root module".to_string(), + )) + } + }; + while i > 0 { + cur = match cur.prev { + Some(n) => n, + None => { + return Err(Error::new( + index.span(), + "alias to `outer` module index too large".to_string(), + )) + } + }; + i -= 1; + } + Ok(cur.resolver) + } +} + +impl<'a, 'b> Drop for Parents<'a, 'b> { + fn drop(&mut self) { + let (id, prev_depth) = match &self.prev { + Some(n) => (n.id, n.prev_depth), + None => return, + }; + if let Some(id) = id { + match prev_depth { + Some(i) => { + self.names.insert(id, i); + } + None => { + self.names.remove(&id); + } + } + } + } +} + +enum TypeInfo<'a> { + Func { + params: Box<[ValType<'a>]>, + results: Box<[ValType<'a>]>, + }, + Other, +} + +trait TypeReference<'a> { + fn check_matches(&mut self, idx: &Index<'a>, cx: &Resolver<'a>) -> Result<(), Error>; + fn resolve(&mut self, cx: &Resolver<'a>) -> Result<(), Error>; +} + +impl<'a> TypeReference<'a> for FunctionType<'a> { + fn check_matches(&mut self, idx: &Index<'a>, cx: &Resolver<'a>) -> Result<(), Error> { + let n = match idx { + Index::Num(n, _) => *n, + Index::Id(_) => panic!("expected `Num`"), + }; + let (params, results) = match cx.type_info.get(n as usize) { + Some(TypeInfo::Func { params, results }) => (params, results), + _ => return Ok(()), + }; + + // Here we need to check that the inline type listed (ourselves) matches + // what was listed in the module itself (the `params` and `results` + // above). The listed values in `types` are not resolved yet, although + // we should be resolved. In any case we do name resolution + // opportunistically here to see if the values are equal. + + let types_not_equal = |a: &ValType, b: &ValType| { + let mut a = a.clone(); + let mut b = b.clone(); + drop(cx.resolve_valtype(&mut a)); + drop(cx.resolve_valtype(&mut b)); + a != b + }; + + let not_equal = params.len() != self.params.len() + || results.len() != self.results.len() + || params + .iter() + .zip(self.params.iter()) + .any(|(a, (_, _, b))| types_not_equal(a, b)) + || results + .iter() + .zip(self.results.iter()) + .any(|(a, b)| types_not_equal(a, b)); + if not_equal { + return Err(Error::new( + idx.span(), + format!("inline function type doesn't match type reference"), + )); + } + + Ok(()) + } + + fn resolve(&mut self, cx: &Resolver<'a>) -> Result<(), Error> { + // Resolve the (ref T) value types in the final function type + for param in self.params.iter_mut() { + cx.resolve_valtype(&mut param.2)?; + } + for result in self.results.iter_mut() { + cx.resolve_valtype(result)?; + } + Ok(()) + } +} + +impl<'a> TypeReference<'a> for InstanceType<'a> { + fn check_matches(&mut self, idx: &Index<'a>, cx: &Resolver<'a>) -> Result<(), Error> { + drop(cx); + Err(Error::new( + idx.span(), + format!("cannot specify instance type as a reference and inline"), + )) + } + + fn resolve(&mut self, cx: &Resolver<'a>) -> Result<(), Error> { + for export in self.exports.iter_mut() { + cx.resolve_item_sig(&mut export.item)?; + } + Ok(()) + } +} + +impl<'a> TypeReference<'a> for ModuleType<'a> { + fn check_matches(&mut self, idx: &Index<'a>, cx: &Resolver<'a>) -> Result<(), Error> { + drop(cx); + Err(Error::new( + idx.span(), + format!("cannot specify module type as a reference and inline"), + )) + } + + fn resolve(&mut self, cx: &Resolver<'a>) -> Result<(), Error> { + for i in self.imports.iter_mut() { + cx.resolve_item_sig(&mut i.item)?; + } + for e in self.exports.iter_mut() { + cx.resolve_item_sig(&mut e.item)?; + } + Ok(()) + } +} diff --git a/third_party/rust/wast/src/resolve/types.rs b/third_party/rust/wast/src/resolve/types.rs new file mode 100644 index 0000000000..08f106c383 --- /dev/null +++ b/third_party/rust/wast/src/resolve/types.rs @@ -0,0 +1,471 @@ +use crate::ast::*; +use crate::resolve::gensym; +use std::collections::HashMap; + +pub fn expand<'a>(fields: &mut Vec<ModuleField<'a>>) { + let mut expander = Expander::default(); + expander.process(fields); +} + +#[derive(Default)] +struct Expander<'a> { + // See the comment in `process` for why this exists. + process_imports_early: bool, + + // Maps used to "intern" types. These maps are populated as type annotations + // are seen and inline type annotations use previously defined ones if + // there's a match. + func_type_to_idx: HashMap<FuncKey<'a>, Index<'a>>, + instance_type_to_idx: HashMap<InstanceKey<'a>, Index<'a>>, + module_type_to_idx: HashMap<ModuleKey<'a>, Index<'a>>, + + /// Fields, during processing, which should be prepended to the + /// currently-being-processed field. This should always be empty after + /// processing is complete. + to_prepend: Vec<ModuleField<'a>>, +} + +impl<'a> Expander<'a> { + fn process(&mut self, fields: &mut Vec<ModuleField<'a>>) { + // For the given list of fields this determines whether imports are + // processed as part of `expand_header` or as part of `expand`. The + // reason for this distinction is that pre-module-linking types were + // always sorted to the front of the module so new types were always + // appended to the end. After module-linking, however, types are + // interspersed with imports and order matters. This means that imports + // can't use intern'd types which appear later. + // + // This is a bit of a hack and ideally something that needs to be + // addressed in the upstream spec. WebAssembly/module-linking#25 + // represents this issue. + self.process_imports_early = fields.iter().any(|f| match f { + ModuleField::Alias(_) | ModuleField::NestedModule(_) | ModuleField::Instance(_) => true, + _ => false, + }); + + // Next we expand "header" fields which are those like types and + // imports. In this context "header" is defined by the previous + // `process_imports_early` annotation. + let mut cur = 0; + while cur < fields.len() { + self.expand_header(&mut fields[cur]); + for item in self.to_prepend.drain(..) { + fields.insert(cur, item); + cur += 1; + } + cur += 1; + } + + // Next after we've done that we expand remaining fields. Note that + // after this we actually append instead of prepend. This is because + // injected types are intended to come at the end of the type section + // and types will be sorted before all other items processed here in the + // final module anyway. + for field in fields.iter_mut() { + self.expand(field); + } + fields.extend(self.to_prepend.drain(..)); + } + + fn expand_header(&mut self, item: &mut ModuleField<'a>) { + match item { + ModuleField::Type(ty) => { + let id = gensym::fill(ty.span, &mut ty.id); + match &mut ty.def { + TypeDef::Func(f) => { + f.key().insert(self, Index::Id(id)); + } + TypeDef::Instance(i) => { + i.expand(self); + i.key().insert(self, Index::Id(id)); + } + TypeDef::Module(m) => { + m.expand(self); + m.key().insert(self, Index::Id(id)); + } + TypeDef::Array(_) | TypeDef::Struct(_) => {} + } + } + ModuleField::Import(i) if self.process_imports_early => { + self.expand_item_sig(&mut i.item); + } + _ => {} + } + } + + fn expand(&mut self, item: &mut ModuleField<'a>) { + match item { + // This is pre-expanded above + ModuleField::Type(_) => {} + + ModuleField::Import(i) => { + // Only expand here if not expanded above + if !self.process_imports_early { + self.expand_item_sig(&mut i.item); + } + } + ModuleField::Func(f) => { + self.expand_type_use(&mut f.ty); + if let FuncKind::Inline { expression, .. } = &mut f.kind { + self.expand_expression(expression); + } + } + ModuleField::Global(g) => { + if let GlobalKind::Inline(expr) = &mut g.kind { + self.expand_expression(expr); + } + } + ModuleField::Data(d) => { + if let DataKind::Active { offset, .. } = &mut d.kind { + self.expand_expression(offset); + } + } + ModuleField::Elem(e) => { + if let ElemKind::Active { offset, .. } = &mut e.kind { + self.expand_expression(offset); + } + } + ModuleField::Event(e) => match &mut e.ty { + EventType::Exception(ty) => { + self.expand_type_use(ty); + } + }, + ModuleField::NestedModule(m) => { + if let NestedModuleKind::Inline { fields } = &mut m.kind { + Expander::default().process(fields); + } + } + + ModuleField::Alias(_) + | ModuleField::Instance(_) + | ModuleField::Table(_) + | ModuleField::Memory(_) + | ModuleField::Start(_) + | ModuleField::Export(_) + | ModuleField::Custom(_) => {} + } + } + + fn expand_item_sig(&mut self, item: &mut ItemSig<'a>) { + match &mut item.kind { + ItemKind::Func(t) | ItemKind::Event(EventType::Exception(t)) => { + self.expand_type_use(t); + } + ItemKind::Instance(t) => { + self.expand_type_use(t); + t.inline.take(); + } + ItemKind::Module(m) => { + self.expand_type_use(m); + m.inline.take(); + } + ItemKind::Global(_) | ItemKind::Table(_) | ItemKind::Memory(_) => {} + } + } + + fn expand_expression(&mut self, expr: &mut Expression<'a>) { + for instr in expr.instrs.iter_mut() { + self.expand_instr(instr); + } + } + + fn expand_instr(&mut self, instr: &mut Instruction<'a>) { + match instr { + Instruction::Block(bt) + | Instruction::If(bt) + | Instruction::Loop(bt) + | Instruction::Let(LetType { block: bt, .. }) + | Instruction::Try(bt) => { + // No expansion necessary, a type reference is already here. + // We'll verify that it's the same as the inline type, if any, + // later. + if bt.ty.index.is_some() { + return; + } + + match &bt.ty.inline { + // Only actually expand `TypeUse` with an index which appends a + // type if it looks like we need one. This way if the + // multi-value proposal isn't enabled and/or used we won't + // encode it. + Some(inline) => { + if inline.params.len() == 0 && inline.results.len() <= 1 { + return; + } + } + + // If we didn't have either an index or an inline type + // listed then assume our block has no inputs/outputs, so + // fill in the inline type here. + // + // Do not fall through to expanding the `TypeUse` because + // this doesn't force an empty function type to go into the + // type section. + None => { + bt.ty.inline = Some(FunctionType::default()); + return; + } + } + self.expand_type_use(&mut bt.ty); + } + Instruction::FuncBind(b) => { + self.expand_type_use(&mut b.ty); + } + Instruction::CallIndirect(c) | Instruction::ReturnCallIndirect(c) => { + self.expand_type_use(&mut c.ty); + } + _ => {} + } + } + + fn expand_type_use<T>(&mut self, item: &mut TypeUse<'a, T>) -> Index<'a> + where + T: TypeReference<'a>, + { + if let Some(idx) = &item.index { + match idx { + ItemRef::Item { idx, exports, .. } => { + debug_assert!(exports.len() == 0); + return idx.clone(); + } + ItemRef::Outer { .. } => unreachable!(), + } + } + let key = match item.inline.as_mut() { + Some(ty) => { + ty.expand(self); + ty.key() + } + None => T::default().key(), + }; + let span = Span::from_offset(0); // FIXME: don't manufacture + let idx = self.key_to_idx(span, key); + item.index = Some(ItemRef::Item { + idx, + kind: kw::r#type(span), + exports: Vec::new(), + }); + return idx; + } + + fn key_to_idx(&mut self, span: Span, key: impl TypeKey<'a>) -> Index<'a> { + // First see if this `key` already exists in the type definitions we've + // seen so far... + if let Some(idx) = key.lookup(self) { + return idx; + } + + // ... and failing that we insert a new type definition. + let id = gensym::gen(span); + self.to_prepend.push(ModuleField::Type(Type { + span, + id: Some(id), + def: key.to_def(span), + })); + let idx = Index::Id(id); + key.insert(self, idx); + + return idx; + } +} + +trait TypeReference<'a>: Default { + type Key: TypeKey<'a>; + fn key(&self) -> Self::Key; + fn expand(&mut self, cx: &mut Expander<'a>); +} + +trait TypeKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>>; + fn to_def(&self, span: Span) -> TypeDef<'a>; + fn insert(&self, cx: &mut Expander<'a>, id: Index<'a>); +} + +type FuncKey<'a> = (Box<[ValType<'a>]>, Box<[ValType<'a>]>); + +impl<'a> TypeReference<'a> for FunctionType<'a> { + type Key = FuncKey<'a>; + + fn key(&self) -> Self::Key { + let params = self.params.iter().map(|p| p.2).collect(); + let results = self.results.clone(); + (params, results) + } + + fn expand(&mut self, _cx: &mut Expander<'a>) {} +} + +impl<'a> TypeKey<'a> for FuncKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>> { + cx.func_type_to_idx.get(self).cloned() + } + + fn to_def(&self, _span: Span) -> TypeDef<'a> { + TypeDef::Func(FunctionType { + params: self.0.iter().map(|t| (None, None, *t)).collect(), + results: self.1.clone(), + }) + } + + fn insert(&self, cx: &mut Expander<'a>, idx: Index<'a>) { + cx.func_type_to_idx.entry(self.clone()).or_insert(idx); + } +} + +// A list of the exports of a module as well as the signature they export. +type InstanceKey<'a> = Vec<(&'a str, Item<'a>)>; + +impl<'a> TypeReference<'a> for InstanceType<'a> { + type Key = InstanceKey<'a>; + + fn key(&self) -> Self::Key { + self.exports + .iter() + .map(|export| (export.name, Item::new(&export.item))) + .collect() + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + for export in self.exports.iter_mut() { + cx.expand_item_sig(&mut export.item); + } + } +} + +impl<'a> TypeKey<'a> for InstanceKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>> { + cx.instance_type_to_idx.get(self).cloned() + } + + fn to_def(&self, span: Span) -> TypeDef<'a> { + let exports = self + .iter() + .map(|(name, item)| ExportType { + span, + name, + item: item.to_sig(span), + }) + .collect(); + TypeDef::Instance(InstanceType { exports }) + } + + fn insert(&self, cx: &mut Expander<'a>, idx: Index<'a>) { + cx.instance_type_to_idx.entry(self.clone()).or_insert(idx); + } +} + +// The first element of this pair is the list of imports in the module, and the +// second element is the list of exports. +type ModuleKey<'a> = ( + Vec<(&'a str, Option<&'a str>, Item<'a>)>, + Vec<(&'a str, Item<'a>)>, +); + +impl<'a> TypeReference<'a> for ModuleType<'a> { + type Key = ModuleKey<'a>; + + fn key(&self) -> Self::Key { + let imports = self + .imports + .iter() + .map(|import| (import.module, import.field, Item::new(&import.item))) + .collect(); + let exports = self + .exports + .iter() + .map(|export| (export.name, Item::new(&export.item))) + .collect(); + (imports, exports) + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + for export in self.exports.iter_mut() { + cx.expand_item_sig(&mut export.item); + } + for import in self.imports.iter_mut() { + cx.expand_item_sig(&mut import.item); + } + } +} + +impl<'a> TypeKey<'a> for ModuleKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>> { + cx.module_type_to_idx.get(self).cloned() + } + + fn to_def(&self, span: Span) -> TypeDef<'a> { + let imports = self + .0 + .iter() + .map(|(module, field, item)| Import { + span, + module, + field: *field, + item: item.to_sig(span), + }) + .collect(); + let exports = self + .1 + .iter() + .map(|(name, item)| ExportType { + span, + name, + item: item.to_sig(span), + }) + .collect(); + TypeDef::Module(ModuleType { imports, exports }) + } + + fn insert(&self, cx: &mut Expander<'a>, idx: Index<'a>) { + cx.module_type_to_idx.entry(self.clone()).or_insert(idx); + } +} + +// A lookalike to `ItemKind` except without all non-relevant information for +// hashing. This is used as a hash key for instance/module type lookup. +#[derive(Clone, PartialEq, Eq, Hash)] +enum Item<'a> { + Func(Index<'a>), + Table(TableType<'a>), + Memory(MemoryType), + Global(GlobalType<'a>), + Event(Index<'a>), + Module(Index<'a>), + Instance(Index<'a>), +} + +impl<'a> Item<'a> { + fn new(item: &ItemSig<'a>) -> Item<'a> { + match &item.kind { + ItemKind::Func(f) => Item::Func(*f.index.as_ref().unwrap().unwrap_index()), + ItemKind::Instance(f) => Item::Instance(*f.index.as_ref().unwrap().unwrap_index()), + ItemKind::Module(f) => Item::Module(*f.index.as_ref().unwrap().unwrap_index()), + ItemKind::Event(EventType::Exception(f)) => { + Item::Event(*f.index.as_ref().unwrap().unwrap_index()) + } + ItemKind::Table(t) => Item::Table(t.clone()), + ItemKind::Memory(t) => Item::Memory(t.clone()), + ItemKind::Global(t) => Item::Global(t.clone()), + } + } + + fn to_sig(&self, span: Span) -> ItemSig<'a> { + let kind = match self { + Item::Func(index) => ItemKind::Func(TypeUse::new_with_index(*index)), + Item::Event(index) => { + ItemKind::Event(EventType::Exception(TypeUse::new_with_index(*index))) + } + Item::Instance(index) => ItemKind::Instance(TypeUse::new_with_index(*index)), + Item::Module(index) => ItemKind::Module(TypeUse::new_with_index(*index)), + Item::Table(t) => ItemKind::Table(t.clone()), + Item::Memory(t) => ItemKind::Memory(t.clone()), + Item::Global(t) => ItemKind::Global(t.clone()), + }; + ItemSig { + span, + id: None, + name: None, + kind, + } + } +} |