diff options
Diffstat (limited to 'third_party/rust/wast/src')
43 files changed, 17534 insertions, 0 deletions
diff --git a/third_party/rust/wast/src/component.rs b/third_party/rust/wast/src/component.rs new file mode 100644 index 0000000000..899baaa356 --- /dev/null +++ b/third_party/rust/wast/src/component.rs @@ -0,0 +1,28 @@ +//! Types and support for parsing the component model text format. + +mod alias; +mod binary; +mod component; +mod custom; +mod expand; +mod export; +mod func; +mod import; +mod instance; +mod item_ref; +mod module; +mod resolve; +mod types; +mod wast; + +pub use self::alias::*; +pub use self::component::*; +pub use self::custom::*; +pub use self::export::*; +pub use self::func::*; +pub use self::import::*; +pub use self::instance::*; +pub use self::item_ref::*; +pub use self::module::*; +pub use self::types::*; +pub use self::wast::*; diff --git a/third_party/rust/wast/src/component/alias.rs b/third_party/rust/wast/src/component/alias.rs new file mode 100644 index 0000000000..3c70a2d371 --- /dev/null +++ b/third_party/rust/wast/src/component/alias.rs @@ -0,0 +1,253 @@ +use crate::core::ExportKind; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, Index, NameAnnotation, Span}; + +/// A inline alias for component exported items. +#[derive(Debug)] +pub struct InlineExportAlias<'a> { + /// The instance to alias the export from. + pub instance: Index<'a>, + /// The name of the export to alias. + pub name: &'a str, +} + +impl<'a> Parse<'a> for InlineExportAlias<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::alias>()?; + parser.parse::<kw::export>()?; + let instance = parser.parse()?; + let name = parser.parse()?; + Ok(Self { instance, name }) + } +} + +/// An alias to a component item. +#[derive(Debug)] +pub struct Alias<'a> { + /// Where this `alias` was defined. + pub span: Span, + /// An identifier that this alias is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this alias stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// The target of this alias. + pub target: AliasTarget<'a>, +} + +impl<'a> Alias<'a> { + /// Parses only an outer type alias. + pub fn parse_outer_core_type_alias(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::alias>()?.0; + parser.parse::<kw::outer>()?; + let outer = parser.parse()?; + let index = parser.parse()?; + + let (kind, id, name) = parser.parens(|parser| { + let mut kind: ComponentOuterAliasKind = parser.parse()?; + match kind { + ComponentOuterAliasKind::CoreType => { + return Err(parser.error("expected type for outer alias")) + } + ComponentOuterAliasKind::Type => { + kind = ComponentOuterAliasKind::CoreType; + } + _ => return Err(parser.error("expected core type or type for outer alias")), + } + + Ok((kind, parser.parse()?, parser.parse()?)) + })?; + + Ok(Self { + span, + target: AliasTarget::Outer { outer, index, kind }, + id, + name, + }) + } +} + +impl<'a> Parse<'a> for Alias<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::alias>()?.0; + + let mut l = parser.lookahead1(); + + let (target, id, name) = if l.peek::<kw::outer>() { + parser.parse::<kw::outer>()?; + let outer = parser.parse()?; + let index = parser.parse()?; + let (kind, id, name) = + parser.parens(|parser| Ok((parser.parse()?, parser.parse()?, parser.parse()?)))?; + + (AliasTarget::Outer { outer, index, kind }, id, name) + } else if l.peek::<kw::export>() { + parser.parse::<kw::export>()?; + let instance = parser.parse()?; + let export_name = parser.parse()?; + let (kind, id, name) = + parser.parens(|parser| Ok((parser.parse()?, parser.parse()?, parser.parse()?)))?; + + ( + AliasTarget::Export { + instance, + name: export_name, + kind, + }, + id, + name, + ) + } else if l.peek::<kw::core>() { + parser.parse::<kw::core>()?; + parser.parse::<kw::export>()?; + let instance = parser.parse()?; + let export_name = parser.parse()?; + let (kind, id, name) = parser.parens(|parser| { + parser.parse::<kw::core>()?; + Ok((parser.parse()?, parser.parse()?, parser.parse()?)) + })?; + + ( + AliasTarget::CoreExport { + instance, + name: export_name, + kind, + }, + id, + name, + ) + } else { + return Err(l.error()); + }; + + Ok(Self { + span, + target, + id, + name, + }) + } +} + +/// Represents the kind of instance export alias. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ComponentExportAliasKind { + /// The alias is to a core module export. + CoreModule, + /// The alias is to a function export. + Func, + /// The alias is to a value export. + Value, + /// The alias is to a type export. + Type, + /// The alias is to a component export. + Component, + /// The alias is to an instance export. + Instance, +} + +impl<'a> Parse<'a> for ComponentExportAliasKind { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::core>() { + parser.parse::<kw::core>()?; + let mut l = parser.lookahead1(); + if l.peek::<kw::module>() { + parser.parse::<kw::module>()?; + Ok(Self::CoreModule) + } else { + Err(l.error()) + } + } else if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(Self::Func) + } else if l.peek::<kw::value>() { + parser.parse::<kw::value>()?; + Ok(Self::Value) + } else if l.peek::<kw::r#type>() { + parser.parse::<kw::r#type>()?; + Ok(Self::Type) + } else if l.peek::<kw::component>() { + parser.parse::<kw::component>()?; + Ok(Self::Component) + } else if l.peek::<kw::instance>() { + parser.parse::<kw::instance>()?; + Ok(Self::Instance) + } else { + Err(l.error()) + } + } +} + +/// Represents the kind of outer alias. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ComponentOuterAliasKind { + /// The alias is to an outer core module. + CoreModule, + /// The alias is to an outer core type. + CoreType, + /// The alias is to an outer type. + Type, + /// The alias is to an outer component. + Component, +} + +impl<'a> Parse<'a> for ComponentOuterAliasKind { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::core>() { + parser.parse::<kw::core>()?; + let mut l = parser.lookahead1(); + if l.peek::<kw::module>() { + parser.parse::<kw::module>()?; + Ok(Self::CoreModule) + } else if l.peek::<kw::r#type>() { + parser.parse::<kw::r#type>()?; + Ok(Self::CoreType) + } else { + Err(l.error()) + } + } else if l.peek::<kw::r#type>() { + parser.parse::<kw::r#type>()?; + Ok(Self::Type) + } else if l.peek::<kw::component>() { + parser.parse::<kw::component>()?; + Ok(Self::Component) + } else { + Err(l.error()) + } + } +} + +/// The target of a component alias. +#[derive(Debug)] +pub enum AliasTarget<'a> { + /// The alias is to an export of a component instance. + Export { + /// The component instance exporting the item. + instance: Index<'a>, + /// The name of the exported item to alias. + name: &'a str, + /// The export kind of the alias. + kind: ComponentExportAliasKind, + }, + /// The alias is to an export of a module instance. + CoreExport { + /// The module instance exporting the item. + instance: Index<'a>, + /// The name of the exported item to alias. + name: &'a str, + /// The export kind of the alias. + kind: ExportKind, + }, + /// The alias is to an item from an outer component. + Outer { + /// The number of enclosing components to skip. + outer: Index<'a>, + /// The index of the item being aliased. + index: Index<'a>, + /// The outer alias kind. + kind: ComponentOuterAliasKind, + }, +} diff --git a/third_party/rust/wast/src/component/binary.rs b/third_party/rust/wast/src/component/binary.rs new file mode 100644 index 0000000000..6d57ed4380 --- /dev/null +++ b/third_party/rust/wast/src/component/binary.rs @@ -0,0 +1,972 @@ +use crate::component::*; +use crate::core; +use crate::token::{Id, Index, NameAnnotation}; +use wasm_encoder::{ + CanonicalFunctionSection, ComponentAliasSection, ComponentDefinedTypeEncoder, + ComponentExportSection, ComponentImportSection, ComponentInstanceSection, ComponentNameSection, + ComponentSection, ComponentSectionId, ComponentStartSection, ComponentTypeEncoder, + ComponentTypeSection, CoreTypeEncoder, CoreTypeSection, InstanceSection, NameMap, + NestedComponentSection, RawSection, SectionId, +}; + +pub fn encode(component: &Component<'_>) -> Vec<u8> { + match &component.kind { + ComponentKind::Text(fields) => { + encode_fields(&component.id, &component.name, fields).finish() + } + ComponentKind::Binary(bytes) => bytes.iter().flat_map(|b| b.iter().copied()).collect(), + } +} + +fn encode_fields( + // TODO: use the id and name for a future names section + component_id: &Option<Id<'_>>, + component_name: &Option<NameAnnotation<'_>>, + fields: &[ComponentField<'_>], +) -> wasm_encoder::Component { + let mut e = Encoder::default(); + + for field in fields { + match field { + ComponentField::CoreModule(m) => e.encode_core_module(m), + ComponentField::CoreInstance(i) => e.encode_core_instance(i), + ComponentField::CoreType(t) => e.encode_core_type(t), + ComponentField::Component(c) => e.encode_component(c), + ComponentField::Instance(i) => e.encode_instance(i), + ComponentField::Alias(a) => e.encode_alias(a), + ComponentField::Type(t) => e.encode_type(t), + ComponentField::CanonicalFunc(f) => e.encode_canonical_func(f), + ComponentField::CoreFunc(_) | ComponentField::Func(_) => { + unreachable!("should be expanded already") + } + ComponentField::Start(s) => e.encode_start(s), + ComponentField::Import(i) => e.encode_import(i), + ComponentField::Export(ex) => e.encode_export(ex), + ComponentField::Custom(c) => e.encode_custom(c), + } + } + + e.flush(None); + e.encode_names(component_id, component_name); + + e.component +} + +fn encode_core_type(encoder: CoreTypeEncoder, ty: &CoreTypeDef) { + match ty { + CoreTypeDef::Def(core::TypeDef::Func(f)) => { + encoder.function( + f.params.iter().map(|(_, _, ty)| (*ty).into()), + f.results.iter().copied().map(Into::into), + ); + } + CoreTypeDef::Def(core::TypeDef::Struct(_)) | CoreTypeDef::Def(core::TypeDef::Array(_)) => { + todo!("encoding of GC proposal types not yet implemented") + } + CoreTypeDef::Module(t) => { + encoder.module(&t.into()); + } + } +} + +fn encode_type(encoder: ComponentTypeEncoder, ty: &TypeDef) { + match ty { + TypeDef::Defined(t) => { + encode_defined_type(encoder.defined_type(), t); + } + TypeDef::Func(f) => { + let mut encoder = encoder.function(); + encoder.params(f.params.iter().map(|p| (p.name, &p.ty))); + + if f.results.len() == 1 && f.results[0].name.is_none() { + encoder.result(&f.results[0].ty); + } else { + encoder.results(f.results.iter().map(|r| (r.name.unwrap_or(""), &r.ty))); + } + } + TypeDef::Component(c) => { + encoder.component(&c.into()); + } + TypeDef::Instance(i) => { + encoder.instance(&i.into()); + } + } +} + +fn encode_defined_type(encoder: ComponentDefinedTypeEncoder, ty: &ComponentDefinedType) { + match ty { + ComponentDefinedType::Primitive(p) => encoder.primitive((*p).into()), + ComponentDefinedType::Record(r) => { + encoder.record(r.fields.iter().map(|f| (f.name, &f.ty))); + } + ComponentDefinedType::Variant(v) => { + encoder.variant(v.cases.iter().map(|c| { + ( + c.name, + c.ty.as_ref().map(Into::into), + c.refines.as_ref().map(Into::into), + ) + })); + } + ComponentDefinedType::List(l) => { + encoder.list(l.element.as_ref()); + } + ComponentDefinedType::Tuple(t) => { + encoder.tuple(t.fields.iter()); + } + ComponentDefinedType::Flags(f) => { + encoder.flags(f.names.iter().copied()); + } + ComponentDefinedType::Enum(e) => { + encoder.enum_type(e.names.iter().copied()); + } + ComponentDefinedType::Union(u) => encoder.union(u.types.iter()), + ComponentDefinedType::Option(o) => { + encoder.option(o.element.as_ref()); + } + ComponentDefinedType::Result(e) => { + encoder.result( + e.ok.as_deref().map(Into::into), + e.err.as_deref().map(Into::into), + ); + } + } +} + +#[derive(Default)] +struct Encoder<'a> { + component: wasm_encoder::Component, + current_section_id: Option<u8>, + + // Core sections + // Note: module sections are written immediately + core_instances: InstanceSection, + core_types: CoreTypeSection, + + // Component sections + // Note: custom, component, start sections are written immediately + instances: ComponentInstanceSection, + aliases: ComponentAliasSection, + types: ComponentTypeSection, + funcs: CanonicalFunctionSection, + imports: ComponentImportSection, + exports: ComponentExportSection, + + core_func_names: Vec<Option<&'a str>>, + core_table_names: Vec<Option<&'a str>>, + core_memory_names: Vec<Option<&'a str>>, + core_global_names: Vec<Option<&'a str>>, + core_type_names: Vec<Option<&'a str>>, + core_module_names: Vec<Option<&'a str>>, + core_instance_names: Vec<Option<&'a str>>, + func_names: Vec<Option<&'a str>>, + value_names: Vec<Option<&'a str>>, + type_names: Vec<Option<&'a str>>, + component_names: Vec<Option<&'a str>>, + instance_names: Vec<Option<&'a str>>, +} + +impl<'a> Encoder<'a> { + fn encode_custom(&mut self, custom: &Custom) { + // Flush any in-progress section before encoding the customs section + self.flush(None); + self.component.section(custom); + } + + fn encode_core_module(&mut self, module: &CoreModule<'a>) { + // Flush any in-progress section before encoding the module + self.flush(None); + + self.core_module_names + .push(get_name(&module.id, &module.name)); + + match &module.kind { + CoreModuleKind::Import { .. } => unreachable!("should be expanded already"), + CoreModuleKind::Inline { fields } => { + // TODO: replace this with a wasm-encoder based encoding (should return `wasm_encoder::Module`) + let data = crate::core::binary::encode(&module.id, &module.name, fields); + self.component.section(&RawSection { + id: ComponentSectionId::CoreModule.into(), + data: &data, + }); + } + } + } + + fn encode_core_instance(&mut self, instance: &CoreInstance<'a>) { + self.core_instance_names + .push(get_name(&instance.id, &instance.name)); + match &instance.kind { + CoreInstanceKind::Instantiate { module, args } => { + self.core_instances.instantiate( + module.into(), + args.iter().map(|arg| (arg.name, (&arg.kind).into())), + ); + } + CoreInstanceKind::BundleOfExports(exports) => { + self.core_instances.export_items(exports.iter().map(|e| { + let (kind, index) = (&e.item).into(); + (e.name, kind, index) + })); + } + } + + self.flush(Some(self.core_instances.id())); + } + + fn encode_core_type(&mut self, ty: &CoreType<'a>) { + self.core_type_names.push(get_name(&ty.id, &ty.name)); + encode_core_type(self.core_types.ty(), &ty.def); + self.flush(Some(self.core_types.id())); + } + + fn encode_component(&mut self, component: &NestedComponent<'a>) { + self.component_names + .push(get_name(&component.id, &component.name)); + // Flush any in-progress section before encoding the component + self.flush(None); + + match &component.kind { + NestedComponentKind::Import { .. } => unreachable!("should be expanded already"), + NestedComponentKind::Inline(fields) => { + self.component + .section(&NestedComponentSection(&encode_fields( + &component.id, + &component.name, + fields, + ))); + } + } + } + + fn encode_instance(&mut self, instance: &Instance<'a>) { + self.instance_names + .push(get_name(&instance.id, &instance.name)); + match &instance.kind { + InstanceKind::Import { .. } => unreachable!("should be expanded already"), + InstanceKind::Instantiate { component, args } => { + self.instances.instantiate( + component.into(), + args.iter().map(|arg| { + let (kind, index) = (&arg.kind).into(); + (arg.name, kind, index) + }), + ); + } + InstanceKind::BundleOfExports(exports) => { + self.instances.export_items(exports.iter().map(|e| { + let (kind, index) = (&e.kind).into(); + (e.name, kind, index) + })); + } + } + + self.flush(Some(self.instances.id())); + } + + fn encode_alias(&mut self, alias: &Alias<'a>) { + let name = get_name(&alias.id, &alias.name); + self.aliases.alias((&alias.target).into()); + match &alias.target { + AliasTarget::Export { kind, .. } => { + self.names_for_component_export_alias(*kind).push(name); + } + AliasTarget::CoreExport { kind, .. } => { + self.names_for_core_export_alias(*kind).push(name); + } + AliasTarget::Outer { kind, .. } => { + self.names_for_component_outer_alias(*kind).push(name); + } + } + + self.flush(Some(self.aliases.id())); + } + + fn encode_start(&mut self, start: &Start) { + // Flush any in-progress section before encoding the start section + self.flush(None); + + self.component.section(&ComponentStartSection { + function_index: start.func.into(), + args: start.args.iter().map(|a| a.idx.into()).collect::<Vec<_>>(), + results: start.results.len() as u32, + }); + } + + fn encode_type(&mut self, ty: &Type<'a>) { + self.type_names.push(get_name(&ty.id, &ty.name)); + encode_type(self.types.ty(), &ty.def); + self.flush(Some(self.types.id())); + } + + fn encode_canonical_func(&mut self, func: &CanonicalFunc<'a>) { + let name = get_name(&func.id, &func.name); + match &func.kind { + CanonicalFuncKind::Lift { ty, info } => { + self.func_names.push(name); + self.funcs.lift( + info.func.idx.into(), + ty.into(), + info.opts.iter().map(Into::into), + ); + } + CanonicalFuncKind::Lower(info) => { + self.core_func_names.push(name); + self.funcs + .lower(info.func.idx.into(), info.opts.iter().map(Into::into)); + } + } + + self.flush(Some(self.funcs.id())); + } + + fn encode_import(&mut self, import: &ComponentImport<'a>) { + let name = get_name(&import.item.id, &import.item.name); + self.names_for_item_kind(&import.item.kind).push(name); + self.imports.import( + import.name, + import.url.unwrap_or(""), + (&import.item.kind).into(), + ); + self.flush(Some(self.imports.id())); + } + + fn encode_export(&mut self, export: &ComponentExport<'a>) { + let name = get_name(&export.id, &export.debug_name); + let (kind, index) = (&export.kind).into(); + self.exports.export( + export.name, + export.url.unwrap_or(""), + kind, + index, + export.ty.as_ref().map(|ty| (&ty.0.kind).into()), + ); + match &export.kind { + ComponentExportKind::CoreModule(_) => self.core_module_names.push(name), + ComponentExportKind::Func(_) => self.func_names.push(name), + ComponentExportKind::Instance(_) => self.instance_names.push(name), + ComponentExportKind::Value(_) => self.value_names.push(name), + ComponentExportKind::Component(_) => self.component_names.push(name), + ComponentExportKind::Type(_) => self.type_names.push(name), + } + self.flush(Some(self.exports.id())); + } + + fn flush(&mut self, section_id: Option<u8>) { + if self.current_section_id == section_id { + return; + } + + if let Some(id) = self.current_section_id { + match id { + // 0 => custom sections are written immediately + // 1 => core modules sections are written immediately + 2 => { + assert_eq!(id, self.core_instances.id()); + self.component.section(&self.core_instances); + self.core_instances = Default::default(); + } + 3 => { + assert_eq!(id, self.core_types.id()); + self.component.section(&self.core_types); + self.core_types = Default::default(); + } + // 4 => components sections are written immediately + 5 => { + assert_eq!(id, self.instances.id()); + self.component.section(&self.instances); + self.instances = Default::default(); + } + 6 => { + assert_eq!(id, self.aliases.id()); + self.component.section(&self.aliases); + self.aliases = Default::default(); + } + 7 => { + assert_eq!(id, self.types.id()); + self.component.section(&self.types); + self.types = Default::default(); + } + 8 => { + assert_eq!(id, self.funcs.id()); + self.component.section(&self.funcs); + self.funcs = Default::default(); + } + // 9 => start sections are written immediately + 10 => { + assert_eq!(id, self.imports.id()); + self.component.section(&self.imports); + self.imports = Default::default(); + } + 11 => { + assert_eq!(id, self.exports.id()); + self.component.section(&self.exports); + self.exports = Default::default(); + } + _ => unreachable!("unknown incremental component section id: {}", id), + } + } + + self.current_section_id = section_id + } + + fn encode_names( + &mut self, + component_id: &Option<Id<'_>>, + component_name: &Option<NameAnnotation<'_>>, + ) { + let mut names = ComponentNameSection::new(); + if let Some(name) = get_name(component_id, component_name) { + names.component(name); + } + + let mut funcs = |list: &[Option<&str>], append: fn(&mut ComponentNameSection, &NameMap)| { + let mut map = NameMap::new(); + for (i, entry) in list.iter().enumerate() { + if let Some(name) = entry { + map.append(i as u32, name); + } + } + if !map.is_empty() { + append(&mut names, &map); + } + }; + + funcs(&self.core_func_names, ComponentNameSection::core_funcs); + funcs(&self.core_table_names, ComponentNameSection::core_tables); + funcs(&self.core_memory_names, ComponentNameSection::core_memories); + funcs(&self.core_global_names, ComponentNameSection::core_globals); + funcs(&self.core_type_names, ComponentNameSection::core_types); + funcs(&self.core_module_names, ComponentNameSection::core_modules); + funcs( + &self.core_instance_names, + ComponentNameSection::core_instances, + ); + funcs(&self.func_names, ComponentNameSection::funcs); + funcs(&self.value_names, ComponentNameSection::values); + funcs(&self.type_names, ComponentNameSection::types); + funcs(&self.component_names, ComponentNameSection::components); + funcs(&self.instance_names, ComponentNameSection::instances); + + if !names.is_empty() { + self.component.section(&names); + } + } + + fn names_for_component_export_alias( + &mut self, + kind: ComponentExportAliasKind, + ) -> &mut Vec<Option<&'a str>> { + match kind { + ComponentExportAliasKind::Func => &mut self.func_names, + ComponentExportAliasKind::CoreModule => &mut self.core_module_names, + ComponentExportAliasKind::Value => &mut self.value_names, + ComponentExportAliasKind::Type => &mut self.type_names, + ComponentExportAliasKind::Component => &mut self.component_names, + ComponentExportAliasKind::Instance => &mut self.instance_names, + } + } + + fn names_for_component_outer_alias( + &mut self, + kind: ComponentOuterAliasKind, + ) -> &mut Vec<Option<&'a str>> { + match kind { + ComponentOuterAliasKind::CoreModule => &mut self.core_module_names, + ComponentOuterAliasKind::CoreType => &mut self.core_type_names, + ComponentOuterAliasKind::Component => &mut self.component_names, + ComponentOuterAliasKind::Type => &mut self.type_names, + } + } + + fn names_for_core_export_alias(&mut self, kind: core::ExportKind) -> &mut Vec<Option<&'a str>> { + match kind { + core::ExportKind::Func => &mut self.core_func_names, + core::ExportKind::Global => &mut self.core_global_names, + core::ExportKind::Table => &mut self.core_table_names, + core::ExportKind::Memory => &mut self.core_memory_names, + core::ExportKind::Tag => unimplemented!(), + } + } + + fn names_for_item_kind(&mut self, kind: &ItemSigKind) -> &mut Vec<Option<&'a str>> { + match kind { + ItemSigKind::CoreModule(_) => &mut self.core_module_names, + ItemSigKind::Func(_) => &mut self.func_names, + ItemSigKind::Component(_) => &mut self.component_names, + ItemSigKind::Instance(_) => &mut self.instance_names, + ItemSigKind::Value(_) => &mut self.value_names, + ItemSigKind::Type(_) => &mut self.type_names, + } + } +} + +fn get_name<'a>(id: &Option<Id<'a>>, name: &Option<NameAnnotation<'a>>) -> Option<&'a str> { + name.as_ref().map(|n| n.name).or_else(|| { + id.and_then(|id| { + if id.is_gensym() { + None + } else { + Some(id.name()) + } + }) + }) +} + +// This implementation is much like `wasm_encoder::CustomSection`, except +// that it extends via a list of slices instead of a single slice. +impl wasm_encoder::Encode for Custom<'_> { + fn encode(&self, sink: &mut Vec<u8>) { + let mut buf = [0u8; 5]; + let encoded_name_len = + leb128::write::unsigned(&mut &mut buf[..], u64::try_from(self.name.len()).unwrap()) + .unwrap(); + let data_len = self.data.iter().fold(0, |acc, s| acc + s.len()); + + // name length + (encoded_name_len + self.name.len() + data_len).encode(sink); + + // name + self.name.encode(sink); + + // data + for s in &self.data { + sink.extend(*s); + } + } +} + +impl wasm_encoder::ComponentSection for Custom<'_> { + fn id(&self) -> u8 { + SectionId::Custom.into() + } +} + +// TODO: move these core conversion functions to the core module +// once we update core encoding to use wasm-encoder. +impl From<core::ValType<'_>> for wasm_encoder::ValType { + fn from(ty: core::ValType) -> Self { + match ty { + core::ValType::I32 => Self::I32, + core::ValType::I64 => Self::I64, + core::ValType::F32 => Self::F32, + core::ValType::F64 => Self::F64, + core::ValType::V128 => Self::V128, + core::ValType::Ref(r) => Self::Ref(r.into()), + } + } +} + +impl From<core::RefType<'_>> for wasm_encoder::RefType { + fn from(r: core::RefType<'_>) -> Self { + wasm_encoder::RefType { + nullable: r.nullable, + heap_type: r.heap.into(), + } + } +} + +impl From<core::HeapType<'_>> for wasm_encoder::HeapType { + fn from(r: core::HeapType<'_>) -> Self { + match r { + core::HeapType::Func => Self::Func, + core::HeapType::Extern => Self::Extern, + core::HeapType::Index(Index::Num(i, _)) => Self::TypedFunc(i), + core::HeapType::Index(_) => panic!("unresolved index"), + core::HeapType::Any + | core::HeapType::Eq + | core::HeapType::Struct + | core::HeapType::Array + | core::HeapType::NoFunc + | core::HeapType::NoExtern + | core::HeapType::None + | core::HeapType::I31 => { + todo!("encoding of GC proposal types not yet implemented") + } + } + } +} + +impl From<&core::ItemKind<'_>> for wasm_encoder::EntityType { + fn from(kind: &core::ItemKind) -> Self { + match kind { + core::ItemKind::Func(t) => Self::Function(t.into()), + core::ItemKind::Table(t) => Self::Table((*t).into()), + core::ItemKind::Memory(t) => Self::Memory((*t).into()), + core::ItemKind::Global(t) => Self::Global((*t).into()), + core::ItemKind::Tag(t) => Self::Tag(t.into()), + } + } +} + +impl From<core::TableType<'_>> for wasm_encoder::TableType { + fn from(ty: core::TableType) -> Self { + Self { + element_type: ty.elem.into(), + minimum: ty.limits.min, + maximum: ty.limits.max, + } + } +} + +impl From<core::MemoryType> for wasm_encoder::MemoryType { + fn from(ty: core::MemoryType) -> Self { + let (minimum, maximum, memory64, shared) = match ty { + core::MemoryType::B32 { limits, shared } => { + (limits.min.into(), limits.max.map(Into::into), false, shared) + } + core::MemoryType::B64 { limits, shared } => (limits.min, limits.max, true, shared), + }; + + Self { + minimum, + maximum, + memory64, + shared, + } + } +} + +impl From<core::GlobalType<'_>> for wasm_encoder::GlobalType { + fn from(ty: core::GlobalType) -> Self { + Self { + val_type: ty.ty.into(), + mutable: ty.mutable, + } + } +} + +impl From<&core::TagType<'_>> for wasm_encoder::TagType { + fn from(ty: &core::TagType) -> Self { + match ty { + core::TagType::Exception(r) => Self { + kind: wasm_encoder::TagKind::Exception, + func_type_idx: r.into(), + }, + } + } +} + +impl<T: std::fmt::Debug> From<&core::TypeUse<'_, T>> for u32 { + fn from(u: &core::TypeUse<'_, T>) -> Self { + match &u.index { + Some(i) => (*i).into(), + None => unreachable!("unresolved type use in encoding: {:?}", u), + } + } +} + +impl From<&CoreInstantiationArgKind<'_>> for wasm_encoder::ModuleArg { + fn from(kind: &CoreInstantiationArgKind) -> Self { + match kind { + CoreInstantiationArgKind::Instance(i) => { + wasm_encoder::ModuleArg::Instance(i.idx.into()) + } + CoreInstantiationArgKind::BundleOfExports(..) => { + unreachable!("should be expanded already") + } + } + } +} + +impl From<&CoreItemRef<'_, core::ExportKind>> for (wasm_encoder::ExportKind, u32) { + fn from(item: &CoreItemRef<'_, core::ExportKind>) -> Self { + match &item.kind { + core::ExportKind::Func => (wasm_encoder::ExportKind::Func, item.idx.into()), + core::ExportKind::Table => (wasm_encoder::ExportKind::Table, item.idx.into()), + core::ExportKind::Memory => (wasm_encoder::ExportKind::Memory, item.idx.into()), + core::ExportKind::Global => (wasm_encoder::ExportKind::Global, item.idx.into()), + core::ExportKind::Tag => (wasm_encoder::ExportKind::Tag, item.idx.into()), + } + } +} + +impl From<core::ExportKind> for wasm_encoder::ExportKind { + fn from(kind: core::ExportKind) -> Self { + match kind { + core::ExportKind::Func => Self::Func, + core::ExportKind::Table => Self::Table, + core::ExportKind::Memory => Self::Memory, + core::ExportKind::Global => Self::Global, + core::ExportKind::Tag => Self::Tag, + } + } +} + +impl From<Index<'_>> for u32 { + fn from(i: Index<'_>) -> Self { + match i { + Index::Num(i, _) => i, + Index::Id(_) => unreachable!("unresolved index in encoding: {:?}", i), + } + } +} + +impl<T> From<&ItemRef<'_, T>> for u32 { + fn from(i: &ItemRef<'_, T>) -> Self { + assert!(i.export_names.is_empty()); + i.idx.into() + } +} + +impl<T> From<&CoreTypeUse<'_, T>> for u32 { + fn from(u: &CoreTypeUse<'_, T>) -> Self { + match u { + CoreTypeUse::Inline(_) => unreachable!("should be expanded already"), + CoreTypeUse::Ref(r) => r.idx.into(), + } + } +} + +impl<T> From<&ComponentTypeUse<'_, T>> for u32 { + fn from(u: &ComponentTypeUse<'_, T>) -> Self { + match u { + ComponentTypeUse::Inline(_) => unreachable!("should be expanded already"), + ComponentTypeUse::Ref(r) => r.idx.into(), + } + } +} + +impl From<&ComponentValType<'_>> for wasm_encoder::ComponentValType { + fn from(r: &ComponentValType) -> Self { + match r { + ComponentValType::Inline(ComponentDefinedType::Primitive(p)) => { + Self::Primitive((*p).into()) + } + ComponentValType::Ref(i) => Self::Type(u32::from(*i)), + ComponentValType::Inline(_) => unreachable!("should be expanded by now"), + } + } +} + +impl From<PrimitiveValType> for wasm_encoder::PrimitiveValType { + fn from(p: PrimitiveValType) -> Self { + match p { + PrimitiveValType::Bool => Self::Bool, + PrimitiveValType::S8 => Self::S8, + PrimitiveValType::U8 => Self::U8, + PrimitiveValType::S16 => Self::S16, + PrimitiveValType::U16 => Self::U16, + PrimitiveValType::S32 => Self::S32, + PrimitiveValType::U32 => Self::U32, + PrimitiveValType::S64 => Self::S64, + PrimitiveValType::U64 => Self::U64, + PrimitiveValType::Float32 => Self::Float32, + PrimitiveValType::Float64 => Self::Float64, + PrimitiveValType::Char => Self::Char, + PrimitiveValType::String => Self::String, + } + } +} + +impl From<&Refinement<'_>> for u32 { + fn from(r: &Refinement) -> Self { + match r { + Refinement::Index(..) => unreachable!("should be resolved by now"), + Refinement::Resolved(i) => *i, + } + } +} + +impl From<&ItemSigKind<'_>> for wasm_encoder::ComponentTypeRef { + fn from(k: &ItemSigKind) -> Self { + match k { + ItemSigKind::Component(c) => Self::Component(c.into()), + ItemSigKind::CoreModule(m) => Self::Module(m.into()), + ItemSigKind::Instance(i) => Self::Instance(i.into()), + ItemSigKind::Value(v) => Self::Value((&v.0).into()), + ItemSigKind::Func(f) => Self::Func(f.into()), + ItemSigKind::Type(TypeBounds::Eq(t)) => { + Self::Type(wasm_encoder::TypeBounds::Eq, (*t).into()) + } + } + } +} + +impl From<&ComponentType<'_>> for wasm_encoder::ComponentType { + fn from(ty: &ComponentType) -> Self { + let mut encoded = wasm_encoder::ComponentType::new(); + + for decl in &ty.decls { + match decl { + ComponentTypeDecl::CoreType(t) => { + encode_core_type(encoded.core_type(), &t.def); + } + ComponentTypeDecl::Type(t) => { + encode_type(encoded.ty(), &t.def); + } + ComponentTypeDecl::Alias(a) => { + encoded.alias((&a.target).into()); + } + ComponentTypeDecl::Import(i) => { + encoded.import(i.name, i.url.unwrap_or(""), (&i.item.kind).into()); + } + ComponentTypeDecl::Export(e) => { + encoded.export(e.name, e.url.unwrap_or(""), (&e.item.kind).into()); + } + } + } + + encoded + } +} + +impl From<&InstanceType<'_>> for wasm_encoder::InstanceType { + fn from(ty: &InstanceType) -> Self { + let mut encoded = wasm_encoder::InstanceType::new(); + + for decl in &ty.decls { + match decl { + InstanceTypeDecl::CoreType(t) => { + encode_core_type(encoded.core_type(), &t.def); + } + InstanceTypeDecl::Type(t) => { + encode_type(encoded.ty(), &t.def); + } + InstanceTypeDecl::Alias(a) => { + encoded.alias((&a.target).into()); + } + InstanceTypeDecl::Export(e) => { + encoded.export(e.name, e.url.unwrap_or(""), (&e.item.kind).into()); + } + } + } + + encoded + } +} + +impl From<&ModuleType<'_>> for wasm_encoder::ModuleType { + fn from(ty: &ModuleType) -> Self { + let mut encoded = wasm_encoder::ModuleType::new(); + + for decl in &ty.decls { + match decl { + ModuleTypeDecl::Type(t) => match &t.def { + core::TypeDef::Func(f) => encoded.ty().function( + f.params.iter().map(|(_, _, ty)| (*ty).into()), + f.results.iter().copied().map(Into::into), + ), + core::TypeDef::Struct(_) | core::TypeDef::Array(_) => { + todo!("encoding of GC proposal types not yet implemented") + } + }, + ModuleTypeDecl::Alias(a) => match &a.target { + AliasTarget::Outer { + outer, + index, + kind: ComponentOuterAliasKind::CoreType, + } => { + encoded.alias_outer_core_type(u32::from(*outer), u32::from(*index)); + } + _ => unreachable!("only outer type aliases are supported"), + }, + ModuleTypeDecl::Import(i) => { + encoded.import(i.module, i.field, (&i.item.kind).into()); + } + ModuleTypeDecl::Export(name, item) => { + encoded.export(name, (&item.kind).into()); + } + } + } + + encoded + } +} + +impl From<&InstantiationArgKind<'_>> for (wasm_encoder::ComponentExportKind, u32) { + fn from(kind: &InstantiationArgKind) -> Self { + match kind { + InstantiationArgKind::Item(i) => i.into(), + InstantiationArgKind::BundleOfExports(..) => unreachable!("should be expanded already"), + } + } +} + +impl From<&ComponentExportKind<'_>> for (wasm_encoder::ComponentExportKind, u32) { + fn from(kind: &ComponentExportKind) -> Self { + match kind { + ComponentExportKind::CoreModule(m) => { + (wasm_encoder::ComponentExportKind::Module, m.idx.into()) + } + ComponentExportKind::Func(f) => (wasm_encoder::ComponentExportKind::Func, f.idx.into()), + ComponentExportKind::Value(v) => { + (wasm_encoder::ComponentExportKind::Value, v.idx.into()) + } + ComponentExportKind::Type(t) => (wasm_encoder::ComponentExportKind::Type, t.idx.into()), + ComponentExportKind::Component(c) => { + (wasm_encoder::ComponentExportKind::Component, c.idx.into()) + } + ComponentExportKind::Instance(i) => { + (wasm_encoder::ComponentExportKind::Instance, i.idx.into()) + } + } + } +} + +impl From<ComponentOuterAliasKind> for wasm_encoder::ComponentOuterAliasKind { + fn from(kind: ComponentOuterAliasKind) -> Self { + match kind { + ComponentOuterAliasKind::CoreModule => Self::CoreModule, + ComponentOuterAliasKind::CoreType => Self::CoreType, + ComponentOuterAliasKind::Type => Self::Type, + ComponentOuterAliasKind::Component => Self::Component, + } + } +} + +impl From<ComponentExportAliasKind> for wasm_encoder::ComponentExportKind { + fn from(kind: ComponentExportAliasKind) -> Self { + match kind { + ComponentExportAliasKind::CoreModule => Self::Module, + ComponentExportAliasKind::Func => Self::Func, + ComponentExportAliasKind::Value => Self::Value, + ComponentExportAliasKind::Type => Self::Type, + ComponentExportAliasKind::Component => Self::Component, + ComponentExportAliasKind::Instance => Self::Instance, + } + } +} + +impl From<&CanonOpt<'_>> for wasm_encoder::CanonicalOption { + fn from(opt: &CanonOpt) -> Self { + match opt { + CanonOpt::StringUtf8 => Self::UTF8, + CanonOpt::StringUtf16 => Self::UTF16, + CanonOpt::StringLatin1Utf16 => Self::CompactUTF16, + CanonOpt::Memory(m) => Self::Memory(m.idx.into()), + CanonOpt::Realloc(f) => Self::Realloc(f.idx.into()), + CanonOpt::PostReturn(f) => Self::PostReturn(f.idx.into()), + } + } +} + +impl<'a> From<&AliasTarget<'a>> for wasm_encoder::Alias<'a> { + fn from(target: &AliasTarget<'a>) -> Self { + match target { + AliasTarget::Export { + instance, + name, + kind, + } => wasm_encoder::Alias::InstanceExport { + instance: (*instance).into(), + kind: (*kind).into(), + name, + }, + AliasTarget::CoreExport { + instance, + name, + kind, + } => wasm_encoder::Alias::CoreInstanceExport { + instance: (*instance).into(), + kind: (*kind).into(), + name, + }, + AliasTarget::Outer { outer, index, kind } => wasm_encoder::Alias::Outer { + count: (*outer).into(), + kind: (*kind).into(), + index: (*index).into(), + }, + } + } +} diff --git a/third_party/rust/wast/src/component/component.rs b/third_party/rust/wast/src/component/component.rs new file mode 100644 index 0000000000..8424e083b8 --- /dev/null +++ b/third_party/rust/wast/src/component/component.rs @@ -0,0 +1,313 @@ +use crate::annotation; +use crate::component::*; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::Index; +use crate::token::{Id, NameAnnotation, Span}; + +/// A parsed WebAssembly component module. +#[derive(Debug)] +pub struct Component<'a> { + /// Where this `component` was defined + pub span: Span, + /// An optional identifier this component is known by + pub id: Option<Id<'a>>, + /// An optional `@name` annotation for this component + pub name: Option<NameAnnotation<'a>>, + /// What kind of component this was parsed as. + pub kind: ComponentKind<'a>, +} + +/// The different kinds of ways to define a component. +#[derive(Debug)] +pub enum ComponentKind<'a> { + /// A component defined in the textual s-expression format. + Text(Vec<ComponentField<'a>>), + /// A component that had its raw binary bytes defined via the `binary` + /// directive. + Binary(Vec<&'a [u8]>), +} + +impl<'a> Component<'a> { + /// Performs a name resolution pass on this [`Component`], resolving all + /// symbolic names to indices. + /// + /// The WAT format contains a number of shorthands to make it easier to + /// write, such as inline exports, inline imports, inline type definitions, + /// etc. Additionally it allows using symbolic names such as `$foo` instead + /// of using indices. This module will postprocess an AST to remove all of + /// this syntactic sugar, preparing the AST for binary emission. This is + /// where expansion and name resolution happens. + /// + /// This function will mutate the AST of this [`Component`] and replace all + /// [`Index`](crate::token::Index) arguments with `Index::Num`. This will + /// also expand inline exports/imports listed on fields and handle various + /// other shorthands of the text format. + /// + /// If successful the AST was modified to be ready for binary encoding. + /// + /// # Errors + /// + /// If an error happens during resolution, such a name resolution error or + /// items are found in the wrong order, then an error is returned. + pub fn resolve(&mut self) -> std::result::Result<(), crate::Error> { + match &mut self.kind { + ComponentKind::Text(fields) => { + crate::component::expand::expand(fields); + } + ComponentKind::Binary(_) => {} + } + crate::component::resolve::resolve(self) + } + + /// Encodes this [`Component`] to its binary form. + /// + /// This function will take the textual representation in [`Component`] and + /// perform all steps necessary to convert it to a binary WebAssembly + /// component, suitable for writing to a `*.wasm` file. This function may + /// internally modify the [`Component`], for example: + /// + /// * Name resolution is performed to ensure that `Index::Id` isn't present + /// anywhere in the AST. + /// + /// * Inline shorthands such as imports/exports/types are all expanded to be + /// dedicated fields of the component. + /// + /// * Component fields may be shuffled around to preserve index ordering from + /// expansions. + /// + /// After all of this expansion has happened the component will be converted to + /// its binary form and returned as a `Vec<u8>`. This is then suitable to + /// hand off to other wasm runtimes and such. + /// + /// # Errors + /// + /// This function can return an error for name resolution errors and other + /// expansion-related errors. + pub fn encode(&mut self) -> std::result::Result<Vec<u8>, crate::Error> { + self.resolve()?; + Ok(crate::component::binary::encode(self)) + } + + pub(crate) fn validate(&self, parser: Parser<'_>) -> Result<()> { + let mut starts = 0; + if let ComponentKind::Text(fields) = &self.kind { + for item in fields.iter() { + if let ComponentField::Start(_) = item { + starts += 1; + } + } + } + if starts > 1 { + return Err(parser.error("multiple start sections found")); + } + Ok(()) + } +} + +impl<'a> Parse<'a> for Component<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let _r = parser.register_annotation("custom"); + + let span = parser.parse::<kw::component>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + + let kind = if parser.peek::<kw::binary>() { + parser.parse::<kw::binary>()?; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + ComponentKind::Binary(data) + } else { + ComponentKind::Text(ComponentField::parse_remaining(parser)?) + }; + Ok(Component { + span, + id, + name, + kind, + }) + } +} + +/// A listing of all possible fields that can make up a WebAssembly component. +#[allow(missing_docs)] +#[derive(Debug)] +pub enum ComponentField<'a> { + CoreModule(CoreModule<'a>), + CoreInstance(CoreInstance<'a>), + CoreType(CoreType<'a>), + Component(NestedComponent<'a>), + Instance(Instance<'a>), + Alias(Alias<'a>), + Type(Type<'a>), + CanonicalFunc(CanonicalFunc<'a>), + CoreFunc(CoreFunc<'a>), // Supports inverted forms of other items + Func(Func<'a>), // Supports inverted forms of other items + Start(Start<'a>), + Import(ComponentImport<'a>), + Export(ComponentExport<'a>), + Custom(Custom<'a>), +} + +impl<'a> ComponentField<'a> { + fn parse_remaining(parser: Parser<'a>) -> Result<Vec<ComponentField>> { + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(ComponentField::parse)?); + } + Ok(fields) + } +} + +impl<'a> Parse<'a> for ComponentField<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::core>() { + if parser.peek2::<kw::module>() { + return Ok(Self::CoreModule(parser.parse()?)); + } + if parser.peek2::<kw::instance>() { + return Ok(Self::CoreInstance(parser.parse()?)); + } + if parser.peek2::<kw::r#type>() { + return Ok(Self::CoreType(parser.parse()?)); + } + if parser.peek2::<kw::func>() { + return Ok(Self::CoreFunc(parser.parse()?)); + } + } else { + if parser.peek::<kw::component>() { + return Ok(Self::Component(parser.parse()?)); + } + if parser.peek::<kw::instance>() { + return Ok(Self::Instance(parser.parse()?)); + } + if parser.peek::<kw::alias>() { + return Ok(Self::Alias(parser.parse()?)); + } + if parser.peek::<kw::r#type>() { + return Ok(Self::Type(parser.parse()?)); + } + if parser.peek::<kw::import>() { + return Ok(Self::Import(parser.parse()?)); + } + if parser.peek::<kw::func>() { + return Ok(Self::Func(parser.parse()?)); + } + if parser.peek::<kw::export>() { + return Ok(Self::Export(parser.parse()?)); + } + if parser.peek::<kw::start>() { + return Ok(Self::Start(parser.parse()?)); + } + if parser.peek::<annotation::custom>() { + return Ok(Self::Custom(parser.parse()?)); + } + } + Err(parser.error("expected valid component field")) + } +} + +/// A function to call at instantiation time. +#[derive(Debug)] +pub struct Start<'a> { + /// The function to call. + pub func: Index<'a>, + /// The arguments to pass to the function. + pub args: Vec<ItemRef<'a, kw::value>>, + /// Names of the result values. + pub results: Vec<Option<Id<'a>>>, +} + +impl<'a> Parse<'a> for Start<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::start>()?; + let func = parser.parse()?; + let mut args = Vec::new(); + while !parser.is_empty() && !parser.peek2::<kw::result>() { + args.push(parser.parens(|parser| parser.parse())?); + } + + let mut results = Vec::new(); + while !parser.is_empty() && parser.peek2::<kw::result>() { + results.push(parser.parens(|parser| { + parser.parse::<kw::result>()?; + parser.parens(|parser| { + parser.parse::<kw::value>()?; + parser.parse() + }) + })?); + } + + Ok(Start { + func, + args, + results, + }) + } +} + +/// A nested WebAssembly component. +#[derive(Debug)] +pub struct NestedComponent<'a> { + /// Where this `component` was defined + pub span: Span, + /// An optional identifier this component is known by + pub id: Option<Id<'a>>, + /// An optional `@name` annotation for this component + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// What kind of component this was parsed as. + pub kind: NestedComponentKind<'a>, +} + +/// The different kinds of ways to define a nested component. +#[derive(Debug)] +pub enum NestedComponentKind<'a> { + /// This is actually an inline import of a component + Import { + /// The information about where this is being imported from. + import: InlineImport<'a>, + /// The type of component being imported. + ty: ComponentTypeUse<'a, ComponentType<'a>>, + }, + /// The component is defined inline as a local definition with its fields + /// listed here. + Inline(Vec<ComponentField<'a>>), +} + +impl<'a> Parse<'a> for NestedComponent<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.depth_check()?; + + let span = parser.parse::<kw::component>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + let kind = if let Some(import) = parser.parse()? { + NestedComponentKind::Import { + import, + ty: parser.parse()?, + } + } else { + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(|p| p.parse())?); + } + NestedComponentKind::Inline(fields) + }; + + Ok(NestedComponent { + span, + id, + name, + exports, + kind, + }) + } +} diff --git a/third_party/rust/wast/src/component/custom.rs b/third_party/rust/wast/src/component/custom.rs new file mode 100644 index 0000000000..b17a7fafb4 --- /dev/null +++ b/third_party/rust/wast/src/component/custom.rs @@ -0,0 +1,28 @@ +use crate::annotation; +use crate::parser::{Parse, Parser, Result}; +use crate::token::Span; + +/// A custom section within a component. +#[derive(Debug)] +pub struct Custom<'a> { + /// Where this `@custom` was defined. + pub span: Span, + + /// Name of the custom section. + pub name: &'a str, + + /// Payload of this custom section. + pub data: Vec<&'a [u8]>, +} + +impl<'a> Parse<'a> for Custom<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<annotation::custom>()?.0; + let name = parser.parse()?; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(Self { span, name, data }) + } +} diff --git a/third_party/rust/wast/src/component/expand.rs b/third_party/rust/wast/src/component/expand.rs new file mode 100644 index 0000000000..2bf0b529e4 --- /dev/null +++ b/third_party/rust/wast/src/component/expand.rs @@ -0,0 +1,859 @@ +use crate::component::*; +use crate::core; +use crate::gensym; +use crate::kw; +use crate::token::Id; +use crate::token::{Index, Span}; +use std::collections::HashMap; +use std::mem; + +/// Performs an AST "expansion" pass over the component fields provided. +/// +/// This expansion is intended to desugar the AST from various parsed constructs +/// to bits and bobs amenable for name resolution as well as binary encoding. +/// For example `(import "i" (func))` is split into a type definition followed by +/// the import referencing that type definition. +/// +/// Most forms of AST expansion happen in this file and afterwards the AST will +/// be handed to the name resolution pass which will convert `Index::Id` to +/// `Index::Num` wherever it's found. +pub fn expand(fields: &mut Vec<ComponentField<'_>>) { + Expander::default().expand_component_fields(fields) +} + +enum AnyType<'a> { + Core(CoreType<'a>), + Component(Type<'a>), +} + +impl<'a> From<AnyType<'a>> for ComponentTypeDecl<'a> { + fn from(t: AnyType<'a>) -> Self { + match t { + AnyType::Core(t) => Self::CoreType(t), + AnyType::Component(t) => Self::Type(t), + } + } +} + +impl<'a> From<AnyType<'a>> for InstanceTypeDecl<'a> { + fn from(t: AnyType<'a>) -> Self { + match t { + AnyType::Core(t) => Self::CoreType(t), + AnyType::Component(t) => Self::Type(t), + } + } +} + +impl<'a> From<AnyType<'a>> for ComponentField<'a> { + fn from(t: AnyType<'a>) -> Self { + match t { + AnyType::Core(t) => Self::CoreType(t), + AnyType::Component(t) => Self::Type(t), + } + } +} + +#[derive(Default)] +struct Expander<'a> { + /// Fields, during processing, which should be prepended to the + /// currently-being-processed field. This should always be empty after + /// processing is complete. + types_to_prepend: Vec<AnyType<'a>>, + component_fields_to_prepend: Vec<ComponentField<'a>>, + + /// Fields that are appended to the end of the module once everything has + /// finished. + component_fields_to_append: Vec<ComponentField<'a>>, +} + +impl<'a> Expander<'a> { + fn expand_component_fields(&mut self, fields: &mut Vec<ComponentField<'a>>) { + let mut cur = 0; + while cur < fields.len() { + self.expand_field(&mut fields[cur]); + let amt = self.types_to_prepend.len() + self.component_fields_to_prepend.len(); + fields.splice(cur..cur, self.component_fields_to_prepend.drain(..)); + fields.splice(cur..cur, self.types_to_prepend.drain(..).map(Into::into)); + cur += 1 + amt; + } + fields.append(&mut self.component_fields_to_append); + } + + fn expand_decls<T>(&mut self, decls: &mut Vec<T>, expand: fn(&mut Self, &mut T)) + where + T: From<AnyType<'a>>, + { + let mut cur = 0; + while cur < decls.len() { + expand(self, &mut decls[cur]); + assert!(self.component_fields_to_prepend.is_empty()); + assert!(self.component_fields_to_append.is_empty()); + let amt = self.types_to_prepend.len(); + decls.splice(cur..cur, self.types_to_prepend.drain(..).map(From::from)); + cur += 1 + amt; + } + } + + fn expand_field(&mut self, item: &mut ComponentField<'a>) { + let expanded = match item { + ComponentField::CoreModule(m) => self.expand_core_module(m), + ComponentField::CoreInstance(i) => { + self.expand_core_instance(i); + None + } + ComponentField::CoreType(t) => { + self.expand_core_type(t); + None + } + ComponentField::Component(c) => self.expand_nested_component(c), + ComponentField::Instance(i) => self.expand_instance(i), + ComponentField::Type(t) => { + self.expand_type(t); + None + } + ComponentField::CanonicalFunc(f) => { + self.expand_canonical_func(f); + None + } + ComponentField::CoreFunc(f) => self.expand_core_func(f), + ComponentField::Func(f) => self.expand_func(f), + ComponentField::Import(i) => { + self.expand_item_sig(&mut i.item); + None + } + ComponentField::Export(e) => { + if let Some(sig) = &mut e.ty { + self.expand_item_sig(&mut sig.0); + } + None + } + ComponentField::Start(_) | ComponentField::Alias(_) | ComponentField::Custom(_) => None, + }; + + if let Some(expanded) = expanded { + *item = expanded; + } + } + + fn expand_core_module(&mut self, module: &mut CoreModule<'a>) -> Option<ComponentField<'a>> { + for (name, url) in module.exports.names.drain(..) { + let id = gensym::fill(module.span, &mut module.id); + self.component_fields_to_append + .push(ComponentField::Export(ComponentExport { + span: module.span, + id: None, + debug_name: None, + name, + url, + kind: ComponentExportKind::module(module.span, id), + ty: None, + })); + } + match &mut module.kind { + // inline modules are expanded later during resolution + CoreModuleKind::Inline { .. } => None, + CoreModuleKind::Import { import, ty } => { + let idx = self.expand_core_type_use(ty); + Some(ComponentField::Import(ComponentImport { + span: module.span, + name: import.name, + url: import.url, + item: ItemSig { + span: module.span, + id: module.id, + name: None, + kind: ItemSigKind::CoreModule(CoreTypeUse::Ref(idx)), + }, + })) + } + } + } + + fn expand_core_instance(&mut self, instance: &mut CoreInstance<'a>) { + match &mut instance.kind { + CoreInstanceKind::Instantiate { args, .. } => { + for arg in args { + self.expand_core_instantiation_arg(&mut arg.kind); + } + } + CoreInstanceKind::BundleOfExports { .. } => {} + } + } + + fn expand_nested_component( + &mut self, + component: &mut NestedComponent<'a>, + ) -> Option<ComponentField<'a>> { + for (name, url) in component.exports.names.drain(..) { + let id = gensym::fill(component.span, &mut component.id); + self.component_fields_to_append + .push(ComponentField::Export(ComponentExport { + span: component.span, + id: None, + debug_name: None, + name, + url, + kind: ComponentExportKind::component(component.span, id), + ty: None, + })); + } + match &mut component.kind { + NestedComponentKind::Inline(fields) => { + expand(fields); + None + } + NestedComponentKind::Import { import, ty } => { + let idx = self.expand_component_type_use(ty); + Some(ComponentField::Import(ComponentImport { + span: component.span, + name: import.name, + url: import.url, + item: ItemSig { + span: component.span, + id: component.id, + name: None, + kind: ItemSigKind::Component(ComponentTypeUse::Ref(idx)), + }, + })) + } + } + } + + fn expand_instance(&mut self, instance: &mut Instance<'a>) -> Option<ComponentField<'a>> { + for (name, url) in instance.exports.names.drain(..) { + let id = gensym::fill(instance.span, &mut instance.id); + self.component_fields_to_append + .push(ComponentField::Export(ComponentExport { + span: instance.span, + id: None, + debug_name: None, + name, + url, + kind: ComponentExportKind::instance(instance.span, id), + ty: None, + })); + } + match &mut instance.kind { + InstanceKind::Import { import, ty } => { + let idx = self.expand_component_type_use(ty); + Some(ComponentField::Import(ComponentImport { + span: instance.span, + name: import.name, + url: import.url, + item: ItemSig { + span: instance.span, + id: instance.id, + name: None, + kind: ItemSigKind::Instance(ComponentTypeUse::Ref(idx)), + }, + })) + } + InstanceKind::Instantiate { args, .. } => { + for arg in args { + self.expand_instantiation_arg(&mut arg.kind); + } + None + } + InstanceKind::BundleOfExports { .. } => None, + } + } + + fn expand_canonical_func(&mut self, func: &mut CanonicalFunc<'a>) { + match &mut func.kind { + CanonicalFuncKind::Lift { ty, .. } => { + self.expand_component_type_use(ty); + } + CanonicalFuncKind::Lower(_) => {} + } + } + + fn expand_core_func(&mut self, func: &mut CoreFunc<'a>) -> Option<ComponentField<'a>> { + match &mut func.kind { + CoreFuncKind::Alias(a) => Some(ComponentField::Alias(Alias { + span: func.span, + id: func.id, + name: func.name, + target: AliasTarget::CoreExport { + instance: a.instance, + name: a.name, + kind: core::ExportKind::Func, + }, + })), + CoreFuncKind::Lower(info) => Some(ComponentField::CanonicalFunc(CanonicalFunc { + span: func.span, + id: func.id, + name: func.name, + kind: CanonicalFuncKind::Lower(mem::take(info)), + })), + } + } + + fn expand_func(&mut self, func: &mut Func<'a>) -> Option<ComponentField<'a>> { + for (name, url) in func.exports.names.drain(..) { + let id = gensym::fill(func.span, &mut func.id); + self.component_fields_to_append + .push(ComponentField::Export(ComponentExport { + span: func.span, + id: None, + debug_name: None, + name, + url, + kind: ComponentExportKind::func(func.span, id), + ty: None, + })); + } + match &mut func.kind { + FuncKind::Import { import, ty } => { + let idx = self.expand_component_type_use(ty); + Some(ComponentField::Import(ComponentImport { + span: func.span, + name: import.name, + url: import.url, + item: ItemSig { + span: func.span, + id: func.id, + name: None, + kind: ItemSigKind::Func(ComponentTypeUse::Ref(idx)), + }, + })) + } + FuncKind::Lift { ty, info } => { + let idx = self.expand_component_type_use(ty); + Some(ComponentField::CanonicalFunc(CanonicalFunc { + span: func.span, + id: func.id, + name: func.name, + kind: CanonicalFuncKind::Lift { + ty: ComponentTypeUse::Ref(idx), + info: mem::take(info), + }, + })) + } + FuncKind::Alias(a) => Some(ComponentField::Alias(Alias { + span: func.span, + id: func.id, + name: func.name, + target: AliasTarget::Export { + instance: a.instance, + name: a.name, + kind: ComponentExportAliasKind::Func, + }, + })), + } + } + + fn expand_core_type(&mut self, field: &mut CoreType<'a>) { + match &mut field.def { + CoreTypeDef::Def(_) => {} + CoreTypeDef::Module(m) => self.expand_module_ty(m), + } + + let id = gensym::fill(field.span, &mut field.id); + let index = Index::Id(id); + match &field.def { + CoreTypeDef::Def(_) => {} + CoreTypeDef::Module(t) => t.key().insert(self, index), + } + } + + fn expand_type(&mut self, field: &mut Type<'a>) { + match &mut field.def { + TypeDef::Defined(d) => self.expand_defined_ty(d), + TypeDef::Func(f) => self.expand_func_ty(f), + TypeDef::Component(c) => self.expand_component_ty(c), + TypeDef::Instance(i) => self.expand_instance_ty(i), + } + + let id = gensym::fill(field.span, &mut field.id); + let index = Index::Id(id); + match &field.def { + TypeDef::Defined(t) => t.key().insert(self, index), + TypeDef::Func(t) => t.key().insert(self, index), + TypeDef::Component(t) => t.key().insert(self, index), + TypeDef::Instance(t) => t.key().insert(self, index), + } + for (name, url) in field.exports.names.drain(..) { + self.component_fields_to_append + .push(ComponentField::Export(ComponentExport { + span: field.span, + id: None, + debug_name: None, + name, + url, + kind: ComponentExportKind::ty(field.span, id), + ty: None, + })); + } + } + + fn expand_func_ty(&mut self, ty: &mut ComponentFunctionType<'a>) { + for param in ty.params.iter_mut() { + self.expand_component_val_ty(&mut param.ty); + } + + for result in ty.results.iter_mut() { + self.expand_component_val_ty(&mut result.ty); + } + } + + fn expand_module_ty(&mut self, ty: &mut ModuleType<'a>) { + use crate::core::resolve::types::{FuncKey, TypeKey, TypeReference}; + + // Note that this is a custom implementation from everything else in + // this file since this is using core wasm types instead of component + // types, so a small part of the core wasm expansion process is + // inlined here to handle the `TypeUse` from core wasm. + + let mut func_type_to_idx = HashMap::new(); + let mut to_prepend = Vec::new(); + let mut i = 0; + while i < ty.decls.len() { + match &mut ty.decls[i] { + ModuleTypeDecl::Type(ty) => match &ty.def { + core::TypeDef::Func(f) => { + let id = gensym::fill(ty.span, &mut ty.id); + func_type_to_idx.insert(f.key(), Index::Id(id)); + } + core::TypeDef::Struct(_) => {} + core::TypeDef::Array(_) => {} + }, + ModuleTypeDecl::Alias(_) => {} + ModuleTypeDecl::Import(ty) => { + expand_sig(&mut ty.item, &mut to_prepend, &mut func_type_to_idx); + } + ModuleTypeDecl::Export(_, item) => { + expand_sig(item, &mut to_prepend, &mut func_type_to_idx); + } + } + ty.decls.splice(i..i, to_prepend.drain(..)); + i += 1; + } + + fn expand_sig<'a>( + item: &mut core::ItemSig<'a>, + to_prepend: &mut Vec<ModuleTypeDecl<'a>>, + func_type_to_idx: &mut HashMap<FuncKey<'a>, Index<'a>>, + ) { + match &mut item.kind { + core::ItemKind::Func(t) | core::ItemKind::Tag(core::TagType::Exception(t)) => { + // If the index is already filled in then this is skipped + if t.index.is_some() { + return; + } + + // Otherwise the inline type information is used to + // generate a type into this module if necessary. If the + // function type already exists we reuse the same key, + // otherwise a fresh type definition is created and we use + // that one instead. + let ty = t.inline.take().unwrap_or_default(); + let key = ty.key(); + if let Some(idx) = func_type_to_idx.get(&key) { + t.index = Some(*idx); + return; + } + let id = gensym::gen(item.span); + to_prepend.push(ModuleTypeDecl::Type(core::Type { + span: item.span, + id: Some(id), + name: None, + def: key.to_def(item.span), + parent: None, + })); + let idx = Index::Id(id); + t.index = Some(idx); + } + core::ItemKind::Global(_) + | core::ItemKind::Table(_) + | core::ItemKind::Memory(_) => {} + } + } + } + + fn expand_component_ty(&mut self, ty: &mut ComponentType<'a>) { + Expander::default().expand_decls(&mut ty.decls, |e, decl| match decl { + ComponentTypeDecl::CoreType(t) => e.expand_core_type(t), + ComponentTypeDecl::Type(t) => e.expand_type(t), + ComponentTypeDecl::Alias(_) => {} + ComponentTypeDecl::Export(t) => e.expand_item_sig(&mut t.item), + ComponentTypeDecl::Import(t) => e.expand_item_sig(&mut t.item), + }) + } + + fn expand_instance_ty(&mut self, ty: &mut InstanceType<'a>) { + Expander::default().expand_decls(&mut ty.decls, |e, decl| match decl { + InstanceTypeDecl::CoreType(t) => e.expand_core_type(t), + InstanceTypeDecl::Type(t) => e.expand_type(t), + InstanceTypeDecl::Alias(_) => {} + InstanceTypeDecl::Export(t) => e.expand_item_sig(&mut t.item), + }) + } + + fn expand_item_sig(&mut self, ext: &mut ItemSig<'a>) { + match &mut ext.kind { + ItemSigKind::CoreModule(t) => { + self.expand_core_type_use(t); + } + ItemSigKind::Func(t) => { + self.expand_component_type_use(t); + } + ItemSigKind::Component(t) => { + self.expand_component_type_use(t); + } + ItemSigKind::Instance(t) => { + self.expand_component_type_use(t); + } + ItemSigKind::Value(t) => { + self.expand_component_val_ty(&mut t.0); + } + ItemSigKind::Type(_) => {} + } + } + + fn expand_defined_ty(&mut self, ty: &mut ComponentDefinedType<'a>) { + match ty { + ComponentDefinedType::Primitive(_) + | ComponentDefinedType::Flags(_) + | ComponentDefinedType::Enum(_) => {} + ComponentDefinedType::Record(r) => { + for field in r.fields.iter_mut() { + self.expand_component_val_ty(&mut field.ty); + } + } + ComponentDefinedType::Variant(v) => { + for case in v.cases.iter_mut() { + if let Some(ty) = &mut case.ty { + self.expand_component_val_ty(ty); + } + } + } + ComponentDefinedType::List(t) => { + self.expand_component_val_ty(&mut t.element); + } + ComponentDefinedType::Tuple(t) => { + for field in t.fields.iter_mut() { + self.expand_component_val_ty(field); + } + } + ComponentDefinedType::Union(u) => { + for ty in u.types.iter_mut() { + self.expand_component_val_ty(ty); + } + } + ComponentDefinedType::Option(t) => { + self.expand_component_val_ty(&mut t.element); + } + ComponentDefinedType::Result(r) => { + if let Some(ty) = &mut r.ok { + self.expand_component_val_ty(ty); + } + + if let Some(ty) = &mut r.err { + self.expand_component_val_ty(ty); + } + } + } + } + + fn expand_component_val_ty(&mut self, ty: &mut ComponentValType<'a>) { + let inline = match ty { + ComponentValType::Inline(ComponentDefinedType::Primitive(_)) + | ComponentValType::Ref(_) => return, + ComponentValType::Inline(inline) => { + self.expand_defined_ty(inline); + mem::take(inline) + } + }; + // If this inline type has already been defined within this context + // then reuse the previously defined type to avoid injecting too many + // types into the type index space. + if let Some(idx) = inline.key().lookup(self) { + *ty = ComponentValType::Ref(idx); + return; + } + + // And if this type isn't already defined we append it to the index + // space with a fresh and unique name. + let span = Span::from_offset(0); // FIXME(#613): don't manufacture + let id = gensym::gen(span); + + self.types_to_prepend.push(inline.into_any_type(span, id)); + + let idx = Index::Id(id); + *ty = ComponentValType::Ref(idx); + } + + fn expand_core_type_use<T>( + &mut self, + item: &mut CoreTypeUse<'a, T>, + ) -> CoreItemRef<'a, kw::r#type> + where + T: TypeReference<'a>, + { + let span = Span::from_offset(0); // FIXME(#613): don't manufacture + let mut inline = match mem::take(item) { + // If this type-use was already a reference to an existing type + // then we put it back the way it was and return the corresponding + // index. + CoreTypeUse::Ref(idx) => { + *item = CoreTypeUse::Ref(idx.clone()); + return idx; + } + + // ... otherwise with an inline type definition we go into + // processing below. + CoreTypeUse::Inline(inline) => inline, + }; + inline.expand(self); + + // If this inline type has already been defined within this context + // then reuse the previously defined type to avoid injecting too many + // types into the type index space. + if let Some(idx) = inline.key().lookup(self) { + let ret = CoreItemRef { + idx, + kind: kw::r#type(span), + export_name: None, + }; + *item = CoreTypeUse::Ref(ret.clone()); + return ret; + } + + // And if this type isn't already defined we append it to the index + // space with a fresh and unique name. + let id = gensym::gen(span); + + self.types_to_prepend.push(inline.into_any_type(span, id)); + + let idx = Index::Id(id); + let ret = CoreItemRef { + idx, + kind: kw::r#type(span), + export_name: None, + }; + + *item = CoreTypeUse::Ref(ret.clone()); + ret + } + + fn expand_component_type_use<T>( + &mut self, + item: &mut ComponentTypeUse<'a, T>, + ) -> ItemRef<'a, kw::r#type> + where + T: TypeReference<'a>, + { + let span = Span::from_offset(0); // FIXME(#613): don't manufacture + let mut inline = match mem::take(item) { + // If this type-use was already a reference to an existing type + // then we put it back the way it was and return the corresponding + // index. + ComponentTypeUse::Ref(idx) => { + *item = ComponentTypeUse::Ref(idx.clone()); + return idx; + } + + // ... otherwise with an inline type definition we go into + // processing below. + ComponentTypeUse::Inline(inline) => inline, + }; + inline.expand(self); + + // If this inline type has already been defined within this context + // then reuse the previously defined type to avoid injecting too many + // types into the type index space. + if let Some(idx) = inline.key().lookup(self) { + let ret = ItemRef { + idx, + kind: kw::r#type(span), + export_names: Vec::new(), + }; + *item = ComponentTypeUse::Ref(ret.clone()); + return ret; + } + + // And if this type isn't already defined we append it to the index + // space with a fresh and unique name. + let id = gensym::gen(span); + + self.types_to_prepend.push(inline.into_any_type(span, id)); + + let idx = Index::Id(id); + let ret = ItemRef { + idx, + kind: kw::r#type(span), + export_names: Vec::new(), + }; + + *item = ComponentTypeUse::Ref(ret.clone()); + ret + } + + fn expand_core_instantiation_arg(&mut self, arg: &mut CoreInstantiationArgKind<'a>) { + let (span, exports) = match arg { + CoreInstantiationArgKind::Instance(_) => return, + CoreInstantiationArgKind::BundleOfExports(span, exports) => (*span, mem::take(exports)), + }; + let id = gensym::gen(span); + self.component_fields_to_prepend + .push(ComponentField::CoreInstance(CoreInstance { + span, + id: Some(id), + name: None, + kind: CoreInstanceKind::BundleOfExports(exports), + })); + *arg = CoreInstantiationArgKind::Instance(CoreItemRef { + kind: kw::instance(span), + idx: Index::Id(id), + export_name: None, + }); + } + + fn expand_instantiation_arg(&mut self, arg: &mut InstantiationArgKind<'a>) { + let (span, exports) = match arg { + InstantiationArgKind::Item(_) => return, + InstantiationArgKind::BundleOfExports(span, exports) => (*span, mem::take(exports)), + }; + let id = gensym::gen(span); + self.component_fields_to_prepend + .push(ComponentField::Instance(Instance { + span, + id: Some(id), + name: None, + exports: Default::default(), + kind: InstanceKind::BundleOfExports(exports), + })); + *arg = InstantiationArgKind::Item(ComponentExportKind::instance(span, id)); + } +} + +trait TypeReference<'a> { + type Key: TypeKey<'a>; + fn key(&self) -> Self::Key; + fn expand(&mut self, cx: &mut Expander<'a>); + fn into_any_type(self, span: Span, id: Id<'a>) -> AnyType<'a>; +} + +impl<'a> TypeReference<'a> for ComponentDefinedType<'a> { + type Key = Todo; // FIXME(#598): should implement this + + fn key(&self) -> Self::Key { + Todo + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + cx.expand_defined_ty(self) + } + + fn into_any_type(self, span: Span, id: Id<'a>) -> AnyType<'a> { + AnyType::Component(Type { + span, + id: Some(id), + name: None, + exports: Default::default(), + def: TypeDef::Defined(self), + }) + } +} + +impl<'a> TypeReference<'a> for ComponentType<'a> { + type Key = Todo; // FIXME(#598): should implement this + + fn key(&self) -> Self::Key { + Todo + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + cx.expand_component_ty(self) + } + + fn into_any_type(self, span: Span, id: Id<'a>) -> AnyType<'a> { + AnyType::Component(Type { + span, + id: Some(id), + name: None, + exports: Default::default(), + def: TypeDef::Component(self), + }) + } +} + +impl<'a> TypeReference<'a> for ModuleType<'a> { + type Key = Todo; // FIXME(#598): should implement this + + fn key(&self) -> Self::Key { + Todo + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + cx.expand_module_ty(self) + } + + fn into_any_type(self, span: Span, id: Id<'a>) -> AnyType<'a> { + AnyType::Core(CoreType { + span, + id: Some(id), + name: None, + def: CoreTypeDef::Module(self), + }) + } +} + +impl<'a> TypeReference<'a> for InstanceType<'a> { + type Key = Todo; // FIXME(#598): should implement this + + fn key(&self) -> Self::Key { + Todo + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + cx.expand_instance_ty(self) + } + + fn into_any_type(self, span: Span, id: Id<'a>) -> AnyType<'a> { + AnyType::Component(Type { + span, + id: Some(id), + name: None, + exports: Default::default(), + def: TypeDef::Instance(self), + }) + } +} + +impl<'a> TypeReference<'a> for ComponentFunctionType<'a> { + type Key = Todo; // FIXME(#598): should implement this + + fn key(&self) -> Self::Key { + Todo + } + + fn expand(&mut self, cx: &mut Expander<'a>) { + cx.expand_func_ty(self) + } + + fn into_any_type(self, span: Span, id: Id<'a>) -> AnyType<'a> { + AnyType::Component(Type { + span, + id: Some(id), + name: None, + exports: Default::default(), + def: TypeDef::Func(self), + }) + } +} + +trait TypeKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>>; + fn insert(&self, cx: &mut Expander<'a>, index: Index<'a>); +} + +struct Todo; + +impl<'a> TypeKey<'a> for Todo { + fn lookup(&self, _cx: &Expander<'a>) -> Option<Index<'a>> { + None + } + + fn insert(&self, _cx: &mut Expander<'a>, _index: Index<'a>) {} +} diff --git a/third_party/rust/wast/src/component/export.rs b/third_party/rust/wast/src/component/export.rs new file mode 100644 index 0000000000..ce9baaf453 --- /dev/null +++ b/third_party/rust/wast/src/component/export.rs @@ -0,0 +1,221 @@ +use super::{ItemRef, ItemSigNoName}; +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::{Id, Index, NameAnnotation, Span}; + +/// An entry in a WebAssembly component's export section. +#[derive(Debug)] +pub struct ComponentExport<'a> { + /// Where this export was defined. + pub span: Span, + /// Optional identifier bound to this export. + pub id: Option<Id<'a>>, + /// An optional name for this instance stored in the custom `name` section. + pub debug_name: Option<NameAnnotation<'a>>, + /// The name of this export from the component. + pub name: &'a str, + /// The URL of the export. + pub url: Option<&'a str>, + /// The kind of export. + pub kind: ComponentExportKind<'a>, + /// The kind of export. + pub ty: Option<ItemSigNoName<'a>>, +} + +impl<'a> Parse<'a> for ComponentExport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::export>()?.0; + let id = parser.parse()?; + let debug_name = parser.parse()?; + let name = parser.parse()?; + let url = parser.parse()?; + let kind = parser.parse()?; + let ty = if !parser.is_empty() { + Some(parser.parens(|p| p.parse())?) + } else { + None + }; + Ok(ComponentExport { + span, + id, + debug_name, + name, + url, + kind, + ty, + }) + } +} + +impl<'a> Parse<'a> for Vec<ComponentExport<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut exports = Vec::new(); + while !parser.is_empty() { + exports.push(parser.parens(|parser| parser.parse())?); + } + Ok(exports) + } +} + +/// The kind of exported item. +#[derive(Debug)] +pub enum ComponentExportKind<'a> { + /// The export is a core module. + /// + /// Note this isn't a core item ref as currently only + /// components can export core modules. + CoreModule(ItemRef<'a, kw::module>), + /// The export is a function. + Func(ItemRef<'a, kw::func>), + /// The export is a value. + Value(ItemRef<'a, kw::value>), + /// The export is a type. + Type(ItemRef<'a, kw::r#type>), + /// The export is a component. + Component(ItemRef<'a, kw::component>), + /// The export is an instance. + Instance(ItemRef<'a, kw::instance>), +} + +impl<'a> ComponentExportKind<'a> { + pub(crate) fn module(span: Span, id: Id<'a>) -> Self { + Self::CoreModule(ItemRef { + kind: kw::module(span), + idx: Index::Id(id), + export_names: Default::default(), + }) + } + + pub(crate) fn component(span: Span, id: Id<'a>) -> Self { + Self::Component(ItemRef { + kind: kw::component(span), + idx: Index::Id(id), + export_names: Default::default(), + }) + } + + pub(crate) fn instance(span: Span, id: Id<'a>) -> Self { + Self::Instance(ItemRef { + kind: kw::instance(span), + idx: Index::Id(id), + export_names: Default::default(), + }) + } + + pub(crate) fn func(span: Span, id: Id<'a>) -> Self { + Self::Func(ItemRef { + kind: kw::func(span), + idx: Index::Id(id), + export_names: Default::default(), + }) + } + + pub(crate) fn ty(span: Span, id: Id<'a>) -> Self { + Self::Type(ItemRef { + kind: kw::r#type(span), + idx: Index::Id(id), + export_names: Default::default(), + }) + } +} + +impl<'a> Parse<'a> for ComponentExportKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|parser| { + let mut l = parser.lookahead1(); + if l.peek::<kw::core>() { + // Remove core prefix + parser.parse::<kw::core>()?; + Ok(Self::CoreModule(parser.parse()?)) + } else if l.peek::<kw::func>() { + Ok(Self::Func(parser.parse()?)) + } else if l.peek::<kw::value>() { + Ok(Self::Value(parser.parse()?)) + } else if l.peek::<kw::r#type>() { + Ok(Self::Type(parser.parse()?)) + } else if l.peek::<kw::component>() { + Ok(Self::Component(parser.parse()?)) + } else if l.peek::<kw::instance>() { + Ok(Self::Instance(parser.parse()?)) + } else { + Err(l.error()) + } + }) + } +} + +impl Peek for ComponentExportKind<'_> { + fn peek(cursor: Cursor) -> bool { + let cursor = match cursor.lparen() { + Some(c) => c, + None => return false, + }; + + let cursor = match cursor.keyword() { + Some(("core", c)) => match c.keyword() { + Some(("module", c)) => c, + _ => return false, + }, + Some(("func", c)) + | Some(("value", c)) + | Some(("type", c)) + | Some(("component", c)) + | Some(("instance", c)) => c, + _ => return false, + }; + + Index::peek(cursor) + } + + fn display() -> &'static str { + "component export" + } +} + +/// A listing of inline `(export "foo" <url>)` statements on a WebAssembly +/// component item in its textual format. +#[derive(Debug, Default)] +pub struct InlineExport<'a> { + /// The extra names to export an item as, if any. + pub names: Vec<(&'a str, Option<&'a str>)>, +} + +impl<'a> Parse<'a> for InlineExport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut names = Vec::new(); + while parser.peek::<Self>() { + names.push(parser.parens(|p| { + p.parse::<kw::export>()?; + Ok((p.parse()?, p.parse()?)) + })?); + } + Ok(InlineExport { names }) + } +} + +impl Peek for InlineExport<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("export", cursor)) => cursor, + _ => return false, + }; + // Name + let mut cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + // Optional URL + if let Some((_, c)) = cursor.string() { + cursor = c; + } + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline export" + } +} diff --git a/third_party/rust/wast/src/component/func.rs b/third_party/rust/wast/src/component/func.rs new file mode 100644 index 0000000000..4edbc63171 --- /dev/null +++ b/third_party/rust/wast/src/component/func.rs @@ -0,0 +1,372 @@ +use crate::component::*; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, Index, LParen, NameAnnotation, Span}; + +/// A declared core function. +/// +/// This is a member of both the core alias and canon sections. +#[derive(Debug)] +pub struct CoreFunc<'a> { + /// Where this `core func` was defined. + pub span: Span, + /// An identifier that this function is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// The kind of core function. + pub kind: CoreFuncKind<'a>, +} + +impl<'a> Parse<'a> for CoreFunc<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::core>()?.0; + parser.parse::<kw::func>()?; + let id = parser.parse()?; + let name = parser.parse()?; + let kind = parser.parse()?; + + Ok(Self { + span, + id, + name, + kind, + }) + } +} + +/// Represents the kind of core functions. +#[derive(Debug)] +pub enum CoreFuncKind<'a> { + /// The core function is defined in terms of lowering a component function. + /// + /// The core function is actually a member of the canon section. + Lower(CanonLower<'a>), + /// The core function is defined in terms of aliasing a module instance export. + /// + /// The core function is actually a member of the core alias section. + Alias(InlineExportAlias<'a>), +} + +impl<'a> Parse<'a> for CoreFuncKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|parser| { + let mut l = parser.lookahead1(); + if l.peek::<kw::canon>() { + parser.parse::<kw::canon>()?; + Ok(Self::Lower(parser.parse()?)) + } else if l.peek::<kw::alias>() { + Ok(Self::Alias(parser.parse()?)) + } else { + Err(l.error()) + } + }) + } +} + +/// A declared component function. +/// +/// This may be a member of the import, alias, or canon sections. +#[derive(Debug)] +pub struct Func<'a> { + /// Where this `func` was defined. + pub span: Span, + /// An identifier that this function is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// The kind of function. + pub kind: FuncKind<'a>, +} + +impl<'a> Parse<'a> for Func<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::func>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + let kind = parser.parse()?; + + Ok(Self { + span, + id, + name, + exports, + kind, + }) + } +} + +/// Represents the kind of component functions. +#[derive(Debug)] +pub enum FuncKind<'a> { + /// A function which is actually defined as an import, such as: + /// + /// ```text + /// (func (import "foo") (param string)) + /// ``` + Import { + /// The import name of this import. + import: InlineImport<'a>, + /// The type that this function will have. + ty: ComponentTypeUse<'a, ComponentFunctionType<'a>>, + }, + /// The function is defined in terms of lifting a core function. + /// + /// The function is actually a member of the canon section. + Lift { + /// The lifted function's type. + ty: ComponentTypeUse<'a, ComponentFunctionType<'a>>, + /// Information relating to the lifting of the core function. + info: CanonLift<'a>, + }, + /// The function is defined in terms of aliasing a component instance export. + /// + /// The function is actually a member of the alias section. + Alias(InlineExportAlias<'a>), +} + +impl<'a> Parse<'a> for FuncKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if let Some(import) = parser.parse()? { + Ok(Self::Import { + import, + ty: parser.parse()?, + }) + } else if parser.peek::<LParen>() && parser.peek2::<kw::alias>() { + parser.parens(|parser| Ok(Self::Alias(parser.parse()?))) + } else { + Ok(Self::Lift { + ty: parser.parse()?, + info: parser.parens(|parser| { + parser.parse::<kw::canon>()?; + parser.parse() + })?, + }) + } + } +} + +/// A WebAssembly canonical function to be inserted into a component. +/// +/// This is a member of the canonical section. +#[derive(Debug)] +pub struct CanonicalFunc<'a> { + /// Where this `func` was defined. + pub span: Span, + /// An identifier that this function is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// What kind of function this is, be it a lowered or lifted function. + pub kind: CanonicalFuncKind<'a>, +} + +impl<'a> Parse<'a> for CanonicalFunc<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::canon>()?.0; + + if parser.peek::<kw::lift>() { + let info = parser.parse()?; + let (id, name, ty) = parser.parens(|parser| { + parser.parse::<kw::func>()?; + let id = parser.parse()?; + let name = parser.parse()?; + let ty = parser.parse()?; + Ok((id, name, ty)) + })?; + + Ok(Self { + span, + id, + name, + kind: CanonicalFuncKind::Lift { info, ty }, + }) + } else if parser.peek::<kw::lower>() { + let info = parser.parse()?; + let (id, name) = parser.parens(|parser| { + parser.parse::<kw::core>()?; + parser.parse::<kw::func>()?; + let id = parser.parse()?; + let name = parser.parse()?; + Ok((id, name)) + })?; + + Ok(Self { + span, + id, + name, + kind: CanonicalFuncKind::Lower(info), + }) + } else { + Err(parser.error("expected `canon lift` or `canon lower`")) + } + } +} + +/// Possible ways to define a canonical function in the text format. +#[derive(Debug)] +pub enum CanonicalFuncKind<'a> { + /// A canonical function that is defined in terms of lifting a core function. + Lift { + /// The lifted function's type. + ty: ComponentTypeUse<'a, ComponentFunctionType<'a>>, + /// Information relating to the lifting of the core function. + info: CanonLift<'a>, + }, + /// A canonical function that is defined in terms of lowering a component function. + Lower(CanonLower<'a>), +} + +/// Information relating to lifting a core function. +#[derive(Debug)] +pub struct CanonLift<'a> { + /// The core function being lifted. + pub func: CoreItemRef<'a, kw::func>, + /// The canonical options for the lifting. + pub opts: Vec<CanonOpt<'a>>, +} + +impl<'a> Parse<'a> for CanonLift<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::lift>()?; + + Ok(Self { + func: parser.parens(|parser| { + parser.parse::<kw::core>()?; + parser.parse() + })?, + opts: parser.parse()?, + }) + } +} + +impl Default for CanonLift<'_> { + fn default() -> Self { + let span = Span::from_offset(0); + Self { + func: CoreItemRef { + kind: kw::func(span), + idx: Index::Num(0, span), + export_name: None, + }, + opts: Vec::new(), + } + } +} + +/// Information relating to lowering a component function. +#[derive(Debug)] +pub struct CanonLower<'a> { + /// The function being lowered. + pub func: ItemRef<'a, kw::func>, + /// The canonical options for the lowering. + pub opts: Vec<CanonOpt<'a>>, +} + +impl<'a> Parse<'a> for CanonLower<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::lower>()?; + + Ok(Self { + func: parser.parens(|parser| parser.parse())?, + opts: parser.parse()?, + }) + } +} + +impl Default for CanonLower<'_> { + fn default() -> Self { + let span = Span::from_offset(0); + Self { + func: ItemRef { + kind: kw::func(span), + idx: Index::Num(0, span), + export_names: Vec::new(), + }, + opts: Vec::new(), + } + } +} + +#[derive(Debug)] +/// Canonical ABI options. +pub enum CanonOpt<'a> { + /// Encode strings as UTF-8. + StringUtf8, + /// Encode strings as UTF-16. + StringUtf16, + /// Encode strings as "compact UTF-16". + StringLatin1Utf16, + /// Use the specified memory for canonical ABI memory access. + Memory(CoreItemRef<'a, kw::memory>), + /// Use the specified reallocation function for memory allocations. + Realloc(CoreItemRef<'a, kw::func>), + /// Call the specified function after the lifted function has returned. + PostReturn(CoreItemRef<'a, kw::func>), +} + +impl<'a> Parse<'a> for CanonOpt<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::string_utf8>() { + parser.parse::<kw::string_utf8>()?; + Ok(Self::StringUtf8) + } else if l.peek::<kw::string_utf16>() { + parser.parse::<kw::string_utf16>()?; + Ok(Self::StringUtf16) + } else if l.peek::<kw::string_latin1_utf16>() { + parser.parse::<kw::string_latin1_utf16>()?; + Ok(Self::StringLatin1Utf16) + } else if l.peek::<LParen>() { + parser.parens(|parser| { + let mut l = parser.lookahead1(); + if l.peek::<kw::memory>() { + let span = parser.parse::<kw::memory>()?.0; + Ok(CanonOpt::Memory(parse_trailing_item_ref( + kw::memory(span), + parser, + )?)) + } else if l.peek::<kw::realloc>() { + parser.parse::<kw::realloc>()?; + Ok(CanonOpt::Realloc( + parser.parse::<IndexOrCoreRef<'_, _>>()?.0, + )) + } else if l.peek::<kw::post_return>() { + parser.parse::<kw::post_return>()?; + Ok(CanonOpt::PostReturn( + parser.parse::<IndexOrCoreRef<'_, _>>()?.0, + )) + } else { + Err(l.error()) + } + }) + } else { + Err(l.error()) + } + } +} + +fn parse_trailing_item_ref<T>(kind: T, parser: Parser) -> Result<CoreItemRef<T>> { + Ok(CoreItemRef { + kind, + idx: parser.parse()?, + export_name: parser.parse()?, + }) +} + +impl<'a> Parse<'a> for Vec<CanonOpt<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut funcs = Vec::new(); + while !parser.is_empty() { + funcs.push(parser.parse()?); + } + Ok(funcs) + } +} diff --git a/third_party/rust/wast/src/component/import.rs b/third_party/rust/wast/src/component/import.rs new file mode 100644 index 0000000000..98fec55aa7 --- /dev/null +++ b/third_party/rust/wast/src/component/import.rs @@ -0,0 +1,176 @@ +use crate::component::*; +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::Index; +use crate::token::{Id, NameAnnotation, Span}; + +/// An `import` statement and entry in a WebAssembly component. +#[derive(Debug)] +pub struct ComponentImport<'a> { + /// Where this `import` was defined + pub span: Span, + /// The name of the item to import. + pub name: &'a str, + /// The optional URL of the import. + pub url: Option<&'a str>, + /// The item that's being imported. + pub item: ItemSig<'a>, +} + +impl<'a> Parse<'a> for ComponentImport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::import>()?.0; + let name = parser.parse()?; + let url = parser.parse()?; + let item = parser.parens(|p| p.parse())?; + Ok(ComponentImport { + span, + name, + url, + item, + }) + } +} + +/// An item signature for imported items. +#[derive(Debug)] +pub struct ItemSig<'a> { + /// Where this item is defined in the source. + pub span: Span, + /// An optional identifier used during name resolution to refer to this item + /// from the rest of the component. + pub id: Option<Id<'a>>, + /// An optional name which, for functions, will be stored in the + /// custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// What kind of item this is. + pub kind: ItemSigKind<'a>, +} + +impl<'a> Parse<'a> for ItemSig<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parse_item_sig(parser, true) + } +} + +/// An item signature for imported items. +#[derive(Debug)] +pub struct ItemSigNoName<'a>(pub ItemSig<'a>); + +impl<'a> Parse<'a> for ItemSigNoName<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ItemSigNoName(parse_item_sig(parser, false)?)) + } +} + +fn parse_item_sig<'a>(parser: Parser<'a>, name: bool) -> Result<ItemSig<'a>> { + let mut l = parser.lookahead1(); + let (span, parse_kind): (_, fn(Parser<'a>) -> Result<ItemSigKind>) = if l.peek::<kw::core>() { + let span = parser.parse::<kw::core>()?.0; + parser.parse::<kw::module>()?; + (span, |parser| Ok(ItemSigKind::CoreModule(parser.parse()?))) + } else if l.peek::<kw::func>() { + let span = parser.parse::<kw::func>()?.0; + (span, |parser| Ok(ItemSigKind::Func(parser.parse()?))) + } else if l.peek::<kw::component>() { + let span = parser.parse::<kw::component>()?.0; + (span, |parser| Ok(ItemSigKind::Component(parser.parse()?))) + } else if l.peek::<kw::instance>() { + let span = parser.parse::<kw::instance>()?.0; + (span, |parser| Ok(ItemSigKind::Instance(parser.parse()?))) + } else if l.peek::<kw::value>() { + let span = parser.parse::<kw::value>()?.0; + (span, |parser| Ok(ItemSigKind::Value(parser.parse()?))) + } else if l.peek::<kw::r#type>() { + let span = parser.parse::<kw::r#type>()?.0; + (span, |parser| { + Ok(ItemSigKind::Type(parser.parens(|parser| parser.parse())?)) + }) + } else { + return Err(l.error()); + }; + Ok(ItemSig { + span, + id: if name { parser.parse()? } else { None }, + name: if name { parser.parse()? } else { None }, + kind: parse_kind(parser)?, + }) +} + +/// The kind of signatures for imported items. +#[derive(Debug)] +pub enum ItemSigKind<'a> { + /// The item signature is for a core module. + CoreModule(CoreTypeUse<'a, ModuleType<'a>>), + /// The item signature is for a function. + Func(ComponentTypeUse<'a, ComponentFunctionType<'a>>), + /// The item signature is for a component. + Component(ComponentTypeUse<'a, ComponentType<'a>>), + /// The item signature is for an instance. + Instance(ComponentTypeUse<'a, InstanceType<'a>>), + /// The item signature is for a value. + Value(ComponentValTypeUse<'a>), + /// The item signature is for a type. + Type(TypeBounds<'a>), +} + +/// Represents the bounds applied to types being imported. +#[derive(Debug)] +pub enum TypeBounds<'a> { + /// The equality type bounds. + Eq(Index<'a>), +} + +impl<'a> Parse<'a> for TypeBounds<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + // Currently this is the only supported type bounds. + parser.parse::<kw::eq>()?; + Ok(Self::Eq(parser.parse()?)) + } +} + +/// A listing of a inline `(import "foo")` statement. +/// +/// This is the same as `core::InlineImport` except only one string import is +/// required. +#[derive(Debug, Clone)] +pub struct InlineImport<'a> { + /// The name of the item being imported. + pub name: &'a str, + /// The optional URL of the item being imported. + pub url: Option<&'a str>, +} + +impl<'a> Parse<'a> for InlineImport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|p| { + p.parse::<kw::import>()?; + Ok(InlineImport { + name: p.parse()?, + url: p.parse()?, + }) + }) + } +} + +impl Peek for InlineImport<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("import", cursor)) => cursor, + _ => return false, + }; + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline import" + } +} diff --git a/third_party/rust/wast/src/component/instance.rs b/third_party/rust/wast/src/component/instance.rs new file mode 100644 index 0000000000..888f4ee9f9 --- /dev/null +++ b/third_party/rust/wast/src/component/instance.rs @@ -0,0 +1,296 @@ +use crate::component::*; +use crate::core; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, LParen, NameAnnotation, Span}; + +/// A core instance defined by instantiation or exporting core items. +#[derive(Debug)] +pub struct CoreInstance<'a> { + /// Where this `core instance` was defined. + pub span: Span, + /// An identifier that this instance is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this instance stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// What kind of instance this is. + pub kind: CoreInstanceKind<'a>, +} + +impl<'a> Parse<'a> for CoreInstance<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::core>()?.0; + parser.parse::<kw::instance>()?; + let id = parser.parse()?; + let name = parser.parse()?; + let kind = parser.parse()?; + + Ok(Self { + span, + id, + name, + kind, + }) + } +} + +/// The kinds of core instances in the text format. +#[derive(Debug)] +pub enum CoreInstanceKind<'a> { + /// Instantiate a core module. + Instantiate { + /// The module being instantiated. + module: ItemRef<'a, kw::module>, + /// Arguments used to instantiate the instance. + args: Vec<CoreInstantiationArg<'a>>, + }, + /// The instance is defined by exporting local items as an instance. + BundleOfExports(Vec<CoreInstanceExport<'a>>), +} + +impl<'a> Parse<'a> for CoreInstanceKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<LParen>() && parser.peek2::<kw::instantiate>() { + parser.parens(|parser| { + parser.parse::<kw::instantiate>()?; + Ok(Self::Instantiate { + module: parser.parse::<IndexOrRef<'_, _>>()?.0, + args: parser.parse()?, + }) + }) + } else { + Ok(Self::BundleOfExports(parser.parse()?)) + } + } +} + +impl Default for kw::module { + fn default() -> kw::module { + kw::module(Span::from_offset(0)) + } +} + +/// An argument to instantiate a core module. +#[derive(Debug)] +pub struct CoreInstantiationArg<'a> { + /// The name of the instantiation argument. + pub name: &'a str, + /// The kind of core instantiation argument. + pub kind: CoreInstantiationArgKind<'a>, +} + +impl<'a> Parse<'a> for CoreInstantiationArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::with>()?; + Ok(Self { + name: parser.parse()?, + kind: parser.parse()?, + }) + } +} + +impl<'a> Parse<'a> for Vec<CoreInstantiationArg<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut args = Vec::new(); + while !parser.is_empty() { + args.push(parser.parens(|parser| parser.parse())?); + } + Ok(args) + } +} + +/// The kind of core instantiation argument. +#[derive(Debug)] +pub enum CoreInstantiationArgKind<'a> { + /// The argument is a reference to an instance. + Instance(CoreItemRef<'a, kw::instance>), + /// The argument is an instance created from local exported core items. + /// + /// This is syntactic sugar for defining a core instance and also using it + /// as an instantiation argument. + BundleOfExports(Span, Vec<CoreInstanceExport<'a>>), +} + +impl<'a> Parse<'a> for CoreInstantiationArgKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|parser| { + if let Some(r) = parser.parse()? { + Ok(Self::Instance(r)) + } else { + let span = parser.parse::<kw::instance>()?.0; + Ok(Self::BundleOfExports(span, parser.parse()?)) + } + }) + } +} + +/// An exported item as part of a core instance. +#[derive(Debug)] +pub struct CoreInstanceExport<'a> { + /// Where this export was defined. + pub span: Span, + /// The name of this export from the instance. + pub name: &'a str, + /// What's being exported from the instance. + pub item: CoreItemRef<'a, core::ExportKind>, +} + +impl<'a> Parse<'a> for CoreInstanceExport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(Self { + span: parser.parse::<kw::export>()?.0, + name: parser.parse()?, + item: parser.parens(|parser| parser.parse())?, + }) + } +} + +impl<'a> Parse<'a> for Vec<CoreInstanceExport<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut exports = Vec::new(); + while !parser.is_empty() { + exports.push(parser.parens(|parser| parser.parse())?); + } + Ok(exports) + } +} + +/// A component instance defined by instantiation or exporting items. +#[derive(Debug)] +pub struct Instance<'a> { + /// Where this `instance` was defined. + pub span: Span, + /// An identifier that this instance is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this instance stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// What kind of instance this is. + pub kind: InstanceKind<'a>, +} + +impl<'a> Parse<'a> for Instance<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::instance>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + let kind = parser.parse()?; + + Ok(Self { + span, + id, + name, + exports, + kind, + }) + } +} + +/// The kinds of instances in the text format. +#[derive(Debug)] +pub enum InstanceKind<'a> { + /// The `(instance (import "x"))` sugar syntax + Import { + /// The name of the import + import: InlineImport<'a>, + /// The type of the instance being imported + ty: ComponentTypeUse<'a, InstanceType<'a>>, + }, + /// Instantiate a component. + Instantiate { + /// The component being instantiated. + component: ItemRef<'a, kw::component>, + /// Arguments used to instantiate the instance. + args: Vec<InstantiationArg<'a>>, + }, + /// The instance is defined by exporting local items as an instance. + BundleOfExports(Vec<ComponentExport<'a>>), +} + +impl<'a> Parse<'a> for InstanceKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if let Some(import) = parser.parse()? { + return Ok(Self::Import { + import, + ty: parser.parse()?, + }); + } + + if parser.peek::<LParen>() && parser.peek2::<kw::instantiate>() { + parser.parens(|parser| { + parser.parse::<kw::instantiate>()?; + Ok(Self::Instantiate { + component: parser.parse::<IndexOrRef<'_, _>>()?.0, + args: parser.parse()?, + }) + }) + } else { + Ok(Self::BundleOfExports(parser.parse()?)) + } + } +} + +impl Default for kw::component { + fn default() -> kw::component { + kw::component(Span::from_offset(0)) + } +} + +/// An argument to instantiate a component. +#[derive(Debug)] +pub struct InstantiationArg<'a> { + /// The name of the instantiation argument. + pub name: &'a str, + /// The kind of instantiation argument. + pub kind: InstantiationArgKind<'a>, +} + +impl<'a> Parse<'a> for InstantiationArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::with>()?; + Ok(Self { + name: parser.parse()?, + kind: parser.parse()?, + }) + } +} + +impl<'a> Parse<'a> for Vec<InstantiationArg<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut args = Vec::new(); + while !parser.is_empty() { + args.push(parser.parens(|parser| parser.parse())?); + } + Ok(args) + } +} + +/// The kind of instantiation argument. +#[derive(Debug)] +pub enum InstantiationArgKind<'a> { + /// The argument is a reference to a component item. + Item(ComponentExportKind<'a>), + /// The argument is an instance created from local exported items. + /// + /// This is syntactic sugar for defining an instance and also using it + /// as an instantiation argument. + BundleOfExports(Span, Vec<ComponentExport<'a>>), +} + +impl<'a> Parse<'a> for InstantiationArgKind<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if let Some(item) = parser.parse()? { + Ok(Self::Item(item)) + } else { + parser.parens(|parser| { + let span = parser.parse::<kw::instance>()?.0; + Ok(Self::BundleOfExports(span, parser.parse()?)) + }) + } + } +} diff --git a/third_party/rust/wast/src/component/item_ref.rs b/third_party/rust/wast/src/component/item_ref.rs new file mode 100644 index 0000000000..c3bbf2f9f4 --- /dev/null +++ b/third_party/rust/wast/src/component/item_ref.rs @@ -0,0 +1,154 @@ +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::Index; + +fn peek<K: Peek>(cursor: Cursor) -> bool { + // This is a little fancy because when parsing something like: + // + // (type (component (type $foo))) + // + // we need to disambiguate that from + // + // (type (component (type $foo (func)))) + // + // where the first is a type reference and the second is an inline + // component type defining a type internally. The peek here not only + // peeks for `K` but also for the index and possibly trailing + // strings. + + // Peek for the given keyword type + if !K::peek(cursor) { + return false; + } + + // Move past the given keyword + let cursor = match cursor.keyword() { + Some((_, c)) => c, + _ => return false, + }; + + // Peek an id or integer index, followed by `)` or string to disambiguate + match cursor + .id() + .map(|p| p.1) + .or_else(|| cursor.integer().map(|p| p.1)) + { + Some(cursor) => cursor.rparen().is_some() || cursor.string().is_some(), + None => false, + } +} + +/// Parses core item references. +#[derive(Clone, Debug)] +pub struct CoreItemRef<'a, K> { + /// The item kind being parsed. + pub kind: K, + /// The item or instance reference. + pub idx: Index<'a>, + /// Export name to resolve the item from. + pub export_name: Option<&'a str>, +} + +impl<'a, K: Parse<'a>> Parse<'a> for CoreItemRef<'a, K> { + fn parse(parser: Parser<'a>) -> Result<Self> { + // This does not parse the surrounding `(` and `)` because + // core prefix is context dependent and only the caller knows if it should be + // present for core references; therefore, the caller parses the parens and any core prefix + let kind = parser.parse::<K>()?; + let idx = parser.parse()?; + let export_name = parser.parse()?; + Ok(Self { + kind, + idx, + export_name, + }) + } +} + +impl<'a, K: Peek> Peek for CoreItemRef<'a, K> { + fn peek(cursor: Cursor<'_>) -> bool { + peek::<K>(cursor) + } + + fn display() -> &'static str { + "a core item reference" + } +} + +/// Parses component item references. +#[derive(Clone, Debug)] +pub struct ItemRef<'a, K> { + /// The item kind being parsed. + pub kind: K, + /// The item or instance reference. + pub idx: Index<'a>, + /// Export names to resolve the item from. + pub export_names: Vec<&'a str>, +} + +impl<'a, K: Parse<'a>> Parse<'a> for ItemRef<'a, K> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let kind = parser.parse::<K>()?; + let idx = parser.parse()?; + let mut export_names = Vec::new(); + while !parser.is_empty() { + export_names.push(parser.parse()?); + } + Ok(Self { + kind, + idx, + export_names, + }) + } +} + +impl<'a, K: Peek> Peek for ItemRef<'a, K> { + fn peek(cursor: Cursor<'_>) -> bool { + peek::<K>(cursor) + } + + fn display() -> &'static str { + "a component item reference" + } +} + +/// Convenience structure to parse `$f` or `(item $f)`. +#[derive(Clone, Debug)] +pub struct IndexOrRef<'a, K>(pub ItemRef<'a, K>); + +impl<'a, K> Parse<'a> for IndexOrRef<'a, K> +where + K: Parse<'a> + Default, +{ + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<Index<'_>>() { + Ok(IndexOrRef(ItemRef { + kind: K::default(), + idx: parser.parse()?, + export_names: Vec::new(), + })) + } else { + Ok(IndexOrRef(parser.parens(|p| p.parse())?)) + } + } +} + +/// Convenience structure to parse `$f` or `(item $f)`. +#[derive(Clone, Debug)] +pub struct IndexOrCoreRef<'a, K>(pub CoreItemRef<'a, K>); + +impl<'a, K> Parse<'a> for IndexOrCoreRef<'a, K> +where + K: Parse<'a> + Default, +{ + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<Index<'_>>() { + Ok(IndexOrCoreRef(CoreItemRef { + kind: K::default(), + idx: parser.parse()?, + export_name: None, + })) + } else { + Ok(IndexOrCoreRef(parser.parens(|p| p.parse())?)) + } + } +} diff --git a/third_party/rust/wast/src/component/module.rs b/third_party/rust/wast/src/component/module.rs new file mode 100644 index 0000000000..6871af8d4c --- /dev/null +++ b/third_party/rust/wast/src/component/module.rs @@ -0,0 +1,75 @@ +use crate::component::*; +use crate::core; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, NameAnnotation, Span}; + +/// A core WebAssembly module to be created as part of a component. +/// +/// This is a member of the core module section. +#[derive(Debug)] +pub struct CoreModule<'a> { + /// Where this `core module` was defined. + pub span: Span, + /// An identifier that this module is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this module stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// What kind of module this is, be it an inline-defined or imported one. + pub kind: CoreModuleKind<'a>, +} + +/// Possible ways to define a core module in the text format. +#[derive(Debug)] +pub enum CoreModuleKind<'a> { + /// A core module which is actually defined as an import + Import { + /// Where this core module is imported from + import: InlineImport<'a>, + /// The type that this core module will have. + ty: CoreTypeUse<'a, ModuleType<'a>>, + }, + + /// Modules that are defined inline. + Inline { + /// Fields in the core module. + fields: Vec<core::ModuleField<'a>>, + }, +} + +impl<'a> Parse<'a> for CoreModule<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.depth_check()?; + + let span = parser.parse::<kw::core>()?.0; + parser.parse::<kw::module>()?; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + let kind = if let Some(import) = parser.parse()? { + CoreModuleKind::Import { + import, + ty: parser.parse()?, + } + } else { + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(|p| p.parse())?); + } + CoreModuleKind::Inline { fields } + }; + + Ok(Self { + span, + id, + name, + exports, + kind, + }) + } +} diff --git a/third_party/rust/wast/src/component/resolve.rs b/third_party/rust/wast/src/component/resolve.rs new file mode 100644 index 0000000000..e87350e329 --- /dev/null +++ b/third_party/rust/wast/src/component/resolve.rs @@ -0,0 +1,973 @@ +use crate::component::*; +use crate::core; +use crate::kw; +use crate::names::Namespace; +use crate::token::Span; +use crate::token::{Id, Index}; +use crate::Error; + +/// Resolve the fields of a component and everything nested within it, changing +/// `Index::Id` to `Index::Num` and expanding alias syntax sugar. +pub fn resolve(component: &mut Component<'_>) -> Result<(), Error> { + let fields = match &mut component.kind { + ComponentKind::Text(fields) => fields, + ComponentKind::Binary(_) => return Ok(()), + }; + let mut resolver = Resolver::default(); + resolver.fields(component.id, fields) +} + +impl<'a> From<Alias<'a>> for ComponentField<'a> { + fn from(a: Alias<'a>) -> Self { + Self::Alias(a) + } +} + +impl<'a> From<Alias<'a>> for ModuleTypeDecl<'a> { + fn from(a: Alias<'a>) -> Self { + Self::Alias(a) + } +} + +impl<'a> From<Alias<'a>> for ComponentTypeDecl<'a> { + fn from(a: Alias<'a>) -> Self { + Self::Alias(a) + } +} + +impl<'a> From<Alias<'a>> for InstanceTypeDecl<'a> { + fn from(a: Alias<'a>) -> Self { + Self::Alias(a) + } +} + +#[derive(Default)] +struct Resolver<'a> { + stack: Vec<ComponentState<'a>>, + + // When a name refers to a definition in an outer scope, we'll need to + // insert an outer alias before it. This collects the aliases to be + // inserted during resolution. + aliases_to_insert: Vec<Alias<'a>>, +} + +/// Context structure used to perform name resolution. +#[derive(Default)] +struct ComponentState<'a> { + id: Option<Id<'a>>, + + // Namespaces within each component. Note that each namespace carries + // with it information about the signature of the item in that namespace. + // The signature is later used to synthesize the type of a component and + // inject type annotations if necessary. + core_funcs: Namespace<'a>, + core_globals: Namespace<'a>, + core_tables: Namespace<'a>, + core_memories: Namespace<'a>, + core_types: Namespace<'a>, + core_tags: Namespace<'a>, + core_instances: Namespace<'a>, + core_modules: Namespace<'a>, + + funcs: Namespace<'a>, + types: Namespace<'a>, + instances: Namespace<'a>, + components: Namespace<'a>, + values: Namespace<'a>, +} + +impl<'a> ComponentState<'a> { + fn new(id: Option<Id<'a>>) -> ComponentState<'a> { + ComponentState { + id, + ..ComponentState::default() + } + } + + fn register_item_sig(&mut self, sig: &ItemSig<'a>) -> Result<u32, Error> { + match &sig.kind { + ItemSigKind::CoreModule(_) => self.core_modules.register(sig.id, "core module"), + ItemSigKind::Func(_) => self.funcs.register(sig.id, "func"), + ItemSigKind::Component(_) => self.components.register(sig.id, "component"), + ItemSigKind::Instance(_) => self.instances.register(sig.id, "instance"), + ItemSigKind::Value(_) => self.values.register(sig.id, "value"), + ItemSigKind::Type(_) => self.types.register(sig.id, "type"), + } + } +} + +impl<'a> Resolver<'a> { + fn current(&mut self) -> &mut ComponentState<'a> { + self.stack + .last_mut() + .expect("should have at least one component state") + } + + fn fields( + &mut self, + id: Option<Id<'a>>, + fields: &mut Vec<ComponentField<'a>>, + ) -> Result<(), Error> { + self.stack.push(ComponentState::new(id)); + self.resolve_prepending_aliases(fields, Resolver::field, ComponentState::register)?; + self.stack.pop(); + Ok(()) + } + + fn resolve_prepending_aliases<T>( + &mut self, + fields: &mut Vec<T>, + resolve: fn(&mut Self, &mut T) -> Result<(), Error>, + register: fn(&mut ComponentState<'a>, &T) -> Result<(), Error>, + ) -> Result<(), Error> + where + T: From<Alias<'a>>, + { + assert!(self.aliases_to_insert.is_empty()); + + // Iterate through the fields of the component. We use an index + // instead of an iterator because we'll be inserting aliases + // as we go. + let mut i = 0; + while i < fields.len() { + // Resolve names within the field. + resolve(self, &mut fields[i])?; + + // Name resolution may have emitted some aliases. Insert them before + // the current definition. + let amt = self.aliases_to_insert.len(); + fields.splice(i..i, self.aliases_to_insert.drain(..).map(T::from)); + i += amt; + + // Definitions can't refer to themselves or to definitions that appear + // later in the format. Now that we're done resolving this field, + // assign it an index for later definitions to refer to. + register(self.current(), &fields[i])?; + + i += 1; + } + + Ok(()) + } + + fn field(&mut self, field: &mut ComponentField<'a>) -> Result<(), Error> { + match field { + ComponentField::CoreModule(m) => self.core_module(m), + ComponentField::CoreInstance(i) => self.core_instance(i), + ComponentField::CoreType(t) => self.core_ty(t), + ComponentField::Component(c) => self.component(c), + ComponentField::Instance(i) => self.instance(i), + ComponentField::Alias(a) => self.alias(a, false), + ComponentField::Type(t) => self.ty(t), + ComponentField::CanonicalFunc(f) => self.canonical_func(f), + ComponentField::CoreFunc(_) => unreachable!("should be expanded already"), + ComponentField::Func(_) => unreachable!("should be expanded already"), + ComponentField::Start(s) => self.start(s), + ComponentField::Import(i) => self.item_sig(&mut i.item), + ComponentField::Export(e) => { + if let Some(ty) = &mut e.ty { + self.item_sig(&mut ty.0)?; + } + self.export(&mut e.kind) + } + ComponentField::Custom(_) => Ok(()), + } + } + + fn core_module(&mut self, module: &mut CoreModule) -> Result<(), Error> { + match &mut module.kind { + CoreModuleKind::Inline { fields } => { + crate::core::resolve::resolve(fields)?; + } + + CoreModuleKind::Import { .. } => { + unreachable!("should be expanded already") + } + } + + Ok(()) + } + + fn component(&mut self, component: &mut NestedComponent<'a>) -> Result<(), Error> { + match &mut component.kind { + NestedComponentKind::Import { .. } => unreachable!("should be expanded already"), + NestedComponentKind::Inline(fields) => self.fields(component.id, fields), + } + } + + fn core_instance(&mut self, instance: &mut CoreInstance<'a>) -> Result<(), Error> { + match &mut instance.kind { + CoreInstanceKind::Instantiate { module, args } => { + self.component_item_ref(module)?; + for arg in args { + match &mut arg.kind { + CoreInstantiationArgKind::Instance(i) => { + self.core_item_ref(i)?; + } + CoreInstantiationArgKind::BundleOfExports(..) => { + unreachable!("should be expanded already"); + } + } + } + } + CoreInstanceKind::BundleOfExports(exports) => { + for export in exports { + self.core_item_ref(&mut export.item)?; + } + } + } + Ok(()) + } + + fn instance(&mut self, instance: &mut Instance<'a>) -> Result<(), Error> { + match &mut instance.kind { + InstanceKind::Instantiate { component, args } => { + self.component_item_ref(component)?; + for arg in args { + match &mut arg.kind { + InstantiationArgKind::Item(e) => { + self.export(e)?; + } + InstantiationArgKind::BundleOfExports(..) => { + unreachable!("should be expanded already") + } + } + } + } + InstanceKind::BundleOfExports(exports) => { + for export in exports { + self.export(&mut export.kind)?; + } + } + InstanceKind::Import { .. } => { + unreachable!("should be expanded already") + } + } + Ok(()) + } + + fn item_sig(&mut self, item: &mut ItemSig<'a>) -> Result<(), Error> { + match &mut item.kind { + // Here we must be explicit otherwise the module type reference will + // be assumed to be in the component type namespace + ItemSigKind::CoreModule(t) => self.core_type_use(t), + ItemSigKind::Func(t) => self.component_type_use(t), + ItemSigKind::Component(t) => self.component_type_use(t), + ItemSigKind::Instance(t) => self.component_type_use(t), + ItemSigKind::Value(t) => self.component_val_type(&mut t.0), + ItemSigKind::Type(b) => match b { + TypeBounds::Eq(i) => self.resolve_ns(i, Ns::Type), + }, + } + } + + fn export(&mut self, kind: &mut ComponentExportKind<'a>) -> Result<(), Error> { + match kind { + // Here we do *not* have to be explicit as the item ref is to a core module + ComponentExportKind::CoreModule(r) => self.component_item_ref(r), + ComponentExportKind::Func(r) => self.component_item_ref(r), + ComponentExportKind::Value(r) => self.component_item_ref(r), + ComponentExportKind::Type(r) => self.component_item_ref(r), + ComponentExportKind::Component(r) => self.component_item_ref(r), + ComponentExportKind::Instance(r) => self.component_item_ref(r), + } + } + + fn start(&mut self, start: &mut Start<'a>) -> Result<(), Error> { + self.resolve_ns(&mut start.func, Ns::Func)?; + for arg in start.args.iter_mut() { + self.component_item_ref(arg)?; + } + Ok(()) + } + + fn outer_alias<T: Into<Ns>>( + &mut self, + outer: &mut Index<'a>, + index: &mut Index<'a>, + kind: T, + span: Span, + enclosing_only: bool, + ) -> Result<(), Error> { + // Short-circuit when both indices are already resolved as this + // helps to write tests for invalid modules where wasmparser should + // be the one returning the error. + if let Index::Num(..) = outer { + if let Index::Num(..) = index { + return Ok(()); + } + } + + // Resolve `outer`, and compute the depth at which to look up + // `index`. + let depth = match outer { + Index::Id(id) => { + let mut depth = 0; + for resolver in self.stack.iter().rev() { + if resolver.id == Some(*id) { + break; + } + depth += 1; + } + if depth as usize == self.stack.len() { + return Err(Error::new( + span, + format!("outer component `{}` not found", id.name()), + )); + } + depth + } + Index::Num(n, _span) => *n, + }; + + if depth as usize >= self.stack.len() { + return Err(Error::new( + span, + format!("outer count of `{}` is too large", depth), + )); + } + + if enclosing_only && depth > 1 { + return Err(Error::new( + span, + "only the local or enclosing scope can be aliased".to_string(), + )); + } + + *outer = Index::Num(depth, span); + + // Resolve `index` within the computed scope depth. + let computed = self.stack.len() - 1 - depth as usize; + self.stack[computed].resolve(kind.into(), index)?; + + Ok(()) + } + + fn alias(&mut self, alias: &mut Alias<'a>, enclosing_only: bool) -> Result<(), Error> { + match &mut alias.target { + AliasTarget::Export { + instance, + name: _, + kind: _, + } => self.resolve_ns(instance, Ns::Instance), + AliasTarget::CoreExport { + instance, + name: _, + kind: _, + } => self.resolve_ns(instance, Ns::CoreInstance), + AliasTarget::Outer { outer, index, kind } => { + self.outer_alias(outer, index, *kind, alias.span, enclosing_only) + } + } + } + + fn canonical_func(&mut self, func: &mut CanonicalFunc<'a>) -> Result<(), Error> { + let opts = match &mut func.kind { + CanonicalFuncKind::Lift { ty, info } => { + self.component_type_use(ty)?; + self.core_item_ref(&mut info.func)?; + &mut info.opts + } + CanonicalFuncKind::Lower(info) => { + self.component_item_ref(&mut info.func)?; + &mut info.opts + } + }; + + for opt in opts { + match opt { + CanonOpt::StringUtf8 | CanonOpt::StringUtf16 | CanonOpt::StringLatin1Utf16 => {} + CanonOpt::Memory(r) => self.core_item_ref(r)?, + CanonOpt::Realloc(r) | CanonOpt::PostReturn(r) => self.core_item_ref(r)?, + } + } + + Ok(()) + } + + fn core_type_use<T>(&mut self, ty: &mut CoreTypeUse<'a, T>) -> Result<(), Error> { + let item = match ty { + CoreTypeUse::Ref(r) => r, + CoreTypeUse::Inline(_) => { + unreachable!("inline type-use should be expanded by now") + } + }; + self.core_item_ref(item) + } + + fn component_type_use<T>(&mut self, ty: &mut ComponentTypeUse<'a, T>) -> Result<(), Error> { + let item = match ty { + ComponentTypeUse::Ref(r) => r, + ComponentTypeUse::Inline(_) => { + unreachable!("inline type-use should be expanded by now") + } + }; + self.component_item_ref(item) + } + + fn defined_type(&mut self, ty: &mut ComponentDefinedType<'a>) -> Result<(), Error> { + match ty { + ComponentDefinedType::Primitive(_) => {} + ComponentDefinedType::Flags(_) => {} + ComponentDefinedType::Enum(_) => {} + ComponentDefinedType::Record(r) => { + for field in r.fields.iter_mut() { + self.component_val_type(&mut field.ty)?; + } + } + ComponentDefinedType::Variant(v) => { + // Namespace for case identifier resolution + let mut ns = Namespace::default(); + for case in v.cases.iter_mut() { + let index = ns.register(case.id, "variant case")?; + + if let Some(ty) = &mut case.ty { + self.component_val_type(ty)?; + } + + if let Some(refines) = &mut case.refines { + if let Refinement::Index(span, idx) = refines { + let resolved = ns.resolve(idx, "variant case")?; + if resolved == index { + return Err(Error::new( + *span, + "variant case cannot refine itself".to_string(), + )); + } + + *refines = Refinement::Resolved(resolved); + } + } + } + } + ComponentDefinedType::List(l) => { + self.component_val_type(&mut l.element)?; + } + ComponentDefinedType::Tuple(t) => { + for field in t.fields.iter_mut() { + self.component_val_type(field)?; + } + } + ComponentDefinedType::Union(t) => { + for ty in t.types.iter_mut() { + self.component_val_type(ty)?; + } + } + ComponentDefinedType::Option(o) => { + self.component_val_type(&mut o.element)?; + } + ComponentDefinedType::Result(r) => { + if let Some(ty) = &mut r.ok { + self.component_val_type(ty)?; + } + + if let Some(ty) = &mut r.err { + self.component_val_type(ty)?; + } + } + } + Ok(()) + } + + fn component_val_type(&mut self, ty: &mut ComponentValType<'a>) -> Result<(), Error> { + match ty { + ComponentValType::Ref(idx) => self.resolve_ns(idx, Ns::Type), + ComponentValType::Inline(ComponentDefinedType::Primitive(_)) => Ok(()), + ComponentValType::Inline(_) => unreachable!("should be expanded by now"), + } + } + + fn core_ty(&mut self, field: &mut CoreType<'a>) -> Result<(), Error> { + match &mut field.def { + CoreTypeDef::Def(_) => {} + CoreTypeDef::Module(t) => { + self.stack.push(ComponentState::new(field.id)); + self.module_type(t)?; + self.stack.pop(); + } + } + Ok(()) + } + + fn ty(&mut self, field: &mut Type<'a>) -> Result<(), Error> { + match &mut field.def { + TypeDef::Defined(t) => { + self.defined_type(t)?; + } + TypeDef::Func(f) => { + for param in f.params.iter_mut() { + self.component_val_type(&mut param.ty)?; + } + + for result in f.results.iter_mut() { + self.component_val_type(&mut result.ty)?; + } + } + TypeDef::Component(c) => { + self.stack.push(ComponentState::new(field.id)); + self.component_type(c)?; + self.stack.pop(); + } + TypeDef::Instance(i) => { + self.stack.push(ComponentState::new(field.id)); + self.instance_type(i)?; + self.stack.pop(); + } + } + Ok(()) + } + + fn component_type(&mut self, c: &mut ComponentType<'a>) -> Result<(), Error> { + self.resolve_prepending_aliases( + &mut c.decls, + |resolver, decl| match decl { + ComponentTypeDecl::Alias(alias) => resolver.alias(alias, false), + ComponentTypeDecl::CoreType(ty) => resolver.core_ty(ty), + ComponentTypeDecl::Type(ty) => resolver.ty(ty), + ComponentTypeDecl::Import(import) => resolver.item_sig(&mut import.item), + ComponentTypeDecl::Export(export) => resolver.item_sig(&mut export.item), + }, + |state, decl| { + match decl { + ComponentTypeDecl::Alias(alias) => { + state.register_alias(alias)?; + } + ComponentTypeDecl::CoreType(ty) => { + state.core_types.register(ty.id, "core type")?; + } + ComponentTypeDecl::Type(ty) => { + state.types.register(ty.id, "type")?; + } + ComponentTypeDecl::Export(e) => { + state.register_item_sig(&e.item)?; + } + ComponentTypeDecl::Import(i) => { + state.register_item_sig(&i.item)?; + } + } + Ok(()) + }, + ) + } + + fn instance_type(&mut self, c: &mut InstanceType<'a>) -> Result<(), Error> { + self.resolve_prepending_aliases( + &mut c.decls, + |resolver, decl| match decl { + InstanceTypeDecl::Alias(alias) => resolver.alias(alias, false), + InstanceTypeDecl::CoreType(ty) => resolver.core_ty(ty), + InstanceTypeDecl::Type(ty) => resolver.ty(ty), + InstanceTypeDecl::Export(export) => resolver.item_sig(&mut export.item), + }, + |state, decl| { + match decl { + InstanceTypeDecl::Alias(alias) => { + state.register_alias(alias)?; + } + InstanceTypeDecl::CoreType(ty) => { + state.core_types.register(ty.id, "core type")?; + } + InstanceTypeDecl::Type(ty) => { + state.types.register(ty.id, "type")?; + } + InstanceTypeDecl::Export(export) => { + state.register_item_sig(&export.item)?; + } + } + Ok(()) + }, + ) + } + + fn core_item_ref<K>(&mut self, item: &mut CoreItemRef<'a, K>) -> Result<(), Error> + where + K: CoreItem + Copy, + { + // Check for not being an instance export reference + if item.export_name.is_none() { + self.resolve_ns(&mut item.idx, item.kind.ns())?; + return Ok(()); + } + + // This is a reference to a core instance export + let mut index = item.idx; + self.resolve_ns(&mut index, Ns::CoreInstance)?; + + // Record an alias to reference the export + let span = item.idx.span(); + let alias = Alias { + span, + id: None, + name: None, + target: AliasTarget::CoreExport { + instance: index, + name: item.export_name.unwrap(), + kind: item.kind.ns().into(), + }, + }; + + index = Index::Num(self.current().register_alias(&alias)?, span); + self.aliases_to_insert.push(alias); + + item.idx = index; + item.export_name = None; + + Ok(()) + } + + fn component_item_ref<K>(&mut self, item: &mut ItemRef<'a, K>) -> Result<(), Error> + where + K: ComponentItem + Copy, + { + // Check for not being an instance export reference + if item.export_names.is_empty() { + self.resolve_ns(&mut item.idx, item.kind.ns())?; + return Ok(()); + } + + // This is a reference to an instance export + let mut index = item.idx; + self.resolve_ns(&mut index, Ns::Instance)?; + + let span = item.idx.span(); + for (pos, export_name) in item.export_names.iter().enumerate() { + // Record an alias to reference the export + let alias = Alias { + span, + id: None, + name: None, + target: AliasTarget::Export { + instance: index, + name: export_name, + kind: if pos == item.export_names.len() - 1 { + item.kind.ns().into() + } else { + ComponentExportAliasKind::Instance + }, + }, + }; + + index = Index::Num(self.current().register_alias(&alias)?, span); + self.aliases_to_insert.push(alias); + } + + item.idx = index; + item.export_names = Vec::new(); + + Ok(()) + } + + fn resolve_ns(&mut self, idx: &mut Index<'a>, ns: Ns) -> Result<(), Error> { + // Perform resolution on a local clone walking up the stack of components + // that we have. Note that a local clone is used since we don't want to use + // the parent's resolved index if a parent matches, instead we want to use + // the index of the alias that we will automatically insert. + let mut idx_clone = *idx; + for (depth, resolver) in self.stack.iter_mut().rev().enumerate() { + let depth = depth as u32; + let found = match resolver.resolve(ns, &mut idx_clone) { + Ok(idx) => idx, + // Try the next parent + Err(_) => continue, + }; + + // If this is the current component then no extra alias is necessary, so + // return success. + if depth == 0 { + *idx = idx_clone; + return Ok(()); + } + let id = match idx { + Index::Id(id) => *id, + Index::Num(..) => unreachable!(), + }; + + // When resolution succeeds in a parent then an outer alias is + // automatically inserted here in this component. + let span = idx.span(); + let alias = Alias { + span, + id: Some(id), + name: None, + target: AliasTarget::Outer { + outer: Index::Num(depth, span), + index: Index::Num(found, span), + kind: ns.into(), + }, + }; + let local_index = self.current().register_alias(&alias)?; + self.aliases_to_insert.push(alias); + *idx = Index::Num(local_index, span); + return Ok(()); + } + + // If resolution in any parent failed then simply return the error from our + // local namespace + self.current().resolve(ns, idx)?; + unreachable!() + } + + fn module_type(&mut self, ty: &mut ModuleType<'a>) -> Result<(), Error> { + return self.resolve_prepending_aliases( + &mut ty.decls, + |resolver, decl| match decl { + ModuleTypeDecl::Alias(alias) => resolver.alias(alias, true), + ModuleTypeDecl::Type(_) => Ok(()), + ModuleTypeDecl::Import(import) => resolve_item_sig(resolver, &mut import.item), + ModuleTypeDecl::Export(_, item) => resolve_item_sig(resolver, item), + }, + |state, decl| { + match decl { + ModuleTypeDecl::Alias(alias) => { + state.register_alias(alias)?; + } + ModuleTypeDecl::Type(ty) => { + state.core_types.register(ty.id, "type")?; + } + // Only the type namespace is populated within the module type + // namespace so these are ignored here. + ModuleTypeDecl::Import(_) | ModuleTypeDecl::Export(..) => {} + } + Ok(()) + }, + ); + + fn resolve_item_sig<'a>( + resolver: &Resolver<'a>, + sig: &mut core::ItemSig<'a>, + ) -> Result<(), Error> { + match &mut sig.kind { + core::ItemKind::Func(ty) | core::ItemKind::Tag(core::TagType::Exception(ty)) => { + let idx = ty.index.as_mut().expect("index should be filled in"); + resolver + .stack + .last() + .unwrap() + .core_types + .resolve(idx, "type")?; + } + core::ItemKind::Memory(_) + | core::ItemKind::Global(_) + | core::ItemKind::Table(_) => {} + } + Ok(()) + } + } +} + +impl<'a> ComponentState<'a> { + fn resolve(&mut self, ns: Ns, idx: &mut Index<'a>) -> Result<u32, Error> { + match ns { + Ns::CoreFunc => self.core_funcs.resolve(idx, "core func"), + Ns::CoreGlobal => self.core_globals.resolve(idx, "core global"), + Ns::CoreTable => self.core_tables.resolve(idx, "core table"), + Ns::CoreMemory => self.core_memories.resolve(idx, "core memory"), + Ns::CoreType => self.core_types.resolve(idx, "core type"), + Ns::CoreTag => self.core_tags.resolve(idx, "core tag"), + Ns::CoreInstance => self.core_instances.resolve(idx, "core instance"), + Ns::CoreModule => self.core_modules.resolve(idx, "core module"), + Ns::Func => self.funcs.resolve(idx, "func"), + Ns::Type => self.types.resolve(idx, "type"), + Ns::Instance => self.instances.resolve(idx, "instance"), + Ns::Component => self.components.resolve(idx, "component"), + Ns::Value => self.values.resolve(idx, "value"), + } + } + + /// Assign an index to the given field. + fn register(&mut self, item: &ComponentField<'a>) -> Result<(), Error> { + match item { + ComponentField::CoreModule(m) => self.core_modules.register(m.id, "core module")?, + ComponentField::CoreInstance(i) => { + self.core_instances.register(i.id, "core instance")? + } + ComponentField::CoreType(t) => self.core_types.register(t.id, "core type")?, + ComponentField::Component(c) => self.components.register(c.id, "component")?, + ComponentField::Instance(i) => self.instances.register(i.id, "instance")?, + ComponentField::Alias(a) => self.register_alias(a)?, + ComponentField::Type(t) => self.types.register(t.id, "type")?, + ComponentField::CanonicalFunc(f) => match &f.kind { + CanonicalFuncKind::Lift { .. } => self.funcs.register(f.id, "func")?, + CanonicalFuncKind::Lower(_) => self.core_funcs.register(f.id, "core func")?, + }, + ComponentField::CoreFunc(_) | ComponentField::Func(_) => { + unreachable!("should be expanded already") + } + ComponentField::Start(s) => { + for r in &s.results { + self.values.register(*r, "value")?; + } + return Ok(()); + } + ComponentField::Import(i) => self.register_item_sig(&i.item)?, + ComponentField::Export(e) => match &e.kind { + ComponentExportKind::CoreModule(_) => { + self.core_modules.register(e.id, "core module")? + } + ComponentExportKind::Func(_) => self.funcs.register(e.id, "func")?, + ComponentExportKind::Instance(_) => self.instances.register(e.id, "instance")?, + ComponentExportKind::Value(_) => self.values.register(e.id, "value")?, + ComponentExportKind::Component(_) => self.components.register(e.id, "component")?, + ComponentExportKind::Type(_) => self.types.register(e.id, "type")?, + }, + ComponentField::Custom(_) => return Ok(()), + }; + + Ok(()) + } + + fn register_alias(&mut self, alias: &Alias<'a>) -> Result<u32, Error> { + match alias.target { + AliasTarget::Export { kind, .. } => match kind { + ComponentExportAliasKind::CoreModule => { + self.core_modules.register(alias.id, "core module") + } + ComponentExportAliasKind::Func => self.funcs.register(alias.id, "func"), + ComponentExportAliasKind::Value => self.values.register(alias.id, "value"), + ComponentExportAliasKind::Type => self.types.register(alias.id, "type"), + ComponentExportAliasKind::Component => { + self.components.register(alias.id, "component") + } + ComponentExportAliasKind::Instance => self.instances.register(alias.id, "instance"), + }, + AliasTarget::CoreExport { kind, .. } => match kind { + core::ExportKind::Func => self.core_funcs.register(alias.id, "core func"), + core::ExportKind::Table => self.core_tables.register(alias.id, "core table"), + core::ExportKind::Memory => self.core_memories.register(alias.id, "core memory"), + core::ExportKind::Global => self.core_globals.register(alias.id, "core global"), + core::ExportKind::Tag => self.core_tags.register(alias.id, "core tag"), + }, + AliasTarget::Outer { kind, .. } => match kind { + ComponentOuterAliasKind::CoreModule => { + self.core_modules.register(alias.id, "core module") + } + ComponentOuterAliasKind::CoreType => { + self.core_types.register(alias.id, "core type") + } + ComponentOuterAliasKind::Type => self.types.register(alias.id, "type"), + ComponentOuterAliasKind::Component => { + self.components.register(alias.id, "component") + } + }, + } + } +} + +#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] +enum Ns { + CoreFunc, + CoreGlobal, + CoreTable, + CoreMemory, + CoreType, + CoreTag, + CoreInstance, + CoreModule, + Func, + Type, + Instance, + Component, + Value, +} + +trait ComponentItem { + fn ns(&self) -> Ns; +} + +trait CoreItem { + fn ns(&self) -> Ns; +} + +macro_rules! component_item { + ($kw:path, $kind:ident) => { + impl ComponentItem for $kw { + fn ns(&self) -> Ns { + Ns::$kind + } + } + }; +} + +macro_rules! core_item { + ($kw:path, $kind:ident) => { + impl CoreItem for $kw { + fn ns(&self) -> Ns { + Ns::$kind + } + } + }; +} + +component_item!(kw::func, Func); +component_item!(kw::r#type, Type); +component_item!(kw::r#instance, Instance); +component_item!(kw::component, Component); +component_item!(kw::value, Value); +component_item!(kw::module, CoreModule); + +core_item!(kw::func, CoreFunc); +core_item!(kw::memory, CoreMemory); +core_item!(kw::r#type, CoreType); +core_item!(kw::r#instance, CoreInstance); + +impl From<Ns> for ComponentExportAliasKind { + fn from(ns: Ns) -> Self { + match ns { + Ns::CoreModule => Self::CoreModule, + Ns::Func => Self::Func, + Ns::Type => Self::Type, + Ns::Instance => Self::Instance, + Ns::Component => Self::Component, + Ns::Value => Self::Value, + _ => unreachable!("not a component exportable namespace"), + } + } +} + +impl From<Ns> for ComponentOuterAliasKind { + fn from(ns: Ns) -> Self { + match ns { + Ns::CoreModule => Self::CoreModule, + Ns::CoreType => Self::CoreType, + Ns::Type => Self::Type, + Ns::Component => Self::Component, + _ => unreachable!("not an outer alias namespace"), + } + } +} + +impl From<Ns> for core::ExportKind { + fn from(ns: Ns) -> Self { + match ns { + Ns::CoreFunc => Self::Func, + Ns::CoreTable => Self::Table, + Ns::CoreGlobal => Self::Global, + Ns::CoreMemory => Self::Memory, + Ns::CoreTag => Self::Tag, + _ => unreachable!("not a core exportable namespace"), + } + } +} + +impl From<ComponentOuterAliasKind> for Ns { + fn from(kind: ComponentOuterAliasKind) -> Self { + match kind { + ComponentOuterAliasKind::CoreModule => Self::CoreModule, + ComponentOuterAliasKind::CoreType => Self::CoreType, + ComponentOuterAliasKind::Type => Self::Type, + ComponentOuterAliasKind::Component => Self::Component, + } + } +} + +impl CoreItem for core::ExportKind { + fn ns(&self) -> Ns { + match self { + Self::Func => Ns::CoreFunc, + Self::Table => Ns::CoreTable, + Self::Global => Ns::CoreGlobal, + Self::Memory => Ns::CoreMemory, + Self::Tag => Ns::CoreTag, + } + } +} diff --git a/third_party/rust/wast/src/component/types.rs b/third_party/rust/wast/src/component/types.rs new file mode 100644 index 0000000000..0ff8cc0183 --- /dev/null +++ b/third_party/rust/wast/src/component/types.rs @@ -0,0 +1,965 @@ +use crate::component::*; +use crate::core; +use crate::kw; +use crate::parser::Lookahead1; +use crate::parser::Peek; +use crate::parser::{Parse, Parser, Result}; +use crate::token::Index; +use crate::token::LParen; +use crate::token::{Id, NameAnnotation, Span}; + +/// A core type declaration. +#[derive(Debug)] +pub struct CoreType<'a> { + /// Where this type was defined. + pub span: Span, + /// An optional identifier to refer to this `core type` by as part of name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this type stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// The core type's definition. + pub def: CoreTypeDef<'a>, +} + +impl<'a> Parse<'a> for CoreType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::core>()?.0; + parser.parse::<kw::r#type>()?; + let id = parser.parse()?; + let name = parser.parse()?; + let def = parser.parens(|p| p.parse())?; + + Ok(Self { + span, + id, + name, + def, + }) + } +} + +/// Represents a core type definition. +/// +/// In the future this may be removed when module types are a part of +/// a core module. +#[derive(Debug)] +pub enum CoreTypeDef<'a> { + /// The type definition is one of the core types. + Def(core::TypeDef<'a>), + /// The type definition is a module type. + Module(ModuleType<'a>), +} + +impl<'a> Parse<'a> for CoreTypeDef<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::module>() { + parser.parse::<kw::module>()?; + Ok(Self::Module(parser.parse()?)) + } else { + Ok(Self::Def(parser.parse()?)) + } + } +} + +/// A type definition for a core module. +#[derive(Debug)] +pub struct ModuleType<'a> { + /// The declarations of the module type. + pub decls: Vec<ModuleTypeDecl<'a>>, +} + +impl<'a> Parse<'a> for ModuleType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.depth_check()?; + Ok(Self { + decls: parser.parse()?, + }) + } +} + +/// The declarations of a [`ModuleType`]. +#[derive(Debug)] +pub enum ModuleTypeDecl<'a> { + /// A core type. + Type(core::Type<'a>), + /// An alias local to the component type. + Alias(Alias<'a>), + /// An import. + Import(core::Import<'a>), + /// An export. + Export(&'a str, core::ItemSig<'a>), +} + +impl<'a> Parse<'a> for ModuleTypeDecl<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::r#type>() { + Ok(Self::Type(parser.parse()?)) + } else if l.peek::<kw::alias>() { + Ok(Self::Alias(Alias::parse_outer_core_type_alias(parser)?)) + } else if l.peek::<kw::import>() { + Ok(Self::Import(parser.parse()?)) + } else if l.peek::<kw::export>() { + parser.parse::<kw::export>()?; + let name = parser.parse()?; + let et = parser.parens(|parser| parser.parse())?; + Ok(Self::Export(name, et)) + } else { + Err(l.error()) + } + } +} + +impl<'a> Parse<'a> for Vec<ModuleTypeDecl<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut decls = Vec::new(); + while !parser.is_empty() { + decls.push(parser.parens(|parser| parser.parse())?); + } + Ok(decls) + } +} + +/// A type declaration in a component. +#[derive(Debug)] +pub struct Type<'a> { + /// Where this type was defined. + pub span: Span, + /// An optional identifier to refer to this `type` by as part of name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this type stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// The type definition. + pub def: TypeDef<'a>, +} + +impl<'a> Parse<'a> for Type<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::r#type>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + let def = parser.parse()?; + + Ok(Self { + span, + id, + name, + exports, + def, + }) + } +} + +/// A definition of a component type. +#[derive(Debug)] +pub enum TypeDef<'a> { + /// A defined value type. + Defined(ComponentDefinedType<'a>), + /// A component function type. + Func(ComponentFunctionType<'a>), + /// A component type. + Component(ComponentType<'a>), + /// An instance type. + Instance(InstanceType<'a>), +} + +impl<'a> Parse<'a> for TypeDef<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<LParen>() { + parser.parens(|parser| { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(Self::Func(parser.parse()?)) + } else if l.peek::<kw::component>() { + parser.parse::<kw::component>()?; + Ok(Self::Component(parser.parse()?)) + } else if l.peek::<kw::instance>() { + parser.parse::<kw::instance>()?; + Ok(Self::Instance(parser.parse()?)) + } else { + Ok(Self::Defined(ComponentDefinedType::parse_non_primitive( + parser, l, + )?)) + } + }) + } else { + // Only primitive types have no parens + Ok(Self::Defined(ComponentDefinedType::Primitive( + parser.parse()?, + ))) + } + } +} + +/// A primitive value type. +#[allow(missing_docs)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PrimitiveValType { + Bool, + S8, + U8, + S16, + U16, + S32, + U32, + S64, + U64, + Float32, + Float64, + Char, + String, +} + +impl<'a> Parse<'a> for PrimitiveValType { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::bool_>() { + parser.parse::<kw::bool_>()?; + Ok(Self::Bool) + } else if l.peek::<kw::s8>() { + parser.parse::<kw::s8>()?; + Ok(Self::S8) + } else if l.peek::<kw::u8>() { + parser.parse::<kw::u8>()?; + Ok(Self::U8) + } else if l.peek::<kw::s16>() { + parser.parse::<kw::s16>()?; + Ok(Self::S16) + } else if l.peek::<kw::u16>() { + parser.parse::<kw::u16>()?; + Ok(Self::U16) + } else if l.peek::<kw::s32>() { + parser.parse::<kw::s32>()?; + Ok(Self::S32) + } else if l.peek::<kw::u32>() { + parser.parse::<kw::u32>()?; + Ok(Self::U32) + } else if l.peek::<kw::s64>() { + parser.parse::<kw::s64>()?; + Ok(Self::S64) + } else if l.peek::<kw::u64>() { + parser.parse::<kw::u64>()?; + Ok(Self::U64) + } else if l.peek::<kw::float32>() { + parser.parse::<kw::float32>()?; + Ok(Self::Float32) + } else if l.peek::<kw::float64>() { + parser.parse::<kw::float64>()?; + Ok(Self::Float64) + } else if l.peek::<kw::char>() { + parser.parse::<kw::char>()?; + Ok(Self::Char) + } else if l.peek::<kw::string>() { + parser.parse::<kw::string>()?; + Ok(Self::String) + } else { + Err(l.error()) + } + } +} + +impl Peek for PrimitiveValType { + fn peek(cursor: crate::parser::Cursor<'_>) -> bool { + matches!( + cursor.keyword(), + Some(("bool", _)) + | Some(("s8", _)) + | Some(("u8", _)) + | Some(("s16", _)) + | Some(("u16", _)) + | Some(("s32", _)) + | Some(("u32", _)) + | Some(("s64", _)) + | Some(("u64", _)) + | Some(("float32", _)) + | Some(("float64", _)) + | Some(("char", _)) + | Some(("string", _)) + ) + } + + fn display() -> &'static str { + "primitive value type" + } +} + +/// A component value type. +#[allow(missing_docs)] +#[derive(Debug)] +pub enum ComponentValType<'a> { + /// The value type is an inline defined type. + Inline(ComponentDefinedType<'a>), + /// The value type is an index reference to a defined type. + Ref(Index<'a>), +} + +impl<'a> Parse<'a> for ComponentValType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<Index<'_>>() { + Ok(Self::Ref(parser.parse()?)) + } else { + Ok(Self::Inline(InlineComponentValType::parse(parser)?.0)) + } + } +} + +impl Peek for ComponentValType<'_> { + fn peek(cursor: crate::parser::Cursor<'_>) -> bool { + Index::peek(cursor) || ComponentDefinedType::peek(cursor) + } + + fn display() -> &'static str { + "component value type" + } +} + +/// An inline-only component value type. +/// +/// This variation does not parse type indexes. +#[allow(missing_docs)] +#[derive(Debug)] +pub struct InlineComponentValType<'a>(ComponentDefinedType<'a>); + +impl<'a> Parse<'a> for InlineComponentValType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<LParen>() { + parser.parens(|parser| { + Ok(Self(ComponentDefinedType::parse_non_primitive( + parser, + parser.lookahead1(), + )?)) + }) + } else { + Ok(Self(ComponentDefinedType::Primitive(parser.parse()?))) + } + } +} + +// A component defined type. +#[allow(missing_docs)] +#[derive(Debug)] +pub enum ComponentDefinedType<'a> { + Primitive(PrimitiveValType), + Record(Record<'a>), + Variant(Variant<'a>), + List(List<'a>), + Tuple(Tuple<'a>), + Flags(Flags<'a>), + Enum(Enum<'a>), + Union(Union<'a>), + Option(OptionType<'a>), + Result(ResultType<'a>), +} + +impl<'a> ComponentDefinedType<'a> { + fn parse_non_primitive(parser: Parser<'a>, mut l: Lookahead1<'a>) -> Result<Self> { + parser.depth_check()?; + if l.peek::<kw::record>() { + Ok(Self::Record(parser.parse()?)) + } else if l.peek::<kw::variant>() { + Ok(Self::Variant(parser.parse()?)) + } else if l.peek::<kw::list>() { + Ok(Self::List(parser.parse()?)) + } else if l.peek::<kw::tuple>() { + Ok(Self::Tuple(parser.parse()?)) + } else if l.peek::<kw::flags>() { + Ok(Self::Flags(parser.parse()?)) + } else if l.peek::<kw::enum_>() { + Ok(Self::Enum(parser.parse()?)) + } else if l.peek::<kw::union>() { + Ok(Self::Union(parser.parse()?)) + } else if l.peek::<kw::option>() { + Ok(Self::Option(parser.parse()?)) + } else if l.peek::<kw::result>() { + Ok(Self::Result(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +impl Default for ComponentDefinedType<'_> { + fn default() -> Self { + Self::Primitive(PrimitiveValType::Bool) + } +} + +impl Peek for ComponentDefinedType<'_> { + fn peek(cursor: crate::parser::Cursor<'_>) -> bool { + if PrimitiveValType::peek(cursor) { + return true; + } + + match cursor.lparen() { + Some(cursor) => matches!( + cursor.keyword(), + Some(("record", _)) + | Some(("variant", _)) + | Some(("list", _)) + | Some(("tuple", _)) + | Some(("flags", _)) + | Some(("enum", _)) + | Some(("union", _)) + | Some(("option", _)) + | Some(("result", _)) + ), + None => false, + } + } + + fn display() -> &'static str { + "component defined type" + } +} + +/// A record defined type. +#[derive(Debug)] +pub struct Record<'a> { + /// The fields of the record. + pub fields: Vec<RecordField<'a>>, +} + +impl<'a> Parse<'a> for Record<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::record>()?; + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(|p| p.parse())?); + } + Ok(Self { fields }) + } +} + +/// A record type field. +#[derive(Debug)] +pub struct RecordField<'a> { + /// The name of the field. + pub name: &'a str, + /// The type of the field. + pub ty: ComponentValType<'a>, +} + +impl<'a> Parse<'a> for RecordField<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::field>()?; + Ok(Self { + name: parser.parse()?, + ty: parser.parse()?, + }) + } +} + +/// A variant defined type. +#[derive(Debug)] +pub struct Variant<'a> { + /// The cases of the variant type. + pub cases: Vec<VariantCase<'a>>, +} + +impl<'a> Parse<'a> for Variant<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::variant>()?; + let mut cases = Vec::new(); + while !parser.is_empty() { + cases.push(parser.parens(|p| p.parse())?); + } + Ok(Self { cases }) + } +} + +/// A case of a variant type. +#[derive(Debug)] +pub struct VariantCase<'a> { + /// Where this `case` was defined + pub span: Span, + /// An optional identifier to refer to this case by as part of name + /// resolution. + pub id: Option<Id<'a>>, + /// The name of the case. + pub name: &'a str, + /// The optional type of the case. + pub ty: Option<ComponentValType<'a>>, + /// The optional refinement. + pub refines: Option<Refinement<'a>>, +} + +impl<'a> Parse<'a> for VariantCase<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::case>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let ty = parser.parse()?; + let refines = if !parser.is_empty() { + Some(parser.parse()?) + } else { + None + }; + Ok(Self { + span, + id, + name, + ty, + refines, + }) + } +} + +/// A refinement for a variant case. +#[derive(Debug)] +pub enum Refinement<'a> { + /// The refinement is referenced by index. + Index(Span, Index<'a>), + /// The refinement has been resolved to an index into + /// the cases of the variant. + Resolved(u32), +} + +impl<'a> Parse<'a> for Refinement<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|parser| { + let span = parser.parse::<kw::refines>()?.0; + let id = parser.parse()?; + Ok(Self::Index(span, id)) + }) + } +} + +/// A list type. +#[derive(Debug)] +pub struct List<'a> { + /// The element type of the array. + pub element: Box<ComponentValType<'a>>, +} + +impl<'a> Parse<'a> for List<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::list>()?; + Ok(Self { + element: Box::new(parser.parse()?), + }) + } +} + +/// A tuple type. +#[derive(Debug)] +pub struct Tuple<'a> { + /// The types of the fields of the tuple. + pub fields: Vec<ComponentValType<'a>>, +} + +impl<'a> Parse<'a> for Tuple<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::tuple>()?; + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parse()?); + } + Ok(Self { fields }) + } +} + +/// A flags type. +#[derive(Debug)] +pub struct Flags<'a> { + /// The names of the individual flags. + pub names: Vec<&'a str>, +} + +impl<'a> Parse<'a> for Flags<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::flags>()?; + let mut names = Vec::new(); + while !parser.is_empty() { + names.push(parser.parse()?); + } + Ok(Self { names }) + } +} + +/// An enum type. +#[derive(Debug)] +pub struct Enum<'a> { + /// The tag names of the enum. + pub names: Vec<&'a str>, +} + +impl<'a> Parse<'a> for Enum<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::enum_>()?; + let mut names = Vec::new(); + while !parser.is_empty() { + names.push(parser.parse()?); + } + Ok(Self { names }) + } +} + +/// A union type. +#[derive(Debug)] +pub struct Union<'a> { + /// The types of the union. + pub types: Vec<ComponentValType<'a>>, +} + +impl<'a> Parse<'a> for Union<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::union>()?; + let mut types = Vec::new(); + while !parser.is_empty() { + types.push(parser.parse()?); + } + Ok(Self { types }) + } +} + +/// An optional type. +#[derive(Debug)] +pub struct OptionType<'a> { + /// The type of the value, when a value is present. + pub element: Box<ComponentValType<'a>>, +} + +impl<'a> Parse<'a> for OptionType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::option>()?; + Ok(Self { + element: Box::new(parser.parse()?), + }) + } +} + +/// A result type. +#[derive(Debug)] +pub struct ResultType<'a> { + /// The type on success. + pub ok: Option<Box<ComponentValType<'a>>>, + /// The type on failure. + pub err: Option<Box<ComponentValType<'a>>>, +} + +impl<'a> Parse<'a> for ResultType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::result>()?; + + let ok: Option<ComponentValType> = parser.parse()?; + let err: Option<ComponentValType> = if parser.peek::<LParen>() { + Some(parser.parens(|parser| { + parser.parse::<kw::error>()?; + parser.parse() + })?) + } else { + None + }; + + Ok(Self { + ok: ok.map(Box::new), + err: err.map(Box::new), + }) + } +} + +/// A component function type with parameters and result. +#[derive(Debug)] +pub struct ComponentFunctionType<'a> { + /// The parameters of a function, optionally each having an identifier for + /// name resolution and a name for the custom `name` section. + pub params: Box<[ComponentFunctionParam<'a>]>, + /// The result of a function, optionally each having an identifier for + /// name resolution and a name for the custom `name` section. + pub results: Box<[ComponentFunctionResult<'a>]>, +} + +impl<'a> Parse<'a> for ComponentFunctionType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut params: Vec<ComponentFunctionParam> = Vec::new(); + while parser.peek2::<kw::param>() { + params.push(parser.parens(|p| p.parse())?); + } + + let mut results: Vec<ComponentFunctionResult> = Vec::new(); + while parser.peek2::<kw::result>() { + results.push(parser.parens(|p| p.parse())?); + } + + Ok(Self { + params: params.into(), + results: results.into(), + }) + } +} + +/// A parameter of a [`ComponentFunctionType`]. +#[derive(Debug)] +pub struct ComponentFunctionParam<'a> { + /// The name of the parameter + pub name: &'a str, + /// The type of the parameter. + pub ty: ComponentValType<'a>, +} + +impl<'a> Parse<'a> for ComponentFunctionParam<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::param>()?; + Ok(Self { + name: parser.parse()?, + ty: parser.parse()?, + }) + } +} + +/// A result of a [`ComponentFunctionType`]. +#[derive(Debug)] +pub struct ComponentFunctionResult<'a> { + /// An optionally-specified name of this result + pub name: Option<&'a str>, + /// The type of the result. + pub ty: ComponentValType<'a>, +} + +impl<'a> Parse<'a> for ComponentFunctionResult<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<kw::result>()?; + Ok(Self { + name: parser.parse()?, + ty: parser.parse()?, + }) + } +} + +/// The type of an exported item from an component or instance type. +#[derive(Debug)] +pub struct ComponentExportType<'a> { + /// Where this export was defined. + pub span: Span, + /// The name of this export. + pub name: &'a str, + /// The optional URL of this export. + pub url: Option<&'a str>, + /// The signature of the item. + pub item: ItemSig<'a>, +} + +impl<'a> Parse<'a> for ComponentExportType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::export>()?.0; + let id = parser.parse()?; + let debug_name = parser.parse()?; + let name = parser.parse()?; + let url = parser.parse()?; + let item = parser.parens(|p| { + let mut item = p.parse::<ItemSigNoName<'_>>()?.0; + item.id = id; + item.name = debug_name; + Ok(item) + })?; + Ok(Self { + span, + name, + url, + item, + }) + } +} + +/// A type definition for a component type. +#[derive(Debug, Default)] +pub struct ComponentType<'a> { + /// The declarations of the component type. + pub decls: Vec<ComponentTypeDecl<'a>>, +} + +impl<'a> Parse<'a> for ComponentType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.depth_check()?; + Ok(Self { + decls: parser.parse()?, + }) + } +} + +/// A declaration of a component type. +#[derive(Debug)] +pub enum ComponentTypeDecl<'a> { + /// A core type definition local to the component type. + CoreType(CoreType<'a>), + /// A type definition local to the component type. + Type(Type<'a>), + /// An alias local to the component type. + Alias(Alias<'a>), + /// An import of the component type. + Import(ComponentImport<'a>), + /// An export of the component type. + Export(ComponentExportType<'a>), +} + +impl<'a> Parse<'a> for ComponentTypeDecl<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::core>() { + Ok(Self::CoreType(parser.parse()?)) + } else if l.peek::<kw::r#type>() { + Ok(Self::Type(parser.parse()?)) + } else if l.peek::<kw::alias>() { + Ok(Self::Alias(parser.parse()?)) + } else if l.peek::<kw::import>() { + Ok(Self::Import(parser.parse()?)) + } else if l.peek::<kw::export>() { + Ok(Self::Export(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +impl<'a> Parse<'a> for Vec<ComponentTypeDecl<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut decls = Vec::new(); + while !parser.is_empty() { + decls.push(parser.parens(|parser| parser.parse())?); + } + Ok(decls) + } +} + +/// A type definition for an instance type. +#[derive(Debug)] +pub struct InstanceType<'a> { + /// The declarations of the instance type. + pub decls: Vec<InstanceTypeDecl<'a>>, +} + +impl<'a> Parse<'a> for InstanceType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.depth_check()?; + Ok(Self { + decls: parser.parse()?, + }) + } +} + +/// A declaration of an instance type. +#[derive(Debug)] +pub enum InstanceTypeDecl<'a> { + /// A core type definition local to the component type. + CoreType(CoreType<'a>), + /// A type definition local to the instance type. + Type(Type<'a>), + /// An alias local to the instance type. + Alias(Alias<'a>), + /// An export of the instance type. + Export(ComponentExportType<'a>), +} + +impl<'a> Parse<'a> for InstanceTypeDecl<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::core>() { + Ok(Self::CoreType(parser.parse()?)) + } else if l.peek::<kw::r#type>() { + Ok(Self::Type(parser.parse()?)) + } else if l.peek::<kw::alias>() { + Ok(Self::Alias(parser.parse()?)) + } else if l.peek::<kw::export>() { + Ok(Self::Export(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +impl<'a> Parse<'a> for Vec<InstanceTypeDecl<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut decls = Vec::new(); + while !parser.is_empty() { + decls.push(parser.parens(|parser| parser.parse())?); + } + Ok(decls) + } +} + +/// A value type declaration used for values in import signatures. +#[derive(Debug)] +pub struct ComponentValTypeUse<'a>(pub ComponentValType<'a>); + +impl<'a> Parse<'a> for ComponentValTypeUse<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + match ComponentTypeUse::<'a, InlineComponentValType<'a>>::parse(parser)? { + ComponentTypeUse::Ref(i) => Ok(Self(ComponentValType::Ref(i.idx))), + ComponentTypeUse::Inline(t) => Ok(Self(ComponentValType::Inline(t.0))), + } + } +} + +/// A reference to a core type defined in this component. +/// +/// This is the same as `TypeUse`, but accepts `$T` as shorthand for +/// `(type $T)`. +#[derive(Debug, Clone)] +pub enum CoreTypeUse<'a, T> { + /// The type that we're referencing. + Ref(CoreItemRef<'a, kw::r#type>), + /// The inline type. + Inline(T), +} + +impl<'a, T: Parse<'a>> Parse<'a> for CoreTypeUse<'a, T> { + fn parse(parser: Parser<'a>) -> Result<Self> { + // Here the core context is assumed, so no core prefix is expected + if parser.peek::<LParen>() && parser.peek2::<CoreItemRef<'a, kw::r#type>>() { + Ok(Self::Ref(parser.parens(|parser| parser.parse())?)) + } else { + Ok(Self::Inline(parser.parse()?)) + } + } +} + +impl<T> Default for CoreTypeUse<'_, T> { + fn default() -> Self { + let span = Span::from_offset(0); + Self::Ref(CoreItemRef { + idx: Index::Num(0, span), + kind: kw::r#type(span), + export_name: None, + }) + } +} + +/// A reference to a type defined in this component. +/// +/// This is the same as `TypeUse`, but accepts `$T` as shorthand for +/// `(type $T)`. +#[derive(Debug, Clone)] +pub enum ComponentTypeUse<'a, T> { + /// The type that we're referencing. + Ref(ItemRef<'a, kw::r#type>), + /// The inline type. + Inline(T), +} + +impl<'a, T: Parse<'a>> Parse<'a> for ComponentTypeUse<'a, T> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<LParen>() && parser.peek2::<ItemRef<'a, kw::r#type>>() { + Ok(Self::Ref(parser.parens(|parser| parser.parse())?)) + } else { + Ok(Self::Inline(parser.parse()?)) + } + } +} + +impl<T> Default for ComponentTypeUse<'_, T> { + fn default() -> Self { + let span = Span::from_offset(0); + Self::Ref(ItemRef { + idx: Index::Num(0, span), + kind: kw::r#type(span), + export_names: Vec::new(), + }) + } +} diff --git a/third_party/rust/wast/src/component/wast.rs b/third_party/rust/wast/src/component/wast.rs new file mode 100644 index 0000000000..8409a6c969 --- /dev/null +++ b/third_party/rust/wast/src/component/wast.rs @@ -0,0 +1,166 @@ +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::{Float32, Float64}; + +/// Expression that can be used inside of `invoke` expressions for core wasm +/// functions. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum WastVal<'a> { + Bool(bool), + U8(u8), + S8(i8), + U16(u16), + S16(i16), + U32(u32), + S32(i32), + U64(u64), + S64(i64), + Float32(Float32), + Float64(Float64), + Char(char), + String(&'a str), + List(Vec<WastVal<'a>>), + Record(Vec<(&'a str, WastVal<'a>)>), + Tuple(Vec<WastVal<'a>>), + Variant(&'a str, Option<Box<WastVal<'a>>>), + Enum(&'a str), + Union(u32, Box<WastVal<'a>>), + Option(Option<Box<WastVal<'a>>>), + Result(Result<Option<Box<WastVal<'a>>>, Option<Box<WastVal<'a>>>>), + Flags(Vec<&'a str>), +} + +static CASES: &[(&str, fn(Parser<'_>) -> Result<WastVal<'_>>)] = { + use WastVal::*; + &[ + ("bool.const", |p| { + let mut l = p.lookahead1(); + if l.peek::<kw::true_>() { + p.parse::<kw::true_>()?; + Ok(Bool(true)) + } else if l.peek::<kw::false_>() { + p.parse::<kw::false_>()?; + Ok(Bool(false)) + } else { + Err(l.error()) + } + }), + ("u8.const", |p| Ok(U8(p.parse()?))), + ("s8.const", |p| Ok(S8(p.parse()?))), + ("u16.const", |p| Ok(U16(p.parse()?))), + ("s16.const", |p| Ok(S16(p.parse()?))), + ("u32.const", |p| Ok(U32(p.parse()?))), + ("s32.const", |p| Ok(S32(p.parse()?))), + ("u64.const", |p| Ok(U64(p.parse()?))), + ("s64.const", |p| Ok(S64(p.parse()?))), + ("f32.const", |p| Ok(Float32(p.parse()?))), + ("f64.const", |p| Ok(Float64(p.parse()?))), + ("char.const", |p| { + let s = p.parse::<&str>()?; + let mut ch = s.chars(); + let ret = match ch.next() { + Some(c) => c, + None => return Err(p.error("empty string")), + }; + if ch.next().is_some() { + return Err(p.error("more than one character")); + } + Ok(Char(ret)) + }), + ("str.const", |p| Ok(String(p.parse()?))), + ("list.const", |p| { + let mut ret = Vec::new(); + while !p.is_empty() { + ret.push(p.parens(|p| p.parse())?); + } + Ok(List(ret)) + }), + ("record.const", |p| { + let mut ret = Vec::new(); + while !p.is_empty() { + ret.push(p.parens(|p| { + p.parse::<kw::field>()?; + Ok((p.parse()?, p.parse()?)) + })?); + } + Ok(Record(ret)) + }), + ("tuple.const", |p| { + let mut ret = Vec::new(); + while !p.is_empty() { + ret.push(p.parens(|p| p.parse())?); + } + Ok(Tuple(ret)) + }), + ("variant.const", |p| { + let name = p.parse()?; + let payload = if p.is_empty() { + None + } else { + Some(Box::new(p.parens(|p| p.parse())?)) + }; + Ok(Variant(name, payload)) + }), + ("enum.const", |p| Ok(Enum(p.parse()?))), + ("union.const", |p| { + let num = p.parse()?; + let payload = Box::new(p.parens(|p| p.parse())?); + Ok(Union(num, payload)) + }), + ("option.none", |_| Ok(Option(None))), + ("option.some", |p| { + Ok(Option(Some(Box::new(p.parens(|p| p.parse())?)))) + }), + ("result.ok", |p| { + Ok(Result(Ok(if p.is_empty() { + None + } else { + Some(Box::new(p.parens(|p| p.parse())?)) + }))) + }), + ("result.err", |p| { + Ok(Result(Err(if p.is_empty() { + None + } else { + Some(Box::new(p.parens(|p| p.parse())?)) + }))) + }), + ("flags.const", |p| { + let mut ret = Vec::new(); + while !p.is_empty() { + ret.push(p.parse()?); + } + Ok(Flags(ret)) + }), + ] +}; + +impl<'a> Parse<'a> for WastVal<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.depth_check()?; + let parse = parser.step(|c| { + if let Some((kw, rest)) = c.keyword() { + if let Some(i) = CASES.iter().position(|(name, _)| *name == kw) { + return Ok((CASES[i].1, rest)); + } + } + Err(c.error("expected a [type].const expression")) + })?; + parse(parser) + } +} + +impl Peek for WastVal<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let kw = match cursor.keyword() { + Some((kw, _)) => kw, + None => return false, + }; + CASES.iter().any(|(name, _)| *name == kw) + } + + fn display() -> &'static str { + "core wasm argument" + } +} diff --git a/third_party/rust/wast/src/core.rs b/third_party/rust/wast/src/core.rs new file mode 100644 index 0000000000..785a081853 --- /dev/null +++ b/third_party/rust/wast/src/core.rs @@ -0,0 +1,29 @@ +//! Types and support for parsing the core wasm text format. + +mod custom; +mod export; +mod expr; +mod func; +mod global; +mod import; +mod memory; +mod module; +mod table; +mod tag; +mod types; +mod wast; +pub use self::custom::*; +pub use self::export::*; +pub use self::expr::*; +pub use self::func::*; +pub use self::global::*; +pub use self::import::*; +pub use self::memory::*; +pub use self::module::*; +pub use self::table::*; +pub use self::tag::*; +pub use self::types::*; +pub use self::wast::*; + +pub(crate) mod binary; +pub(crate) mod resolve; diff --git a/third_party/rust/wast/src/core/binary.rs b/third_party/rust/wast/src/core/binary.rs new file mode 100644 index 0000000000..5ae4752349 --- /dev/null +++ b/third_party/rust/wast/src/core/binary.rs @@ -0,0 +1,1180 @@ +use crate::core::*; +use crate::encode::Encode; +use crate::token::*; + +pub fn encode( + module_id: &Option<Id<'_>>, + module_name: &Option<NameAnnotation<'_>>, + fields: &[ModuleField<'_>], +) -> Vec<u8> { + use CustomPlace::*; + use CustomPlaceAnchor::*; + + let mut types = Vec::new(); + let mut imports = Vec::new(); + let mut funcs = Vec::new(); + let mut tables = Vec::new(); + let mut memories = Vec::new(); + let mut globals = Vec::new(); + let mut exports = Vec::new(); + let mut start = Vec::new(); + let mut elem = Vec::new(); + let mut data = Vec::new(); + let mut tags = Vec::new(); + let mut customs = Vec::new(); + for field in fields { + match field { + ModuleField::Type(i) => types.push(RecOrType::Type(i)), + ModuleField::Rec(i) => types.push(RecOrType::Rec(i)), + ModuleField::Import(i) => imports.push(i), + ModuleField::Func(i) => funcs.push(i), + ModuleField::Table(i) => tables.push(i), + ModuleField::Memory(i) => memories.push(i), + ModuleField::Global(i) => globals.push(i), + ModuleField::Export(i) => exports.push(i), + ModuleField::Start(i) => start.push(i), + ModuleField::Elem(i) => elem.push(i), + ModuleField::Data(i) => data.push(i), + ModuleField::Tag(i) => tags.push(i), + ModuleField::Custom(i) => customs.push(i), + } + } + + let mut e = Encoder { + wasm: Vec::new(), + tmp: Vec::new(), + customs: &customs, + }; + e.wasm.extend(b"\0asm"); + e.wasm.extend(b"\x01\0\0\0"); + + e.custom_sections(BeforeFirst); + + e.section_list(1, Type, &types); + e.section_list(2, Import, &imports); + + let functys = funcs.iter().map(|f| &f.ty).collect::<Vec<_>>(); + e.section_list(3, Func, &functys); + e.section_list(4, Table, &tables); + e.section_list(5, Memory, &memories); + e.section_list(13, Tag, &tags); + e.section_list(6, Global, &globals); + e.section_list(7, Export, &exports); + e.custom_sections(Before(Start)); + if let Some(start) = start.get(0) { + e.section(8, start); + } + e.custom_sections(After(Start)); + e.section_list(9, Elem, &elem); + if needs_data_count(&funcs) { + e.section(12, &data.len()); + } + e.section_list(10, Code, &funcs); + e.section_list(11, Data, &data); + + let names = find_names(module_id, module_name, fields); + if !names.is_empty() { + e.section(0, &("name", names)); + } + e.custom_sections(AfterLast); + + return e.wasm; + + fn needs_data_count(funcs: &[&crate::core::Func<'_>]) -> bool { + funcs + .iter() + .filter_map(|f| match &f.kind { + FuncKind::Inline { expression, .. } => Some(expression), + _ => None, + }) + .flat_map(|e| e.instrs.iter()) + .any(|i| i.needs_data_count()) + } +} + +struct Encoder<'a> { + wasm: Vec<u8>, + tmp: Vec<u8>, + customs: &'a [&'a Custom<'a>], +} + +impl Encoder<'_> { + fn section(&mut self, id: u8, section: &dyn Encode) { + self.tmp.truncate(0); + section.encode(&mut self.tmp); + self.wasm.push(id); + self.tmp.encode(&mut self.wasm); + } + + fn custom_sections(&mut self, place: CustomPlace) { + for entry in self.customs.iter() { + if entry.place == place { + self.section(0, &(entry.name, entry)); + } + } + } + + fn section_list(&mut self, id: u8, anchor: CustomPlaceAnchor, list: &[impl Encode]) { + self.custom_sections(CustomPlace::Before(anchor)); + if !list.is_empty() { + self.section(id, &list) + } + self.custom_sections(CustomPlace::After(anchor)); + } +} + +impl Encode for FunctionType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.params.len().encode(e); + for (_, _, ty) in self.params.iter() { + ty.encode(e); + } + self.results.encode(e); + } +} + +impl Encode for StructType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.fields.len().encode(e); + for field in self.fields.iter() { + field.ty.encode(e); + (field.mutable as i32).encode(e); + } + } +} + +impl Encode for ArrayType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + (self.mutable as i32).encode(e); + } +} + +impl Encode for ExportType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.name.encode(e); + self.item.encode(e); + } +} + +enum RecOrType<'a> { + Type(&'a Type<'a>), + Rec(&'a Rec<'a>), +} + +impl Encode for RecOrType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + RecOrType::Type(ty) => ty.encode(e), + RecOrType::Rec(rec) => rec.encode(e), + } + } +} + +impl Encode for Type<'_> { + fn encode(&self, e: &mut Vec<u8>) { + if let Some(parent) = &self.parent { + e.push(0x50); + (1 as usize).encode(e); + parent.encode(e); + } + match &self.def { + TypeDef::Func(func) => { + e.push(0x60); + func.encode(e) + } + TypeDef::Struct(r#struct) => { + e.push(0x5f); + r#struct.encode(e) + } + TypeDef::Array(array) => { + e.push(0x5e); + array.encode(e) + } + } + } +} + +impl Encode for Rec<'_> { + fn encode(&self, e: &mut Vec<u8>) { + if self.types.len() == 1 { + self.types[0].encode(e); + return; + } + + e.push(0x4f); + self.types.len().encode(e); + for ty in &self.types { + ty.encode(e); + } + } +} + +impl Encode for Option<Id<'_>> { + fn encode(&self, _e: &mut Vec<u8>) { + // used for parameters in the tuple impl as well as instruction labels + } +} + +impl<'a> Encode for ValType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ValType::I32 => e.push(0x7f), + ValType::I64 => e.push(0x7e), + ValType::F32 => e.push(0x7d), + ValType::F64 => e.push(0x7c), + ValType::V128 => e.push(0x7b), + ValType::Ref(ty) => { + ty.encode(e); + } + } + } +} + +impl<'a> Encode for HeapType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + HeapType::Func => e.push(0x70), + HeapType::Extern => e.push(0x6f), + HeapType::Any => e.push(0x6e), + HeapType::Eq => e.push(0x6d), + HeapType::Struct => e.push(0x67), + HeapType::Array => e.push(0x66), + HeapType::I31 => e.push(0x6a), + HeapType::NoFunc => e.push(0x68), + HeapType::NoExtern => e.push(0x69), + HeapType::None => e.push(0x65), + // Note that this is encoded as a signed leb128 so be sure to cast + // to an i64 first + HeapType::Index(Index::Num(n, _)) => i64::from(*n).encode(e), + HeapType::Index(Index::Id(n)) => { + panic!("unresolved index in emission: {:?}", n) + } + } + } +} + +impl<'a> Encode for RefType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + // The 'funcref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Func, + } => e.push(0x70), + // The 'externref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Extern, + } => e.push(0x6f), + // The 'eqref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Eq, + } => e.push(0x6d), + // The 'structref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::Struct, + } => e.push(0x67), + // The 'i31ref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::I31, + } => e.push(0x6a), + // The 'nullfuncref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::NoFunc, + } => e.push(0x68), + // The 'nullexternref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::NoExtern, + } => e.push(0x69), + // The 'nullref' binary abbreviation + RefType { + nullable: true, + heap: HeapType::None, + } => e.push(0x65), + + // Generic 'ref null <heaptype>' encoding + RefType { + nullable: true, + heap, + } => { + e.push(0x6c); + heap.encode(e); + } + // Generic 'ref <heaptype>' encoding + RefType { + nullable: false, + heap, + } => { + e.push(0x6b); + heap.encode(e); + } + } + } +} + +impl<'a> Encode for StorageType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + StorageType::I8 => e.push(0x7a), + StorageType::I16 => e.push(0x79), + StorageType::Val(ty) => { + ty.encode(e); + } + } + } +} + +impl Encode for Import<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.module.encode(e); + self.field.encode(e); + self.item.encode(e); + } +} + +impl Encode for ItemSig<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.kind { + ItemKind::Func(f) => { + e.push(0x00); + f.encode(e); + } + ItemKind::Table(f) => { + e.push(0x01); + f.encode(e); + } + ItemKind::Memory(f) => { + e.push(0x02); + f.encode(e); + } + ItemKind::Global(f) => { + e.push(0x03); + f.encode(e); + } + ItemKind::Tag(f) => { + e.push(0x04); + f.encode(e); + } + } + } +} + +impl<T> Encode for TypeUse<'_, T> { + fn encode(&self, e: &mut Vec<u8>) { + self.index + .as_ref() + .expect("TypeUse should be filled in by this point") + .encode(e) + } +} + +impl Encode for Index<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + Index::Num(n, _) => n.encode(e), + Index::Id(n) => panic!("unresolved index in emission: {:?}", n), + } + } +} + +impl<'a> Encode for TableType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + self.elem.encode(e); + self.limits.encode(e); + } +} + +impl Encode for Limits { + fn encode(&self, e: &mut Vec<u8>) { + match self.max { + Some(max) => { + e.push(0x01); + self.min.encode(e); + max.encode(e); + } + None => { + e.push(0x00); + self.min.encode(e); + } + } + } +} + +impl Encode for MemoryType { + fn encode(&self, e: &mut Vec<u8>) { + match self { + MemoryType::B32 { limits, shared } => { + let flag_max = limits.max.is_some() as u8; + let flag_shared = *shared as u8; + let flags = flag_max | (flag_shared << 1); + e.push(flags); + limits.min.encode(e); + if let Some(max) = limits.max { + max.encode(e); + } + } + MemoryType::B64 { limits, shared } => { + let flag_max = limits.max.is_some() as u8; + let flag_shared = *shared as u8; + let flags = flag_max | (flag_shared << 1) | 0x04; + e.push(flags); + limits.min.encode(e); + if let Some(max) = limits.max { + max.encode(e); + } + } + } + } +} + +impl<'a> Encode for GlobalType<'a> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + if self.mutable { + e.push(0x01); + } else { + e.push(0x00); + } + } +} + +impl Encode for Table<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + match &self.kind { + TableKind::Normal { + ty, + init_expr: None, + } => ty.encode(e), + TableKind::Normal { + ty, + init_expr: Some(init_expr), + } => { + e.push(0x40); + e.push(0x00); + ty.encode(e); + init_expr.encode(e); + } + _ => panic!("TableKind should be normal during encoding"), + } + } +} + +impl Encode for Memory<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + match &self.kind { + MemoryKind::Normal(t) => t.encode(e), + _ => panic!("MemoryKind should be normal during encoding"), + } + } +} + +impl Encode for Global<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + self.ty.encode(e); + match &self.kind { + GlobalKind::Inline(expr) => expr.encode(e), + _ => panic!("GlobalKind should be inline during encoding"), + } + } +} + +impl Encode for Export<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.name.encode(e); + self.kind.encode(e); + self.item.encode(e); + } +} + +impl Encode for ExportKind { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ExportKind::Func => e.push(0x00), + ExportKind::Table => e.push(0x01), + ExportKind::Memory => e.push(0x02), + ExportKind::Global => e.push(0x03), + ExportKind::Tag => e.push(0x04), + } + } +} + +impl Encode for Elem<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match (&self.kind, &self.payload) { + ( + ElemKind::Active { + table: Index::Num(0, _), + offset, + }, + ElemPayload::Indices(_), + ) => { + e.push(0x00); + offset.encode(e); + } + (ElemKind::Passive, ElemPayload::Indices(_)) => { + e.push(0x01); // flags + e.push(0x00); // extern_kind + } + (ElemKind::Active { table, offset }, ElemPayload::Indices(_)) => { + e.push(0x02); // flags + table.encode(e); + offset.encode(e); + e.push(0x00); // extern_kind + } + ( + ElemKind::Active { + table: Index::Num(0, _), + offset, + }, + ElemPayload::Exprs { + ty: + RefType { + nullable: true, + heap: HeapType::Func, + }, + .. + }, + ) => { + e.push(0x04); + offset.encode(e); + } + (ElemKind::Passive, ElemPayload::Exprs { ty, .. }) => { + e.push(0x05); + ty.encode(e); + } + (ElemKind::Active { table, offset }, ElemPayload::Exprs { ty, .. }) => { + e.push(0x06); + table.encode(e); + offset.encode(e); + ty.encode(e); + } + (ElemKind::Declared, ElemPayload::Indices(_)) => { + e.push(0x03); // flags + e.push(0x00); // extern_kind + } + (ElemKind::Declared, ElemPayload::Exprs { ty, .. }) => { + e.push(0x07); // flags + ty.encode(e); + } + } + + self.payload.encode(e); + } +} + +impl Encode for ElemPayload<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + ElemPayload::Indices(v) => v.encode(e), + ElemPayload::Exprs { exprs, ty: _ } => { + exprs.len().encode(e); + for expr in exprs { + expr.encode(e); + } + } + } + } +} + +impl Encode for Data<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.kind { + DataKind::Passive => e.push(0x01), + DataKind::Active { + memory: Index::Num(0, _), + offset, + } => { + e.push(0x00); + offset.encode(e); + } + DataKind::Active { memory, offset } => { + e.push(0x02); + memory.encode(e); + offset.encode(e); + } + } + self.data.iter().map(|l| l.len()).sum::<usize>().encode(e); + for val in self.data.iter() { + val.push_onto(e); + } + } +} + +impl Encode for Func<'_> { + fn encode(&self, e: &mut Vec<u8>) { + assert!(self.exports.names.is_empty()); + let mut tmp = Vec::new(); + let (expr, locals) = match &self.kind { + FuncKind::Inline { expression, locals } => (expression, locals), + _ => panic!("should only have inline functions in emission"), + }; + + locals.encode(&mut tmp); + expr.encode(&mut tmp); + + tmp.len().encode(e); + e.extend_from_slice(&tmp); + } +} + +impl Encode for Vec<Local<'_>> { + fn encode(&self, e: &mut Vec<u8>) { + let mut locals_compressed = Vec::<(u32, ValType)>::new(); + for local in self { + if let Some((cnt, prev)) = locals_compressed.last_mut() { + if *prev == local.ty { + *cnt += 1; + continue; + } + } + locals_compressed.push((1, local.ty)); + } + locals_compressed.encode(e); + } +} + +impl Encode for Expression<'_> { + fn encode(&self, e: &mut Vec<u8>) { + for instr in self.instrs.iter() { + instr.encode(e); + } + e.push(0x0b); + } +} + +impl Encode for BlockType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + // block types using an index are encoded as an sleb, not a uleb + if let Some(Index::Num(n, _)) = &self.ty.index { + return i64::from(*n).encode(e); + } + let ty = self + .ty + .inline + .as_ref() + .expect("function type not filled in"); + if ty.params.is_empty() && ty.results.is_empty() { + return e.push(0x40); + } + if ty.params.is_empty() && ty.results.len() == 1 { + return ty.results[0].encode(e); + } + panic!("multi-value block types should have an index"); + } +} + +impl Encode for FuncBindType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + } +} + +impl Encode for LetType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.block.encode(e); + self.locals.encode(e); + } +} + +impl Encode for LaneArg { + fn encode(&self, e: &mut Vec<u8>) { + self.lane.encode(e); + } +} + +impl Encode for MemArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match &self.memory { + Index::Num(0, _) => { + self.align.trailing_zeros().encode(e); + self.offset.encode(e); + } + _ => { + (self.align.trailing_zeros() | (1 << 6)).encode(e); + self.memory.encode(e); + self.offset.encode(e); + } + } + } +} + +impl Encode for LoadOrStoreLane<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.memarg.encode(e); + self.lane.encode(e); + } +} + +impl Encode for CallIndirect<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + self.table.encode(e); + } +} + +impl Encode for TableInit<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.elem.encode(e); + self.table.encode(e); + } +} + +impl Encode for TableCopy<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dst.encode(e); + self.src.encode(e); + } +} + +impl Encode for TableArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dst.encode(e); + } +} + +impl Encode for MemoryArg<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.mem.encode(e); + } +} + +impl Encode for MemoryInit<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.data.encode(e); + self.mem.encode(e); + } +} + +impl Encode for MemoryCopy<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dst.encode(e); + self.src.encode(e); + } +} + +impl Encode for BrTableIndices<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.labels.encode(e); + self.default.encode(e); + } +} + +impl Encode for Float32 { + fn encode(&self, e: &mut Vec<u8>) { + e.extend_from_slice(&self.bits.to_le_bytes()); + } +} + +impl Encode for Float64 { + fn encode(&self, e: &mut Vec<u8>) { + e.extend_from_slice(&self.bits.to_le_bytes()); + } +} + +#[derive(Default)] +struct Names<'a> { + module: Option<&'a str>, + funcs: Vec<(u32, &'a str)>, + func_idx: u32, + locals: Vec<(u32, Vec<(u32, &'a str)>)>, + labels: Vec<(u32, Vec<(u32, &'a str)>)>, + globals: Vec<(u32, &'a str)>, + global_idx: u32, + memories: Vec<(u32, &'a str)>, + memory_idx: u32, + tables: Vec<(u32, &'a str)>, + table_idx: u32, + tags: Vec<(u32, &'a str)>, + tag_idx: u32, + types: Vec<(u32, &'a str)>, + type_idx: u32, + data: Vec<(u32, &'a str)>, + data_idx: u32, + elems: Vec<(u32, &'a str)>, + elem_idx: u32, +} + +fn find_names<'a>( + module_id: &Option<Id<'a>>, + module_name: &Option<NameAnnotation<'a>>, + fields: &[ModuleField<'a>], +) -> Names<'a> { + fn get_name<'a>(id: &Option<Id<'a>>, name: &Option<NameAnnotation<'a>>) -> Option<&'a str> { + name.as_ref().map(|n| n.name).or(id.and_then(|id| { + if id.is_gensym() { + None + } else { + Some(id.name()) + } + })) + } + + enum Name { + Type, + Global, + Func, + Memory, + Table, + Tag, + Elem, + Data, + } + + let mut ret = Names::default(); + ret.module = get_name(module_id, module_name); + let mut names = Vec::new(); + for field in fields { + // Extract the kind/id/name from whatever kind of field this is... + let (kind, id, name) = match field { + ModuleField::Import(i) => ( + match i.item.kind { + ItemKind::Func(_) => Name::Func, + ItemKind::Table(_) => Name::Table, + ItemKind::Memory(_) => Name::Memory, + ItemKind::Global(_) => Name::Global, + ItemKind::Tag(_) => Name::Tag, + }, + &i.item.id, + &i.item.name, + ), + ModuleField::Global(g) => (Name::Global, &g.id, &g.name), + ModuleField::Table(t) => (Name::Table, &t.id, &t.name), + ModuleField::Memory(m) => (Name::Memory, &m.id, &m.name), + ModuleField::Tag(t) => (Name::Tag, &t.id, &t.name), + ModuleField::Type(t) => (Name::Type, &t.id, &t.name), + ModuleField::Rec(r) => { + for ty in &r.types { + names.push((Name::Type, &ty.id, &ty.name, field)); + } + continue; + } + ModuleField::Elem(e) => (Name::Elem, &e.id, &e.name), + ModuleField::Data(d) => (Name::Data, &d.id, &d.name), + ModuleField::Func(f) => (Name::Func, &f.id, &f.name), + ModuleField::Export(_) | ModuleField::Start(_) | ModuleField::Custom(_) => continue, + }; + names.push((kind, id, name, field)); + } + + for (kind, id, name, field) in names { + // .. and using the kind we can figure out where to place this name + let (list, idx) = match kind { + Name::Func => (&mut ret.funcs, &mut ret.func_idx), + Name::Table => (&mut ret.tables, &mut ret.table_idx), + Name::Memory => (&mut ret.memories, &mut ret.memory_idx), + Name::Global => (&mut ret.globals, &mut ret.global_idx), + Name::Tag => (&mut ret.tags, &mut ret.tag_idx), + Name::Type => (&mut ret.types, &mut ret.type_idx), + Name::Elem => (&mut ret.elems, &mut ret.elem_idx), + Name::Data => (&mut ret.data, &mut ret.data_idx), + }; + if let Some(name) = get_name(id, name) { + list.push((*idx, name)); + } + + // Handle module locals separately from above + if let ModuleField::Func(f) = field { + let mut local_names = Vec::new(); + let mut label_names = Vec::new(); + let mut local_idx = 0; + let mut label_idx = 0; + + // Consult the inline type listed for local names of parameters. + // This is specifically preserved during the name resolution + // pass, but only for functions, so here we can look at the + // original source's names. + if let Some(ty) = &f.ty.inline { + for (id, name, _) in ty.params.iter() { + if let Some(name) = get_name(id, name) { + local_names.push((local_idx, name)); + } + local_idx += 1; + } + } + if let FuncKind::Inline { + locals, expression, .. + } = &f.kind + { + for local in locals { + if let Some(name) = get_name(&local.id, &local.name) { + local_names.push((local_idx, name)); + } + local_idx += 1; + } + + for i in expression.instrs.iter() { + match i { + Instruction::If(block) + | Instruction::Block(block) + | Instruction::Loop(block) + | Instruction::Try(block) + | Instruction::Let(LetType { block, .. }) => { + if let Some(name) = get_name(&block.label, &block.label_name) { + label_names.push((label_idx, name)); + } + label_idx += 1; + } + _ => {} + } + } + } + if local_names.len() > 0 { + ret.locals.push((*idx, local_names)); + } + if label_names.len() > 0 { + ret.labels.push((*idx, label_names)); + } + } + + *idx += 1; + } + + return ret; +} + +impl Names<'_> { + fn is_empty(&self) -> bool { + self.module.is_none() + && self.funcs.is_empty() + && self.locals.is_empty() + && self.labels.is_empty() + && self.globals.is_empty() + && self.memories.is_empty() + && self.tables.is_empty() + && self.types.is_empty() + && self.data.is_empty() + && self.elems.is_empty() + // NB: specifically don't check tags/modules/instances since they're + // not encoded for now. + } +} + +impl Encode for Names<'_> { + fn encode(&self, dst: &mut Vec<u8>) { + let mut tmp = Vec::new(); + + let mut subsec = |id: u8, data: &mut Vec<u8>| { + dst.push(id); + data.encode(dst); + data.truncate(0); + }; + + if let Some(id) = self.module { + id.encode(&mut tmp); + subsec(0, &mut tmp); + } + if self.funcs.len() > 0 { + self.funcs.encode(&mut tmp); + subsec(1, &mut tmp); + } + if self.locals.len() > 0 { + self.locals.encode(&mut tmp); + subsec(2, &mut tmp); + } + if self.labels.len() > 0 { + self.labels.encode(&mut tmp); + subsec(3, &mut tmp); + } + if self.types.len() > 0 { + self.types.encode(&mut tmp); + subsec(4, &mut tmp); + } + if self.tables.len() > 0 { + self.tables.encode(&mut tmp); + subsec(5, &mut tmp); + } + if self.memories.len() > 0 { + self.memories.encode(&mut tmp); + subsec(6, &mut tmp); + } + if self.globals.len() > 0 { + self.globals.encode(&mut tmp); + subsec(7, &mut tmp); + } + if self.elems.len() > 0 { + self.elems.encode(&mut tmp); + subsec(8, &mut tmp); + } + if self.data.len() > 0 { + self.data.encode(&mut tmp); + subsec(9, &mut tmp); + } + } +} + +impl Encode for Id<'_> { + fn encode(&self, dst: &mut Vec<u8>) { + assert!(!self.is_gensym()); + self.name().encode(dst); + } +} + +impl Encode for V128Const { + fn encode(&self, dst: &mut Vec<u8>) { + dst.extend_from_slice(&self.to_le_bytes()); + } +} + +impl Encode for I8x16Shuffle { + fn encode(&self, dst: &mut Vec<u8>) { + dst.extend_from_slice(&self.lanes); + } +} + +impl<'a> Encode for SelectTypes<'a> { + fn encode(&self, dst: &mut Vec<u8>) { + match &self.tys { + Some(list) => { + dst.push(0x1c); + list.encode(dst); + } + None => dst.push(0x1b), + } + } +} + +impl Encode for Custom<'_> { + fn encode(&self, e: &mut Vec<u8>) { + for list in self.data.iter() { + e.extend_from_slice(list); + } + } +} + +impl Encode for Tag<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.ty.encode(e); + match &self.kind { + TagKind::Inline() => {} + _ => panic!("TagKind should be inline during encoding"), + } + } +} + +impl Encode for TagType<'_> { + fn encode(&self, e: &mut Vec<u8>) { + match self { + TagType::Exception(ty) => { + e.push(0x00); + ty.encode(e); + } + } + } +} + +impl Encode for StructAccess<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.r#struct.encode(e); + self.field.encode(e); + } +} + +impl Encode for ArrayFill<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.array.encode(e); + } +} + +impl Encode for ArrayCopy<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.dest_array.encode(e); + self.src_array.encode(e); + } +} + +impl Encode for ArrayInit<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.array.encode(e); + self.segment.encode(e); + } +} + +impl Encode for ArrayNewFixed<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.array.encode(e); + self.length.encode(e); + } +} + +impl Encode for ArrayNewData<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.array.encode(e); + self.data_idx.encode(e); + } +} + +impl Encode for ArrayNewElem<'_> { + fn encode(&self, e: &mut Vec<u8>) { + self.array.encode(e); + self.elem_idx.encode(e); + } +} + +impl Encode for RefTest<'_> { + fn encode(&self, e: &mut Vec<u8>) { + e.push(0xfb); + if self.r#type.nullable { + e.push(0x48); + } else { + e.push(0x40); + } + self.r#type.heap.encode(e); + } +} + +impl Encode for RefCast<'_> { + fn encode(&self, e: &mut Vec<u8>) { + e.push(0xfb); + if self.r#type.nullable { + e.push(0x49); + } else { + e.push(0x41); + } + self.r#type.heap.encode(e); + } +} + +fn br_on_cast_flags(on_fail: bool, from_nullable: bool, to_nullable: bool) -> u8 { + let mut flag = 0; + if from_nullable { + flag |= 1 << 0; + } + if to_nullable { + flag |= 1 << 1; + } + if on_fail { + flag |= 1 << 2; + } + flag +} + +impl Encode for BrOnCast<'_> { + fn encode(&self, e: &mut Vec<u8>) { + e.push(0xfb); + e.push(0x4f); + e.push(br_on_cast_flags(false, self.from_type.nullable, self.to_type.nullable)); + self.label.encode(e); + self.from_type.heap.encode(e); + self.to_type.heap.encode(e); + } +} + +impl Encode for BrOnCastFail<'_> { + fn encode(&self, e: &mut Vec<u8>) { + e.push(0xfb); + e.push(0x4f); + e.push(br_on_cast_flags(true, self.from_type.nullable, self.to_type.nullable)); + self.label.encode(e); + self.from_type.heap.encode(e); + self.to_type.heap.encode(e); + } +} diff --git a/third_party/rust/wast/src/core/custom.rs b/third_party/rust/wast/src/core/custom.rs new file mode 100644 index 0000000000..40c20b1cc7 --- /dev/null +++ b/third_party/rust/wast/src/core/custom.rs @@ -0,0 +1,151 @@ +use crate::parser::{Parse, Parser, Result}; +use crate::token::{self, Span}; +use crate::{annotation, kw}; + +/// A wasm custom section within a module. +#[derive(Debug)] +pub struct Custom<'a> { + /// Where this `@custom` was defined. + pub span: Span, + + /// Name of the custom section. + pub name: &'a str, + + /// Where the custom section is being placed, + pub place: CustomPlace, + + /// Payload of this custom section. + pub data: Vec<&'a [u8]>, +} + +/// Possible locations to place a custom section within a module. +#[derive(Debug, PartialEq, Copy, Clone)] +pub enum CustomPlace { + /// This custom section will appear before the first section in the module. + BeforeFirst, + /// This custom section will be placed just before a known section. + Before(CustomPlaceAnchor), + /// This custom section will be placed just after a known section. + After(CustomPlaceAnchor), + /// This custom section will appear after the last section in the module. + AfterLast, +} + +/// Known sections that custom sections can be placed relative to. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +#[allow(missing_docs)] +pub enum CustomPlaceAnchor { + Type, + Import, + Func, + Table, + Memory, + Global, + Export, + Start, + Elem, + Code, + Data, + Tag, +} + +impl<'a> Parse<'a> for Custom<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<annotation::custom>()?.0; + let name = parser.parse()?; + let place = if parser.peek::<token::LParen>() { + parser.parens(|p| p.parse())? + } else { + CustomPlace::AfterLast + }; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(Custom { + span, + name, + place, + data, + }) + } +} + +impl<'a> Parse<'a> for CustomPlace { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + let ctor = if l.peek::<kw::before>() { + parser.parse::<kw::before>()?; + if l.peek::<kw::first>() { + parser.parse::<kw::first>()?; + return Ok(CustomPlace::BeforeFirst); + } + CustomPlace::Before as fn(CustomPlaceAnchor) -> _ + } else if l.peek::<kw::after>() { + parser.parse::<kw::after>()?; + if l.peek::<kw::last>() { + parser.parse::<kw::last>()?; + return Ok(CustomPlace::AfterLast); + } + CustomPlace::After + } else { + return Err(l.error()); + }; + Ok(ctor(parser.parse()?)) + } +} + +impl<'a> Parse<'a> for CustomPlaceAnchor { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::r#type>() { + parser.parse::<kw::r#type>()?; + return Ok(CustomPlaceAnchor::Type); + } + if parser.peek::<kw::import>() { + parser.parse::<kw::import>()?; + return Ok(CustomPlaceAnchor::Import); + } + if parser.peek::<kw::func>() { + parser.parse::<kw::func>()?; + return Ok(CustomPlaceAnchor::Func); + } + if parser.peek::<kw::table>() { + parser.parse::<kw::table>()?; + return Ok(CustomPlaceAnchor::Table); + } + if parser.peek::<kw::memory>() { + parser.parse::<kw::memory>()?; + return Ok(CustomPlaceAnchor::Memory); + } + if parser.peek::<kw::global>() { + parser.parse::<kw::global>()?; + return Ok(CustomPlaceAnchor::Global); + } + if parser.peek::<kw::export>() { + parser.parse::<kw::export>()?; + return Ok(CustomPlaceAnchor::Export); + } + if parser.peek::<kw::start>() { + parser.parse::<kw::start>()?; + return Ok(CustomPlaceAnchor::Start); + } + if parser.peek::<kw::elem>() { + parser.parse::<kw::elem>()?; + return Ok(CustomPlaceAnchor::Elem); + } + if parser.peek::<kw::code>() { + parser.parse::<kw::code>()?; + return Ok(CustomPlaceAnchor::Code); + } + if parser.peek::<kw::data>() { + parser.parse::<kw::data>()?; + return Ok(CustomPlaceAnchor::Data); + } + if parser.peek::<kw::tag>() { + parser.parse::<kw::tag>()?; + return Ok(CustomPlaceAnchor::Tag); + } + + Err(parser.error("expected a valid section name")) + } +} diff --git a/third_party/rust/wast/src/core/export.rs b/third_party/rust/wast/src/core/export.rs new file mode 100644 index 0000000000..66354d0546 --- /dev/null +++ b/third_party/rust/wast/src/core/export.rs @@ -0,0 +1,146 @@ +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::{Index, Span}; + +/// A entry in a WebAssembly module's export section. +#[derive(Debug)] +pub struct Export<'a> { + /// Where this export was defined. + pub span: Span, + /// The name of this export from the module. + pub name: &'a str, + /// The kind of item being exported. + pub kind: ExportKind, + /// What's being exported from the module. + pub item: Index<'a>, +} + +/// Different kinds of elements that can be exported from a WebAssembly module, +/// contained in an [`Export`]. +#[derive(Debug, Clone, Copy, Hash, Eq, PartialEq)] +#[allow(missing_docs)] +pub enum ExportKind { + Func, + Table, + Memory, + Global, + Tag, +} + +impl<'a> Parse<'a> for Export<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::export>()?.0; + let name = parser.parse()?; + let (kind, item) = parser.parens(|p| Ok((p.parse()?, p.parse()?)))?; + Ok(Export { + span, + name, + kind, + item, + }) + } +} + +impl<'a> Parse<'a> for ExportKind { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(ExportKind::Func) + } else if l.peek::<kw::table>() { + parser.parse::<kw::table>()?; + Ok(ExportKind::Table) + } else if l.peek::<kw::memory>() { + parser.parse::<kw::memory>()?; + Ok(ExportKind::Memory) + } else if l.peek::<kw::global>() { + parser.parse::<kw::global>()?; + Ok(ExportKind::Global) + } else if l.peek::<kw::tag>() { + parser.parse::<kw::tag>()?; + Ok(ExportKind::Tag) + } else { + Err(l.error()) + } + } +} + +impl Peek for ExportKind { + fn peek(cursor: Cursor<'_>) -> bool { + kw::func::peek(cursor) + || kw::table::peek(cursor) + || kw::memory::peek(cursor) + || kw::global::peek(cursor) + || kw::tag::peek(cursor) + } + fn display() -> &'static str { + "export kind" + } +} + +macro_rules! kw_conversions { + ($($kw:ident => $kind:ident)*) => ($( + impl From<kw::$kw> for ExportKind { + fn from(_: kw::$kw) -> ExportKind { + ExportKind::$kind + } + } + + impl Default for kw::$kw { + fn default() -> kw::$kw { + kw::$kw(Span::from_offset(0)) + } + } + )*); +} + +kw_conversions! { + func => Func + table => Table + global => Global + tag => Tag + memory => Memory +} + +/// A listing of inline `(export "foo")` statements on a WebAssembly item in +/// its textual format. +#[derive(Debug, Default)] +pub struct InlineExport<'a> { + /// The extra names to export an item as, if any. + pub names: Vec<&'a str>, +} + +impl<'a> Parse<'a> for InlineExport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut names = Vec::new(); + while parser.peek::<Self>() { + names.push(parser.parens(|p| { + p.parse::<kw::export>()?; + p.parse::<&str>() + })?); + } + Ok(InlineExport { names }) + } +} + +impl Peek for InlineExport<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("export", cursor)) => cursor, + _ => return false, + }; + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline export" + } +} diff --git a/third_party/rust/wast/src/core/expr.rs b/third_party/rust/wast/src/core/expr.rs new file mode 100644 index 0000000000..14d309fc0d --- /dev/null +++ b/third_party/rust/wast/src/core/expr.rs @@ -0,0 +1,1960 @@ +use crate::core::*; +use crate::encode::Encode; +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Result}; +use crate::token::*; +use std::mem; + +/// An expression, or a list of instructions, in the WebAssembly text format. +/// +/// This expression type will parse s-expression-folded instructions into a flat +/// list of instructions for emission later on. The implicit `end` instruction +/// at the end of an expression is not included in the `instrs` field. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct Expression<'a> { + pub instrs: Box<[Instruction<'a>]>, +} + +impl<'a> Parse<'a> for Expression<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut exprs = ExpressionParser::default(); + exprs.parse(parser)?; + Ok(Expression { + instrs: exprs.instrs.into(), + }) + } +} + +/// Helper struct used to parse an `Expression` with helper methods and such. +/// +/// The primary purpose of this is to avoid defining expression parsing as a +/// call-thread-stack recursive function. Since we're parsing user input that +/// runs the risk of blowing the call stack, so we want to be sure to use a heap +/// stack structure wherever possible. +#[derive(Default)] +struct ExpressionParser<'a> { + /// The flat list of instructions that we've parsed so far, and will + /// eventually become the final `Expression`. + instrs: Vec<Instruction<'a>>, + + /// Descriptor of all our nested s-expr blocks. This only happens when + /// instructions themselves are nested. + stack: Vec<Level<'a>>, +} + +enum Paren { + None, + Left, + Right, +} + +/// A "kind" of nested block that we can be parsing inside of. +enum Level<'a> { + /// This is a normal `block` or `loop` or similar, where the instruction + /// payload here is pushed when the block is exited. + EndWith(Instruction<'a>), + + /// This is a pretty special variant which means that we're parsing an `if` + /// statement, and the state of the `if` parsing is tracked internally in + /// the payload. + If(If<'a>), + + /// This means we're either parsing inside of `(then ...)` or `(else ...)` + /// which don't correspond to terminating instructions, we're just in a + /// nested block. + IfArm, + + /// Similar to `If` but for `Try` statements, which has simpler parsing + /// state to track. + Try(Try<'a>), + + /// Similar to `IfArm` but for `(do ...)` and `(catch ...)` blocks. + TryArm, +} + +/// Possible states of "what should be parsed next?" in an `if` expression. +enum If<'a> { + /// Only the `if` has been parsed, next thing to parse is the clause, if + /// any, of the `if` instruction. + Clause(Instruction<'a>), + /// Next thing to parse is the `then` block + Then(Instruction<'a>), + /// Next thing to parse is the `else` block + Else, + /// This `if` statement has finished parsing and if anything remains it's a + /// syntax error. + End, +} + +/// Possible state of "what should be parsed next?" in a `try` expression. +enum Try<'a> { + /// Next thing to parse is the `do` block. + Do(Instruction<'a>), + /// Next thing to parse is `catch`/`catch_all`, or `delegate`. + CatchOrDelegate, + /// Next thing to parse is a `catch` block or `catch_all`. + Catch, + /// Finished parsing like the `End` case, but does not push `end` opcode. + Delegate, + /// This `try` statement has finished parsing and if anything remains it's a + /// syntax error. + End, +} + +impl<'a> ExpressionParser<'a> { + fn parse(&mut self, parser: Parser<'a>) -> Result<()> { + // Here we parse instructions in a loop, and we do not recursively + // invoke this parse function to avoid blowing the stack on + // deeply-recursive parses. + // + // Our loop generally only finishes once there's no more input left int + // the `parser`. If there's some unclosed delimiters though (on our + // `stack`), then we also keep parsing to generate error messages if + // there's no input left. + while !parser.is_empty() || !self.stack.is_empty() { + // As a small ease-of-life adjustment here, if we're parsing inside + // of an `if block then we require that all sub-components are + // s-expressions surrounded by `(` and `)`, so verify that here. + if let Some(Level::If(_)) | Some(Level::Try(_)) = self.stack.last() { + if !parser.is_empty() && !parser.peek::<LParen>() { + return Err(parser.error("expected `(`")); + } + } + + match self.paren(parser)? { + // No parenthesis seen? Then we just parse the next instruction + // and move on. + Paren::None => self.instrs.push(parser.parse()?), + + // If we see a left-parenthesis then things are a little + // special. We handle block-like instructions specially + // (`block`, `loop`, and `if`), and otherwise all other + // instructions simply get appended once we reach the end of the + // s-expression. + // + // In all cases here we push something onto the `stack` to get + // popped when the `)` character is seen. + Paren::Left => { + // First up is handling `if` parsing, which is funky in a + // whole bunch of ways. See the method internally for more + // information. + if self.handle_if_lparen(parser)? { + continue; + } + // Second, we handle `try` parsing, which is simpler than + // `if` but more complicated than, e.g., `block`. + if self.handle_try_lparen(parser)? { + continue; + } + match parser.parse()? { + // If block/loop show up then we just need to be sure to + // push an `end` instruction whenever the `)` token is + // seen + i @ Instruction::Block(_) + | i @ Instruction::Loop(_) + | i @ Instruction::Let(_) => { + self.instrs.push(i); + self.stack.push(Level::EndWith(Instruction::End(None))); + } + + // Parsing an `if` instruction is super tricky, so we + // push an `If` scope and we let all our scope-based + // parsing handle the remaining items. + i @ Instruction::If(_) => { + self.stack.push(Level::If(If::Clause(i))); + } + + // Parsing a `try` is easier than `if` but we also push + // a `Try` scope to handle the required nested blocks. + i @ Instruction::Try(_) => { + self.stack.push(Level::Try(Try::Do(i))); + } + + // Anything else means that we're parsing a nested form + // such as `(i32.add ...)` which means that the + // instruction we parsed will be coming at the end. + other => self.stack.push(Level::EndWith(other)), + } + } + + // If we registered a `)` token as being seen, then we're + // guaranteed there's an item in the `stack` stack for us to + // pop. We peel that off and take a look at what it says to do. + Paren::Right => match self.stack.pop().unwrap() { + Level::EndWith(i) => self.instrs.push(i), + Level::IfArm => {} + Level::TryArm => {} + + // If an `if` statement hasn't parsed the clause or `then` + // block, then that's an error because there weren't enough + // items in the `if` statement. Otherwise we're just careful + // to terminate with an `end` instruction. + Level::If(If::Clause(_)) => { + return Err(parser.error("previous `if` had no clause")); + } + Level::If(If::Then(_)) => { + return Err(parser.error("previous `if` had no `then`")); + } + Level::If(_) => { + self.instrs.push(Instruction::End(None)); + } + + // The `do` clause is required in a `try` statement, so + // we will signal that error here. Otherwise, terminate with + // an `end` or `delegate` instruction. + Level::Try(Try::Do(_)) => { + return Err(parser.error("previous `try` had no `do`")); + } + Level::Try(Try::Delegate) => {} + Level::Try(_) => { + self.instrs.push(Instruction::End(None)); + } + }, + } + } + + Ok(()) + } + + /// Parses either `(`, `)`, or nothing. + fn paren(&self, parser: Parser<'a>) -> Result<Paren> { + parser.step(|cursor| { + Ok(match cursor.lparen() { + Some(rest) => (Paren::Left, rest), + None if self.stack.is_empty() => (Paren::None, cursor), + None => match cursor.rparen() { + Some(rest) => (Paren::Right, rest), + None => (Paren::None, cursor), + }, + }) + }) + } + + /// Handles all parsing of an `if` statement. + /// + /// The syntactical form of an `if` stament looks like: + /// + /// ```wat + /// (if $clause (then $then) (else $else)) + /// ``` + /// + /// but it turns out we practically see a few things in the wild: + /// + /// * inside the `(if ...)` every sub-thing is surrounded by parens + /// * The `then` and `else` keywords are optional + /// * The `$then` and `$else` blocks don't need to be surrounded by parens + /// + /// That's all attempted to be handled here. The part about all sub-parts + /// being surrounded by `(` and `)` means that we hook into the `LParen` + /// parsing above to call this method there unconditionally. + /// + /// Returns `true` if the rest of the arm above should be skipped, or + /// `false` if we should parse the next item as an instruction (because we + /// didn't handle the lparen here). + fn handle_if_lparen(&mut self, parser: Parser<'a>) -> Result<bool> { + // Only execute the code below if there's an `If` listed last. + let i = match self.stack.last_mut() { + Some(Level::If(i)) => i, + _ => return Ok(false), + }; + + // The first thing parsed in an `if` statement is the clause. If the + // clause starts with `then`, however, then we know to skip the clause + // and fall through to below. + if let If::Clause(if_instr) = i { + let instr = mem::replace(if_instr, Instruction::End(None)); + *i = If::Then(instr); + if !parser.peek::<kw::then>() { + return Ok(false); + } + } + + // All `if` statements are required to have a `then`. This is either the + // second s-expr (with or without a leading `then`) or the first s-expr + // with a leading `then`. The optionality of `then` isn't strictly what + // the text spec says but it matches wabt for now. + // + // Note that when we see the `then`, that's when we actually add the + // original `if` instruction to the stream. + if let If::Then(if_instr) = i { + let instr = mem::replace(if_instr, Instruction::End(None)); + self.instrs.push(instr); + *i = If::Else; + if parser.parse::<Option<kw::then>>()?.is_some() { + self.stack.push(Level::IfArm); + return Ok(true); + } + return Ok(false); + } + + // effectively the same as the `then` parsing above + if let If::Else = i { + self.instrs.push(Instruction::Else(None)); + if parser.parse::<Option<kw::r#else>>()?.is_some() { + if parser.is_empty() { + self.instrs.pop(); + } + self.stack.push(Level::IfArm); + return Ok(true); + } + *i = If::End; + return Ok(false); + } + + // If we made it this far then we're at `If::End` which means that there + // were too many s-expressions inside the `(if)` and we don't want to + // parse anything else. + Err(parser.error("unexpected token: too many payloads inside of `(if)`")) + } + + /// Handles parsing of a `try` statement. A `try` statement is simpler + /// than an `if` as the syntactic form is: + /// + /// ```wat + /// (try (do $do) (catch $tag $catch)) + /// ``` + /// + /// where the `do` and `catch` keywords are mandatory, even for an empty + /// $do or $catch. + /// + /// Returns `true` if the rest of the arm above should be skipped, or + /// `false` if we should parse the next item as an instruction (because we + /// didn't handle the lparen here). + fn handle_try_lparen(&mut self, parser: Parser<'a>) -> Result<bool> { + // Only execute the code below if there's a `Try` listed last. + let i = match self.stack.last_mut() { + Some(Level::Try(i)) => i, + _ => return Ok(false), + }; + + // Try statements must start with a `do` block. + if let Try::Do(try_instr) = i { + let instr = mem::replace(try_instr, Instruction::End(None)); + self.instrs.push(instr); + if parser.parse::<Option<kw::r#do>>()?.is_some() { + // The state is advanced here only if the parse succeeds in + // order to strictly require the keyword. + *i = Try::CatchOrDelegate; + self.stack.push(Level::TryArm); + return Ok(true); + } + // We return here and continue parsing instead of raising an error + // immediately because the missing keyword will be caught more + // generally in the `Paren::Right` case in `parse`. + return Ok(false); + } + + // After a try's `do`, there are several possible kinds of handlers. + if let Try::CatchOrDelegate = i { + // `catch` may be followed by more `catch`s or `catch_all`. + if parser.parse::<Option<kw::catch>>()?.is_some() { + let evt = parser.parse::<Index<'a>>()?; + self.instrs.push(Instruction::Catch(evt)); + *i = Try::Catch; + self.stack.push(Level::TryArm); + return Ok(true); + } + // `catch_all` can only come at the end and has no argument. + if parser.parse::<Option<kw::catch_all>>()?.is_some() { + self.instrs.push(Instruction::CatchAll); + *i = Try::End; + self.stack.push(Level::TryArm); + return Ok(true); + } + // `delegate` has an index, and also ends the block like `end`. + if parser.parse::<Option<kw::delegate>>()?.is_some() { + let depth = parser.parse::<Index<'a>>()?; + self.instrs.push(Instruction::Delegate(depth)); + *i = Try::Delegate; + match self.paren(parser)? { + Paren::Left | Paren::None => return Ok(false), + Paren::Right => return Ok(true), + } + } + return Err(parser.error("expected a `catch`, `catch_all`, or `delegate`")); + } + + if let Try::Catch = i { + if parser.parse::<Option<kw::catch>>()?.is_some() { + let evt = parser.parse::<Index<'a>>()?; + self.instrs.push(Instruction::Catch(evt)); + *i = Try::Catch; + self.stack.push(Level::TryArm); + return Ok(true); + } + if parser.parse::<Option<kw::catch_all>>()?.is_some() { + self.instrs.push(Instruction::CatchAll); + *i = Try::End; + self.stack.push(Level::TryArm); + return Ok(true); + } + return Err(parser.error("unexpected items after `catch`")); + } + + Err(parser.error("unexpected token: too many payloads inside of `(try)`")) + } +} + +// TODO: document this obscenity +macro_rules! instructions { + (pub enum Instruction<'a> { + $( + $(#[$doc:meta])* + $name:ident $(($($arg:tt)*))? : [$($binary:tt)*] : $instr:tt $( | $deprecated:tt )?, + )* + }) => ( + /// A listing of all WebAssembly instructions that can be in a module + /// that this crate currently parses. + #[derive(Debug)] + #[allow(missing_docs)] + pub enum Instruction<'a> { + $( + $(#[$doc])* + $name $(( instructions!(@ty $($arg)*) ))?, + )* + } + + #[allow(non_snake_case)] + impl<'a> Parse<'a> for Instruction<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + $( + fn $name<'a>(_parser: Parser<'a>) -> Result<Instruction<'a>> { + Ok(Instruction::$name $(( + instructions!(@parse _parser $($arg)*)? + ))?) + } + )* + let parse_remainder = parser.step(|c| { + let (kw, rest) = match c.keyword() { + Some(pair) => pair, + None => return Err(c.error("expected an instruction")), + }; + match kw { + $($instr $( | $deprecated )?=> Ok(($name as fn(_) -> _, rest)),)* + _ => return Err(c.error("unknown operator or unexpected token")), + } + })?; + parse_remainder(parser) + } + } + + impl Encode for Instruction<'_> { + #[allow(non_snake_case)] + fn encode(&self, v: &mut Vec<u8>) { + match self { + $( + Instruction::$name $((instructions!(@first x $($arg)*)))? => { + fn encode<'a>($(arg: &instructions!(@ty $($arg)*),)? v: &mut Vec<u8>) { + instructions!(@encode v $($binary)*); + $(<instructions!(@ty $($arg)*) as Encode>::encode(arg, v);)? + } + encode($( instructions!(@first x $($arg)*), )? v) + } + )* + } + } + } + + impl<'a> Instruction<'a> { + /// Returns the associated [`MemArg`] if one is available for this + /// instruction. + #[allow(unused_variables, non_snake_case)] + pub fn memarg_mut(&mut self) -> Option<&mut MemArg<'a>> { + match self { + $( + Instruction::$name $((instructions!(@memarg_binding a $($arg)*)))? => { + instructions!(@get_memarg a $($($arg)*)?) + } + )* + } + } + } + ); + + (@ty MemArg<$amt:tt>) => (MemArg<'a>); + (@ty LoadOrStoreLane<$amt:tt>) => (LoadOrStoreLane<'a>); + (@ty $other:ty) => ($other); + + (@first $first:ident $($t:tt)*) => ($first); + + (@parse $parser:ident MemArg<$amt:tt>) => (MemArg::parse($parser, $amt)); + (@parse $parser:ident MemArg) => (compile_error!("must specify `MemArg` default")); + (@parse $parser:ident LoadOrStoreLane<$amt:tt>) => (LoadOrStoreLane::parse($parser, $amt)); + (@parse $parser:ident LoadOrStoreLane) => (compile_error!("must specify `LoadOrStoreLane` default")); + (@parse $parser:ident $other:ty) => ($parser.parse::<$other>()); + + // simd opcodes prefixed with `0xfd` get a varuint32 encoding for their payload + (@encode $dst:ident 0xfd, $simd:tt) => ({ + $dst.push(0xfd); + <u32 as Encode>::encode(&$simd, $dst); + }); + (@encode $dst:ident $($bytes:tt)*) => ($dst.extend_from_slice(&[$($bytes)*]);); + + (@get_memarg $name:ident MemArg<$amt:tt>) => (Some($name)); + (@get_memarg $name:ident LoadOrStoreLane<$amt:tt>) => (Some(&mut $name.memarg)); + (@get_memarg $($other:tt)*) => (None); + + (@memarg_binding $name:ident MemArg<$amt:tt>) => ($name); + (@memarg_binding $name:ident LoadOrStoreLane<$amt:tt>) => ($name); + (@memarg_binding $name:ident $other:ty) => (_); +} + +instructions! { + pub enum Instruction<'a> { + Block(BlockType<'a>) : [0x02] : "block", + If(BlockType<'a>) : [0x04] : "if", + Else(Option<Id<'a>>) : [0x05] : "else", + Loop(BlockType<'a>) : [0x03] : "loop", + End(Option<Id<'a>>) : [0x0b] : "end", + + Unreachable : [0x00] : "unreachable", + Nop : [0x01] : "nop", + Br(Index<'a>) : [0x0c] : "br", + BrIf(Index<'a>) : [0x0d] : "br_if", + BrTable(BrTableIndices<'a>) : [0x0e] : "br_table", + Return : [0x0f] : "return", + Call(Index<'a>) : [0x10] : "call", + CallIndirect(CallIndirect<'a>) : [0x11] : "call_indirect", + + // tail-call proposal + ReturnCall(Index<'a>) : [0x12] : "return_call", + ReturnCallIndirect(CallIndirect<'a>) : [0x13] : "return_call_indirect", + + // function-references proposal + CallRef(HeapType<'a>) : [0x14] : "call_ref", + ReturnCallRef(HeapType<'a>) : [0x15] : "return_call_ref", + FuncBind(FuncBindType<'a>) : [0x16] : "func.bind", + Let(LetType<'a>) : [0x17] : "let", + + Drop : [0x1a] : "drop", + Select(SelectTypes<'a>) : [] : "select", + LocalGet(Index<'a>) : [0x20] : "local.get" | "get_local", + LocalSet(Index<'a>) : [0x21] : "local.set" | "set_local", + LocalTee(Index<'a>) : [0x22] : "local.tee" | "tee_local", + GlobalGet(Index<'a>) : [0x23] : "global.get" | "get_global", + GlobalSet(Index<'a>) : [0x24] : "global.set" | "set_global", + + TableGet(TableArg<'a>) : [0x25] : "table.get", + TableSet(TableArg<'a>) : [0x26] : "table.set", + + I32Load(MemArg<4>) : [0x28] : "i32.load", + I64Load(MemArg<8>) : [0x29] : "i64.load", + F32Load(MemArg<4>) : [0x2a] : "f32.load", + F64Load(MemArg<8>) : [0x2b] : "f64.load", + I32Load8s(MemArg<1>) : [0x2c] : "i32.load8_s", + I32Load8u(MemArg<1>) : [0x2d] : "i32.load8_u", + I32Load16s(MemArg<2>) : [0x2e] : "i32.load16_s", + I32Load16u(MemArg<2>) : [0x2f] : "i32.load16_u", + I64Load8s(MemArg<1>) : [0x30] : "i64.load8_s", + I64Load8u(MemArg<1>) : [0x31] : "i64.load8_u", + I64Load16s(MemArg<2>) : [0x32] : "i64.load16_s", + I64Load16u(MemArg<2>) : [0x33] : "i64.load16_u", + I64Load32s(MemArg<4>) : [0x34] : "i64.load32_s", + I64Load32u(MemArg<4>) : [0x35] : "i64.load32_u", + I32Store(MemArg<4>) : [0x36] : "i32.store", + I64Store(MemArg<8>) : [0x37] : "i64.store", + F32Store(MemArg<4>) : [0x38] : "f32.store", + F64Store(MemArg<8>) : [0x39] : "f64.store", + I32Store8(MemArg<1>) : [0x3a] : "i32.store8", + I32Store16(MemArg<2>) : [0x3b] : "i32.store16", + I64Store8(MemArg<1>) : [0x3c] : "i64.store8", + I64Store16(MemArg<2>) : [0x3d] : "i64.store16", + I64Store32(MemArg<4>) : [0x3e] : "i64.store32", + + // Lots of bulk memory proposal here as well + MemorySize(MemoryArg<'a>) : [0x3f] : "memory.size" | "current_memory", + MemoryGrow(MemoryArg<'a>) : [0x40] : "memory.grow" | "grow_memory", + MemoryInit(MemoryInit<'a>) : [0xfc, 0x08] : "memory.init", + MemoryCopy(MemoryCopy<'a>) : [0xfc, 0x0a] : "memory.copy", + MemoryFill(MemoryArg<'a>) : [0xfc, 0x0b] : "memory.fill", + MemoryDiscard(MemoryArg<'a>) : [0xfc, 0x12] : "memory.discard", + DataDrop(Index<'a>) : [0xfc, 0x09] : "data.drop", + ElemDrop(Index<'a>) : [0xfc, 0x0d] : "elem.drop", + TableInit(TableInit<'a>) : [0xfc, 0x0c] : "table.init", + TableCopy(TableCopy<'a>) : [0xfc, 0x0e] : "table.copy", + TableFill(TableArg<'a>) : [0xfc, 0x11] : "table.fill", + TableSize(TableArg<'a>) : [0xfc, 0x10] : "table.size", + TableGrow(TableArg<'a>) : [0xfc, 0x0f] : "table.grow", + + RefNull(HeapType<'a>) : [0xd0] : "ref.null", + RefIsNull : [0xd1] : "ref.is_null", + RefFunc(Index<'a>) : [0xd2] : "ref.func", + + // function-references proposal + RefAsNonNull : [0xd3] : "ref.as_non_null", + BrOnNull(Index<'a>) : [0xd4] : "br_on_null", + BrOnNonNull(Index<'a>) : [0xd6] : "br_on_non_null", + + // gc proposal: eqref + RefEq : [0xd5] : "ref.eq", + + // gc proposal: struct + StructNew(Index<'a>) : [0xfb, 0x07] : "struct.new", + StructNewDefault(Index<'a>) : [0xfb, 0x08] : "struct.new_default", + StructGet(StructAccess<'a>) : [0xfb, 0x03] : "struct.get", + StructGetS(StructAccess<'a>) : [0xfb, 0x04] : "struct.get_s", + StructGetU(StructAccess<'a>) : [0xfb, 0x05] : "struct.get_u", + StructSet(StructAccess<'a>) : [0xfb, 0x06] : "struct.set", + + // gc proposal: array + ArrayNew(Index<'a>) : [0xfb, 0x1b] : "array.new", + ArrayNewDefault(Index<'a>) : [0xfb, 0x1c] : "array.new_default", + ArrayNewFixed(ArrayNewFixed<'a>) : [0xfb, 0x1a] : "array.new_fixed", + ArrayNewData(ArrayNewData<'a>) : [0xfb, 0x1d] : "array.new_data", + ArrayNewElem(ArrayNewElem<'a>) : [0xfb, 0x1f] : "array.new_elem", + ArrayGet(Index<'a>) : [0xfb, 0x13] : "array.get", + ArrayGetS(Index<'a>) : [0xfb, 0x14] : "array.get_s", + ArrayGetU(Index<'a>) : [0xfb, 0x15] : "array.get_u", + ArraySet(Index<'a>) : [0xfb, 0x16] : "array.set", + ArrayLen : [0xfb, 0x19] : "array.len", + ArrayFill(ArrayFill<'a>) : [0xfb, 0x0f] : "array.fill", + ArrayCopy(ArrayCopy<'a>) : [0xfb, 0x18] : "array.copy", + ArrayInitData(ArrayInit<'a>) : [0xfb, 0x54] : "array.init_data", + ArrayInitElem(ArrayInit<'a>) : [0xfb, 0x55] : "array.init_elem", + + // gc proposal, i31 + I31New : [0xfb, 0x20] : "i31.new", + I31GetS : [0xfb, 0x21] : "i31.get_s", + I31GetU : [0xfb, 0x22] : "i31.get_u", + + // gc proposal, concrete casting + RefTest(RefTest<'a>) : [] : "ref.test", + RefCast(RefCast<'a>) : [] : "ref.cast", + BrOnCast(BrOnCast<'a>) : [] : "br_on_cast", + BrOnCastFail(BrOnCastFail<'a>) : [] : "br_on_cast_fail", + + // gc proposal extern/any coercion operations + ExternInternalize : [0xfb, 0x70] : "extern.internalize", + ExternExternalize : [0xfb, 0x71] : "extern.externalize", + + I32Const(i32) : [0x41] : "i32.const", + I64Const(i64) : [0x42] : "i64.const", + F32Const(Float32) : [0x43] : "f32.const", + F64Const(Float64) : [0x44] : "f64.const", + + I32Clz : [0x67] : "i32.clz", + I32Ctz : [0x68] : "i32.ctz", + I32Popcnt : [0x69] : "i32.popcnt", + I32Add : [0x6a] : "i32.add", + I32Sub : [0x6b] : "i32.sub", + I32Mul : [0x6c] : "i32.mul", + I32DivS : [0x6d] : "i32.div_s", + I32DivU : [0x6e] : "i32.div_u", + I32RemS : [0x6f] : "i32.rem_s", + I32RemU : [0x70] : "i32.rem_u", + I32And : [0x71] : "i32.and", + I32Or : [0x72] : "i32.or", + I32Xor : [0x73] : "i32.xor", + I32Shl : [0x74] : "i32.shl", + I32ShrS : [0x75] : "i32.shr_s", + I32ShrU : [0x76] : "i32.shr_u", + I32Rotl : [0x77] : "i32.rotl", + I32Rotr : [0x78] : "i32.rotr", + + I64Clz : [0x79] : "i64.clz", + I64Ctz : [0x7a] : "i64.ctz", + I64Popcnt : [0x7b] : "i64.popcnt", + I64Add : [0x7c] : "i64.add", + I64Sub : [0x7d] : "i64.sub", + I64Mul : [0x7e] : "i64.mul", + I64DivS : [0x7f] : "i64.div_s", + I64DivU : [0x80] : "i64.div_u", + I64RemS : [0x81] : "i64.rem_s", + I64RemU : [0x82] : "i64.rem_u", + I64And : [0x83] : "i64.and", + I64Or : [0x84] : "i64.or", + I64Xor : [0x85] : "i64.xor", + I64Shl : [0x86] : "i64.shl", + I64ShrS : [0x87] : "i64.shr_s", + I64ShrU : [0x88] : "i64.shr_u", + I64Rotl : [0x89] : "i64.rotl", + I64Rotr : [0x8a] : "i64.rotr", + + F32Abs : [0x8b] : "f32.abs", + F32Neg : [0x8c] : "f32.neg", + F32Ceil : [0x8d] : "f32.ceil", + F32Floor : [0x8e] : "f32.floor", + F32Trunc : [0x8f] : "f32.trunc", + F32Nearest : [0x90] : "f32.nearest", + F32Sqrt : [0x91] : "f32.sqrt", + F32Add : [0x92] : "f32.add", + F32Sub : [0x93] : "f32.sub", + F32Mul : [0x94] : "f32.mul", + F32Div : [0x95] : "f32.div", + F32Min : [0x96] : "f32.min", + F32Max : [0x97] : "f32.max", + F32Copysign : [0x98] : "f32.copysign", + + F64Abs : [0x99] : "f64.abs", + F64Neg : [0x9a] : "f64.neg", + F64Ceil : [0x9b] : "f64.ceil", + F64Floor : [0x9c] : "f64.floor", + F64Trunc : [0x9d] : "f64.trunc", + F64Nearest : [0x9e] : "f64.nearest", + F64Sqrt : [0x9f] : "f64.sqrt", + F64Add : [0xa0] : "f64.add", + F64Sub : [0xa1] : "f64.sub", + F64Mul : [0xa2] : "f64.mul", + F64Div : [0xa3] : "f64.div", + F64Min : [0xa4] : "f64.min", + F64Max : [0xa5] : "f64.max", + F64Copysign : [0xa6] : "f64.copysign", + + I32Eqz : [0x45] : "i32.eqz", + I32Eq : [0x46] : "i32.eq", + I32Ne : [0x47] : "i32.ne", + I32LtS : [0x48] : "i32.lt_s", + I32LtU : [0x49] : "i32.lt_u", + I32GtS : [0x4a] : "i32.gt_s", + I32GtU : [0x4b] : "i32.gt_u", + I32LeS : [0x4c] : "i32.le_s", + I32LeU : [0x4d] : "i32.le_u", + I32GeS : [0x4e] : "i32.ge_s", + I32GeU : [0x4f] : "i32.ge_u", + + I64Eqz : [0x50] : "i64.eqz", + I64Eq : [0x51] : "i64.eq", + I64Ne : [0x52] : "i64.ne", + I64LtS : [0x53] : "i64.lt_s", + I64LtU : [0x54] : "i64.lt_u", + I64GtS : [0x55] : "i64.gt_s", + I64GtU : [0x56] : "i64.gt_u", + I64LeS : [0x57] : "i64.le_s", + I64LeU : [0x58] : "i64.le_u", + I64GeS : [0x59] : "i64.ge_s", + I64GeU : [0x5a] : "i64.ge_u", + + F32Eq : [0x5b] : "f32.eq", + F32Ne : [0x5c] : "f32.ne", + F32Lt : [0x5d] : "f32.lt", + F32Gt : [0x5e] : "f32.gt", + F32Le : [0x5f] : "f32.le", + F32Ge : [0x60] : "f32.ge", + + F64Eq : [0x61] : "f64.eq", + F64Ne : [0x62] : "f64.ne", + F64Lt : [0x63] : "f64.lt", + F64Gt : [0x64] : "f64.gt", + F64Le : [0x65] : "f64.le", + F64Ge : [0x66] : "f64.ge", + + I32WrapI64 : [0xa7] : "i32.wrap_i64" | "i32.wrap/i64", + I32TruncF32S : [0xa8] : "i32.trunc_f32_s" | "i32.trunc_s/f32", + I32TruncF32U : [0xa9] : "i32.trunc_f32_u" | "i32.trunc_u/f32", + I32TruncF64S : [0xaa] : "i32.trunc_f64_s" | "i32.trunc_s/f64", + I32TruncF64U : [0xab] : "i32.trunc_f64_u" | "i32.trunc_u/f64", + I64ExtendI32S : [0xac] : "i64.extend_i32_s" | "i64.extend_s/i32", + I64ExtendI32U : [0xad] : "i64.extend_i32_u" | "i64.extend_u/i32", + I64TruncF32S : [0xae] : "i64.trunc_f32_s" | "i64.trunc_s/f32", + I64TruncF32U : [0xaf] : "i64.trunc_f32_u" | "i64.trunc_u/f32", + I64TruncF64S : [0xb0] : "i64.trunc_f64_s" | "i64.trunc_s/f64", + I64TruncF64U : [0xb1] : "i64.trunc_f64_u" | "i64.trunc_u/f64", + F32ConvertI32S : [0xb2] : "f32.convert_i32_s" | "f32.convert_s/i32", + F32ConvertI32U : [0xb3] : "f32.convert_i32_u" | "f32.convert_u/i32", + F32ConvertI64S : [0xb4] : "f32.convert_i64_s" | "f32.convert_s/i64", + F32ConvertI64U : [0xb5] : "f32.convert_i64_u" | "f32.convert_u/i64", + F32DemoteF64 : [0xb6] : "f32.demote_f64" | "f32.demote/f64", + F64ConvertI32S : [0xb7] : "f64.convert_i32_s" | "f64.convert_s/i32", + F64ConvertI32U : [0xb8] : "f64.convert_i32_u" | "f64.convert_u/i32", + F64ConvertI64S : [0xb9] : "f64.convert_i64_s" | "f64.convert_s/i64", + F64ConvertI64U : [0xba] : "f64.convert_i64_u" | "f64.convert_u/i64", + F64PromoteF32 : [0xbb] : "f64.promote_f32" | "f64.promote/f32", + I32ReinterpretF32 : [0xbc] : "i32.reinterpret_f32" | "i32.reinterpret/f32", + I64ReinterpretF64 : [0xbd] : "i64.reinterpret_f64" | "i64.reinterpret/f64", + F32ReinterpretI32 : [0xbe] : "f32.reinterpret_i32" | "f32.reinterpret/i32", + F64ReinterpretI64 : [0xbf] : "f64.reinterpret_i64" | "f64.reinterpret/i64", + + // non-trapping float to int + I32TruncSatF32S : [0xfc, 0x00] : "i32.trunc_sat_f32_s" | "i32.trunc_s:sat/f32", + I32TruncSatF32U : [0xfc, 0x01] : "i32.trunc_sat_f32_u" | "i32.trunc_u:sat/f32", + I32TruncSatF64S : [0xfc, 0x02] : "i32.trunc_sat_f64_s" | "i32.trunc_s:sat/f64", + I32TruncSatF64U : [0xfc, 0x03] : "i32.trunc_sat_f64_u" | "i32.trunc_u:sat/f64", + I64TruncSatF32S : [0xfc, 0x04] : "i64.trunc_sat_f32_s" | "i64.trunc_s:sat/f32", + I64TruncSatF32U : [0xfc, 0x05] : "i64.trunc_sat_f32_u" | "i64.trunc_u:sat/f32", + I64TruncSatF64S : [0xfc, 0x06] : "i64.trunc_sat_f64_s" | "i64.trunc_s:sat/f64", + I64TruncSatF64U : [0xfc, 0x07] : "i64.trunc_sat_f64_u" | "i64.trunc_u:sat/f64", + + // sign extension proposal + I32Extend8S : [0xc0] : "i32.extend8_s", + I32Extend16S : [0xc1] : "i32.extend16_s", + I64Extend8S : [0xc2] : "i64.extend8_s", + I64Extend16S : [0xc3] : "i64.extend16_s", + I64Extend32S : [0xc4] : "i64.extend32_s", + + // atomics proposal + MemoryAtomicNotify(MemArg<4>) : [0xfe, 0x00] : "memory.atomic.notify" | "atomic.notify", + MemoryAtomicWait32(MemArg<4>) : [0xfe, 0x01] : "memory.atomic.wait32" | "i32.atomic.wait", + MemoryAtomicWait64(MemArg<8>) : [0xfe, 0x02] : "memory.atomic.wait64" | "i64.atomic.wait", + AtomicFence : [0xfe, 0x03, 0x00] : "atomic.fence", + + I32AtomicLoad(MemArg<4>) : [0xfe, 0x10] : "i32.atomic.load", + I64AtomicLoad(MemArg<8>) : [0xfe, 0x11] : "i64.atomic.load", + I32AtomicLoad8u(MemArg<1>) : [0xfe, 0x12] : "i32.atomic.load8_u", + I32AtomicLoad16u(MemArg<2>) : [0xfe, 0x13] : "i32.atomic.load16_u", + I64AtomicLoad8u(MemArg<1>) : [0xfe, 0x14] : "i64.atomic.load8_u", + I64AtomicLoad16u(MemArg<2>) : [0xfe, 0x15] : "i64.atomic.load16_u", + I64AtomicLoad32u(MemArg<4>) : [0xfe, 0x16] : "i64.atomic.load32_u", + I32AtomicStore(MemArg<4>) : [0xfe, 0x17] : "i32.atomic.store", + I64AtomicStore(MemArg<8>) : [0xfe, 0x18] : "i64.atomic.store", + I32AtomicStore8(MemArg<1>) : [0xfe, 0x19] : "i32.atomic.store8", + I32AtomicStore16(MemArg<2>) : [0xfe, 0x1a] : "i32.atomic.store16", + I64AtomicStore8(MemArg<1>) : [0xfe, 0x1b] : "i64.atomic.store8", + I64AtomicStore16(MemArg<2>) : [0xfe, 0x1c] : "i64.atomic.store16", + I64AtomicStore32(MemArg<4>) : [0xfe, 0x1d] : "i64.atomic.store32", + + I32AtomicRmwAdd(MemArg<4>) : [0xfe, 0x1e] : "i32.atomic.rmw.add", + I64AtomicRmwAdd(MemArg<8>) : [0xfe, 0x1f] : "i64.atomic.rmw.add", + I32AtomicRmw8AddU(MemArg<1>) : [0xfe, 0x20] : "i32.atomic.rmw8.add_u", + I32AtomicRmw16AddU(MemArg<2>) : [0xfe, 0x21] : "i32.atomic.rmw16.add_u", + I64AtomicRmw8AddU(MemArg<1>) : [0xfe, 0x22] : "i64.atomic.rmw8.add_u", + I64AtomicRmw16AddU(MemArg<2>) : [0xfe, 0x23] : "i64.atomic.rmw16.add_u", + I64AtomicRmw32AddU(MemArg<4>) : [0xfe, 0x24] : "i64.atomic.rmw32.add_u", + + I32AtomicRmwSub(MemArg<4>) : [0xfe, 0x25] : "i32.atomic.rmw.sub", + I64AtomicRmwSub(MemArg<8>) : [0xfe, 0x26] : "i64.atomic.rmw.sub", + I32AtomicRmw8SubU(MemArg<1>) : [0xfe, 0x27] : "i32.atomic.rmw8.sub_u", + I32AtomicRmw16SubU(MemArg<2>) : [0xfe, 0x28] : "i32.atomic.rmw16.sub_u", + I64AtomicRmw8SubU(MemArg<1>) : [0xfe, 0x29] : "i64.atomic.rmw8.sub_u", + I64AtomicRmw16SubU(MemArg<2>) : [0xfe, 0x2a] : "i64.atomic.rmw16.sub_u", + I64AtomicRmw32SubU(MemArg<4>) : [0xfe, 0x2b] : "i64.atomic.rmw32.sub_u", + + I32AtomicRmwAnd(MemArg<4>) : [0xfe, 0x2c] : "i32.atomic.rmw.and", + I64AtomicRmwAnd(MemArg<8>) : [0xfe, 0x2d] : "i64.atomic.rmw.and", + I32AtomicRmw8AndU(MemArg<1>) : [0xfe, 0x2e] : "i32.atomic.rmw8.and_u", + I32AtomicRmw16AndU(MemArg<2>) : [0xfe, 0x2f] : "i32.atomic.rmw16.and_u", + I64AtomicRmw8AndU(MemArg<1>) : [0xfe, 0x30] : "i64.atomic.rmw8.and_u", + I64AtomicRmw16AndU(MemArg<2>) : [0xfe, 0x31] : "i64.atomic.rmw16.and_u", + I64AtomicRmw32AndU(MemArg<4>) : [0xfe, 0x32] : "i64.atomic.rmw32.and_u", + + I32AtomicRmwOr(MemArg<4>) : [0xfe, 0x33] : "i32.atomic.rmw.or", + I64AtomicRmwOr(MemArg<8>) : [0xfe, 0x34] : "i64.atomic.rmw.or", + I32AtomicRmw8OrU(MemArg<1>) : [0xfe, 0x35] : "i32.atomic.rmw8.or_u", + I32AtomicRmw16OrU(MemArg<2>) : [0xfe, 0x36] : "i32.atomic.rmw16.or_u", + I64AtomicRmw8OrU(MemArg<1>) : [0xfe, 0x37] : "i64.atomic.rmw8.or_u", + I64AtomicRmw16OrU(MemArg<2>) : [0xfe, 0x38] : "i64.atomic.rmw16.or_u", + I64AtomicRmw32OrU(MemArg<4>) : [0xfe, 0x39] : "i64.atomic.rmw32.or_u", + + I32AtomicRmwXor(MemArg<4>) : [0xfe, 0x3a] : "i32.atomic.rmw.xor", + I64AtomicRmwXor(MemArg<8>) : [0xfe, 0x3b] : "i64.atomic.rmw.xor", + I32AtomicRmw8XorU(MemArg<1>) : [0xfe, 0x3c] : "i32.atomic.rmw8.xor_u", + I32AtomicRmw16XorU(MemArg<2>) : [0xfe, 0x3d] : "i32.atomic.rmw16.xor_u", + I64AtomicRmw8XorU(MemArg<1>) : [0xfe, 0x3e] : "i64.atomic.rmw8.xor_u", + I64AtomicRmw16XorU(MemArg<2>) : [0xfe, 0x3f] : "i64.atomic.rmw16.xor_u", + I64AtomicRmw32XorU(MemArg<4>) : [0xfe, 0x40] : "i64.atomic.rmw32.xor_u", + + I32AtomicRmwXchg(MemArg<4>) : [0xfe, 0x41] : "i32.atomic.rmw.xchg", + I64AtomicRmwXchg(MemArg<8>) : [0xfe, 0x42] : "i64.atomic.rmw.xchg", + I32AtomicRmw8XchgU(MemArg<1>) : [0xfe, 0x43] : "i32.atomic.rmw8.xchg_u", + I32AtomicRmw16XchgU(MemArg<2>) : [0xfe, 0x44] : "i32.atomic.rmw16.xchg_u", + I64AtomicRmw8XchgU(MemArg<1>) : [0xfe, 0x45] : "i64.atomic.rmw8.xchg_u", + I64AtomicRmw16XchgU(MemArg<2>) : [0xfe, 0x46] : "i64.atomic.rmw16.xchg_u", + I64AtomicRmw32XchgU(MemArg<4>) : [0xfe, 0x47] : "i64.atomic.rmw32.xchg_u", + + I32AtomicRmwCmpxchg(MemArg<4>) : [0xfe, 0x48] : "i32.atomic.rmw.cmpxchg", + I64AtomicRmwCmpxchg(MemArg<8>) : [0xfe, 0x49] : "i64.atomic.rmw.cmpxchg", + I32AtomicRmw8CmpxchgU(MemArg<1>) : [0xfe, 0x4a] : "i32.atomic.rmw8.cmpxchg_u", + I32AtomicRmw16CmpxchgU(MemArg<2>) : [0xfe, 0x4b] : "i32.atomic.rmw16.cmpxchg_u", + I64AtomicRmw8CmpxchgU(MemArg<1>) : [0xfe, 0x4c] : "i64.atomic.rmw8.cmpxchg_u", + I64AtomicRmw16CmpxchgU(MemArg<2>) : [0xfe, 0x4d] : "i64.atomic.rmw16.cmpxchg_u", + I64AtomicRmw32CmpxchgU(MemArg<4>) : [0xfe, 0x4e] : "i64.atomic.rmw32.cmpxchg_u", + + // proposal: simd + // + // https://webassembly.github.io/simd/core/binary/instructions.html + V128Load(MemArg<16>) : [0xfd, 0] : "v128.load", + V128Load8x8S(MemArg<8>) : [0xfd, 1] : "v128.load8x8_s", + V128Load8x8U(MemArg<8>) : [0xfd, 2] : "v128.load8x8_u", + V128Load16x4S(MemArg<8>) : [0xfd, 3] : "v128.load16x4_s", + V128Load16x4U(MemArg<8>) : [0xfd, 4] : "v128.load16x4_u", + V128Load32x2S(MemArg<8>) : [0xfd, 5] : "v128.load32x2_s", + V128Load32x2U(MemArg<8>) : [0xfd, 6] : "v128.load32x2_u", + V128Load8Splat(MemArg<1>) : [0xfd, 7] : "v128.load8_splat", + V128Load16Splat(MemArg<2>) : [0xfd, 8] : "v128.load16_splat", + V128Load32Splat(MemArg<4>) : [0xfd, 9] : "v128.load32_splat", + V128Load64Splat(MemArg<8>) : [0xfd, 10] : "v128.load64_splat", + V128Load32Zero(MemArg<4>) : [0xfd, 92] : "v128.load32_zero", + V128Load64Zero(MemArg<8>) : [0xfd, 93] : "v128.load64_zero", + V128Store(MemArg<16>) : [0xfd, 11] : "v128.store", + + V128Load8Lane(LoadOrStoreLane<1>) : [0xfd, 84] : "v128.load8_lane", + V128Load16Lane(LoadOrStoreLane<2>) : [0xfd, 85] : "v128.load16_lane", + V128Load32Lane(LoadOrStoreLane<4>) : [0xfd, 86] : "v128.load32_lane", + V128Load64Lane(LoadOrStoreLane<8>): [0xfd, 87] : "v128.load64_lane", + V128Store8Lane(LoadOrStoreLane<1>) : [0xfd, 88] : "v128.store8_lane", + V128Store16Lane(LoadOrStoreLane<2>) : [0xfd, 89] : "v128.store16_lane", + V128Store32Lane(LoadOrStoreLane<4>) : [0xfd, 90] : "v128.store32_lane", + V128Store64Lane(LoadOrStoreLane<8>) : [0xfd, 91] : "v128.store64_lane", + + V128Const(V128Const) : [0xfd, 12] : "v128.const", + I8x16Shuffle(I8x16Shuffle) : [0xfd, 13] : "i8x16.shuffle", + + I8x16ExtractLaneS(LaneArg) : [0xfd, 21] : "i8x16.extract_lane_s", + I8x16ExtractLaneU(LaneArg) : [0xfd, 22] : "i8x16.extract_lane_u", + I8x16ReplaceLane(LaneArg) : [0xfd, 23] : "i8x16.replace_lane", + I16x8ExtractLaneS(LaneArg) : [0xfd, 24] : "i16x8.extract_lane_s", + I16x8ExtractLaneU(LaneArg) : [0xfd, 25] : "i16x8.extract_lane_u", + I16x8ReplaceLane(LaneArg) : [0xfd, 26] : "i16x8.replace_lane", + I32x4ExtractLane(LaneArg) : [0xfd, 27] : "i32x4.extract_lane", + I32x4ReplaceLane(LaneArg) : [0xfd, 28] : "i32x4.replace_lane", + I64x2ExtractLane(LaneArg) : [0xfd, 29] : "i64x2.extract_lane", + I64x2ReplaceLane(LaneArg) : [0xfd, 30] : "i64x2.replace_lane", + F32x4ExtractLane(LaneArg) : [0xfd, 31] : "f32x4.extract_lane", + F32x4ReplaceLane(LaneArg) : [0xfd, 32] : "f32x4.replace_lane", + F64x2ExtractLane(LaneArg) : [0xfd, 33] : "f64x2.extract_lane", + F64x2ReplaceLane(LaneArg) : [0xfd, 34] : "f64x2.replace_lane", + + I8x16Swizzle : [0xfd, 14] : "i8x16.swizzle", + I8x16Splat : [0xfd, 15] : "i8x16.splat", + I16x8Splat : [0xfd, 16] : "i16x8.splat", + I32x4Splat : [0xfd, 17] : "i32x4.splat", + I64x2Splat : [0xfd, 18] : "i64x2.splat", + F32x4Splat : [0xfd, 19] : "f32x4.splat", + F64x2Splat : [0xfd, 20] : "f64x2.splat", + + I8x16Eq : [0xfd, 35] : "i8x16.eq", + I8x16Ne : [0xfd, 36] : "i8x16.ne", + I8x16LtS : [0xfd, 37] : "i8x16.lt_s", + I8x16LtU : [0xfd, 38] : "i8x16.lt_u", + I8x16GtS : [0xfd, 39] : "i8x16.gt_s", + I8x16GtU : [0xfd, 40] : "i8x16.gt_u", + I8x16LeS : [0xfd, 41] : "i8x16.le_s", + I8x16LeU : [0xfd, 42] : "i8x16.le_u", + I8x16GeS : [0xfd, 43] : "i8x16.ge_s", + I8x16GeU : [0xfd, 44] : "i8x16.ge_u", + + I16x8Eq : [0xfd, 45] : "i16x8.eq", + I16x8Ne : [0xfd, 46] : "i16x8.ne", + I16x8LtS : [0xfd, 47] : "i16x8.lt_s", + I16x8LtU : [0xfd, 48] : "i16x8.lt_u", + I16x8GtS : [0xfd, 49] : "i16x8.gt_s", + I16x8GtU : [0xfd, 50] : "i16x8.gt_u", + I16x8LeS : [0xfd, 51] : "i16x8.le_s", + I16x8LeU : [0xfd, 52] : "i16x8.le_u", + I16x8GeS : [0xfd, 53] : "i16x8.ge_s", + I16x8GeU : [0xfd, 54] : "i16x8.ge_u", + + I32x4Eq : [0xfd, 55] : "i32x4.eq", + I32x4Ne : [0xfd, 56] : "i32x4.ne", + I32x4LtS : [0xfd, 57] : "i32x4.lt_s", + I32x4LtU : [0xfd, 58] : "i32x4.lt_u", + I32x4GtS : [0xfd, 59] : "i32x4.gt_s", + I32x4GtU : [0xfd, 60] : "i32x4.gt_u", + I32x4LeS : [0xfd, 61] : "i32x4.le_s", + I32x4LeU : [0xfd, 62] : "i32x4.le_u", + I32x4GeS : [0xfd, 63] : "i32x4.ge_s", + I32x4GeU : [0xfd, 64] : "i32x4.ge_u", + + I64x2Eq : [0xfd, 214] : "i64x2.eq", + I64x2Ne : [0xfd, 215] : "i64x2.ne", + I64x2LtS : [0xfd, 216] : "i64x2.lt_s", + I64x2GtS : [0xfd, 217] : "i64x2.gt_s", + I64x2LeS : [0xfd, 218] : "i64x2.le_s", + I64x2GeS : [0xfd, 219] : "i64x2.ge_s", + + F32x4Eq : [0xfd, 65] : "f32x4.eq", + F32x4Ne : [0xfd, 66] : "f32x4.ne", + F32x4Lt : [0xfd, 67] : "f32x4.lt", + F32x4Gt : [0xfd, 68] : "f32x4.gt", + F32x4Le : [0xfd, 69] : "f32x4.le", + F32x4Ge : [0xfd, 70] : "f32x4.ge", + + F64x2Eq : [0xfd, 71] : "f64x2.eq", + F64x2Ne : [0xfd, 72] : "f64x2.ne", + F64x2Lt : [0xfd, 73] : "f64x2.lt", + F64x2Gt : [0xfd, 74] : "f64x2.gt", + F64x2Le : [0xfd, 75] : "f64x2.le", + F64x2Ge : [0xfd, 76] : "f64x2.ge", + + V128Not : [0xfd, 77] : "v128.not", + V128And : [0xfd, 78] : "v128.and", + V128Andnot : [0xfd, 79] : "v128.andnot", + V128Or : [0xfd, 80] : "v128.or", + V128Xor : [0xfd, 81] : "v128.xor", + V128Bitselect : [0xfd, 82] : "v128.bitselect", + V128AnyTrue : [0xfd, 83] : "v128.any_true", + + I8x16Abs : [0xfd, 96] : "i8x16.abs", + I8x16Neg : [0xfd, 97] : "i8x16.neg", + I8x16Popcnt : [0xfd, 98] : "i8x16.popcnt", + I8x16AllTrue : [0xfd, 99] : "i8x16.all_true", + I8x16Bitmask : [0xfd, 100] : "i8x16.bitmask", + I8x16NarrowI16x8S : [0xfd, 101] : "i8x16.narrow_i16x8_s", + I8x16NarrowI16x8U : [0xfd, 102] : "i8x16.narrow_i16x8_u", + I8x16Shl : [0xfd, 107] : "i8x16.shl", + I8x16ShrS : [0xfd, 108] : "i8x16.shr_s", + I8x16ShrU : [0xfd, 109] : "i8x16.shr_u", + I8x16Add : [0xfd, 110] : "i8x16.add", + I8x16AddSatS : [0xfd, 111] : "i8x16.add_sat_s", + I8x16AddSatU : [0xfd, 112] : "i8x16.add_sat_u", + I8x16Sub : [0xfd, 113] : "i8x16.sub", + I8x16SubSatS : [0xfd, 114] : "i8x16.sub_sat_s", + I8x16SubSatU : [0xfd, 115] : "i8x16.sub_sat_u", + I8x16MinS : [0xfd, 118] : "i8x16.min_s", + I8x16MinU : [0xfd, 119] : "i8x16.min_u", + I8x16MaxS : [0xfd, 120] : "i8x16.max_s", + I8x16MaxU : [0xfd, 121] : "i8x16.max_u", + I8x16AvgrU : [0xfd, 123] : "i8x16.avgr_u", + + I16x8ExtAddPairwiseI8x16S : [0xfd, 124] : "i16x8.extadd_pairwise_i8x16_s", + I16x8ExtAddPairwiseI8x16U : [0xfd, 125] : "i16x8.extadd_pairwise_i8x16_u", + I16x8Abs : [0xfd, 128] : "i16x8.abs", + I16x8Neg : [0xfd, 129] : "i16x8.neg", + I16x8Q15MulrSatS : [0xfd, 130] : "i16x8.q15mulr_sat_s", + I16x8AllTrue : [0xfd, 131] : "i16x8.all_true", + I16x8Bitmask : [0xfd, 132] : "i16x8.bitmask", + I16x8NarrowI32x4S : [0xfd, 133] : "i16x8.narrow_i32x4_s", + I16x8NarrowI32x4U : [0xfd, 134] : "i16x8.narrow_i32x4_u", + I16x8ExtendLowI8x16S : [0xfd, 135] : "i16x8.extend_low_i8x16_s", + I16x8ExtendHighI8x16S : [0xfd, 136] : "i16x8.extend_high_i8x16_s", + I16x8ExtendLowI8x16U : [0xfd, 137] : "i16x8.extend_low_i8x16_u", + I16x8ExtendHighI8x16u : [0xfd, 138] : "i16x8.extend_high_i8x16_u", + I16x8Shl : [0xfd, 139] : "i16x8.shl", + I16x8ShrS : [0xfd, 140] : "i16x8.shr_s", + I16x8ShrU : [0xfd, 141] : "i16x8.shr_u", + I16x8Add : [0xfd, 142] : "i16x8.add", + I16x8AddSatS : [0xfd, 143] : "i16x8.add_sat_s", + I16x8AddSatU : [0xfd, 144] : "i16x8.add_sat_u", + I16x8Sub : [0xfd, 145] : "i16x8.sub", + I16x8SubSatS : [0xfd, 146] : "i16x8.sub_sat_s", + I16x8SubSatU : [0xfd, 147] : "i16x8.sub_sat_u", + I16x8Mul : [0xfd, 149] : "i16x8.mul", + I16x8MinS : [0xfd, 150] : "i16x8.min_s", + I16x8MinU : [0xfd, 151] : "i16x8.min_u", + I16x8MaxS : [0xfd, 152] : "i16x8.max_s", + I16x8MaxU : [0xfd, 153] : "i16x8.max_u", + I16x8AvgrU : [0xfd, 155] : "i16x8.avgr_u", + I16x8ExtMulLowI8x16S : [0xfd, 156] : "i16x8.extmul_low_i8x16_s", + I16x8ExtMulHighI8x16S : [0xfd, 157] : "i16x8.extmul_high_i8x16_s", + I16x8ExtMulLowI8x16U : [0xfd, 158] : "i16x8.extmul_low_i8x16_u", + I16x8ExtMulHighI8x16U : [0xfd, 159] : "i16x8.extmul_high_i8x16_u", + + I32x4ExtAddPairwiseI16x8S : [0xfd, 126] : "i32x4.extadd_pairwise_i16x8_s", + I32x4ExtAddPairwiseI16x8U : [0xfd, 127] : "i32x4.extadd_pairwise_i16x8_u", + I32x4Abs : [0xfd, 160] : "i32x4.abs", + I32x4Neg : [0xfd, 161] : "i32x4.neg", + I32x4AllTrue : [0xfd, 163] : "i32x4.all_true", + I32x4Bitmask : [0xfd, 164] : "i32x4.bitmask", + I32x4ExtendLowI16x8S : [0xfd, 167] : "i32x4.extend_low_i16x8_s", + I32x4ExtendHighI16x8S : [0xfd, 168] : "i32x4.extend_high_i16x8_s", + I32x4ExtendLowI16x8U : [0xfd, 169] : "i32x4.extend_low_i16x8_u", + I32x4ExtendHighI16x8U : [0xfd, 170] : "i32x4.extend_high_i16x8_u", + I32x4Shl : [0xfd, 171] : "i32x4.shl", + I32x4ShrS : [0xfd, 172] : "i32x4.shr_s", + I32x4ShrU : [0xfd, 173] : "i32x4.shr_u", + I32x4Add : [0xfd, 174] : "i32x4.add", + I32x4Sub : [0xfd, 177] : "i32x4.sub", + I32x4Mul : [0xfd, 181] : "i32x4.mul", + I32x4MinS : [0xfd, 182] : "i32x4.min_s", + I32x4MinU : [0xfd, 183] : "i32x4.min_u", + I32x4MaxS : [0xfd, 184] : "i32x4.max_s", + I32x4MaxU : [0xfd, 185] : "i32x4.max_u", + I32x4DotI16x8S : [0xfd, 186] : "i32x4.dot_i16x8_s", + I32x4ExtMulLowI16x8S : [0xfd, 188] : "i32x4.extmul_low_i16x8_s", + I32x4ExtMulHighI16x8S : [0xfd, 189] : "i32x4.extmul_high_i16x8_s", + I32x4ExtMulLowI16x8U : [0xfd, 190] : "i32x4.extmul_low_i16x8_u", + I32x4ExtMulHighI16x8U : [0xfd, 191] : "i32x4.extmul_high_i16x8_u", + + I64x2Abs : [0xfd, 192] : "i64x2.abs", + I64x2Neg : [0xfd, 193] : "i64x2.neg", + I64x2AllTrue : [0xfd, 195] : "i64x2.all_true", + I64x2Bitmask : [0xfd, 196] : "i64x2.bitmask", + I64x2ExtendLowI32x4S : [0xfd, 199] : "i64x2.extend_low_i32x4_s", + I64x2ExtendHighI32x4S : [0xfd, 200] : "i64x2.extend_high_i32x4_s", + I64x2ExtendLowI32x4U : [0xfd, 201] : "i64x2.extend_low_i32x4_u", + I64x2ExtendHighI32x4U : [0xfd, 202] : "i64x2.extend_high_i32x4_u", + I64x2Shl : [0xfd, 203] : "i64x2.shl", + I64x2ShrS : [0xfd, 204] : "i64x2.shr_s", + I64x2ShrU : [0xfd, 205] : "i64x2.shr_u", + I64x2Add : [0xfd, 206] : "i64x2.add", + I64x2Sub : [0xfd, 209] : "i64x2.sub", + I64x2Mul : [0xfd, 213] : "i64x2.mul", + I64x2ExtMulLowI32x4S : [0xfd, 220] : "i64x2.extmul_low_i32x4_s", + I64x2ExtMulHighI32x4S : [0xfd, 221] : "i64x2.extmul_high_i32x4_s", + I64x2ExtMulLowI32x4U : [0xfd, 222] : "i64x2.extmul_low_i32x4_u", + I64x2ExtMulHighI32x4U : [0xfd, 223] : "i64x2.extmul_high_i32x4_u", + + F32x4Ceil : [0xfd, 103] : "f32x4.ceil", + F32x4Floor : [0xfd, 104] : "f32x4.floor", + F32x4Trunc : [0xfd, 105] : "f32x4.trunc", + F32x4Nearest : [0xfd, 106] : "f32x4.nearest", + F32x4Abs : [0xfd, 224] : "f32x4.abs", + F32x4Neg : [0xfd, 225] : "f32x4.neg", + F32x4Sqrt : [0xfd, 227] : "f32x4.sqrt", + F32x4Add : [0xfd, 228] : "f32x4.add", + F32x4Sub : [0xfd, 229] : "f32x4.sub", + F32x4Mul : [0xfd, 230] : "f32x4.mul", + F32x4Div : [0xfd, 231] : "f32x4.div", + F32x4Min : [0xfd, 232] : "f32x4.min", + F32x4Max : [0xfd, 233] : "f32x4.max", + F32x4PMin : [0xfd, 234] : "f32x4.pmin", + F32x4PMax : [0xfd, 235] : "f32x4.pmax", + + F64x2Ceil : [0xfd, 116] : "f64x2.ceil", + F64x2Floor : [0xfd, 117] : "f64x2.floor", + F64x2Trunc : [0xfd, 122] : "f64x2.trunc", + F64x2Nearest : [0xfd, 148] : "f64x2.nearest", + F64x2Abs : [0xfd, 236] : "f64x2.abs", + F64x2Neg : [0xfd, 237] : "f64x2.neg", + F64x2Sqrt : [0xfd, 239] : "f64x2.sqrt", + F64x2Add : [0xfd, 240] : "f64x2.add", + F64x2Sub : [0xfd, 241] : "f64x2.sub", + F64x2Mul : [0xfd, 242] : "f64x2.mul", + F64x2Div : [0xfd, 243] : "f64x2.div", + F64x2Min : [0xfd, 244] : "f64x2.min", + F64x2Max : [0xfd, 245] : "f64x2.max", + F64x2PMin : [0xfd, 246] : "f64x2.pmin", + F64x2PMax : [0xfd, 247] : "f64x2.pmax", + + I32x4TruncSatF32x4S : [0xfd, 248] : "i32x4.trunc_sat_f32x4_s", + I32x4TruncSatF32x4U : [0xfd, 249] : "i32x4.trunc_sat_f32x4_u", + F32x4ConvertI32x4S : [0xfd, 250] : "f32x4.convert_i32x4_s", + F32x4ConvertI32x4U : [0xfd, 251] : "f32x4.convert_i32x4_u", + I32x4TruncSatF64x2SZero : [0xfd, 252] : "i32x4.trunc_sat_f64x2_s_zero", + I32x4TruncSatF64x2UZero : [0xfd, 253] : "i32x4.trunc_sat_f64x2_u_zero", + F64x2ConvertLowI32x4S : [0xfd, 254] : "f64x2.convert_low_i32x4_s", + F64x2ConvertLowI32x4U : [0xfd, 255] : "f64x2.convert_low_i32x4_u", + F32x4DemoteF64x2Zero : [0xfd, 94] : "f32x4.demote_f64x2_zero", + F64x2PromoteLowF32x4 : [0xfd, 95] : "f64x2.promote_low_f32x4", + + // Exception handling proposal + Try(BlockType<'a>) : [0x06] : "try", + Catch(Index<'a>) : [0x07] : "catch", + Throw(Index<'a>) : [0x08] : "throw", + Rethrow(Index<'a>) : [0x09] : "rethrow", + Delegate(Index<'a>) : [0x18] : "delegate", + CatchAll : [0x19] : "catch_all", + + // Relaxed SIMD proposal + I8x16RelaxedSwizzle : [0xfd, 0x100]: "i8x16.relaxed_swizzle", + I32x4RelaxedTruncF32x4S : [0xfd, 0x101]: "i32x4.relaxed_trunc_f32x4_s", + I32x4RelaxedTruncF32x4U : [0xfd, 0x102]: "i32x4.relaxed_trunc_f32x4_u", + I32x4RelaxedTruncF64x2SZero : [0xfd, 0x103]: "i32x4.relaxed_trunc_f64x2_s_zero", + I32x4RelaxedTruncF64x2UZero : [0xfd, 0x104]: "i32x4.relaxed_trunc_f64x2_u_zero", + F32x4RelaxedMadd : [0xfd, 0x105]: "f32x4.relaxed_madd", + F32x4RelaxedNmadd : [0xfd, 0x106]: "f32x4.relaxed_nmadd", + F64x2RelaxedMadd : [0xfd, 0x107]: "f64x2.relaxed_madd", + F64x2RelaxedNmadd : [0xfd, 0x108]: "f64x2.relaxed_nmadd", + I8x16RelaxedLaneselect : [0xfd, 0x109]: "i8x16.relaxed_laneselect", + I16x8RelaxedLaneselect : [0xfd, 0x10A]: "i16x8.relaxed_laneselect", + I32x4RelaxedLaneselect : [0xfd, 0x10B]: "i32x4.relaxed_laneselect", + I64x2RelaxedLaneselect : [0xfd, 0x10C]: "i64x2.relaxed_laneselect", + F32x4RelaxedMin : [0xfd, 0x10D]: "f32x4.relaxed_min", + F32x4RelaxedMax : [0xfd, 0x10E]: "f32x4.relaxed_max", + F64x2RelaxedMin : [0xfd, 0x10F]: "f64x2.relaxed_min", + F64x2RelaxedMax : [0xfd, 0x110]: "f64x2.relaxed_max", + I16x8RelaxedQ15mulrS: [0xfd, 0x111]: "i16x8.relaxed_q15mulr_s", + I16x8RelaxedDotI8x16I7x16S: [0xfd, 0x112]: "i16x8.relaxed_dot_i8x16_i7x16_s", + I32x4RelaxedDotI8x16I7x16AddS: [0xfd, 0x113]: "i32x4.relaxed_dot_i8x16_i7x16_add_s", + } +} + +impl<'a> Instruction<'a> { + pub(crate) fn needs_data_count(&self) -> bool { + match self { + Instruction::MemoryInit(_) + | Instruction::DataDrop(_) + | Instruction::ArrayNewData(_) => true, + _ => false, + } + } +} + +/// Extra information associated with block-related instructions. +/// +/// This is used to label blocks and also annotate what types are expected for +/// the block. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct BlockType<'a> { + pub label: Option<Id<'a>>, + pub label_name: Option<NameAnnotation<'a>>, + pub ty: TypeUse<'a, FunctionType<'a>>, +} + +impl<'a> Parse<'a> for BlockType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(BlockType { + label: parser.parse()?, + label_name: parser.parse()?, + ty: parser + .parse::<TypeUse<'a, FunctionTypeNoNames<'a>>>()? + .into(), + }) + } +} + +/// Extra information associated with the func.bind instruction. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct FuncBindType<'a> { + pub ty: TypeUse<'a, FunctionType<'a>>, +} + +impl<'a> Parse<'a> for FuncBindType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(FuncBindType { + ty: parser + .parse::<TypeUse<'a, FunctionTypeNoNames<'a>>>()? + .into(), + }) + } +} + +/// Extra information associated with the let instruction. +#[derive(Debug)] +#[allow(missing_docs)] +pub struct LetType<'a> { + pub block: BlockType<'a>, + pub locals: Vec<Local<'a>>, +} + +impl<'a> Parse<'a> for LetType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(LetType { + block: parser.parse()?, + locals: Local::parse_remainder(parser)?, + }) + } +} + +/// Extra information associated with the `br_table` instruction. +#[allow(missing_docs)] +#[derive(Debug)] +pub struct BrTableIndices<'a> { + pub labels: Vec<Index<'a>>, + pub default: Index<'a>, +} + +impl<'a> Parse<'a> for BrTableIndices<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut labels = vec![parser.parse()?]; + while parser.peek::<Index>() { + labels.push(parser.parse()?); + } + let default = labels.pop().unwrap(); + Ok(BrTableIndices { labels, default }) + } +} + +/// Payload for lane-related instructions. Unsigned with no + prefix. +#[derive(Debug)] +pub struct LaneArg { + /// The lane argument. + pub lane: u8, +} + +impl<'a> Parse<'a> for LaneArg { + fn parse(parser: Parser<'a>) -> Result<Self> { + let lane = parser.step(|c| { + if let Some((i, rest)) = c.integer() { + if i.sign() == None { + let (src, radix) = i.val(); + let val = u8::from_str_radix(src, radix) + .map_err(|_| c.error("malformed lane index"))?; + Ok((val, rest)) + } else { + Err(c.error("unexpected token")) + } + } else { + Err(c.error("expected a lane index")) + } + })?; + Ok(LaneArg { lane }) + } +} + +/// Payload for memory-related instructions indicating offset/alignment of +/// memory accesses. +#[derive(Debug)] +pub struct MemArg<'a> { + /// The alignment of this access. + /// + /// This is not stored as a log, this is the actual alignment (e.g. 1, 2, 4, + /// 8, etc). + pub align: u32, + /// The offset, in bytes of this access. + pub offset: u64, + /// The memory index we're accessing + pub memory: Index<'a>, +} + +impl<'a> MemArg<'a> { + fn parse(parser: Parser<'a>, default_align: u32) -> Result<Self> { + fn parse_field<T>( + name: &str, + parser: Parser<'_>, + f: impl FnOnce(Cursor<'_>, &str, u32) -> Result<T>, + ) -> Result<Option<T>> { + parser.step(|c| { + let (kw, rest) = match c.keyword() { + Some(p) => p, + None => return Ok((None, c)), + }; + if !kw.starts_with(name) { + return Ok((None, c)); + } + let kw = &kw[name.len()..]; + if !kw.starts_with('=') { + return Ok((None, c)); + } + let num = &kw[1..]; + let num = if let Some(stripped) = num.strip_prefix("0x") { + f(c, stripped, 16)? + } else { + f(c, num, 10)? + }; + + Ok((Some(num), rest)) + }) + } + + fn parse_u32(name: &str, parser: Parser<'_>) -> Result<Option<u32>> { + parse_field(name, parser, |c, num, radix| { + u32::from_str_radix(num, radix).map_err(|_| c.error("i32 constant out of range")) + }) + } + + fn parse_u64(name: &str, parser: Parser<'_>) -> Result<Option<u64>> { + parse_field(name, parser, |c, num, radix| { + u64::from_str_radix(num, radix).map_err(|_| c.error("i64 constant out of range")) + }) + } + + let memory = parser + .parse::<Option<_>>()? + .unwrap_or_else(|| Index::Num(0, parser.prev_span())); + let offset = parse_u64("offset", parser)?.unwrap_or(0); + let align = match parse_u32("align", parser)? { + Some(n) if !n.is_power_of_two() => { + return Err(parser.error("alignment must be a power of two")) + } + n => n.unwrap_or(default_align), + }; + + Ok(MemArg { + offset, + align, + memory, + }) + } +} + +/// Extra data associated with the `loadN_lane` and `storeN_lane` instructions. +#[derive(Debug)] +pub struct LoadOrStoreLane<'a> { + /// The memory argument for this instruction. + pub memarg: MemArg<'a>, + /// The lane argument for this instruction. + pub lane: LaneArg, +} + +impl<'a> LoadOrStoreLane<'a> { + fn parse(parser: Parser<'a>, default_align: u32) -> Result<Self> { + // This is sort of funky. The first integer we see could be the lane + // index, but it could also be the memory index. To determine what it is + // then if we see a second integer we need to look further. + let has_memarg = parser.step(|c| match c.integer() { + Some((_, after_int)) => { + // Two integers in a row? That means that the first one is the + // memory index and the second must be the lane index. + if after_int.integer().is_some() { + return Ok((true, c)); + } + + // If the first integer is trailed by `offset=...` or + // `align=...` then this is definitely a memarg. + if let Some((kw, _)) = after_int.keyword() { + if kw.starts_with("offset=") || kw.starts_with("align=") { + return Ok((true, c)); + } + } + + // Otherwise the first integer was trailed by something that + // didn't look like a memarg, so this must be the lane index. + Ok((false, c)) + } + + // Not an integer here? That must mean that this must be the memarg + // first followed by the trailing index. + None => Ok((true, c)), + })?; + Ok(LoadOrStoreLane { + memarg: if has_memarg { + MemArg::parse(parser, default_align)? + } else { + MemArg { + align: default_align, + offset: 0, + memory: Index::Num(0, parser.prev_span()), + } + }, + lane: LaneArg::parse(parser)?, + }) + } +} + +/// Extra data associated with the `call_indirect` instruction. +#[derive(Debug)] +pub struct CallIndirect<'a> { + /// The table that this call is going to be indexing. + pub table: Index<'a>, + /// Type type signature that this `call_indirect` instruction is using. + pub ty: TypeUse<'a, FunctionType<'a>>, +} + +impl<'a> Parse<'a> for CallIndirect<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let prev_span = parser.prev_span(); + let table: Option<_> = parser.parse()?; + let ty = parser.parse::<TypeUse<'a, FunctionTypeNoNames<'a>>>()?; + Ok(CallIndirect { + table: table.unwrap_or(Index::Num(0, prev_span)), + ty: ty.into(), + }) + } +} + +/// Extra data associated with the `table.init` instruction +#[derive(Debug)] +pub struct TableInit<'a> { + /// The index of the table we're copying into. + pub table: Index<'a>, + /// The index of the element segment we're copying into a table. + pub elem: Index<'a>, +} + +impl<'a> Parse<'a> for TableInit<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let prev_span = parser.prev_span(); + let (elem, table) = if parser.peek2::<Index>() { + let table = parser.parse()?; + (parser.parse()?, table) + } else { + (parser.parse()?, Index::Num(0, prev_span)) + }; + Ok(TableInit { table, elem }) + } +} + +/// Extra data associated with the `table.copy` instruction. +#[derive(Debug)] +pub struct TableCopy<'a> { + /// The index of the destination table to copy into. + pub dst: Index<'a>, + /// The index of the source table to copy from. + pub src: Index<'a>, +} + +impl<'a> Parse<'a> for TableCopy<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let (dst, src) = match parser.parse::<Option<_>>()? { + Some(dst) => (dst, parser.parse()?), + None => ( + Index::Num(0, parser.prev_span()), + Index::Num(0, parser.prev_span()), + ), + }; + Ok(TableCopy { dst, src }) + } +} + +/// Extra data associated with unary table instructions. +#[derive(Debug)] +pub struct TableArg<'a> { + /// The index of the table argument. + pub dst: Index<'a>, +} + +impl<'a> Parse<'a> for TableArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let dst = if let Some(dst) = parser.parse()? { + dst + } else { + Index::Num(0, parser.prev_span()) + }; + Ok(TableArg { dst }) + } +} + +/// Extra data associated with unary memory instructions. +#[derive(Debug)] +pub struct MemoryArg<'a> { + /// The index of the memory space. + pub mem: Index<'a>, +} + +impl<'a> Parse<'a> for MemoryArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mem = if let Some(mem) = parser.parse()? { + mem + } else { + Index::Num(0, parser.prev_span()) + }; + Ok(MemoryArg { mem }) + } +} + +/// Extra data associated with the `memory.init` instruction +#[derive(Debug)] +pub struct MemoryInit<'a> { + /// The index of the data segment we're copying into memory. + pub data: Index<'a>, + /// The index of the memory we're copying into, + pub mem: Index<'a>, +} + +impl<'a> Parse<'a> for MemoryInit<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let prev_span = parser.prev_span(); + let (data, mem) = if parser.peek2::<Index>() { + let memory = parser.parse()?; + (parser.parse()?, memory) + } else { + (parser.parse()?, Index::Num(0, prev_span)) + }; + Ok(MemoryInit { data, mem }) + } +} + +/// Extra data associated with the `memory.copy` instruction +#[derive(Debug)] +pub struct MemoryCopy<'a> { + /// The index of the memory we're copying from. + pub src: Index<'a>, + /// The index of the memory we're copying to. + pub dst: Index<'a>, +} + +impl<'a> Parse<'a> for MemoryCopy<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let (src, dst) = match parser.parse()? { + Some(dst) => (parser.parse()?, dst), + None => ( + Index::Num(0, parser.prev_span()), + Index::Num(0, parser.prev_span()), + ), + }; + Ok(MemoryCopy { src, dst }) + } +} + +/// Extra data associated with the `struct.get/set` instructions +#[derive(Debug)] +pub struct StructAccess<'a> { + /// The index of the struct type we're accessing. + pub r#struct: Index<'a>, + /// The index of the field of the struct we're accessing + pub field: Index<'a>, +} + +impl<'a> Parse<'a> for StructAccess<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(StructAccess { + r#struct: parser.parse()?, + field: parser.parse()?, + }) + } +} + +/// Extra data associated with the `array.fill` instruction +#[derive(Debug)] +pub struct ArrayFill<'a> { + /// The index of the array type we're filling. + pub array: Index<'a>, +} + +impl<'a> Parse<'a> for ArrayFill<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ArrayFill { + array: parser.parse()?, + }) + } +} + +/// Extra data associated with the `array.copy` instruction +#[derive(Debug)] +pub struct ArrayCopy<'a> { + /// The index of the array type we're copying to. + pub dest_array: Index<'a>, + /// The index of the array type we're copying from. + pub src_array: Index<'a>, +} + +impl<'a> Parse<'a> for ArrayCopy<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ArrayCopy { + dest_array: parser.parse()?, + src_array: parser.parse()?, + }) + } +} + +/// Extra data associated with the `array.init_[data/elem]` instruction +#[derive(Debug)] +pub struct ArrayInit<'a> { + /// The index of the array type we're initializing. + pub array: Index<'a>, + /// The index of the data or elem segment we're reading from. + pub segment: Index<'a>, +} + +impl<'a> Parse<'a> for ArrayInit<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ArrayInit { + array: parser.parse()?, + segment: parser.parse()?, + }) + } +} + +/// Extra data associated with the `array.new_fixed` instruction +#[derive(Debug)] +pub struct ArrayNewFixed<'a> { + /// The index of the array type we're accessing. + pub array: Index<'a>, + /// The amount of values to initialize the array with. + pub length: u32, +} + +impl<'a> Parse<'a> for ArrayNewFixed<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ArrayNewFixed { + array: parser.parse()?, + length: parser.parse()?, + }) + } +} + +/// Extra data associated with the `array.new_data` instruction +#[derive(Debug)] +pub struct ArrayNewData<'a> { + /// The index of the array type we're accessing. + pub array: Index<'a>, + /// The data segment to initialize from. + pub data_idx: Index<'a>, +} + +impl<'a> Parse<'a> for ArrayNewData<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ArrayNewData { + array: parser.parse()?, + data_idx: parser.parse()?, + }) + } +} + +/// Extra data associated with the `array.new_elem` instruction +#[derive(Debug)] +pub struct ArrayNewElem<'a> { + /// The index of the array type we're accessing. + pub array: Index<'a>, + /// The elem segment to initialize from. + pub elem_idx: Index<'a>, +} + +impl<'a> Parse<'a> for ArrayNewElem<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(ArrayNewElem { + array: parser.parse()?, + elem_idx: parser.parse()?, + }) + } +} + +/// Extra data associated with the `ref.cast` instruction +#[derive(Debug)] +pub struct RefCast<'a> { + /// The type to cast to. + pub r#type: RefType<'a>, +} + +impl<'a> Parse<'a> for RefCast<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(RefCast { + r#type: parser.parse()?, + }) + } +} + +/// Extra data associated with the `ref.test` instruction +#[derive(Debug)] +pub struct RefTest<'a> { + /// The type to test for. + pub r#type: RefType<'a>, +} + +impl<'a> Parse<'a> for RefTest<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(RefTest { + r#type: parser.parse()?, + }) + } +} + +/// Extra data associated with the `br_on_cast` instruction +#[derive(Debug)] +pub struct BrOnCast<'a> { + /// The label to branch to. + pub label: Index<'a>, + /// The type we're casting from. + pub from_type: RefType<'a>, + /// The type we're casting to. + pub to_type: RefType<'a>, +} + +impl<'a> Parse<'a> for BrOnCast<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(BrOnCast { + label: parser.parse()?, + from_type: parser.parse()?, + to_type: parser.parse()?, + }) + } +} + +/// Extra data associated with the `br_on_cast_fail` instruction +#[derive(Debug)] +pub struct BrOnCastFail<'a> { + /// The label to branch to. + pub label: Index<'a>, + /// The type we're casting from. + pub from_type: RefType<'a>, + /// The type we're casting to. + pub to_type: RefType<'a>, +} + +impl<'a> Parse<'a> for BrOnCastFail<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(BrOnCastFail { + label: parser.parse()?, + from_type: parser.parse()?, + to_type: parser.parse()?, + }) + } +} + +/// Different ways to specify a `v128.const` instruction +#[derive(Debug)] +#[allow(missing_docs)] +pub enum V128Const { + I8x16([i8; 16]), + I16x8([i16; 8]), + I32x4([i32; 4]), + I64x2([i64; 2]), + F32x4([Float32; 4]), + F64x2([Float64; 2]), +} + +impl V128Const { + /// Returns the raw little-ended byte sequence used to represent this + /// `v128` constant` + /// + /// This is typically suitable for encoding as the payload of the + /// `v128.const` instruction. + #[rustfmt::skip] + pub fn to_le_bytes(&self) -> [u8; 16] { + match self { + V128Const::I8x16(arr) => [ + arr[0] as u8, + arr[1] as u8, + arr[2] as u8, + arr[3] as u8, + arr[4] as u8, + arr[5] as u8, + arr[6] as u8, + arr[7] as u8, + arr[8] as u8, + arr[9] as u8, + arr[10] as u8, + arr[11] as u8, + arr[12] as u8, + arr[13] as u8, + arr[14] as u8, + arr[15] as u8, + ], + V128Const::I16x8(arr) => { + let a1 = arr[0].to_le_bytes(); + let a2 = arr[1].to_le_bytes(); + let a3 = arr[2].to_le_bytes(); + let a4 = arr[3].to_le_bytes(); + let a5 = arr[4].to_le_bytes(); + let a6 = arr[5].to_le_bytes(); + let a7 = arr[6].to_le_bytes(); + let a8 = arr[7].to_le_bytes(); + [ + a1[0], a1[1], + a2[0], a2[1], + a3[0], a3[1], + a4[0], a4[1], + a5[0], a5[1], + a6[0], a6[1], + a7[0], a7[1], + a8[0], a8[1], + ] + } + V128Const::I32x4(arr) => { + let a1 = arr[0].to_le_bytes(); + let a2 = arr[1].to_le_bytes(); + let a3 = arr[2].to_le_bytes(); + let a4 = arr[3].to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], + a2[0], a2[1], a2[2], a2[3], + a3[0], a3[1], a3[2], a3[3], + a4[0], a4[1], a4[2], a4[3], + ] + } + V128Const::I64x2(arr) => { + let a1 = arr[0].to_le_bytes(); + let a2 = arr[1].to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], a1[4], a1[5], a1[6], a1[7], + a2[0], a2[1], a2[2], a2[3], a2[4], a2[5], a2[6], a2[7], + ] + } + V128Const::F32x4(arr) => { + let a1 = arr[0].bits.to_le_bytes(); + let a2 = arr[1].bits.to_le_bytes(); + let a3 = arr[2].bits.to_le_bytes(); + let a4 = arr[3].bits.to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], + a2[0], a2[1], a2[2], a2[3], + a3[0], a3[1], a3[2], a3[3], + a4[0], a4[1], a4[2], a4[3], + ] + } + V128Const::F64x2(arr) => { + let a1 = arr[0].bits.to_le_bytes(); + let a2 = arr[1].bits.to_le_bytes(); + [ + a1[0], a1[1], a1[2], a1[3], a1[4], a1[5], a1[6], a1[7], + a2[0], a2[1], a2[2], a2[3], a2[4], a2[5], a2[6], a2[7], + ] + } + } + } +} + +impl<'a> Parse<'a> for V128Const { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i8x16>() { + parser.parse::<kw::i8x16>()?; + Ok(V128Const::I8x16([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i16x8>() { + parser.parse::<kw::i16x8>()?; + Ok(V128Const::I16x8([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i32x4>() { + parser.parse::<kw::i32x4>()?; + Ok(V128Const::I32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i64x2>() { + parser.parse::<kw::i64x2>()?; + Ok(V128Const::I64x2([parser.parse()?, parser.parse()?])) + } else if l.peek::<kw::f32x4>() { + parser.parse::<kw::f32x4>()?; + Ok(V128Const::F32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::f64x2>() { + parser.parse::<kw::f64x2>()?; + Ok(V128Const::F64x2([parser.parse()?, parser.parse()?])) + } else { + Err(l.error()) + } + } +} + +/// Lanes being shuffled in the `i8x16.shuffle` instruction +#[derive(Debug)] +pub struct I8x16Shuffle { + #[allow(missing_docs)] + pub lanes: [u8; 16], +} + +impl<'a> Parse<'a> for I8x16Shuffle { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(I8x16Shuffle { + lanes: [ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ], + }) + } +} + +/// Payload of the `select` instructions +#[derive(Debug)] +pub struct SelectTypes<'a> { + #[allow(missing_docs)] + pub tys: Option<Vec<ValType<'a>>>, +} + +impl<'a> Parse<'a> for SelectTypes<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut found = false; + let mut list = Vec::new(); + while parser.peek2::<kw::result>() { + found = true; + parser.parens(|p| { + p.parse::<kw::result>()?; + while !p.is_empty() { + list.push(p.parse()?); + } + Ok(()) + })?; + } + Ok(SelectTypes { + tys: if found { Some(list) } else { None }, + }) + } +} diff --git a/third_party/rust/wast/src/core/func.rs b/third_party/rust/wast/src/core/func.rs new file mode 100644 index 0000000000..84abdf8578 --- /dev/null +++ b/third_party/rust/wast/src/core/func.rs @@ -0,0 +1,121 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, NameAnnotation, Span}; + +/// A WebAssembly function to be inserted into a module. +/// +/// This is a member of both the function and code sections. +#[derive(Debug)] +pub struct Func<'a> { + /// Where this `func` was defined. + pub span: Span, + /// An identifier that this function is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// What kind of function this is, be it an inline-defined or imported + /// function. + pub kind: FuncKind<'a>, + /// The type that this function will have. + pub ty: TypeUse<'a, FunctionType<'a>>, +} + +/// Possible ways to define a function in the text format. +#[derive(Debug)] +pub enum FuncKind<'a> { + /// A function which is actually defined as an import, such as: + /// + /// ```text + /// (func (type 3) (import "foo" "bar")) + /// ``` + Import(InlineImport<'a>), + + /// Almost all functions, those defined inline in a wasm module. + Inline { + /// The list of locals, if any, for this function. + locals: Vec<Local<'a>>, + + /// The instructions of the function. + expression: Expression<'a>, + }, +} + +impl<'a> Parse<'a> for Func<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::func>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + let (ty, kind) = if let Some(import) = parser.parse()? { + (parser.parse()?, FuncKind::Import(import)) + } else { + let ty = parser.parse()?; + let locals = Local::parse_remainder(parser)?; + ( + ty, + FuncKind::Inline { + locals, + expression: parser.parse()?, + }, + ) + }; + + Ok(Func { + span, + id, + name, + exports, + ty, + kind, + }) + } +} + +/// A local for a `func` or `let` instruction. +/// +/// Each local has an optional identifier for name resolution, an optional name +/// for the custom `name` section, and a value type. +#[derive(Debug)] +pub struct Local<'a> { + /// An identifier that this local is resolved with (optionally) for name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this local stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// The value type of this local. + pub ty: ValType<'a>, +} + +impl<'a> Local<'a> { + pub(crate) fn parse_remainder(parser: Parser<'a>) -> Result<Vec<Local<'a>>> { + let mut locals = Vec::new(); + while parser.peek2::<kw::local>() { + parser.parens(|p| { + p.parse::<kw::local>()?; + if p.is_empty() { + return Ok(()); + } + let id: Option<_> = p.parse()?; + let name: Option<_> = p.parse()?; + let ty = p.parse()?; + let parse_more = id.is_none() && name.is_none(); + locals.push(Local { id, name, ty }); + while parse_more && !p.is_empty() { + locals.push(Local { + id: None, + name: None, + ty: p.parse()?, + }); + } + Ok(()) + })?; + } + Ok(locals) + } +} diff --git a/third_party/rust/wast/src/core/global.rs b/third_party/rust/wast/src/core/global.rs new file mode 100644 index 0000000000..b8ce287fd8 --- /dev/null +++ b/third_party/rust/wast/src/core/global.rs @@ -0,0 +1,59 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, NameAnnotation, Span}; + +/// A WebAssembly global in a module +#[derive(Debug)] +pub struct Global<'a> { + /// Where this `global` was defined. + pub span: Span, + /// An optional name to reference this global by + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// The type of this global, both its value type and whether it's mutable. + pub ty: GlobalType<'a>, + /// What kind of global this defined as. + pub kind: GlobalKind<'a>, +} + +/// Different kinds of globals that can be defined in a module. +#[derive(Debug)] +pub enum GlobalKind<'a> { + /// A global which is actually defined as an import, such as: + /// + /// ```text + /// (global i32 (import "foo" "bar")) + /// ``` + Import(InlineImport<'a>), + + /// A global defined inline in the module itself + Inline(Expression<'a>), +} + +impl<'a> Parse<'a> for Global<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::global>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + let (ty, kind) = if let Some(import) = parser.parse()? { + (parser.parse()?, GlobalKind::Import(import)) + } else { + (parser.parse()?, GlobalKind::Inline(parser.parse()?)) + }; + Ok(Global { + span, + id, + name, + exports, + ty, + kind, + }) + } +} diff --git a/third_party/rust/wast/src/core/import.rs b/third_party/rust/wast/src/core/import.rs new file mode 100644 index 0000000000..e44057f72f --- /dev/null +++ b/third_party/rust/wast/src/core/import.rs @@ -0,0 +1,158 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::{Id, NameAnnotation, Span}; + +/// An `import` statement and entry in a WebAssembly module. +#[derive(Debug, Clone)] +pub struct Import<'a> { + /// Where this `import` was defined + pub span: Span, + /// The module that this statement is importing from + pub module: &'a str, + /// The name of the field in the module this statement imports from. + pub field: &'a str, + /// The item that's being imported. + pub item: ItemSig<'a>, +} + +impl<'a> Parse<'a> for Import<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::import>()?.0; + let module = parser.parse()?; + let field = parser.parse()?; + let item = parser.parens(|p| p.parse())?; + Ok(Import { + span, + module, + field, + item, + }) + } +} + +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub struct ItemSig<'a> { + /// Where this item is defined in the source. + pub span: Span, + /// An optional identifier used during name resolution to refer to this item + /// from the rest of the module. + pub id: Option<Id<'a>>, + /// An optional name which, for functions, will be stored in the + /// custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// What kind of item this is. + pub kind: ItemKind<'a>, +} + +#[derive(Debug, Clone)] +#[allow(missing_docs)] +pub enum ItemKind<'a> { + Func(TypeUse<'a, FunctionType<'a>>), + Table(TableType<'a>), + Memory(MemoryType), + Global(GlobalType<'a>), + Tag(TagType<'a>), +} + +impl<'a> Parse<'a> for ItemSig<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + let span = parser.parse::<kw::func>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: parser.parse()?, + kind: ItemKind::Func(parser.parse()?), + }) + } else if l.peek::<kw::table>() { + let span = parser.parse::<kw::table>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Table(parser.parse()?), + }) + } else if l.peek::<kw::memory>() { + let span = parser.parse::<kw::memory>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Memory(parser.parse()?), + }) + } else if l.peek::<kw::global>() { + let span = parser.parse::<kw::global>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Global(parser.parse()?), + }) + } else if l.peek::<kw::tag>() { + let span = parser.parse::<kw::tag>()?.0; + Ok(ItemSig { + span, + id: parser.parse()?, + name: None, + kind: ItemKind::Tag(parser.parse()?), + }) + } else { + Err(l.error()) + } + } +} + +/// A listing of a inline `(import "foo")` statement. +/// +/// Note that when parsing this type it is somewhat unconventional that it +/// parses its own surrounding parentheses. This is typically an optional type, +/// so it's so far been a bit nicer to have the optionality handled through +/// `Peek` rather than `Option<T>`. +#[derive(Debug, Copy, Clone)] +#[allow(missing_docs)] +pub struct InlineImport<'a> { + pub module: &'a str, + pub field: &'a str, +} + +impl<'a> Parse<'a> for InlineImport<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|p| { + p.parse::<kw::import>()?; + Ok(InlineImport { + module: p.parse()?, + field: p.parse()?, + }) + }) + } +} + +impl Peek for InlineImport<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let cursor = match cursor.lparen() { + Some(cursor) => cursor, + None => return false, + }; + let cursor = match cursor.keyword() { + Some(("import", cursor)) => cursor, + _ => return false, + }; + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + let cursor = match cursor.string() { + Some((_, cursor)) => cursor, + None => return false, + }; + + cursor.rparen().is_some() + } + + fn display() -> &'static str { + "inline import" + } +} diff --git a/third_party/rust/wast/src/core/memory.rs b/third_party/rust/wast/src/core/memory.rs new file mode 100644 index 0000000000..ed845e055d --- /dev/null +++ b/third_party/rust/wast/src/core/memory.rs @@ -0,0 +1,279 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Lookahead1, Parse, Parser, Peek, Result}; +use crate::token::*; + +/// A defined WebAssembly memory instance inside of a module. +#[derive(Debug)] +pub struct Memory<'a> { + /// Where this `memory` was defined + pub span: Span, + /// An optional name to refer to this memory by. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// How this memory is defined in the module. + pub kind: MemoryKind<'a>, +} + +/// Different syntactical ways a memory can be defined in a module. +#[derive(Debug)] +pub enum MemoryKind<'a> { + /// This memory is actually an inlined import definition. + #[allow(missing_docs)] + Import { + import: InlineImport<'a>, + ty: MemoryType, + }, + + /// A typical memory definition which simply says the limits of the memory + Normal(MemoryType), + + /// The data of this memory, starting from 0, explicitly listed + Inline { + /// Whether or not this will be creating a 32-bit memory + is_32: bool, + /// The inline data specified for this memory + data: Vec<DataVal<'a>>, + }, +} + +impl<'a> Parse<'a> for Memory<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::memory>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + // Afterwards figure out which style this is, either: + // + // * `(import "a" "b") limits` + // * `(data ...)` + // * `limits` + let mut l = parser.lookahead1(); + let kind = if let Some(import) = parser.parse()? { + MemoryKind::Import { + import, + ty: parser.parse()?, + } + } else if l.peek::<LParen>() || parser.peek2::<LParen>() { + let is_32 = if parser.parse::<Option<kw::i32>>()?.is_some() { + true + } else { + parser.parse::<Option<kw::i64>>()?.is_none() + }; + let data = parser.parens(|parser| { + parser.parse::<kw::data>()?; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(data) + })?; + MemoryKind::Inline { data, is_32 } + } else if l.peek::<u32>() || l.peek::<kw::i32>() || l.peek::<kw::i64>() { + MemoryKind::Normal(parser.parse()?) + } else { + return Err(l.error()); + }; + Ok(Memory { + span, + id, + name, + exports, + kind, + }) + } +} + +/// A `data` directive in a WebAssembly module. +#[derive(Debug)] +pub struct Data<'a> { + /// Where this `data` was defined + pub span: Span, + + /// The optional name of this data segment + pub id: Option<Id<'a>>, + + /// An optional name for this data stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + + /// Whether this data segment is passive or active + pub kind: DataKind<'a>, + + /// Bytes for this `Data` segment, viewed as the concatenation of all the + /// contained slices. + pub data: Vec<DataVal<'a>>, +} + +/// Different kinds of data segments, either passive or active. +#[derive(Debug)] +pub enum DataKind<'a> { + /// A passive data segment which isn't associated with a memory and is + /// referenced from various instructions. + Passive, + + /// An active data segment which is associated and loaded into a particular + /// memory on module instantiation. + Active { + /// The memory that this `Data` will be associated with. + memory: Index<'a>, + + /// Initial offset to load this data segment at + offset: Expression<'a>, + }, +} + +impl<'a> Parse<'a> for Data<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::data>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + + let kind = if parser.peek::<&[u8]>() { + DataKind::Passive + + // ... and otherwise we must be attached to a particular memory as well + // as having an initialization offset. + } else { + let memory = if parser.peek::<u32>() { + // FIXME: this is only here to accomodate + // proposals/threads/imports.wast at this current moment in + // time, this probably should get removed when the threads + // proposal is rebased on the current spec. + Index::Num(parser.parse()?, span) + } else if parser.peek2::<kw::memory>() { + parser.parens(|p| { + p.parse::<kw::memory>()?; + p.parse() + })? + } else { + Index::Num(0, span) + }; + let offset = parser.parens(|parser| { + if parser.peek::<kw::offset>() { + parser.parse::<kw::offset>()?; + parser.parse() + } else { + // This is all that the spec allows, which is that if + // `offset` isn't present then this is "sugar" for a + // single-instruction expression. + let insn = parser.parse()?; + if parser.is_empty() { + return Ok(Expression { + instrs: [insn].into(), + }); + } + + // This is support for what is currently invalid syntax + // according to the strict specification but is otherwise + // present in the spec test suite: + // + // (data (i32.add (i32.const 0) (i32.const 0))) + // + // Technically the spec says this should be: + // + // (data (offset ...)) + // + // but alas + let expr: Expression = parser.parse()?; + let mut instrs = Vec::from(expr.instrs); + instrs.push(insn); + Ok(Expression { + instrs: instrs.into(), + }) + } + })?; + DataKind::Active { memory, offset } + }; + + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + Ok(Data { + span, + id, + name, + kind, + data, + }) + } +} + +/// Differnet ways the value of a data segment can be defined. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum DataVal<'a> { + String(&'a [u8]), + Integral(Vec<u8>), +} + +impl DataVal<'_> { + /// Returns the length, in bytes, of the memory used to represent this data + /// value. + pub fn len(&self) -> usize { + match self { + DataVal::String(s) => s.len(), + DataVal::Integral(s) => s.len(), + } + } + + /// Pushes the value of this data value onto the provided list of bytes. + pub fn push_onto(&self, dst: &mut Vec<u8>) { + match self { + DataVal::String(s) => dst.extend_from_slice(s), + DataVal::Integral(s) => dst.extend_from_slice(s), + } + } +} + +impl<'a> Parse<'a> for DataVal<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if !parser.peek::<LParen>() { + return Ok(DataVal::String(parser.parse()?)); + } + + return parser.parens(|p| { + let mut result = Vec::new(); + let mut lookahead = p.lookahead1(); + let l = &mut lookahead; + let r = &mut result; + if consume::<kw::i8, i8, _>(p, l, r, |u, v| v.push(u as u8))? + || consume::<kw::i16, i16, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + || consume::<kw::i32, i32, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + || consume::<kw::i64, i64, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + || consume::<kw::f32, Float32, _>(p, l, r, |u, v| v.extend(&u.bits.to_le_bytes()))? + || consume::<kw::f64, Float64, _>(p, l, r, |u, v| v.extend(&u.bits.to_le_bytes()))? + || consume::<kw::v128, V128Const, _>(p, l, r, |u, v| v.extend(&u.to_le_bytes()))? + { + Ok(DataVal::Integral(result)) + } else { + Err(lookahead.error()) + } + }); + + fn consume<'a, T: Peek + Parse<'a>, U: Parse<'a>, F>( + parser: Parser<'a>, + lookahead: &mut Lookahead1<'a>, + dst: &mut Vec<u8>, + push: F, + ) -> Result<bool> + where + F: Fn(U, &mut Vec<u8>), + { + if !lookahead.peek::<T>() { + return Ok(false); + } + parser.parse::<T>()?; + while !parser.is_empty() { + let val = parser.parse::<U>()?; + push(val, dst); + } + Ok(true) + } + } +} diff --git a/third_party/rust/wast/src/core/module.rs b/third_party/rust/wast/src/core/module.rs new file mode 100644 index 0000000000..b6e3ca0ad2 --- /dev/null +++ b/third_party/rust/wast/src/core/module.rs @@ -0,0 +1,210 @@ +use crate::core::*; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, Index, NameAnnotation, Span}; +use crate::{annotation, kw}; + +pub use crate::core::resolve::Names; + +/// A parsed WebAssembly core module. +#[derive(Debug)] +pub struct Module<'a> { + /// Where this `module` was defined + pub span: Span, + /// An optional identifier this module is known by + pub id: Option<Id<'a>>, + /// An optional `@name` annotation for this module + pub name: Option<NameAnnotation<'a>>, + /// What kind of module this was parsed as. + pub kind: ModuleKind<'a>, +} + +/// The different kinds of ways to define a module. +#[derive(Debug)] +pub enum ModuleKind<'a> { + /// A module defined in the textual s-expression format. + Text(Vec<ModuleField<'a>>), + /// A module that had its raw binary bytes defined via the `binary` + /// directive. + Binary(Vec<&'a [u8]>), +} + +impl<'a> Module<'a> { + /// Performs a name resolution pass on this [`Module`], resolving all + /// symbolic names to indices. + /// + /// The WAT format contains a number of shorthands to make it easier to + /// write, such as inline exports, inline imports, inline type definitions, + /// etc. Additionally it allows using symbolic names such as `$foo` instead + /// of using indices. This module will postprocess an AST to remove all of + /// this syntactic sugar, preparing the AST for binary emission. This is + /// where expansion and name resolution happens. + /// + /// This function will mutate the AST of this [`Module`] and replace all + /// [`Index`](crate::token::Index) arguments with `Index::Num`. This will + /// also expand inline exports/imports listed on fields and handle various + /// other shorthands of the text format. + /// + /// If successful the AST was modified to be ready for binary encoding. A + /// [`Names`] structure is also returned so if you'd like to do your own + /// name lookups on the result you can do so as well. + /// + /// # Errors + /// + /// If an error happens during resolution, such a name resolution error or + /// items are found in the wrong order, then an error is returned. + pub fn resolve(&mut self) -> std::result::Result<Names<'a>, crate::Error> { + let names = match &mut self.kind { + ModuleKind::Text(fields) => crate::core::resolve::resolve(fields)?, + ModuleKind::Binary(_blobs) => Default::default(), + }; + Ok(names) + } + + /// Encodes this [`Module`] to its binary form. + /// + /// This function will take the textual representation in [`Module`] and + /// perform all steps necessary to convert it to a binary WebAssembly + /// module, suitable for writing to a `*.wasm` file. This function may + /// internally modify the [`Module`], for example: + /// + /// * Name resolution is performed to ensure that `Index::Id` isn't present + /// anywhere in the AST. + /// + /// * Inline shorthands such as imports/exports/types are all expanded to be + /// dedicated fields of the module. + /// + /// * Module fields may be shuffled around to preserve index ordering from + /// expansions. + /// + /// After all of this expansion has happened the module will be converted to + /// its binary form and returned as a `Vec<u8>`. This is then suitable to + /// hand off to other wasm runtimes and such. + /// + /// # Errors + /// + /// This function can return an error for name resolution errors and other + /// expansion-related errors. + pub fn encode(&mut self) -> std::result::Result<Vec<u8>, crate::Error> { + self.resolve()?; + Ok(match &self.kind { + ModuleKind::Text(fields) => crate::core::binary::encode(&self.id, &self.name, fields), + ModuleKind::Binary(blobs) => blobs.iter().flat_map(|b| b.iter().cloned()).collect(), + }) + } + + pub(crate) fn validate(&self, parser: Parser<'_>) -> Result<()> { + let mut starts = 0; + if let ModuleKind::Text(fields) = &self.kind { + for item in fields.iter() { + if let ModuleField::Start(_) = item { + starts += 1; + } + } + } + if starts > 1 { + return Err(parser.error("multiple start sections found")); + } + Ok(()) + } +} + +impl<'a> Parse<'a> for Module<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let _r = parser.register_annotation("custom"); + let span = parser.parse::<kw::module>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + + let kind = if parser.peek::<kw::binary>() { + parser.parse::<kw::binary>()?; + let mut data = Vec::new(); + while !parser.is_empty() { + data.push(parser.parse()?); + } + ModuleKind::Binary(data) + } else { + ModuleKind::Text(ModuleField::parse_remaining(parser)?) + }; + Ok(Module { + span, + id, + name, + kind, + }) + } +} + +/// A listing of all possible fields that can make up a WebAssembly module. +#[allow(missing_docs)] +#[derive(Debug)] +pub enum ModuleField<'a> { + Type(Type<'a>), + Rec(Rec<'a>), + Import(Import<'a>), + Func(Func<'a>), + Table(Table<'a>), + Memory(Memory<'a>), + Global(Global<'a>), + Export(Export<'a>), + Start(Index<'a>), + Elem(Elem<'a>), + Data(Data<'a>), + Tag(Tag<'a>), + Custom(Custom<'a>), +} + +impl<'a> ModuleField<'a> { + pub(crate) fn parse_remaining(parser: Parser<'a>) -> Result<Vec<ModuleField>> { + let mut fields = Vec::new(); + while !parser.is_empty() { + fields.push(parser.parens(ModuleField::parse)?); + } + Ok(fields) + } +} + +impl<'a> Parse<'a> for ModuleField<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<Type<'a>>() { + return Ok(ModuleField::Type(parser.parse()?)); + } + if parser.peek::<kw::rec>() { + return Ok(ModuleField::Rec(parser.parse()?)); + } + if parser.peek::<kw::import>() { + return Ok(ModuleField::Import(parser.parse()?)); + } + if parser.peek::<kw::func>() { + return Ok(ModuleField::Func(parser.parse()?)); + } + if parser.peek::<kw::table>() { + return Ok(ModuleField::Table(parser.parse()?)); + } + if parser.peek::<kw::memory>() { + return Ok(ModuleField::Memory(parser.parse()?)); + } + if parser.peek::<kw::global>() { + return Ok(ModuleField::Global(parser.parse()?)); + } + if parser.peek::<kw::export>() { + return Ok(ModuleField::Export(parser.parse()?)); + } + if parser.peek::<kw::start>() { + parser.parse::<kw::start>()?; + return Ok(ModuleField::Start(parser.parse()?)); + } + if parser.peek::<kw::elem>() { + return Ok(ModuleField::Elem(parser.parse()?)); + } + if parser.peek::<kw::data>() { + return Ok(ModuleField::Data(parser.parse()?)); + } + if parser.peek::<kw::tag>() { + return Ok(ModuleField::Tag(parser.parse()?)); + } + if parser.peek::<annotation::custom>() { + return Ok(ModuleField::Custom(parser.parse()?)); + } + Err(parser.error("expected valid module field")) + } +} diff --git a/third_party/rust/wast/src/core/resolve/deinline_import_export.rs b/third_party/rust/wast/src/core/resolve/deinline_import_export.rs new file mode 100644 index 0000000000..c338407182 --- /dev/null +++ b/third_party/rust/wast/src/core/resolve/deinline_import_export.rs @@ -0,0 +1,231 @@ +use crate::core::*; +use crate::gensym; +use crate::token::{Id, Index, Span}; +use std::mem; + +pub fn run(fields: &mut Vec<ModuleField>) { + for mut item in mem::take(fields) { + match &mut item { + ModuleField::Func(f) => { + for name in f.exports.names.drain(..) { + fields.push(export(f.span, name, ExportKind::Func, &mut f.id)); + } + match f.kind { + FuncKind::Import(import) => { + item = ModuleField::Import(Import { + span: f.span, + module: import.module, + field: import.field, + item: ItemSig { + span: f.span, + id: f.id, + name: f.name, + kind: ItemKind::Func(f.ty.clone()), + }, + }); + } + FuncKind::Inline { .. } => {} + } + } + + ModuleField::Memory(m) => { + for name in m.exports.names.drain(..) { + fields.push(export(m.span, name, ExportKind::Memory, &mut m.id)); + } + match m.kind { + MemoryKind::Import { import, ty } => { + item = ModuleField::Import(Import { + span: m.span, + module: import.module, + field: import.field, + item: ItemSig { + span: m.span, + id: m.id, + name: None, + kind: ItemKind::Memory(ty), + }, + }); + } + // If data is defined inline insert an explicit `data` module + // field here instead, switching this to a `Normal` memory. + MemoryKind::Inline { is_32, ref data } => { + let len = data.iter().map(|l| l.len()).sum::<usize>() as u32; + let pages = (len + page_size() - 1) / page_size(); + let kind = MemoryKind::Normal(if is_32 { + MemoryType::B32 { + limits: Limits { + min: pages, + max: Some(pages), + }, + shared: false, + } + } else { + MemoryType::B64 { + limits: Limits64 { + min: u64::from(pages), + max: Some(u64::from(pages)), + }, + shared: false, + } + }); + let data = match mem::replace(&mut m.kind, kind) { + MemoryKind::Inline { data, .. } => data, + _ => unreachable!(), + }; + let id = gensym::fill(m.span, &mut m.id); + fields.push(ModuleField::Data(Data { + span: m.span, + id: None, + name: None, + kind: DataKind::Active { + memory: Index::Id(id), + offset: Expression { + instrs: Box::new([if is_32 { + Instruction::I32Const(0) + } else { + Instruction::I64Const(0) + }]), + }, + }, + data, + })); + } + + MemoryKind::Normal(_) => {} + } + } + + ModuleField::Table(t) => { + for name in t.exports.names.drain(..) { + fields.push(export(t.span, name, ExportKind::Table, &mut t.id)); + } + match &mut t.kind { + TableKind::Import { import, ty } => { + item = ModuleField::Import(Import { + span: t.span, + module: import.module, + field: import.field, + item: ItemSig { + span: t.span, + id: t.id, + name: None, + kind: ItemKind::Table(*ty), + }, + }); + } + // If data is defined inline insert an explicit `data` module + // field here instead, switching this to a `Normal` memory. + TableKind::Inline { payload, elem } => { + let len = match payload { + ElemPayload::Indices(v) => v.len(), + ElemPayload::Exprs { exprs, .. } => exprs.len(), + }; + let kind = TableKind::Normal { + ty: TableType { + limits: Limits { + min: len as u32, + max: Some(len as u32), + }, + elem: *elem, + }, + init_expr: None, + }; + let payload = match mem::replace(&mut t.kind, kind) { + TableKind::Inline { payload, .. } => payload, + _ => unreachable!(), + }; + let id = gensym::fill(t.span, &mut t.id); + fields.push(ModuleField::Elem(Elem { + span: t.span, + id: None, + name: None, + kind: ElemKind::Active { + table: Index::Id(id), + offset: Expression { + instrs: Box::new([Instruction::I32Const(0)]), + }, + }, + payload, + })); + } + + TableKind::Normal { .. } => {} + } + } + + ModuleField::Global(g) => { + for name in g.exports.names.drain(..) { + fields.push(export(g.span, name, ExportKind::Global, &mut g.id)); + } + match g.kind { + GlobalKind::Import(import) => { + item = ModuleField::Import(Import { + span: g.span, + module: import.module, + field: import.field, + item: ItemSig { + span: g.span, + id: g.id, + name: None, + kind: ItemKind::Global(g.ty), + }, + }); + } + GlobalKind::Inline { .. } => {} + } + } + + ModuleField::Tag(e) => { + for name in e.exports.names.drain(..) { + fields.push(export(e.span, name, ExportKind::Tag, &mut e.id)); + } + match e.kind { + TagKind::Import(import) => { + item = ModuleField::Import(Import { + span: e.span, + module: import.module, + field: import.field, + item: ItemSig { + span: e.span, + id: e.id, + name: None, + kind: ItemKind::Tag(e.ty.clone()), + }, + }); + } + TagKind::Inline { .. } => {} + } + } + + ModuleField::Import(_) + | ModuleField::Type(_) + | ModuleField::Rec(_) + | ModuleField::Export(_) + | ModuleField::Start(_) + | ModuleField::Elem(_) + | ModuleField::Data(_) + | ModuleField::Custom(_) => {} + } + + fields.push(item); + } + + fn page_size() -> u32 { + 1 << 16 + } +} + +fn export<'a>( + span: Span, + name: &'a str, + kind: ExportKind, + id: &mut Option<Id<'a>>, +) -> ModuleField<'a> { + let id = gensym::fill(span, id); + ModuleField::Export(Export { + span, + name, + kind, + item: Index::Id(id), + }) +} diff --git a/third_party/rust/wast/src/core/resolve/mod.rs b/third_party/rust/wast/src/core/resolve/mod.rs new file mode 100644 index 0000000000..7b3ba2b1e3 --- /dev/null +++ b/third_party/rust/wast/src/core/resolve/mod.rs @@ -0,0 +1,109 @@ +use crate::core::*; +use crate::token::Index; +use crate::{gensym, Error}; + +mod deinline_import_export; +mod names; +pub(crate) mod types; + +#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)] +pub enum Ns { + Func, + Table, + Global, + Memory, + Tag, + Type, +} + +pub fn resolve<'a>(fields: &mut Vec<ModuleField<'a>>) -> Result<Names<'a>, Error> { + // Ensure that each resolution of a module is deterministic in the names + // that it generates by resetting our thread-local symbol generator. + gensym::reset(); + + // First up, de-inline import/export annotations. + // + // This ensures we only have to deal with inline definitions and to + // calculate exports we only have to look for a particular kind of module + // field. + deinline_import_export::run(fields); + + // With a canonical form of imports make sure that imports are all listed + // first. + let mut last = None; + for field in fields.iter() { + match field { + ModuleField::Import(i) => { + if let Some(name) = last { + return Err(Error::new(i.span, format!("import after {}", name))); + } + } + ModuleField::Memory(_) => last = Some("memory"), + ModuleField::Func(_) => last = Some("function"), + ModuleField::Table(_) => last = Some("table"), + ModuleField::Global(_) => last = Some("global"), + _ => continue, + } + } + + // Expand all `TypeUse` annotations so all necessary `type` nodes are + // present in the AST. + types::expand(fields); + + // Perform name resolution over all `Index` items to resolve them all to + // indices instead of symbolic names. + let resolver = names::resolve(fields)?; + Ok(Names { resolver }) +} + +/// Representation of the results of name resolution for a module. +/// +/// This structure is returned from the +/// [`Module::resolve`](crate::core::Module::resolve) function and can be used +/// to resolve your own name arguments if you have any. +#[derive(Default)] +pub struct Names<'a> { + resolver: names::Resolver<'a>, +} + +impl<'a> Names<'a> { + /// Resolves `idx` within the function namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the function namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_func(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Func)?; + Ok(()) + } + + /// Resolves `idx` within the memory namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the memory namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_memory(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Memory)?; + Ok(()) + } + + /// Resolves `idx` within the table namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the table namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_table(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Table)?; + Ok(()) + } + + /// Resolves `idx` within the global namespace. + /// + /// If `idx` is a `Num`, it is ignored, but if it's an `Id` then it will be + /// looked up in the global namespace and converted to a `Num`. If the + /// `Id` is not defined then an error will be returned. + pub fn resolve_global(&self, idx: &mut Index<'a>) -> Result<(), Error> { + self.resolver.resolve(idx, Ns::Global)?; + Ok(()) + } +} diff --git a/third_party/rust/wast/src/core/resolve/names.rs b/third_party/rust/wast/src/core/resolve/names.rs new file mode 100644 index 0000000000..5ff2e8042a --- /dev/null +++ b/third_party/rust/wast/src/core/resolve/names.rs @@ -0,0 +1,737 @@ +use crate::core::resolve::Ns; +use crate::core::*; +use crate::names::{resolve_error, Namespace}; +use crate::token::{Id, Index}; +use crate::Error; + +pub fn resolve<'a>(fields: &mut Vec<ModuleField<'a>>) -> Result<Resolver<'a>, Error> { + let mut resolver = Resolver::default(); + resolver.process(fields)?; + Ok(resolver) +} + +/// Context structure used to perform name resolution. +#[derive(Default)] +pub struct Resolver<'a> { + // Namespaces within each module. Note that each namespace carries with it + // information about the signature of the item in that namespace. The + // signature is later used to synthesize the type of a module and inject + // type annotations if necessary. + funcs: Namespace<'a>, + globals: Namespace<'a>, + tables: Namespace<'a>, + memories: Namespace<'a>, + types: Namespace<'a>, + tags: Namespace<'a>, + datas: Namespace<'a>, + elems: Namespace<'a>, + fields: Namespace<'a>, + type_info: Vec<TypeInfo<'a>>, +} + +impl<'a> Resolver<'a> { + fn process(&mut self, fields: &mut Vec<ModuleField<'a>>) -> Result<(), Error> { + // Number everything in the module, recording what names correspond to + // what indices. + for field in fields.iter_mut() { + self.register(field)?; + } + + // Then we can replace all our `Index::Id` instances with `Index::Num` + // in the AST. Note that this also recurses into nested modules. + for field in fields.iter_mut() { + self.resolve_field(field)?; + } + Ok(()) + } + + fn register_type(&mut self, ty: &Type<'a>) -> Result<(), Error> { + match &ty.def { + // For GC structure types we need to be sure to populate the + // field namespace here as well. + // + // The field namespace is global, but the resolved indices + // are relative to the struct they are defined in + TypeDef::Struct(r#struct) => { + for (i, field) in r#struct.fields.iter().enumerate() { + if let Some(id) = field.id { + self.fields.register_specific(id, i as u32, "field")?; + } + } + } + + TypeDef::Array(_) | TypeDef::Func(_) => {} + } + + // Record function signatures as we see them to so we can + // generate errors for mismatches in references such as + // `call_indirect`. + match &ty.def { + TypeDef::Func(f) => { + let params = f.params.iter().map(|p| p.2).collect(); + let results = f.results.clone(); + self.type_info.push(TypeInfo::Func { params, results }); + } + _ => self.type_info.push(TypeInfo::Other), + } + + self.types.register(ty.id, "type")?; + Ok(()) + } + + fn register(&mut self, item: &ModuleField<'a>) -> Result<(), Error> { + match item { + ModuleField::Import(i) => match &i.item.kind { + ItemKind::Func(_) => self.funcs.register(i.item.id, "func")?, + ItemKind::Memory(_) => self.memories.register(i.item.id, "memory")?, + ItemKind::Table(_) => self.tables.register(i.item.id, "table")?, + ItemKind::Global(_) => self.globals.register(i.item.id, "global")?, + ItemKind::Tag(_) => self.tags.register(i.item.id, "tag")?, + }, + ModuleField::Global(i) => self.globals.register(i.id, "global")?, + ModuleField::Memory(i) => self.memories.register(i.id, "memory")?, + ModuleField::Func(i) => self.funcs.register(i.id, "func")?, + ModuleField::Table(i) => self.tables.register(i.id, "table")?, + + ModuleField::Type(i) => { + return self.register_type(i); + } + ModuleField::Rec(i) => { + for ty in &i.types { + self.register_type(ty)?; + } + return Ok(()); + } + ModuleField::Elem(e) => self.elems.register(e.id, "elem")?, + ModuleField::Data(d) => self.datas.register(d.id, "data")?, + ModuleField::Tag(t) => self.tags.register(t.id, "tag")?, + + // These fields don't define any items in any index space. + ModuleField::Export(_) | ModuleField::Start(_) | ModuleField::Custom(_) => { + return Ok(()) + } + }; + + Ok(()) + } + + fn resolve_type(&self, ty: &mut Type<'a>) -> Result<(), Error> { + match &mut ty.def { + TypeDef::Func(func) => func.resolve(self)?, + TypeDef::Struct(struct_) => { + for field in &mut struct_.fields { + self.resolve_storagetype(&mut field.ty)?; + } + } + TypeDef::Array(array) => self.resolve_storagetype(&mut array.ty)?, + } + if let Some(parent) = &mut ty.parent { + self.resolve(parent, Ns::Type)?; + } + Ok(()) + } + + fn resolve_field(&self, field: &mut ModuleField<'a>) -> Result<(), Error> { + match field { + ModuleField::Import(i) => { + self.resolve_item_sig(&mut i.item)?; + Ok(()) + } + + ModuleField::Type(ty) => self.resolve_type(ty), + ModuleField::Rec(rec) => { + for ty in &mut rec.types { + self.resolve_type(ty)?; + } + Ok(()) + } + + ModuleField::Func(f) => { + let (idx, inline) = self.resolve_type_use(&mut f.ty)?; + let n = match idx { + Index::Num(n, _) => *n, + Index::Id(_) => panic!("expected `Num`"), + }; + if let FuncKind::Inline { locals, expression } = &mut f.kind { + // Resolve (ref T) in locals + for local in locals.iter_mut() { + self.resolve_valtype(&mut local.ty)?; + } + + // Build a scope with a local namespace for the function + // body + let mut scope = Namespace::default(); + + // Parameters come first in the scope... + if let Some(inline) = &inline { + for (id, _, _) in inline.params.iter() { + scope.register(*id, "local")?; + } + } else if let Some(TypeInfo::Func { params, .. }) = + self.type_info.get(n as usize) + { + for _ in 0..params.len() { + scope.register(None, "local")?; + } + } + + // .. followed by locals themselves + for local in locals { + scope.register(local.id, "local")?; + } + + // Initialize the expression resolver with this scope + let mut resolver = ExprResolver::new(self, scope); + + // and then we can resolve the expression! + resolver.resolve(expression)?; + + // specifically save the original `sig`, if it was present, + // because that's what we're using for local names. + f.ty.inline = inline; + } + Ok(()) + } + + ModuleField::Elem(e) => { + match &mut e.kind { + ElemKind::Active { table, offset } => { + self.resolve(table, Ns::Table)?; + self.resolve_expr(offset)?; + } + ElemKind::Passive { .. } | ElemKind::Declared { .. } => {} + } + match &mut e.payload { + ElemPayload::Indices(elems) => { + for idx in elems { + self.resolve(idx, Ns::Func)?; + } + } + ElemPayload::Exprs { exprs, ty } => { + for expr in exprs { + self.resolve_expr(expr)?; + } + self.resolve_heaptype(&mut ty.heap)?; + } + } + Ok(()) + } + + ModuleField::Data(d) => { + if let DataKind::Active { memory, offset } = &mut d.kind { + self.resolve(memory, Ns::Memory)?; + self.resolve_expr(offset)?; + } + Ok(()) + } + + ModuleField::Start(i) => { + self.resolve(i, Ns::Func)?; + Ok(()) + } + + ModuleField::Export(e) => { + self.resolve( + &mut e.item, + match e.kind { + ExportKind::Func => Ns::Func, + ExportKind::Table => Ns::Table, + ExportKind::Memory => Ns::Memory, + ExportKind::Global => Ns::Global, + ExportKind::Tag => Ns::Tag, + }, + )?; + Ok(()) + } + + ModuleField::Global(g) => { + self.resolve_valtype(&mut g.ty.ty)?; + if let GlobalKind::Inline(expr) = &mut g.kind { + self.resolve_expr(expr)?; + } + Ok(()) + } + + ModuleField::Tag(t) => { + match &mut t.ty { + TagType::Exception(ty) => { + self.resolve_type_use(ty)?; + } + } + Ok(()) + } + + ModuleField::Table(t) => { + if let TableKind::Normal { ty, init_expr } = &mut t.kind { + self.resolve_heaptype(&mut ty.elem.heap)?; + if let Some(init_expr) = init_expr { + self.resolve_expr(init_expr)?; + } + } + Ok(()) + } + + ModuleField::Memory(_) | ModuleField::Custom(_) => Ok(()), + } + } + + fn resolve_valtype(&self, ty: &mut ValType<'a>) -> Result<(), Error> { + match ty { + ValType::Ref(ty) => self.resolve_heaptype(&mut ty.heap)?, + _ => {} + } + Ok(()) + } + + fn resolve_reftype(&self, ty: &mut RefType<'a>) -> Result<(), Error> { + self.resolve_heaptype(&mut ty.heap) + } + + fn resolve_heaptype(&self, ty: &mut HeapType<'a>) -> Result<(), Error> { + match ty { + HeapType::Index(i) => { + self.resolve(i, Ns::Type)?; + } + _ => {} + } + Ok(()) + } + + fn resolve_storagetype(&self, ty: &mut StorageType<'a>) -> Result<(), Error> { + match ty { + StorageType::Val(ty) => self.resolve_valtype(ty)?, + _ => {} + } + Ok(()) + } + + fn resolve_item_sig(&self, item: &mut ItemSig<'a>) -> Result<(), Error> { + match &mut item.kind { + ItemKind::Func(t) | ItemKind::Tag(TagType::Exception(t)) => { + self.resolve_type_use(t)?; + } + ItemKind::Global(t) => self.resolve_valtype(&mut t.ty)?, + ItemKind::Table(t) => { + self.resolve_heaptype(&mut t.elem.heap)?; + } + ItemKind::Memory(_) => {} + } + Ok(()) + } + + fn resolve_type_use<'b, T>( + &self, + ty: &'b mut TypeUse<'a, T>, + ) -> Result<(&'b Index<'a>, Option<T>), Error> + where + T: TypeReference<'a>, + { + let idx = ty.index.as_mut().unwrap(); + self.resolve(idx, Ns::Type)?; + + // If the type was listed inline *and* it was specified via a type index + // we need to assert they're the same. + // + // Note that we resolve the type first to transform all names to + // indices to ensure that all the indices line up. + if let Some(inline) = &mut ty.inline { + inline.resolve(self)?; + inline.check_matches(idx, self)?; + } + + Ok((idx, ty.inline.take())) + } + + fn resolve_expr(&self, expr: &mut Expression<'a>) -> Result<(), Error> { + ExprResolver::new(self, Namespace::default()).resolve(expr) + } + + pub fn resolve(&self, idx: &mut Index<'a>, ns: Ns) -> Result<u32, Error> { + match ns { + Ns::Func => self.funcs.resolve(idx, "func"), + Ns::Table => self.tables.resolve(idx, "table"), + Ns::Global => self.globals.resolve(idx, "global"), + Ns::Memory => self.memories.resolve(idx, "memory"), + Ns::Tag => self.tags.resolve(idx, "tag"), + Ns::Type => self.types.resolve(idx, "type"), + } + } +} + +#[derive(Debug, Clone)] +struct ExprBlock<'a> { + // The label of the block + label: Option<Id<'a>>, + // Whether this block pushed a new scope for resolving locals + pushed_scope: bool, +} + +struct ExprResolver<'a, 'b> { + resolver: &'b Resolver<'a>, + // Scopes tracks the local namespace and dynamically grows as we enter/exit + // `let` blocks + scopes: Vec<Namespace<'a>>, + blocks: Vec<ExprBlock<'a>>, +} + +impl<'a, 'b> ExprResolver<'a, 'b> { + fn new(resolver: &'b Resolver<'a>, initial_scope: Namespace<'a>) -> ExprResolver<'a, 'b> { + ExprResolver { + resolver, + scopes: vec![initial_scope], + blocks: Vec::new(), + } + } + + fn resolve(&mut self, expr: &mut Expression<'a>) -> Result<(), Error> { + for instr in expr.instrs.iter_mut() { + self.resolve_instr(instr)?; + } + Ok(()) + } + + fn resolve_block_type(&mut self, bt: &mut BlockType<'a>) -> Result<(), Error> { + // If the index is specified on this block type then that's the source + // of resolution and the resolver step here will verify the inline type + // matches. Note that indexes may come from the source text itself but + // may also come from being injected as part of the type expansion phase + // of resolution. + // + // If no type is present then that means that the inline type is not + // present or has 0-1 results. In that case the nested value types are + // resolved, if they're there, to get encoded later on. + if bt.ty.index.is_some() { + self.resolver.resolve_type_use(&mut bt.ty)?; + } else if let Some(inline) = &mut bt.ty.inline { + inline.resolve(self.resolver)?; + } + + Ok(()) + } + + fn resolve_instr(&mut self, instr: &mut Instruction<'a>) -> Result<(), Error> { + use Instruction::*; + + if let Some(m) = instr.memarg_mut() { + self.resolver.resolve(&mut m.memory, Ns::Memory)?; + } + + match instr { + MemorySize(i) | MemoryGrow(i) | MemoryFill(i) | MemoryDiscard(i) => { + self.resolver.resolve(&mut i.mem, Ns::Memory)?; + } + MemoryInit(i) => { + self.resolver.datas.resolve(&mut i.data, "data")?; + self.resolver.resolve(&mut i.mem, Ns::Memory)?; + } + MemoryCopy(i) => { + self.resolver.resolve(&mut i.src, Ns::Memory)?; + self.resolver.resolve(&mut i.dst, Ns::Memory)?; + } + DataDrop(i) => { + self.resolver.datas.resolve(i, "data")?; + } + + TableInit(i) => { + self.resolver.elems.resolve(&mut i.elem, "elem")?; + self.resolver.resolve(&mut i.table, Ns::Table)?; + } + ElemDrop(i) => { + self.resolver.elems.resolve(i, "elem")?; + } + + TableCopy(i) => { + self.resolver.resolve(&mut i.dst, Ns::Table)?; + self.resolver.resolve(&mut i.src, Ns::Table)?; + } + + TableFill(i) | TableSet(i) | TableGet(i) | TableSize(i) | TableGrow(i) => { + self.resolver.resolve(&mut i.dst, Ns::Table)?; + } + + GlobalSet(i) | GlobalGet(i) => { + self.resolver.resolve(i, Ns::Global)?; + } + + LocalSet(i) | LocalGet(i) | LocalTee(i) => { + assert!(self.scopes.len() > 0); + // Resolve a local by iterating over scopes from most recent + // to less recent. This allows locals added by `let` blocks to + // shadow less recent locals. + for (depth, scope) in self.scopes.iter().enumerate().rev() { + if let Err(e) = scope.resolve(i, "local") { + if depth == 0 { + // There are no more scopes left, report this as + // the result + return Err(e); + } + } else { + break; + } + } + // We must have taken the `break` and resolved the local + assert!(i.is_resolved()); + } + + Call(i) | RefFunc(i) | ReturnCall(i) => { + self.resolver.resolve(i, Ns::Func)?; + } + + CallIndirect(c) | ReturnCallIndirect(c) => { + self.resolver.resolve(&mut c.table, Ns::Table)?; + self.resolver.resolve_type_use(&mut c.ty)?; + } + + FuncBind(b) => { + self.resolver.resolve_type_use(&mut b.ty)?; + } + + Let(t) => { + // Resolve (ref T) in locals + for local in &mut t.locals { + self.resolver.resolve_valtype(&mut local.ty)?; + } + + // Register all locals defined in this let + let mut scope = Namespace::default(); + for local in &t.locals { + scope.register(local.id, "local")?; + } + self.scopes.push(scope); + self.blocks.push(ExprBlock { + label: t.block.label, + pushed_scope: true, + }); + + self.resolve_block_type(&mut t.block)?; + } + + Block(bt) | If(bt) | Loop(bt) | Try(bt) => { + self.blocks.push(ExprBlock { + label: bt.label, + pushed_scope: false, + }); + self.resolve_block_type(bt)?; + } + + // On `End` instructions we pop a label from the stack, and for both + // `End` and `Else` instructions if they have labels listed we + // verify that they match the label at the beginning of the block. + Else(_) | End(_) => { + let (matching_block, label) = match &instr { + Else(label) => (self.blocks.last().cloned(), label), + End(label) => (self.blocks.pop(), label), + _ => unreachable!(), + }; + let matching_block = match matching_block { + Some(l) => l, + None => return Ok(()), + }; + + // Reset the local scopes to before this block was entered + if matching_block.pushed_scope { + if let End(_) = instr { + self.scopes.pop(); + } + } + + let label = match label { + Some(l) => l, + None => return Ok(()), + }; + if Some(*label) == matching_block.label { + return Ok(()); + } + return Err(Error::new( + label.span(), + "mismatching labels between end and block".to_string(), + )); + } + + Br(i) | BrIf(i) | BrOnNull(i) | BrOnNonNull(i) => { + self.resolve_label(i)?; + } + + BrTable(i) => { + for label in i.labels.iter_mut() { + self.resolve_label(label)?; + } + self.resolve_label(&mut i.default)?; + } + + Throw(i) => { + self.resolver.resolve(i, Ns::Tag)?; + } + Rethrow(i) => { + self.resolve_label(i)?; + } + Catch(i) => { + self.resolver.resolve(i, Ns::Tag)?; + } + Delegate(i) => { + // Since a delegate starts counting one layer out from the + // current try-delegate block, we pop before we resolve labels. + self.blocks.pop(); + self.resolve_label(i)?; + } + + Select(s) => { + if let Some(list) = &mut s.tys { + for ty in list { + self.resolver.resolve_valtype(ty)?; + } + } + } + + RefTest(i) => { + self.resolver.resolve_reftype(&mut i.r#type)?; + } + RefCast(i) => { + self.resolver.resolve_reftype(&mut i.r#type)?; + } + BrOnCast(i) => { + self.resolve_label(&mut i.label)?; + self.resolver.resolve_reftype(&mut i.to_type)?; + self.resolver.resolve_reftype(&mut i.from_type)?; + } + BrOnCastFail(i) => { + self.resolve_label(&mut i.label)?; + self.resolver.resolve_reftype(&mut i.to_type)?; + self.resolver.resolve_reftype(&mut i.from_type)?; + } + + StructNew(i) | StructNewDefault(i) | ArrayNew(i) + | ArrayNewDefault(i) | ArrayGet(i) | ArrayGetS(i) | ArrayGetU(i) | ArraySet(i) => { + self.resolver.resolve(i, Ns::Type)?; + } + + StructSet(s) | StructGet(s) | StructGetS(s) | StructGetU(s) => { + self.resolver.resolve(&mut s.r#struct, Ns::Type)?; + self.resolver.fields.resolve(&mut s.field, "field")?; + } + + ArrayNewFixed(a) => { + self.resolver.resolve(&mut a.array, Ns::Type)?; + } + ArrayNewData(a) => { + self.resolver.resolve(&mut a.array, Ns::Type)?; + self.resolver.datas.resolve(&mut a.data_idx, "data")?; + } + ArrayNewElem(a) => { + self.resolver.resolve(&mut a.array, Ns::Type)?; + self.resolver.elems.resolve(&mut a.elem_idx, "elem")?; + } + ArrayFill(a) => { + self.resolver.resolve(&mut a.array, Ns::Type)?; + } + ArrayCopy(a) => { + self.resolver.resolve(&mut a.dest_array, Ns::Type)?; + self.resolver.resolve(&mut a.src_array, Ns::Type)?; + } + ArrayInitData(a) => { + self.resolver.resolve(&mut a.array, Ns::Type)?; + self.resolver.datas.resolve(&mut a.segment, "data")?; + } + ArrayInitElem(a) => { + self.resolver.resolve(&mut a.array, Ns::Type)?; + self.resolver.elems.resolve(&mut a.segment, "elem")?; + } + + RefNull(ty) | CallRef(ty) | ReturnCallRef(ty) => self.resolver.resolve_heaptype(ty)?, + + _ => {} + } + Ok(()) + } + + fn resolve_label(&self, label: &mut Index<'a>) -> Result<(), Error> { + let id = match label { + Index::Num(..) => return Ok(()), + Index::Id(id) => *id, + }; + let idx = self + .blocks + .iter() + .rev() + .enumerate() + .filter_map(|(i, b)| b.label.map(|l| (i, l))) + .find(|(_, l)| *l == id); + match idx { + Some((idx, _)) => { + *label = Index::Num(idx as u32, id.span()); + Ok(()) + } + None => Err(resolve_error(id, "label")), + } + } +} + +enum TypeInfo<'a> { + Func { + params: Box<[ValType<'a>]>, + results: Box<[ValType<'a>]>, + }, + Other, +} + +trait TypeReference<'a> { + fn check_matches(&mut self, idx: &Index<'a>, cx: &Resolver<'a>) -> Result<(), Error>; + fn resolve(&mut self, cx: &Resolver<'a>) -> Result<(), Error>; +} + +impl<'a> TypeReference<'a> for FunctionType<'a> { + fn check_matches(&mut self, idx: &Index<'a>, cx: &Resolver<'a>) -> Result<(), Error> { + let n = match idx { + Index::Num(n, _) => *n, + Index::Id(_) => panic!("expected `Num`"), + }; + let (params, results) = match cx.type_info.get(n as usize) { + Some(TypeInfo::Func { params, results }) => (params, results), + _ => return Ok(()), + }; + + // Here we need to check that the inline type listed (ourselves) matches + // what was listed in the module itself (the `params` and `results` + // above). The listed values in `types` are not resolved yet, although + // we should be resolved. In any case we do name resolution + // opportunistically here to see if the values are equal. + + let types_not_equal = |a: &ValType, b: &ValType| { + let mut a = a.clone(); + let mut b = b.clone(); + drop(cx.resolve_valtype(&mut a)); + drop(cx.resolve_valtype(&mut b)); + a != b + }; + + let not_equal = params.len() != self.params.len() + || results.len() != self.results.len() + || params + .iter() + .zip(self.params.iter()) + .any(|(a, (_, _, b))| types_not_equal(a, b)) + || results + .iter() + .zip(self.results.iter()) + .any(|(a, b)| types_not_equal(a, b)); + if not_equal { + return Err(Error::new( + idx.span(), + format!("inline function type doesn't match type reference"), + )); + } + + Ok(()) + } + + fn resolve(&mut self, cx: &Resolver<'a>) -> Result<(), Error> { + // Resolve the (ref T) value types in the final function type + for param in self.params.iter_mut() { + cx.resolve_valtype(&mut param.2)?; + } + for result in self.results.iter_mut() { + cx.resolve_valtype(result)?; + } + Ok(()) + } +} diff --git a/third_party/rust/wast/src/core/resolve/types.rs b/third_party/rust/wast/src/core/resolve/types.rs new file mode 100644 index 0000000000..b3cc76d1f9 --- /dev/null +++ b/third_party/rust/wast/src/core/resolve/types.rs @@ -0,0 +1,269 @@ +use crate::core::*; +use crate::gensym; +use crate::token::{Index, Span}; +use std::collections::HashMap; + +pub fn expand<'a>(fields: &mut Vec<ModuleField<'a>>) { + let mut expander = Expander::default(); + expander.process(fields); +} + +#[derive(Default)] +pub(crate) struct Expander<'a> { + // Maps used to "intern" types. These maps are populated as type annotations + // are seen and inline type annotations use previously defined ones if + // there's a match. + func_type_to_idx: HashMap<FuncKey<'a>, Index<'a>>, + + /// Fields, during processing, which should be prepended to the + /// currently-being-processed field. This should always be empty after + /// processing is complete. + to_prepend: Vec<ModuleField<'a>>, +} + +impl<'a> Expander<'a> { + fn process(&mut self, fields: &mut Vec<ModuleField<'a>>) { + // Next we expand "header" fields which are those like types and + // imports. In this context "header" is defined by the previous + // `process_imports_early` annotation. + let mut cur = 0; + while cur < fields.len() { + self.expand_header(&mut fields[cur]); + for item in self.to_prepend.drain(..) { + fields.insert(cur, item); + cur += 1; + } + cur += 1; + } + + // Next after we've done that we expand remaining fields. Note that + // after this we actually append instead of prepend. This is because + // injected types are intended to come at the end of the type section + // and types will be sorted before all other items processed here in the + // final module anyway. + for field in fields.iter_mut() { + self.expand(field); + } + fields.append(&mut self.to_prepend); + } + + fn expand_header(&mut self, item: &mut ModuleField<'a>) { + match item { + ModuleField::Type(ty) => { + let id = gensym::fill(ty.span, &mut ty.id); + match &mut ty.def { + TypeDef::Func(f) => { + f.key().insert(self, Index::Id(id)); + } + TypeDef::Array(_) | TypeDef::Struct(_) => {} + } + } + _ => {} + } + } + + fn expand(&mut self, item: &mut ModuleField<'a>) { + match item { + // This is pre-expanded above + ModuleField::Type(_) => {} + ModuleField::Rec(_) => {} + + ModuleField::Import(i) => { + self.expand_item_sig(&mut i.item); + } + ModuleField::Func(f) => { + self.expand_type_use(&mut f.ty); + if let FuncKind::Inline { expression, .. } = &mut f.kind { + self.expand_expression(expression); + } + } + ModuleField::Global(g) => { + if let GlobalKind::Inline(expr) = &mut g.kind { + self.expand_expression(expr); + } + } + ModuleField::Data(d) => { + if let DataKind::Active { offset, .. } = &mut d.kind { + self.expand_expression(offset); + } + } + ModuleField::Elem(e) => { + if let ElemKind::Active { offset, .. } = &mut e.kind { + self.expand_expression(offset); + } + if let ElemPayload::Exprs { exprs, .. } = &mut e.payload { + for expr in exprs { + self.expand_expression(expr); + } + } + } + ModuleField::Tag(t) => match &mut t.ty { + TagType::Exception(ty) => { + self.expand_type_use(ty); + } + }, + + ModuleField::Table(t) => match &mut t.kind { + TableKind::Normal { init_expr, .. } => { + if let Some(expr) = init_expr { + self.expand_expression(expr); + } + } + TableKind::Import { .. } | TableKind::Inline { .. } => {} + }, + + ModuleField::Memory(_) + | ModuleField::Start(_) + | ModuleField::Export(_) + | ModuleField::Custom(_) => {} + } + } + + fn expand_item_sig(&mut self, item: &mut ItemSig<'a>) { + match &mut item.kind { + ItemKind::Func(t) | ItemKind::Tag(TagType::Exception(t)) => { + self.expand_type_use(t); + } + ItemKind::Global(_) | ItemKind::Table(_) | ItemKind::Memory(_) => {} + } + } + + fn expand_expression(&mut self, expr: &mut Expression<'a>) { + for instr in expr.instrs.iter_mut() { + self.expand_instr(instr); + } + } + + fn expand_instr(&mut self, instr: &mut Instruction<'a>) { + match instr { + Instruction::Block(bt) + | Instruction::If(bt) + | Instruction::Loop(bt) + | Instruction::Let(LetType { block: bt, .. }) + | Instruction::Try(bt) => { + // No expansion necessary, a type reference is already here. + // We'll verify that it's the same as the inline type, if any, + // later. + if bt.ty.index.is_some() { + return; + } + + match &bt.ty.inline { + // Only actually expand `TypeUse` with an index which appends a + // type if it looks like we need one. This way if the + // multi-value proposal isn't enabled and/or used we won't + // encode it. + Some(inline) => { + if inline.params.len() == 0 && inline.results.len() <= 1 { + return; + } + } + + // If we didn't have either an index or an inline type + // listed then assume our block has no inputs/outputs, so + // fill in the inline type here. + // + // Do not fall through to expanding the `TypeUse` because + // this doesn't force an empty function type to go into the + // type section. + None => { + bt.ty.inline = Some(FunctionType::default()); + return; + } + } + self.expand_type_use(&mut bt.ty); + } + Instruction::FuncBind(b) => { + self.expand_type_use(&mut b.ty); + } + Instruction::CallIndirect(c) | Instruction::ReturnCallIndirect(c) => { + self.expand_type_use(&mut c.ty); + } + _ => {} + } + } + + fn expand_type_use<T>(&mut self, item: &mut TypeUse<'a, T>) -> Index<'a> + where + T: TypeReference<'a>, + { + if let Some(idx) = &item.index { + return *idx; + } + let key = match item.inline.as_mut() { + Some(ty) => { + ty.expand(self); + ty.key() + } + None => T::default().key(), + }; + let span = Span::from_offset(0); // FIXME(#613): don't manufacture + let idx = self.key_to_idx(span, key); + item.index = Some(idx); + idx + } + + fn key_to_idx(&mut self, span: Span, key: impl TypeKey<'a>) -> Index<'a> { + // First see if this `key` already exists in the type definitions we've + // seen so far... + if let Some(idx) = key.lookup(self) { + return idx; + } + + // ... and failing that we insert a new type definition. + let id = gensym::gen(span); + self.to_prepend.push(ModuleField::Type(Type { + span, + id: Some(id), + name: None, + def: key.to_def(span), + parent: None, + })); + let idx = Index::Id(id); + key.insert(self, idx); + idx + } +} + +pub(crate) trait TypeReference<'a>: Default { + type Key: TypeKey<'a>; + fn key(&self) -> Self::Key; + fn expand(&mut self, cx: &mut Expander<'a>); +} + +pub(crate) trait TypeKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>>; + fn to_def(&self, span: Span) -> TypeDef<'a>; + fn insert(&self, cx: &mut Expander<'a>, id: Index<'a>); +} + +pub(crate) type FuncKey<'a> = (Box<[ValType<'a>]>, Box<[ValType<'a>]>); + +impl<'a> TypeReference<'a> for FunctionType<'a> { + type Key = FuncKey<'a>; + + fn key(&self) -> Self::Key { + let params = self.params.iter().map(|p| p.2).collect(); + let results = self.results.clone(); + (params, results) + } + + fn expand(&mut self, _cx: &mut Expander<'a>) {} +} + +impl<'a> TypeKey<'a> for FuncKey<'a> { + fn lookup(&self, cx: &Expander<'a>) -> Option<Index<'a>> { + cx.func_type_to_idx.get(self).cloned() + } + + fn to_def(&self, _span: Span) -> TypeDef<'a> { + TypeDef::Func(FunctionType { + params: self.0.iter().map(|t| (None, None, *t)).collect(), + results: self.1.clone(), + }) + } + + fn insert(&self, cx: &mut Expander<'a>, idx: Index<'a>) { + cx.func_type_to_idx.entry(self.clone()).or_insert(idx); + } +} diff --git a/third_party/rust/wast/src/core/table.rs b/third_party/rust/wast/src/core/table.rs new file mode 100644 index 0000000000..f279e174b9 --- /dev/null +++ b/third_party/rust/wast/src/core/table.rs @@ -0,0 +1,241 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, Index, LParen, NameAnnotation, Span}; + +/// A WebAssembly `table` directive in a module. +#[derive(Debug)] +pub struct Table<'a> { + /// Where this table was defined. + pub span: Span, + /// An optional name to refer to this table by. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// If present, inline export annotations which indicate names this + /// definition should be exported under. + pub exports: InlineExport<'a>, + /// How this table is textually defined in the module. + pub kind: TableKind<'a>, +} + +/// Different ways to textually define a table. +#[derive(Debug)] +pub enum TableKind<'a> { + /// This table is actually an inlined import definition. + #[allow(missing_docs)] + Import { + import: InlineImport<'a>, + ty: TableType<'a>, + }, + + /// A typical memory definition which simply says the limits of the table + Normal { + /// Table type. + ty: TableType<'a>, + /// Optional items initializer expression. + init_expr: Option<Expression<'a>>, + }, + + /// The elem segments of this table, starting from 0, explicitly listed + Inline { + /// The element type of this table. + elem: RefType<'a>, + /// The element table entries to have, and the length of this list is + /// the limits of the table as well. + payload: ElemPayload<'a>, + }, +} + +impl<'a> Parse<'a> for Table<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::table>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + + // Afterwards figure out which style this is, either: + // + // * `elemtype (elem ...)` + // * `(import "a" "b") limits` + // * `limits` + let mut l = parser.lookahead1(); + let kind = if l.peek::<RefType>() { + let elem = parser.parse()?; + let payload = parser.parens(|p| { + p.parse::<kw::elem>()?; + let ty = if parser.peek::<LParen>() { + Some(elem) + } else { + None + }; + ElemPayload::parse_tail(parser, ty) + })?; + TableKind::Inline { elem, payload } + } else if l.peek::<u32>() { + TableKind::Normal { + ty: parser.parse()?, + init_expr: if !parser.is_empty() { + Some(parser.parse::<Expression>()?) + } else { + None + }, + } + } else if let Some(import) = parser.parse()? { + TableKind::Import { + import, + ty: parser.parse()?, + } + } else { + return Err(l.error()); + }; + Ok(Table { + span, + id, + name, + exports, + kind, + }) + } +} + +/// An `elem` segment in a WebAssembly module. +#[derive(Debug)] +pub struct Elem<'a> { + /// Where this `elem` was defined. + pub span: Span, + /// An optional name by which to refer to this segment. + pub id: Option<Id<'a>>, + /// An optional name for this element stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// The way this segment was defined in the module. + pub kind: ElemKind<'a>, + /// The payload of this element segment, typically a list of functions. + pub payload: ElemPayload<'a>, +} + +/// Different ways to define an element segment in an mdoule. +#[derive(Debug)] +pub enum ElemKind<'a> { + /// A passive segment that isn't associated with a table and can be used in + /// various bulk-memory instructions. + Passive, + + /// A declared element segment that is purely used to declare function + /// references. + Declared, + + /// An active segment associated with a table. + Active { + /// The table this `elem` is initializing. + table: Index<'a>, + /// The offset within `table` that we'll initialize at. + offset: Expression<'a>, + }, +} + +/// Different ways to define the element segment payload in a module. +#[derive(Debug)] +pub enum ElemPayload<'a> { + /// This element segment has a contiguous list of function indices + Indices(Vec<Index<'a>>), + + /// This element segment has a list of optional function indices, + /// represented as expressions using `ref.func` and `ref.null`. + Exprs { + /// The desired type of each expression below. + ty: RefType<'a>, + /// The expressions in this segment. + exprs: Vec<Expression<'a>>, + }, +} + +impl<'a> Parse<'a> for Elem<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::elem>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + + let kind = if parser.peek::<kw::declare>() { + parser.parse::<kw::declare>()?; + ElemKind::Declared + } else if parser.peek::<u32>() || (parser.peek::<LParen>() && !parser.peek::<RefType>()) { + let table = if parser.peek::<u32>() { + // FIXME: this is only here to accomodate + // proposals/threads/imports.wast at this current moment in + // time, this probably should get removed when the threads + // proposal is rebased on the current spec. + Index::Num(parser.parse()?, span) + } else if parser.peek2::<kw::table>() { + parser.parens(|p| { + p.parse::<kw::table>()?; + p.parse() + })? + } else { + Index::Num(0, span) + }; + let offset = parser.parens(|parser| { + if parser.peek::<kw::offset>() { + parser.parse::<kw::offset>()?; + } + parser.parse() + })?; + ElemKind::Active { table, offset } + } else { + ElemKind::Passive + }; + let payload = parser.parse()?; + Ok(Elem { + span, + id, + name, + kind, + payload, + }) + } +} + +impl<'a> Parse<'a> for ElemPayload<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + ElemPayload::parse_tail(parser, parser.parse()?) + } +} + +impl<'a> ElemPayload<'a> { + fn parse_tail(parser: Parser<'a>, ty: Option<RefType<'a>>) -> Result<Self> { + let (must_use_indices, ty) = match ty { + None => { + parser.parse::<Option<kw::func>>()?; + (true, RefType::func()) + } + Some(ty) => (false, ty), + }; + if let HeapType::Func = ty.heap { + if must_use_indices || parser.peek::<Index<'_>>() { + let mut elems = Vec::new(); + while !parser.is_empty() { + elems.push(parser.parse()?); + } + return Ok(ElemPayload::Indices(elems)); + } + } + let mut exprs = Vec::new(); + while !parser.is_empty() { + let expr = parser.parens(|parser| { + if parser.peek::<kw::item>() { + parser.parse::<kw::item>()?; + parser.parse() + } else { + // Without `item` this is "sugar" for a single-instruction + // expression. + let insn = parser.parse()?; + Ok(Expression { + instrs: [insn].into(), + }) + } + })?; + exprs.push(expr); + } + Ok(ElemPayload::Exprs { exprs, ty }) + } +} diff --git a/third_party/rust/wast/src/core/tag.rs b/third_party/rust/wast/src/core/tag.rs new file mode 100644 index 0000000000..233b5e4cd0 --- /dev/null +++ b/third_party/rust/wast/src/core/tag.rs @@ -0,0 +1,71 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::{Id, NameAnnotation, Span}; + +/// A WebAssembly tag directive, part of the exception handling proposal. +#[derive(Debug)] +pub struct Tag<'a> { + /// Where this tag was defined + pub span: Span, + /// An optional name by which to refer to this tag in name resolution. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// Optional export directives for this tag. + pub exports: InlineExport<'a>, + /// The type of tag that is defined. + pub ty: TagType<'a>, + /// What kind of tag this is defined as. + pub kind: TagKind<'a>, +} + +/// Listing of various types of tags that can be defined in a wasm module. +#[derive(Clone, Debug)] +pub enum TagType<'a> { + /// An exception tag, where the payload is the type signature of the tag + /// (constructor parameters, etc). + Exception(TypeUse<'a, FunctionType<'a>>), +} + +/// Different kinds of tags that can be defined in a module. +#[derive(Debug)] +pub enum TagKind<'a> { + /// An tag which is actually defined as an import, such as: + /// + /// ```text + /// (tag (type 0) (import "foo" "bar")) + /// ``` + Import(InlineImport<'a>), + + /// A tag defined inline in the module itself + Inline(), +} + +impl<'a> Parse<'a> for Tag<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::tag>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + let exports = parser.parse()?; + let (ty, kind) = if let Some(import) = parser.parse()? { + (parser.parse()?, TagKind::Import(import)) + } else { + (parser.parse()?, TagKind::Inline()) + }; + Ok(Tag { + span, + id, + name, + exports, + ty, + kind, + }) + } +} + +impl<'a> Parse<'a> for TagType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(TagType::Exception(parser.parse()?)) + } +} diff --git a/third_party/rust/wast/src/core/types.rs b/third_party/rust/wast/src/core/types.rs new file mode 100644 index 0000000000..9abb906806 --- /dev/null +++ b/third_party/rust/wast/src/core/types.rs @@ -0,0 +1,832 @@ +use crate::core::*; +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::{Id, Index, LParen, NameAnnotation, Span}; +use std::mem; + +/// The value types for a wasm module. +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum ValType<'a> { + I32, + I64, + F32, + F64, + V128, + Ref(RefType<'a>), +} + +impl<'a> Parse<'a> for ValType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i32>() { + parser.parse::<kw::i32>()?; + Ok(ValType::I32) + } else if l.peek::<kw::i64>() { + parser.parse::<kw::i64>()?; + Ok(ValType::I64) + } else if l.peek::<kw::f32>() { + parser.parse::<kw::f32>()?; + Ok(ValType::F32) + } else if l.peek::<kw::f64>() { + parser.parse::<kw::f64>()?; + Ok(ValType::F64) + } else if l.peek::<kw::v128>() { + parser.parse::<kw::v128>()?; + Ok(ValType::V128) + } else if l.peek::<RefType>() { + Ok(ValType::Ref(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +impl<'a> Peek for ValType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::i32::peek(cursor) + || kw::i64::peek(cursor) + || kw::f32::peek(cursor) + || kw::f64::peek(cursor) + || kw::v128::peek(cursor) + || RefType::peek(cursor) + } + fn display() -> &'static str { + "valtype" + } +} + +/// A heap type for a reference type +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum HeapType<'a> { + /// An untyped function reference: funcref. This is part of the reference + /// types proposal. + Func, + /// A reference to any host value: externref. This is part of the reference + /// types proposal. + Extern, + /// A reference to any reference value: anyref. This is part of the GC + /// proposal. + Any, + /// A reference that has an identity that can be compared: eqref. This is + /// part of the GC proposal. + Eq, + /// A reference to a GC struct. This is part of the GC proposal. + Struct, + /// A reference to a GC array. This is part of the GC proposal. + Array, + /// An unboxed 31-bit integer: i31ref. Part of the GC proposal. + I31, + /// The bottom type of the funcref hierarchy. Part of the GC proposal. + NoFunc, + /// The bottom type of the externref hierarchy. Part of the GC proposal. + NoExtern, + /// The bottom type of the anyref hierarchy. Part of the GC proposal. + None, + /// A reference to a function, struct, or array: ref T. This is part of the + /// GC proposal. + Index(Index<'a>), +} + +impl<'a> Parse<'a> for HeapType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(HeapType::Func) + } else if l.peek::<kw::r#extern>() { + parser.parse::<kw::r#extern>()?; + Ok(HeapType::Extern) + } else if l.peek::<kw::r#any>() { + parser.parse::<kw::r#any>()?; + Ok(HeapType::Any) + } else if l.peek::<kw::eq>() { + parser.parse::<kw::eq>()?; + Ok(HeapType::Eq) + } else if l.peek::<kw::r#struct>() { + parser.parse::<kw::r#struct>()?; + Ok(HeapType::Struct) + } else if l.peek::<kw::array>() { + parser.parse::<kw::array>()?; + Ok(HeapType::Array) + } else if l.peek::<kw::i31>() { + parser.parse::<kw::i31>()?; + Ok(HeapType::I31) + } else if l.peek::<kw::nofunc>() { + parser.parse::<kw::nofunc>()?; + Ok(HeapType::NoFunc) + } else if l.peek::<kw::noextern>() { + parser.parse::<kw::noextern>()?; + Ok(HeapType::NoExtern) + } else if l.peek::<kw::none>() { + parser.parse::<kw::none>()?; + Ok(HeapType::None) + } else if l.peek::<Index>() { + Ok(HeapType::Index(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +impl<'a> Peek for HeapType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::func::peek(cursor) + || kw::r#extern::peek(cursor) + || kw::any::peek(cursor) + || kw::eq::peek(cursor) + || kw::r#struct::peek(cursor) + || kw::array::peek(cursor) + || kw::i31::peek(cursor) + || kw::nofunc::peek(cursor) + || kw::noextern::peek(cursor) + || kw::none::peek(cursor) + || (LParen::peek(cursor) && kw::r#type::peek2(cursor)) + } + fn display() -> &'static str { + "heaptype" + } +} + +/// A reference type in a wasm module. +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub struct RefType<'a> { + pub nullable: bool, + pub heap: HeapType<'a>, +} + +impl<'a> RefType<'a> { + /// A `funcref` as an abbreviation for `(ref null func)`. + pub fn func() -> Self { + RefType { + nullable: true, + heap: HeapType::Func, + } + } + + /// An `externref` as an abbreviation for `(ref null extern)`. + pub fn r#extern() -> Self { + RefType { + nullable: true, + heap: HeapType::Extern, + } + } + + /// An `anyref` as an abbreviation for `(ref null any)`. + pub fn any() -> Self { + RefType { + nullable: true, + heap: HeapType::Any, + } + } + + /// An `eqref` as an abbreviation for `(ref null eq)`. + pub fn eq() -> Self { + RefType { + nullable: true, + heap: HeapType::Eq, + } + } + + /// An `structref` as an abbreviation for `(ref null struct)`. + pub fn r#struct() -> Self { + RefType { + nullable: true, + heap: HeapType::Struct, + } + } + + /// An `arrayref` as an abbreviation for `(ref null array)`. + pub fn array() -> Self { + RefType { + nullable: true, + heap: HeapType::Array, + } + } + + /// An `i31ref` as an abbreviation for `(ref null i31)`. + pub fn i31() -> Self { + RefType { + nullable: true, + heap: HeapType::I31, + } + } + + /// A `nullfuncref` as an abbreviation for `(ref null nofunc)`. + pub fn nullfuncref() -> Self { + RefType { + nullable: true, + heap: HeapType::NoFunc, + } + } + + /// A `nullexternref` as an abbreviation for `(ref null noextern)`. + pub fn nullexternref() -> Self { + RefType { + nullable: true, + heap: HeapType::NoExtern, + } + } + + /// A `nullref` as an abbreviation for `(ref null none)`. + pub fn nullref() -> Self { + RefType { + nullable: true, + heap: HeapType::None, + } + } +} + +impl<'a> Parse<'a> for RefType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::funcref>() { + parser.parse::<kw::funcref>()?; + Ok(RefType::func()) + } else if l.peek::<kw::anyfunc>() { + parser.parse::<kw::anyfunc>()?; + Ok(RefType::func()) + } else if l.peek::<kw::externref>() { + parser.parse::<kw::externref>()?; + Ok(RefType::r#extern()) + } else if l.peek::<kw::anyref>() { + parser.parse::<kw::anyref>()?; + Ok(RefType::any()) + } else if l.peek::<kw::eqref>() { + parser.parse::<kw::eqref>()?; + Ok(RefType::eq()) + } else if l.peek::<kw::structref>() { + parser.parse::<kw::structref>()?; + Ok(RefType::r#struct()) + } else if l.peek::<kw::arrayref>() { + parser.parse::<kw::arrayref>()?; + Ok(RefType::array()) + } else if l.peek::<kw::i31ref>() { + parser.parse::<kw::i31ref>()?; + Ok(RefType::i31()) + } else if l.peek::<kw::nullfuncref>() { + parser.parse::<kw::nullfuncref>()?; + Ok(RefType::nullfuncref()) + } else if l.peek::<kw::nullexternref>() { + parser.parse::<kw::nullexternref>()?; + Ok(RefType::nullexternref()) + } else if l.peek::<kw::nullref>() { + parser.parse::<kw::nullref>()?; + Ok(RefType::nullref()) + } else if l.peek::<LParen>() { + parser.parens(|p| { + let mut l = parser.lookahead1(); + if l.peek::<kw::r#ref>() { + p.parse::<kw::r#ref>()?; + + let mut nullable = false; + if parser.peek::<kw::null>() { + parser.parse::<kw::null>()?; + nullable = true; + } + + Ok(RefType { + nullable, + heap: parser.parse()?, + }) + } else { + Err(l.error()) + } + }) + } else { + Err(l.error()) + } + } +} + +impl<'a> Peek for RefType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::funcref::peek(cursor) + || /* legacy */ kw::anyfunc::peek(cursor) + || kw::externref::peek(cursor) + || kw::anyref::peek(cursor) + || kw::eqref::peek(cursor) + || kw::structref::peek(cursor) + || kw::arrayref::peek(cursor) + || kw::i31ref::peek(cursor) + || kw::nullfuncref::peek(cursor) + || kw::nullexternref::peek(cursor) + || kw::nullref::peek(cursor) + || (LParen::peek(cursor) && kw::r#ref::peek2(cursor)) + } + fn display() -> &'static str { + "reftype" + } +} + +/// The types of values that may be used in a struct or array. +#[allow(missing_docs)] +#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)] +pub enum StorageType<'a> { + I8, + I16, + Val(ValType<'a>), +} + +impl<'a> Parse<'a> for StorageType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i8>() { + parser.parse::<kw::i8>()?; + Ok(StorageType::I8) + } else if l.peek::<kw::i16>() { + parser.parse::<kw::i16>()?; + Ok(StorageType::I16) + } else if l.peek::<ValType>() { + Ok(StorageType::Val(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +/// Type for a `global` in a wasm module +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct GlobalType<'a> { + /// The element type of this `global` + pub ty: ValType<'a>, + /// Whether or not the global is mutable or not. + pub mutable: bool, +} + +impl<'a> Parse<'a> for GlobalType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek2::<kw::r#mut>() { + parser.parens(|p| { + p.parse::<kw::r#mut>()?; + Ok(GlobalType { + ty: parser.parse()?, + mutable: true, + }) + }) + } else { + Ok(GlobalType { + ty: parser.parse()?, + mutable: false, + }) + } + } +} + +/// Min/max limits used for tables/memories. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct Limits { + /// The minimum number of units for this type. + pub min: u32, + /// An optional maximum number of units for this type. + pub max: Option<u32>, +} + +impl<'a> Parse<'a> for Limits { + fn parse(parser: Parser<'a>) -> Result<Self> { + let min = parser.parse()?; + let max = if parser.peek::<u32>() { + Some(parser.parse()?) + } else { + None + }; + Ok(Limits { min, max }) + } +} + +/// Min/max limits used for 64-bit memories +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct Limits64 { + /// The minimum number of units for this type. + pub min: u64, + /// An optional maximum number of units for this type. + pub max: Option<u64>, +} + +impl<'a> Parse<'a> for Limits64 { + fn parse(parser: Parser<'a>) -> Result<Self> { + let min = parser.parse()?; + let max = if parser.peek::<u64>() { + Some(parser.parse()?) + } else { + None + }; + Ok(Limits64 { min, max }) + } +} + +/// Configuration for a table of a wasm mdoule +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub struct TableType<'a> { + /// Limits on the element sizes of this table + pub limits: Limits, + /// The type of element stored in this table + pub elem: RefType<'a>, +} + +impl<'a> Parse<'a> for TableType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(TableType { + limits: parser.parse()?, + elem: parser.parse()?, + }) + } +} + +/// Configuration for a memory of a wasm module +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum MemoryType { + /// A 32-bit memory + B32 { + /// Limits on the page sizes of this memory + limits: Limits, + /// Whether or not this is a shared (atomic) memory type + shared: bool, + }, + /// A 64-bit memory + B64 { + /// Limits on the page sizes of this memory + limits: Limits64, + /// Whether or not this is a shared (atomic) memory type + shared: bool, + }, +} + +impl<'a> Parse<'a> for MemoryType { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::i64>() { + parser.parse::<kw::i64>()?; + let limits = parser.parse()?; + let shared = parser.parse::<Option<kw::shared>>()?.is_some(); + Ok(MemoryType::B64 { limits, shared }) + } else { + parser.parse::<Option<kw::i32>>()?; + let limits = parser.parse()?; + let shared = parser.parse::<Option<kw::shared>>()?.is_some(); + Ok(MemoryType::B32 { limits, shared }) + } + } +} + +/// A function type with parameters and results. +#[derive(Clone, Debug, Default)] +pub struct FunctionType<'a> { + /// The parameters of a function, optionally each having an identifier for + /// name resolution and a name for the custom `name` section. + pub params: Box<[(Option<Id<'a>>, Option<NameAnnotation<'a>>, ValType<'a>)]>, + /// The results types of a function. + pub results: Box<[ValType<'a>]>, +} + +impl<'a> FunctionType<'a> { + fn finish_parse(&mut self, allow_names: bool, parser: Parser<'a>) -> Result<()> { + let mut params = Vec::from(mem::take(&mut self.params)); + let mut results = Vec::from(mem::take(&mut self.results)); + while parser.peek2::<kw::param>() || parser.peek2::<kw::result>() { + parser.parens(|p| { + let mut l = p.lookahead1(); + if l.peek::<kw::param>() { + if results.len() > 0 { + return Err(p.error( + "result before parameter (or unexpected token): \ + cannot list params after results", + )); + } + p.parse::<kw::param>()?; + if p.is_empty() { + return Ok(()); + } + let (id, name) = if allow_names { + (p.parse::<Option<_>>()?, p.parse::<Option<_>>()?) + } else { + (None, None) + }; + let parse_more = id.is_none() && name.is_none(); + let ty = p.parse()?; + params.push((id, name, ty)); + while parse_more && !p.is_empty() { + params.push((None, None, p.parse()?)); + } + } else if l.peek::<kw::result>() { + p.parse::<kw::result>()?; + while !p.is_empty() { + results.push(p.parse()?); + } + } else { + return Err(l.error()); + } + Ok(()) + })?; + } + self.params = params.into(); + self.results = results.into(); + Ok(()) + } +} + +impl<'a> Parse<'a> for FunctionType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut ret = FunctionType { + params: Box::new([]), + results: Box::new([]), + }; + ret.finish_parse(true, parser)?; + Ok(ret) + } +} + +impl<'a> Peek for FunctionType<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + if let Some(next) = cursor.lparen() { + match next.keyword() { + Some(("param", _)) | Some(("result", _)) => return true, + _ => {} + } + } + + false + } + + fn display() -> &'static str { + "function type" + } +} + +/// A function type with parameters and results. +#[derive(Clone, Debug, Default)] +pub struct FunctionTypeNoNames<'a>(pub FunctionType<'a>); + +impl<'a> Parse<'a> for FunctionTypeNoNames<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut ret = FunctionType { + params: Box::new([]), + results: Box::new([]), + }; + ret.finish_parse(false, parser)?; + Ok(FunctionTypeNoNames(ret)) + } +} + +impl<'a> Peek for FunctionTypeNoNames<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + FunctionType::peek(cursor) + } + + fn display() -> &'static str { + FunctionType::display() + } +} + +impl<'a> From<FunctionTypeNoNames<'a>> for FunctionType<'a> { + fn from(ty: FunctionTypeNoNames<'a>) -> FunctionType<'a> { + ty.0 + } +} + +/// A struct type with fields. +#[derive(Clone, Debug)] +pub struct StructType<'a> { + /// The fields of the struct + pub fields: Vec<StructField<'a>>, +} + +impl<'a> Parse<'a> for StructType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut ret = StructType { fields: Vec::new() }; + while !parser.is_empty() { + let field = if parser.peek2::<kw::field>() { + parser.parens(|parser| { + parser.parse::<kw::field>()?; + StructField::parse(parser, true) + }) + } else { + StructField::parse(parser, false) + }; + ret.fields.push(field?); + } + Ok(ret) + } +} + +/// A field of a struct type. +#[derive(Clone, Debug)] +pub struct StructField<'a> { + /// An optional identifier for name resolution. + pub id: Option<Id<'a>>, + /// Whether this field may be mutated or not. + pub mutable: bool, + /// The storage type stored in this field. + pub ty: StorageType<'a>, +} + +impl<'a> StructField<'a> { + fn parse(parser: Parser<'a>, with_id: bool) -> Result<Self> { + let id = if with_id { parser.parse()? } else { None }; + let (ty, mutable) = if parser.peek2::<kw::r#mut>() { + let ty = parser.parens(|parser| { + parser.parse::<kw::r#mut>()?; + parser.parse() + })?; + (ty, true) + } else { + (parser.parse::<StorageType<'a>>()?, false) + }; + Ok(StructField { id, mutable, ty }) + } +} + +/// An array type with fields. +#[derive(Clone, Debug)] +pub struct ArrayType<'a> { + /// Whether this field may be mutated or not. + pub mutable: bool, + /// The storage type stored in this field. + pub ty: StorageType<'a>, +} + +impl<'a> Parse<'a> for ArrayType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let (ty, mutable) = if parser.peek2::<kw::r#mut>() { + let ty = parser.parens(|parser| { + parser.parse::<kw::r#mut>()?; + parser.parse() + })?; + (ty, true) + } else { + (parser.parse::<StorageType<'a>>()?, false) + }; + Ok(ArrayType { mutable, ty }) + } +} + +/// The type of an exported item from a module or instance. +#[derive(Debug, Clone)] +pub struct ExportType<'a> { + /// Where this export was defined. + pub span: Span, + /// The name of this export. + pub name: &'a str, + /// The signature of the item that's exported. + pub item: ItemSig<'a>, +} + +impl<'a> Parse<'a> for ExportType<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::export>()?.0; + let name = parser.parse()?; + let item = parser.parens(|p| p.parse())?; + Ok(ExportType { span, name, item }) + } +} + +/// A definition of a type. +#[derive(Debug)] +pub enum TypeDef<'a> { + /// A function type definition. + Func(FunctionType<'a>), + /// A struct type definition. + Struct(StructType<'a>), + /// An array type definition. + Array(ArrayType<'a>), +} + +impl<'a> Parse<'a> for TypeDef<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::func>() { + parser.parse::<kw::func>()?; + Ok(TypeDef::Func(parser.parse()?)) + } else if l.peek::<kw::r#struct>() { + parser.parse::<kw::r#struct>()?; + Ok(TypeDef::Struct(parser.parse()?)) + } else if l.peek::<kw::array>() { + parser.parse::<kw::array>()?; + Ok(TypeDef::Array(parser.parse()?)) + } else { + Err(l.error()) + } + } +} + +/// A type declaration in a module +#[derive(Debug)] +pub struct Type<'a> { + /// Where this type was defined. + pub span: Span, + /// An optional identifier to refer to this `type` by as part of name + /// resolution. + pub id: Option<Id<'a>>, + /// An optional name for this function stored in the custom `name` section. + pub name: Option<NameAnnotation<'a>>, + /// The type that we're declaring. + pub def: TypeDef<'a>, + /// The declared parent type of this definition. + pub parent: Option<Index<'a>>, +} + +impl<'a> Peek for Type<'a> { + fn peek(cursor: Cursor<'_>) -> bool { + kw::r#type::peek(cursor) + } + fn display() -> &'static str { + "type" + } +} + +impl<'a> Parse<'a> for Type<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::r#type>()?.0; + let id = parser.parse()?; + let name = parser.parse()?; + + let (parent, def) = if parser.peek2::<kw::sub>() { + parser.parens(|parser| { + parser.parse::<kw::sub>()?; + let parent = if parser.peek::<Index<'a>>() { + parser.parse()? + } else { + None + }; + let def = parser.parens(|parser| parser.parse())?; + Ok((parent, def)) + })? + } else { + (None, parser.parens(|parser| parser.parse())?) + }; + + Ok(Type { + span, + id, + name, + def, + parent, + }) + } +} + +/// A recursion group declaration in a module +#[derive(Debug)] +pub struct Rec<'a> { + /// Where this recursion group was defined. + pub span: Span, + /// The types that we're defining in this group. + pub types: Vec<Type<'a>>, +} + +impl<'a> Parse<'a> for Rec<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::r#rec>()?.0; + let mut types = Vec::new(); + while parser.peek2::<Type<'a>>() { + types.push(parser.parens(|p| p.parse())?); + } + Ok(Rec { span, types }) + } +} + +/// A reference to a type defined in this module. +#[derive(Clone, Debug)] +pub struct TypeUse<'a, T> { + /// The type that we're referencing, if it was present. + pub index: Option<Index<'a>>, + /// The inline type, if present. + pub inline: Option<T>, +} + +impl<'a, T> TypeUse<'a, T> { + /// Constructs a new instance of `TypeUse` without an inline definition but + /// with an index specified. + pub fn new_with_index(idx: Index<'a>) -> TypeUse<'a, T> { + TypeUse { + index: Some(idx), + inline: None, + } + } +} + +impl<'a, T: Peek + Parse<'a>> Parse<'a> for TypeUse<'a, T> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let index = if parser.peek2::<kw::r#type>() { + Some(parser.parens(|p| { + p.parse::<kw::r#type>()?; + p.parse() + })?) + } else { + None + }; + let inline = parser.parse()?; + + Ok(TypeUse { index, inline }) + } +} + +impl<'a> From<TypeUse<'a, FunctionTypeNoNames<'a>>> for TypeUse<'a, FunctionType<'a>> { + fn from(src: TypeUse<'a, FunctionTypeNoNames<'a>>) -> TypeUse<'a, FunctionType<'a>> { + TypeUse { + index: src.index, + inline: src.inline.map(|x| x.into()), + } + } +} diff --git a/third_party/rust/wast/src/core/wast.rs b/third_party/rust/wast/src/core/wast.rs new file mode 100644 index 0000000000..41437e02d8 --- /dev/null +++ b/third_party/rust/wast/src/core/wast.rs @@ -0,0 +1,236 @@ +use crate::core::{HeapType, V128Const}; +use crate::kw; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use crate::token::{Float32, Float64, Index}; + +/// Expression that can be used inside of `invoke` expressions for core wasm +/// functions. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum WastArgCore<'a> { + I32(i32), + I64(i64), + F32(Float32), + F64(Float64), + V128(V128Const), + RefNull(HeapType<'a>), + RefExtern(u32), +} + +static ARGS: &[(&str, fn(Parser<'_>) -> Result<WastArgCore<'_>>)] = { + use WastArgCore::*; + &[ + ("i32.const", |p| Ok(I32(p.parse()?))), + ("i64.const", |p| Ok(I64(p.parse()?))), + ("f32.const", |p| Ok(F32(p.parse()?))), + ("f64.const", |p| Ok(F64(p.parse()?))), + ("v128.const", |p| Ok(V128(p.parse()?))), + ("ref.null", |p| Ok(RefNull(p.parse()?))), + ("ref.extern", |p| Ok(RefExtern(p.parse()?))), + ] +}; + +impl<'a> Parse<'a> for WastArgCore<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let parse = parser.step(|c| { + if let Some((kw, rest)) = c.keyword() { + if let Some(i) = ARGS.iter().position(|(name, _)| *name == kw) { + return Ok((ARGS[i].1, rest)); + } + } + Err(c.error("expected a [type].const expression")) + })?; + parse(parser) + } +} + +impl Peek for WastArgCore<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let kw = match cursor.keyword() { + Some((kw, _)) => kw, + None => return false, + }; + ARGS.iter().find(|(name, _)| *name == kw).is_some() + } + + fn display() -> &'static str { + "core wasm argument" + } +} + +/// Expressions that can be used inside of `assert_return` to validate the +/// return value of a core wasm function. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum WastRetCore<'a> { + I32(i32), + I64(i64), + F32(NanPattern<Float32>), + F64(NanPattern<Float64>), + V128(V128Pattern), + + /// A null reference is expected, optionally with a specified type. + RefNull(Option<HeapType<'a>>), + /// A non-null externref is expected which should contain the specified + /// value. + RefExtern(u32), + /// A non-null funcref is expected. + RefFunc(Option<Index<'a>>), + + Either(Vec<WastRetCore<'a>>), +} + +static RETS: &[(&str, fn(Parser<'_>) -> Result<WastRetCore<'_>>)] = { + use WastRetCore::*; + &[ + ("i32.const", |p| Ok(I32(p.parse()?))), + ("i64.const", |p| Ok(I64(p.parse()?))), + ("f32.const", |p| Ok(F32(p.parse()?))), + ("f64.const", |p| Ok(F64(p.parse()?))), + ("v128.const", |p| Ok(V128(p.parse()?))), + ("ref.null", |p| Ok(RefNull(p.parse()?))), + ("ref.extern", |p| Ok(RefExtern(p.parse()?))), + ("ref.func", |p| Ok(RefFunc(p.parse()?))), + ("either", |p| { + p.depth_check()?; + let mut cases = Vec::new(); + while !p.is_empty() { + cases.push(p.parens(|p| p.parse())?); + } + Ok(Either(cases)) + }), + ] +}; + +impl<'a> Parse<'a> for WastRetCore<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let parse = parser.step(|c| { + if let Some((kw, rest)) = c.keyword() { + if let Some(i) = RETS.iter().position(|(name, _)| *name == kw) { + return Ok((RETS[i].1, rest)); + } + } + Err(c.error("expected a [type].const expression")) + })?; + parse(parser) + } +} + +impl Peek for WastRetCore<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + let kw = match cursor.keyword() { + Some((kw, _)) => kw, + None => return false, + }; + RETS.iter().find(|(name, _)| *name == kw).is_some() + } + + fn display() -> &'static str { + "core wasm return value" + } +} + +/// Either a NaN pattern (`nan:canonical`, `nan:arithmetic`) or a value of type `T`. +#[derive(Debug, PartialEq)] +#[allow(missing_docs)] +pub enum NanPattern<T> { + CanonicalNan, + ArithmeticNan, + Value(T), +} + +impl<'a, T> Parse<'a> for NanPattern<T> +where + T: Parse<'a>, +{ + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<kw::nan_canonical>() { + parser.parse::<kw::nan_canonical>()?; + Ok(NanPattern::CanonicalNan) + } else if parser.peek::<kw::nan_arithmetic>() { + parser.parse::<kw::nan_arithmetic>()?; + Ok(NanPattern::ArithmeticNan) + } else { + let val = parser.parse()?; + Ok(NanPattern::Value(val)) + } + } +} + +/// A version of `V128Const` that allows `NanPattern`s. +/// +/// This implementation is necessary because only float types can include NaN patterns; otherwise +/// it is largely similar to the implementation of `V128Const`. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum V128Pattern { + I8x16([i8; 16]), + I16x8([i16; 8]), + I32x4([i32; 4]), + I64x2([i64; 2]), + F32x4([NanPattern<Float32>; 4]), + F64x2([NanPattern<Float64>; 2]), +} + +impl<'a> Parse<'a> for V128Pattern { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::i8x16>() { + parser.parse::<kw::i8x16>()?; + Ok(V128Pattern::I8x16([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i16x8>() { + parser.parse::<kw::i16x8>()?; + Ok(V128Pattern::I16x8([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i32x4>() { + parser.parse::<kw::i32x4>()?; + Ok(V128Pattern::I32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::i64x2>() { + parser.parse::<kw::i64x2>()?; + Ok(V128Pattern::I64x2([parser.parse()?, parser.parse()?])) + } else if l.peek::<kw::f32x4>() { + parser.parse::<kw::f32x4>()?; + Ok(V128Pattern::F32x4([ + parser.parse()?, + parser.parse()?, + parser.parse()?, + parser.parse()?, + ])) + } else if l.peek::<kw::f64x2>() { + parser.parse::<kw::f64x2>()?; + Ok(V128Pattern::F64x2([parser.parse()?, parser.parse()?])) + } else { + Err(l.error()) + } + } +} diff --git a/third_party/rust/wast/src/encode.rs b/third_party/rust/wast/src/encode.rs new file mode 100644 index 0000000000..3fc932690e --- /dev/null +++ b/third_party/rust/wast/src/encode.rs @@ -0,0 +1,75 @@ +pub(crate) trait Encode { + fn encode(&self, e: &mut Vec<u8>); +} + +impl<T: Encode + ?Sized> Encode for &'_ T { + fn encode(&self, e: &mut Vec<u8>) { + T::encode(self, e) + } +} + +impl<T: Encode> Encode for [T] { + fn encode(&self, e: &mut Vec<u8>) { + self.len().encode(e); + for item in self { + item.encode(e); + } + } +} + +impl<T: Encode> Encode for Vec<T> { + fn encode(&self, e: &mut Vec<u8>) { + <[T]>::encode(self, e) + } +} + +impl Encode for str { + fn encode(&self, e: &mut Vec<u8>) { + self.len().encode(e); + e.extend_from_slice(self.as_bytes()); + } +} + +impl Encode for usize { + fn encode(&self, e: &mut Vec<u8>) { + assert!(*self <= u32::max_value() as usize); + (*self as u32).encode(e) + } +} + +impl Encode for u8 { + fn encode(&self, e: &mut Vec<u8>) { + e.push(*self); + } +} + +impl Encode for u32 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::unsigned(e, (*self).into()).unwrap(); + } +} + +impl Encode for i32 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::signed(e, (*self).into()).unwrap(); + } +} + +impl Encode for u64 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::unsigned(e, *self).unwrap(); + } +} + +impl Encode for i64 { + fn encode(&self, e: &mut Vec<u8>) { + leb128::write::signed(e, *self).unwrap(); + } +} + +impl<T: Encode, U: Encode> Encode for (T, U) { + fn encode(&self, e: &mut Vec<u8>) { + self.0.encode(e); + self.1.encode(e); + } +} diff --git a/third_party/rust/wast/src/error.rs b/third_party/rust/wast/src/error.rs new file mode 100644 index 0000000000..214e678338 --- /dev/null +++ b/third_party/rust/wast/src/error.rs @@ -0,0 +1,196 @@ +use crate::lexer::LexError; +use crate::token::Span; +use std::fmt; +use std::path::{Path, PathBuf}; +use unicode_width::UnicodeWidthStr; + +/// A convenience error type to tie together all the detailed errors produced by +/// this crate. +/// +/// This type can be created from a [`LexError`]. This also contains +/// storage for file/text information so a nice error can be rendered along the +/// same lines of rustc's own error messages (minus the color). +/// +/// This type is typically suitable for use in public APIs for consumers of this +/// crate. +#[derive(Debug)] +pub struct Error { + inner: Box<ErrorInner>, +} + +#[derive(Debug)] +struct ErrorInner { + text: Option<Text>, + file: Option<PathBuf>, + span: Span, + kind: ErrorKind, +} + +#[derive(Debug)] +struct Text { + line: usize, + col: usize, + snippet: String, +} + +#[derive(Debug)] +enum ErrorKind { + Lex(LexError), + Custom(String), +} + +impl Error { + pub(crate) fn lex(span: Span, content: &str, kind: LexError) -> Error { + let mut ret = Error { + inner: Box::new(ErrorInner { + text: None, + file: None, + span, + kind: ErrorKind::Lex(kind), + }), + }; + ret.set_text(content); + ret + } + + pub(crate) fn parse(span: Span, content: &str, message: String) -> Error { + let mut ret = Error { + inner: Box::new(ErrorInner { + text: None, + file: None, + span, + kind: ErrorKind::Custom(message), + }), + }; + ret.set_text(content); + ret + } + + /// Creates a new error with the given `message` which is targeted at the + /// given `span` + /// + /// Note that you'll want to ensure that `set_text` or `set_path` is called + /// on the resulting error to improve the rendering of the error message. + pub fn new(span: Span, message: String) -> Error { + Error { + inner: Box::new(ErrorInner { + text: None, + file: None, + span, + kind: ErrorKind::Custom(message), + }), + } + } + + /// Return the `Span` for this error. + pub fn span(&self) -> Span { + self.inner.span + } + + /// To provide a more useful error this function can be used to extract + /// relevant textual information about this error into the error itself. + /// + /// The `contents` here should be the full text of the original file being + /// parsed, and this will extract a sub-slice as necessary to render in the + /// `Display` implementation later on. + pub fn set_text(&mut self, contents: &str) { + if self.inner.text.is_some() { + return; + } + self.inner.text = Some(Text::new(contents, self.inner.span)); + } + + /// To provide a more useful error this function can be used to set + /// the file name that this error is associated with. + /// + /// The `path` here will be stored in this error and later rendered in the + /// `Display` implementation. + pub fn set_path(&mut self, path: &Path) { + if self.inner.file.is_some() { + return; + } + self.inner.file = Some(path.to_path_buf()); + } + + /// Returns the underlying `LexError`, if any, that describes this error. + pub fn lex_error(&self) -> Option<&LexError> { + match &self.inner.kind { + ErrorKind::Lex(e) => Some(e), + _ => None, + } + } + + /// Returns the underlying message, if any, that describes this error. + pub fn message(&self) -> String { + match &self.inner.kind { + ErrorKind::Lex(e) => e.to_string(), + ErrorKind::Custom(e) => e.clone(), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let err = match &self.inner.kind { + ErrorKind::Lex(e) => e as &dyn fmt::Display, + ErrorKind::Custom(e) => e as &dyn fmt::Display, + }; + let text = match &self.inner.text { + Some(text) => text, + None => { + return write!(f, "{} at byte offset {}", err, self.inner.span.offset); + } + }; + let file = self + .inner + .file + .as_ref() + .and_then(|p| p.to_str()) + .unwrap_or("<anon>"); + write!( + f, + "\ +{err} + --> {file}:{line}:{col} + | + {line:4} | {text} + | {marker:>0$}", + text.col + 1, + file = file, + line = text.line + 1, + col = text.col + 1, + err = err, + text = text.snippet, + marker = "^", + ) + } +} + +impl std::error::Error for Error {} + +impl Text { + fn new(content: &str, span: Span) -> Text { + let (line, col) = span.linecol_in(content); + let contents = content.lines().nth(line).unwrap_or(""); + let mut snippet = String::new(); + for ch in contents.chars() { + match ch { + // Replace tabs with spaces to render consistently + '\t' => { + snippet.push_str(" "); + } + // these codepoints change how text is rendered so for clarity + // in error messages they're dropped. + '\u{202a}' | '\u{202b}' | '\u{202d}' | '\u{202e}' | '\u{2066}' | '\u{2067}' + | '\u{2068}' | '\u{206c}' | '\u{2069}' => {} + + c => snippet.push(c), + } + } + // Use the `unicode-width` crate to figure out how wide the snippet, up + // to our "column", actually is. That'll tell us how many spaces to + // place before the `^` character that points at the problem + let col = snippet.get(..col).map(|s| s.width()).unwrap_or(col); + Text { line, col, snippet } + } +} diff --git a/third_party/rust/wast/src/gensym.rs b/third_party/rust/wast/src/gensym.rs new file mode 100644 index 0000000000..9f718f06d9 --- /dev/null +++ b/third_party/rust/wast/src/gensym.rs @@ -0,0 +1,20 @@ +use crate::token::{Id, Span}; +use std::cell::Cell; + +thread_local!(static NEXT: Cell<u32> = Cell::new(0)); + +pub fn reset() { + NEXT.with(|c| c.set(0)); +} + +pub fn gen(span: Span) -> Id<'static> { + NEXT.with(|next| { + let gen = next.get() + 1; + next.set(gen); + Id::gensym(span, gen) + }) +} + +pub fn fill<'a>(span: Span, slot: &mut Option<Id<'a>>) -> Id<'a> { + *slot.get_or_insert_with(|| gen(span)) +} diff --git a/third_party/rust/wast/src/lexer.rs b/third_party/rust/wast/src/lexer.rs new file mode 100644 index 0000000000..a4f8f128c7 --- /dev/null +++ b/third_party/rust/wast/src/lexer.rs @@ -0,0 +1,1334 @@ +//! Definition of a lexer for the WebAssembly text format. +//! +//! This module provides a [`Lexer`][] type which is an iterate over the raw +//! tokens of a WebAssembly text file. A [`Lexer`][] accounts for every single +//! byte in a WebAssembly text field, returning tokens even for comments and +//! whitespace. Typically you'll ignore comments and whitespace, however. +//! +//! If you'd like to iterate over the tokens in a file you can do so via: +//! +//! ``` +//! # fn foo() -> Result<(), wast::Error> { +//! use wast::lexer::Lexer; +//! +//! let wat = "(module (func $foo))"; +//! for token in Lexer::new(wat) { +//! println!("{:?}", token?); +//! } +//! # Ok(()) +//! # } +//! ``` +//! +//! Note that you'll typically not use this module but will rather use +//! [`ParseBuffer`](crate::parser::ParseBuffer) instead. +//! +//! [`Lexer`]: crate::lexer::Lexer + +use crate::token::Span; +use crate::Error; +use std::borrow::Cow; +use std::char; +use std::fmt; +use std::str; + +/// A structure used to lex the s-expression syntax of WAT files. +/// +/// This structure is used to generate [`Token`] items, which should account for +/// every single byte of the input as we iterate over it. A [`LexError`] is +/// returned for any non-lexable text. +#[derive(Clone)] +pub struct Lexer<'a> { + remaining: &'a str, + input: &'a str, + allow_confusing_unicode: bool, +} + +/// A fragment of source lex'd from an input string. +/// +/// This enumeration contains all kinds of fragments, including comments and +/// whitespace. For most cases you'll probably ignore these and simply look at +/// tokens. +#[derive(Debug, PartialEq)] +pub enum Token<'a> { + /// A line comment, preceded with `;;` + LineComment(&'a str), + + /// A block comment, surrounded by `(;` and `;)`. Note that these can be + /// nested. + BlockComment(&'a str), + + /// A fragment of source that represents whitespace. + Whitespace(&'a str), + + /// A left-parenthesis, including the source text for where it comes from. + LParen(&'a str), + /// A right-parenthesis, including the source text for where it comes from. + RParen(&'a str), + + /// A string literal, which is actually a list of bytes. + String(WasmString<'a>), + + /// An identifier (like `$foo`). + /// + /// All identifiers start with `$` and the payload here is the original + /// source text. + Id(&'a str), + + /// A keyword, or something that starts with an alphabetic character. + /// + /// The payload here is the original source text. + Keyword(&'a str), + + /// A reserved series of `idchar` symbols. Unknown what this is meant to be + /// used for, you'll probably generate an error about an unexpected token. + Reserved(&'a str), + + /// An integer. + Integer(Integer<'a>), + + /// A float. + Float(Float<'a>), +} + +enum ReservedKind<'a> { + String(Cow<'a, [u8]>), + Idchars, + Reserved, +} + +/// Errors that can be generated while lexing. +/// +/// All lexing errors have line/colum/position information as well as a +/// `LexError` indicating what kind of error happened while lexing. +#[derive(Debug, Clone, PartialEq, Eq)] +#[non_exhaustive] +pub enum LexError { + /// A dangling block comment was found with an unbalanced `(;` which was + /// never terminated in the file. + DanglingBlockComment, + + /// An unexpected character was encountered when generally parsing and + /// looking for something else. + Unexpected(char), + + /// An invalid `char` in a string literal was found. + InvalidStringElement(char), + + /// An invalid string escape letter was found (the thing after the `\` in + /// string literals) + InvalidStringEscape(char), + + /// An invalid hexadecimal digit was found. + InvalidHexDigit(char), + + /// An invalid base-10 digit was found. + InvalidDigit(char), + + /// Parsing expected `wanted` but ended up finding `found` instead where the + /// two characters aren't the same. + Expected { + /// The character that was expected to be found + wanted: char, + /// The character that was actually found + found: char, + }, + + /// We needed to parse more but EOF (or end of the string) was encountered. + UnexpectedEof, + + /// A number failed to parse because it was too big to fit within the target + /// type. + NumberTooBig, + + /// An invalid unicode value was found in a `\u{...}` escape in a string, + /// only valid unicode scalars can be escaped that way. + InvalidUnicodeValue(u32), + + /// A lone underscore was found when parsing a number, since underscores + /// should always be preceded and succeeded with a digit of some form. + LoneUnderscore, + + /// A "confusing" unicode character is present in a comment or a string + /// literal, such as a character that changes the direction text is + /// typically displayed in editors. This could cause the human-read + /// version to behave differently than the compiler-visible version, so + /// these are simply rejected for now. + ConfusingUnicode(char), +} + +/// A sign token for an integer. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum SignToken { + /// Plus sign: "+", + Plus, + /// Minus sign: "-", + Minus, +} + +/// A parsed integer, signed or unsigned. +/// +/// Methods can be use to access the value of the integer. +#[derive(Debug, PartialEq)] +pub struct Integer<'a>(Box<IntegerInner<'a>>); + +#[derive(Debug, PartialEq)] +struct IntegerInner<'a> { + sign: Option<SignToken>, + src: &'a str, + val: Cow<'a, str>, + hex: bool, +} + +/// A parsed float. +/// +/// Methods can be use to access the value of the float. +#[derive(Debug, PartialEq)] +pub struct Float<'a>(Box<FloatInner<'a>>); + +#[derive(Debug, PartialEq)] +struct FloatInner<'a> { + src: &'a str, + val: FloatVal<'a>, +} + +/// A parsed string. +#[derive(Debug, PartialEq)] +pub struct WasmString<'a>(Box<WasmStringInner<'a>>); + +#[derive(Debug, PartialEq)] +struct WasmStringInner<'a> { + src: &'a str, + val: Cow<'a, [u8]>, +} + +/// Possible parsed float values +#[derive(Debug, PartialEq, Eq)] +pub enum FloatVal<'a> { + /// A float `NaN` representation + Nan { + /// The specific bits to encode for this float, optionally + val: Option<u64>, + /// Whether or not this is a negative `NaN` or not. + negative: bool, + }, + /// An float infinite representation, + Inf { + #[allow(missing_docs)] + negative: bool, + }, + /// A parsed and separated floating point value + Val { + /// Whether or not the `integral` and `decimal` are specified in hex + hex: bool, + /// The float parts before the `.` + integral: Cow<'a, str>, + /// The float parts after the `.` + decimal: Option<Cow<'a, str>>, + /// The exponent to multiple this `integral.decimal` portion of the + /// float by. If `hex` is true this is `2^exponent` and otherwise it's + /// `10^exponent` + exponent: Option<Cow<'a, str>>, + }, +} + +// https://webassembly.github.io/spec/core/text/values.html#text-idchar +macro_rules! idchars { + () => { + b'0'..=b'9' + | b'A'..=b'Z' + | b'a'..=b'z' + | b'!' + | b'#' + | b'$' + | b'%' + | b'&' + | b'\'' + | b'*' + | b'+' + | b'-' + | b'.' + | b'/' + | b':' + | b'<' + | b'=' + | b'>' + | b'?' + | b'@' + | b'\\' + | b'^' + | b'_' + | b'`' + | b'|' + | b'~' + } +} + +impl<'a> Lexer<'a> { + /// Creates a new lexer which will lex the `input` source string. + pub fn new(input: &str) -> Lexer<'_> { + Lexer { + remaining: input, + input, + allow_confusing_unicode: false, + } + } + + /// Returns the original source input that we're lexing. + pub fn input(&self) -> &'a str { + self.input + } + + /// Configures whether "confusing" unicode characters are allowed while + /// lexing. + /// + /// If allowed then no error will happen if these characters are found, but + /// otherwise if disallowed a lex error will be produced when these + /// characters are found. Confusing characters are denied by default. + /// + /// For now "confusing characters" are primarily related to the "trojan + /// source" problem where it refers to characters which cause humans to read + /// text differently than this lexer, such as characters that alter the + /// left-to-right display of the source code. + pub fn allow_confusing_unicode(&mut self, allow: bool) -> &mut Self { + self.allow_confusing_unicode = allow; + self + } + + /// Lexes the next token in the input. + /// + /// Returns `Some` if a token is found or `None` if we're at EOF. + /// + /// # Errors + /// + /// Returns an error if the input is malformed. + pub fn parse(&mut self) -> Result<Option<Token<'a>>, Error> { + let pos = self.cur(); + // This `match` generally parses the grammar specified at + // + // https://webassembly.github.io/spec/core/text/lexical.html#text-token + let byte = match self.remaining.as_bytes().first() { + Some(b) => b, + None => return Ok(None), + }; + + match byte { + // Open-parens check the next character to see if this is the start + // of a block comment, otherwise it's just a bland left-paren + // token. + b'(' => match self.remaining.as_bytes().get(1) { + Some(b';') => { + let mut level = 1; + // Note that we're doing a byte-level search here for the + // close-delimiter of `;)`. The actual source text is utf-8 + // encode in `self.remaining` but due to how utf-8 works we + // can safely search for an ASCII byte since it'll never + // otherwise appear in the middle of a codepoint and if we + // find it then it's guaranteed to be the right byte. + // + // Mainly we're avoiding the overhead of decoding utf-8 + // characters into a Rust `char` since it's otherwise + // unnecessary work. + let mut iter = self.remaining.as_bytes()[2..].iter(); + while let Some(ch) = iter.next() { + match ch { + b'(' => { + if let Some(b';') = iter.as_slice().first() { + level += 1; + iter.next(); + } + } + b';' => { + if let Some(b')') = iter.as_slice().first() { + level -= 1; + iter.next(); + if level == 0 { + let len = self.remaining.len() - iter.as_slice().len(); + let (comment, remaining) = self.remaining.split_at(len); + self.remaining = remaining; + self.check_confusing_comment(comment)?; + return Ok(Some(Token::BlockComment(comment))); + } + } + } + _ => {} + } + } + Err(self.error(pos, LexError::DanglingBlockComment)) + } + _ => Ok(Some(Token::LParen(self.split_first_byte()))), + }, + + b')' => Ok(Some(Token::RParen(self.split_first_byte()))), + + // https://webassembly.github.io/spec/core/text/lexical.html#white-space + b' ' | b'\n' | b'\r' | b'\t' => Ok(Some(Token::Whitespace(self.split_ws()))), + + c @ (idchars!() | b'"') => { + let (kind, src) = self.split_reserved()?; + match kind { + // If the reserved token was simply a single string then + // that is converted to a standalone string token + ReservedKind::String(val) => { + return Ok(Some(Token::String(WasmString(Box::new(WasmStringInner { + val, + src, + }))))); + } + + // If only idchars were consumed then this could be a + // specific kind of standalone token we're interested in. + ReservedKind::Idchars => { + // https://webassembly.github.io/spec/core/text/values.html#integers + if let Some(number) = self.number(src) { + return Ok(Some(number)); + // https://webassembly.github.io/spec/core/text/values.html#text-id + } else if *c == b'$' && src.len() > 1 { + return Ok(Some(Token::Id(src))); + // https://webassembly.github.io/spec/core/text/lexical.html#text-keyword + } else if b'a' <= *c && *c <= b'z' { + return Ok(Some(Token::Keyword(src))); + } + } + + // ... otherwise this was a conglomeration of idchars, + // strings, or just idchars that don't match a prior rule, + // meaning this falls through to the fallback `Reserved` + // token. + ReservedKind::Reserved => {} + } + + Ok(Some(Token::Reserved(src))) + } + + // This could be a line comment, otherwise `;` is a reserved token. + // The second byte is checked to see if it's a `;;` line comment + // + // Note that this character being considered as part of a + // `reserved` token is part of the annotations proposal. + b';' => match self.remaining.as_bytes().get(1) { + Some(b';') => { + let comment = self.split_until(b'\n'); + self.check_confusing_comment(comment)?; + Ok(Some(Token::LineComment(comment))) + } + _ => Ok(Some(Token::Reserved(self.split_first_byte()))), + }, + + // Other known reserved tokens other than `;` + // + // Note that these characters being considered as part of a + // `reserved` token is part of the annotations proposal. + b',' | b'[' | b']' | b'{' | b'}' => Ok(Some(Token::Reserved(self.split_first_byte()))), + + _ => { + let ch = self.remaining.chars().next().unwrap(); + Err(self.error(pos, LexError::Unexpected(ch))) + } + } + } + + fn split_first_byte(&mut self) -> &'a str { + let (token, remaining) = self.remaining.split_at(1); + self.remaining = remaining; + token + } + + fn split_until(&mut self, byte: u8) -> &'a str { + let pos = memchr::memchr(byte, self.remaining.as_bytes()).unwrap_or(self.remaining.len()); + let (ret, remaining) = self.remaining.split_at(pos); + self.remaining = remaining; + ret + } + + fn split_ws(&mut self) -> &'a str { + // This table is a byte lookup table to determine whether a byte is a + // whitespace byte. There are only 4 whitespace bytes for the `*.wat` + // format right now which are ' ', '\t', '\r', and '\n'. These 4 bytes + // have a '1' in the table below. + // + // Due to how utf-8 works (our input is guaranteed to be utf-8) it is + // known that if these bytes are found they're guaranteed to be the + // whitespace byte, so they can be safely skipped and we don't have to + // do full utf-8 decoding. This means that the goal of this function is + // to find the first non-whitespace byte in `self.remaining`. + // + // For now this lookup table seems to be the fastest, but projects like + // https://github.com/lemire/despacer show other simd algorithms which + // can possibly accelerate this even more. Note that `*.wat` files often + // have a lot of whitespace so this function is typically quite hot when + // parsing inputs. + #[rustfmt::skip] + const WS: [u8; 256] = [ + // \t \n \r + /* 0x00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, + /* 0x10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + // ' ' + /* 0x20 */ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xa0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xb0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xc0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xd0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xe0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + /* 0xf0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ]; + let pos = self + .remaining + .as_bytes() + .iter() + .position(|b| WS[*b as usize] != 1) + .unwrap_or(self.remaining.len()); + let (ret, remaining) = self.remaining.split_at(pos); + self.remaining = remaining; + ret + } + + /// Splits off a "reserved" token which is then further processed later on + /// to figure out which kind of token it is `depending on `ReservedKind`. + /// + /// For more information on this method see the clarification at + /// https://github.com/WebAssembly/spec/pull/1499 but the general gist is + /// that this is parsing the grammar: + /// + /// ```text + /// reserved := (idchar | string)+ + /// ``` + /// + /// which means that it is eating any number of adjacent string/idchar + /// tokens (e.g. `a"b"c`) and returning the classification of what was + /// eaten. The classification assists in determining what the actual token + /// here eaten looks like. + fn split_reserved(&mut self) -> Result<(ReservedKind<'a>, &'a str), Error> { + let mut idchars = false; + let mut strings = 0u32; + let mut last_string_val = None; + let mut pos = 0; + while let Some(byte) = self.remaining.as_bytes().get(pos) { + match byte { + // Normal `idchars` production which appends to the reserved + // token that's being produced. + idchars!() => { + idchars = true; + pos += 1; + } + + // https://webassembly.github.io/spec/core/text/values.html#text-string + b'"' => { + strings += 1; + pos += 1; + let mut it = self.remaining[pos..].chars(); + let result = Lexer::parse_str(&mut it, self.allow_confusing_unicode); + pos = self.remaining.len() - it.as_str().len(); + match result { + Ok(s) => last_string_val = Some(s), + Err(e) => { + let start = self.input.len() - self.remaining.len(); + self.remaining = &self.remaining[pos..]; + let err_pos = match &e { + LexError::UnexpectedEof => self.input.len(), + _ => { + self.input[..start + pos] + .char_indices() + .next_back() + .unwrap() + .0 + } + }; + return Err(self.error(err_pos, e)); + } + } + } + + // Nothing else is considered part of a reserved token + _ => break, + } + } + let (ret, remaining) = self.remaining.split_at(pos); + self.remaining = remaining; + Ok(match (idchars, strings) { + (false, 0) => unreachable!(), + (false, 1) => (ReservedKind::String(last_string_val.unwrap()), ret), + (true, 0) => (ReservedKind::Idchars, ret), + _ => (ReservedKind::Reserved, ret), + }) + } + + fn number(&self, src: &'a str) -> Option<Token<'a>> { + let (sign, num) = if let Some(stripped) = src.strip_prefix('+') { + (Some(SignToken::Plus), stripped) + } else if let Some(stripped) = src.strip_prefix('-') { + (Some(SignToken::Minus), stripped) + } else { + (None, src) + }; + + let negative = sign == Some(SignToken::Minus); + + // Handle `inf` and `nan` which are special numbers here + if num == "inf" { + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Inf { negative }, + })))); + } else if num == "nan" { + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Nan { + val: None, + negative, + }, + })))); + } else if let Some(stripped) = num.strip_prefix("nan:0x") { + let mut it = stripped.chars(); + let to_parse = skip_undescores(&mut it, false, char::is_ascii_hexdigit)?; + if it.next().is_some() { + return None; + } + let n = u64::from_str_radix(&to_parse, 16).ok()?; + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Nan { + val: Some(n), + negative, + }, + })))); + } + + // Figure out if we're a hex number or not + let (mut it, hex, test_valid) = if let Some(stripped) = num.strip_prefix("0x") { + ( + stripped.chars(), + true, + char::is_ascii_hexdigit as fn(&char) -> bool, + ) + } else { + ( + num.chars(), + false, + char::is_ascii_digit as fn(&char) -> bool, + ) + }; + + // Evaluate the first part, moving out all underscores + let val = skip_undescores(&mut it, negative, test_valid)?; + + match it.clone().next() { + // If we're followed by something this may be a float so keep going. + Some(_) => {} + + // Otherwise this is a valid integer literal! + None => { + return Some(Token::Integer(Integer(Box::new(IntegerInner { + sign, + src, + val, + hex, + })))) + } + } + + // A number can optionally be after the decimal so only actually try to + // parse one if it's there. + let decimal = if it.clone().next() == Some('.') { + it.next(); + match it.clone().next() { + Some(c) if test_valid(&c) => Some(skip_undescores(&mut it, false, test_valid)?), + Some(_) | None => None, + } + } else { + None + }; + + // Figure out if there's an exponential part here to make a float, and + // if so parse it but defer its actual calculation until later. + let exponent = match (hex, it.next()) { + (true, Some('p')) | (true, Some('P')) | (false, Some('e')) | (false, Some('E')) => { + let negative = match it.clone().next() { + Some('-') => { + it.next(); + true + } + Some('+') => { + it.next(); + false + } + _ => false, + }; + Some(skip_undescores(&mut it, negative, char::is_ascii_digit)?) + } + (_, None) => None, + _ => return None, + }; + + // We should have eaten everything by now, if not then this is surely + // not a float or integer literal. + if it.next().is_some() { + return None; + } + + return Some(Token::Float(Float(Box::new(FloatInner { + src, + val: FloatVal::Val { + hex, + integral: val, + exponent, + decimal, + }, + })))); + + fn skip_undescores<'a>( + it: &mut str::Chars<'a>, + negative: bool, + good: fn(&char) -> bool, + ) -> Option<Cow<'a, str>> { + enum State { + Raw, + Collecting(String), + } + let mut last_underscore = false; + let mut state = if negative { + State::Collecting("-".to_string()) + } else { + State::Raw + }; + let input = it.as_str(); + let first = it.next()?; + if !good(&first) { + return None; + } + if let State::Collecting(s) = &mut state { + s.push(first); + } + let mut last = 1; + while let Some(c) = it.clone().next() { + if c == '_' && !last_underscore { + if let State::Raw = state { + state = State::Collecting(input[..last].to_string()); + } + it.next(); + last_underscore = true; + continue; + } + if !good(&c) { + break; + } + if let State::Collecting(s) = &mut state { + s.push(c); + } + last_underscore = false; + it.next(); + last += 1; + } + if last_underscore { + return None; + } + Some(match state { + State::Raw => input[..last].into(), + State::Collecting(s) => s.into(), + }) + } + } + + /// Verifies that `comment`, which is about to be returned, has a "confusing + /// unicode character" in it and should instead be transformed into an + /// error. + fn check_confusing_comment(&self, comment: &str) -> Result<(), Error> { + if self.allow_confusing_unicode { + return Ok(()); + } + + // In an effort to avoid utf-8 decoding the entire `comment` the search + // here is a bit more optimized. This checks for the `0xe2` byte because + // in the utf-8 encoding that's the leading encoding byte for all + // "confusing characters". Each instance of 0xe2 is checked to see if it + // starts a confusing character, and if so that's returned. + // + // Also note that 0xe2 will never be found in the middle of a codepoint, + // it's always the start of a codepoint. This means that if our special + // characters show up they're guaranteed to start with 0xe2 bytes. + let bytes = comment.as_bytes(); + for pos in memchr::Memchr::new(0xe2, bytes) { + if let Some(c) = comment[pos..].chars().next() { + if is_confusing_unicode(c) { + // Note that `self.cur()` accounts for already having + // parsed `comment`, so we move backwards to where + // `comment` started and then add the index within + // `comment`. + let pos = self.cur() - comment.len() + pos; + return Err(self.error(pos, LexError::ConfusingUnicode(c))); + } + } + } + + Ok(()) + } + + fn parse_str( + it: &mut str::Chars<'a>, + allow_confusing_unicode: bool, + ) -> Result<Cow<'a, [u8]>, LexError> { + enum State { + Start, + String(Vec<u8>), + } + let orig = it.as_str(); + let mut state = State::Start; + loop { + match it.next().ok_or(LexError::UnexpectedEof)? { + '"' => break, + '\\' => { + match state { + State::String(_) => {} + State::Start => { + let pos = orig.len() - it.as_str().len() - 1; + state = State::String(orig[..pos].as_bytes().to_vec()); + } + } + let buf = match &mut state { + State::String(b) => b, + State::Start => unreachable!(), + }; + match it.next().ok_or(LexError::UnexpectedEof)? { + '"' => buf.push(b'"'), + '\'' => buf.push(b'\''), + 't' => buf.push(b'\t'), + 'n' => buf.push(b'\n'), + 'r' => buf.push(b'\r'), + '\\' => buf.push(b'\\'), + 'u' => { + Lexer::must_eat_char(it, '{')?; + let n = Lexer::hexnum(it)?; + let c = char::from_u32(n).ok_or(LexError::InvalidUnicodeValue(n))?; + buf.extend(c.encode_utf8(&mut [0; 4]).as_bytes()); + Lexer::must_eat_char(it, '}')?; + } + c1 if c1.is_ascii_hexdigit() => { + let c2 = Lexer::hexdigit(it)?; + buf.push(to_hex(c1) * 16 + c2); + } + c => return Err(LexError::InvalidStringEscape(c)), + } + } + c if (c as u32) < 0x20 || c as u32 == 0x7f => { + return Err(LexError::InvalidStringElement(c)) + } + c if !allow_confusing_unicode && is_confusing_unicode(c) => { + return Err(LexError::ConfusingUnicode(c)) + } + c => match &mut state { + State::Start => {} + State::String(v) => { + v.extend(c.encode_utf8(&mut [0; 4]).as_bytes()); + } + }, + } + } + match state { + State::Start => Ok(orig[..orig.len() - it.as_str().len() - 1].as_bytes().into()), + State::String(s) => Ok(s.into()), + } + } + + fn hexnum(it: &mut str::Chars<'_>) -> Result<u32, LexError> { + let n = Lexer::hexdigit(it)?; + let mut last_underscore = false; + let mut n = n as u32; + while let Some(c) = it.clone().next() { + if c == '_' { + it.next(); + last_underscore = true; + continue; + } + if !c.is_ascii_hexdigit() { + break; + } + last_underscore = false; + it.next(); + n = n + .checked_mul(16) + .and_then(|n| n.checked_add(to_hex(c) as u32)) + .ok_or(LexError::NumberTooBig)?; + } + if last_underscore { + return Err(LexError::LoneUnderscore); + } + Ok(n) + } + + /// Reads a hexidecimal digit from the input stream, returning where it's + /// defined and the hex value. Returns an error on EOF or an invalid hex + /// digit. + fn hexdigit(it: &mut str::Chars<'_>) -> Result<u8, LexError> { + let ch = Lexer::must_char(it)?; + if ch.is_ascii_hexdigit() { + Ok(to_hex(ch)) + } else { + Err(LexError::InvalidHexDigit(ch)) + } + } + + /// Reads the next character from the input string and where it's located, + /// returning an error if the input stream is empty. + fn must_char(it: &mut str::Chars<'_>) -> Result<char, LexError> { + it.next().ok_or(LexError::UnexpectedEof) + } + + /// Expects that a specific character must be read next + fn must_eat_char(it: &mut str::Chars<'_>, wanted: char) -> Result<(), LexError> { + let found = Lexer::must_char(it)?; + if wanted == found { + Ok(()) + } else { + Err(LexError::Expected { wanted, found }) + } + } + + /// Returns the current position of our iterator through the input string + fn cur(&self) -> usize { + self.input.len() - self.remaining.len() + } + + /// Creates an error at `pos` with the specified `kind` + fn error(&self, pos: usize, kind: LexError) -> Error { + Error::lex(Span { offset: pos }, self.input, kind) + } +} + +impl<'a> Iterator for Lexer<'a> { + type Item = Result<Token<'a>, Error>; + + fn next(&mut self) -> Option<Self::Item> { + self.parse().transpose() + } +} + +impl<'a> Token<'a> { + /// Returns the original source text for this token. + pub fn src(&self) -> &'a str { + match self { + Token::Whitespace(s) => s, + Token::BlockComment(s) => s, + Token::LineComment(s) => s, + Token::LParen(s) => s, + Token::RParen(s) => s, + Token::String(s) => s.src(), + Token::Id(s) => s, + Token::Keyword(s) => s, + Token::Reserved(s) => s, + Token::Integer(i) => i.src(), + Token::Float(f) => f.src(), + } + } +} + +impl<'a> Integer<'a> { + /// Returns the sign token for this integer. + pub fn sign(&self) -> Option<SignToken> { + self.0.sign + } + + /// Returns the original source text for this integer. + pub fn src(&self) -> &'a str { + self.0.src + } + + /// Returns the value string that can be parsed for this integer, as well as + /// the base that it should be parsed in + pub fn val(&self) -> (&str, u32) { + (&self.0.val, if self.0.hex { 16 } else { 10 }) + } +} + +impl<'a> Float<'a> { + /// Returns the original source text for this integer. + pub fn src(&self) -> &'a str { + self.0.src + } + + /// Returns a parsed value of this float with all of the components still + /// listed as strings. + pub fn val(&self) -> &FloatVal<'a> { + &self.0.val + } +} + +impl<'a> WasmString<'a> { + /// Returns the original source text for this string. + pub fn src(&self) -> &'a str { + self.0.src + } + + /// Returns a parsed value, as a list of bytes, for this string. + pub fn val(&self) -> &[u8] { + &self.0.val + } +} + +fn to_hex(c: char) -> u8 { + match c { + 'a'..='f' => c as u8 - b'a' + 10, + 'A'..='F' => c as u8 - b'A' + 10, + _ => c as u8 - b'0', + } +} + +impl fmt::Display for LexError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use LexError::*; + match self { + DanglingBlockComment => f.write_str("unterminated block comment")?, + Unexpected(c) => write!(f, "unexpected character '{}'", escape_char(*c))?, + InvalidStringElement(c) => { + write!(f, "invalid character in string '{}'", escape_char(*c))? + } + InvalidStringEscape(c) => write!(f, "invalid string escape '{}'", escape_char(*c))?, + InvalidHexDigit(c) => write!(f, "invalid hex digit '{}'", escape_char(*c))?, + InvalidDigit(c) => write!(f, "invalid decimal digit '{}'", escape_char(*c))?, + Expected { wanted, found } => write!( + f, + "expected '{}' but found '{}'", + escape_char(*wanted), + escape_char(*found) + )?, + UnexpectedEof => write!(f, "unexpected end-of-file")?, + NumberTooBig => f.write_str("number is too big to parse")?, + InvalidUnicodeValue(c) => write!(f, "invalid unicode scalar value 0x{:x}", c)?, + LoneUnderscore => write!(f, "bare underscore in numeric literal")?, + ConfusingUnicode(c) => write!(f, "likely-confusing unicode character found {:?}", c)?, + } + Ok(()) + } +} + +fn escape_char(c: char) -> String { + match c { + '\t' => String::from("\\t"), + '\r' => String::from("\\r"), + '\n' => String::from("\\n"), + '\\' => String::from("\\\\"), + '\'' => String::from("\\\'"), + '\"' => String::from("\""), + '\x20'..='\x7e' => String::from(c), + _ => c.escape_unicode().to_string(), + } +} + +/// This is an attempt to protect agains the "trojan source" [1] problem where +/// unicode characters can cause editors to render source code differently +/// for humans than the compiler itself sees. +/// +/// To mitigate this issue, and because it's relatively rare in practice, +/// this simply rejects characters of that form. +/// +/// [1]: https://www.trojansource.codes/ +fn is_confusing_unicode(ch: char) -> bool { + matches!( + ch, + '\u{202a}' + | '\u{202b}' + | '\u{202d}' + | '\u{202e}' + | '\u{2066}' + | '\u{2067}' + | '\u{2068}' + | '\u{206c}' + | '\u{2069}' + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ws_smoke() { + fn get_whitespace(input: &str) -> &str { + match Lexer::new(input).parse().expect("no first token") { + Some(Token::Whitespace(s)) => s, + other => panic!("unexpected {:?}", other), + } + } + assert_eq!(get_whitespace(" "), " "); + assert_eq!(get_whitespace(" "), " "); + assert_eq!(get_whitespace(" \n "), " \n "); + assert_eq!(get_whitespace(" x"), " "); + assert_eq!(get_whitespace(" ;"), " "); + } + + #[test] + fn line_comment_smoke() { + fn get_line_comment(input: &str) -> &str { + match Lexer::new(input).parse().expect("no first token") { + Some(Token::LineComment(s)) => s, + other => panic!("unexpected {:?}", other), + } + } + assert_eq!(get_line_comment(";;"), ";;"); + assert_eq!(get_line_comment(";; xyz"), ";; xyz"); + assert_eq!(get_line_comment(";; xyz\nabc"), ";; xyz"); + assert_eq!(get_line_comment(";;\nabc"), ";;"); + assert_eq!(get_line_comment(";; \nabc"), ";; "); + } + + #[test] + fn block_comment_smoke() { + fn get_block_comment(input: &str) -> &str { + match Lexer::new(input).parse().expect("no first token") { + Some(Token::BlockComment(s)) => s, + other => panic!("unexpected {:?}", other), + } + } + assert_eq!(get_block_comment("(;;)"), "(;;)"); + assert_eq!(get_block_comment("(; ;)"), "(; ;)"); + assert_eq!(get_block_comment("(; (;;) ;)"), "(; (;;) ;)"); + } + + fn get_token(input: &str) -> Token<'_> { + Lexer::new(input) + .parse() + .expect("no first token") + .expect("no token") + } + + #[test] + fn lparen() { + assert_eq!(get_token("(("), Token::LParen("(")); + } + + #[test] + fn rparen() { + assert_eq!(get_token(")("), Token::RParen(")")); + } + + #[test] + fn strings() { + fn get_string(input: &str) -> Vec<u8> { + match get_token(input) { + Token::String(s) => { + assert_eq!(input, s.src()); + s.val().to_vec() + } + other => panic!("not string {:?}", other), + } + } + assert_eq!(&*get_string("\"\""), b""); + assert_eq!(&*get_string("\"a\""), b"a"); + assert_eq!(&*get_string("\"a b c d\""), b"a b c d"); + assert_eq!(&*get_string("\"\\\"\""), b"\""); + assert_eq!(&*get_string("\"\\'\""), b"'"); + assert_eq!(&*get_string("\"\\n\""), b"\n"); + assert_eq!(&*get_string("\"\\t\""), b"\t"); + assert_eq!(&*get_string("\"\\r\""), b"\r"); + assert_eq!(&*get_string("\"\\\\\""), b"\\"); + assert_eq!(&*get_string("\"\\01\""), &[1]); + assert_eq!(&*get_string("\"\\u{1}\""), &[1]); + assert_eq!( + &*get_string("\"\\u{0f3}\""), + '\u{0f3}'.encode_utf8(&mut [0; 4]).as_bytes() + ); + assert_eq!( + &*get_string("\"\\u{0_f_3}\""), + '\u{0f3}'.encode_utf8(&mut [0; 4]).as_bytes() + ); + + for i in 0..=255i32 { + let s = format!("\"\\{:02x}\"", i); + assert_eq!(&*get_string(&s), &[i as u8]); + } + } + + #[test] + fn id() { + fn get_id(input: &str) -> &str { + match get_token(input) { + Token::Id(s) => s, + other => panic!("not id {:?}", other), + } + } + assert_eq!(get_id("$x"), "$x"); + assert_eq!(get_id("$xyz"), "$xyz"); + assert_eq!(get_id("$x_z"), "$x_z"); + assert_eq!(get_id("$0^"), "$0^"); + assert_eq!(get_id("$0^;;"), "$0^"); + assert_eq!(get_id("$0^ ;;"), "$0^"); + } + + #[test] + fn keyword() { + fn get_keyword(input: &str) -> &str { + match get_token(input) { + Token::Keyword(s) => s, + other => panic!("not id {:?}", other), + } + } + assert_eq!(get_keyword("x"), "x"); + assert_eq!(get_keyword("xyz"), "xyz"); + assert_eq!(get_keyword("x_z"), "x_z"); + assert_eq!(get_keyword("x_z "), "x_z"); + assert_eq!(get_keyword("x_z "), "x_z"); + } + + #[test] + fn reserved() { + fn get_reserved(input: &str) -> &str { + match get_token(input) { + Token::Reserved(s) => s, + other => panic!("not reserved {:?}", other), + } + } + assert_eq!(get_reserved("$ "), "$"); + assert_eq!(get_reserved("^_x "), "^_x"); + } + + #[test] + fn integer() { + fn get_integer(input: &str) -> String { + match get_token(input) { + Token::Integer(i) => { + assert_eq!(input, i.src()); + i.val().0.to_string() + } + other => panic!("not integer {:?}", other), + } + } + assert_eq!(get_integer("1"), "1"); + assert_eq!(get_integer("0"), "0"); + assert_eq!(get_integer("-1"), "-1"); + assert_eq!(get_integer("+1"), "1"); + assert_eq!(get_integer("+1_000"), "1000"); + assert_eq!(get_integer("+1_0_0_0"), "1000"); + assert_eq!(get_integer("+0x10"), "10"); + assert_eq!(get_integer("-0x10"), "-10"); + assert_eq!(get_integer("0x10"), "10"); + } + + #[test] + fn float() { + fn get_float(input: &str) -> FloatVal<'_> { + match get_token(input) { + Token::Float(i) => { + assert_eq!(input, i.src()); + i.0.val + } + other => panic!("not reserved {:?}", other), + } + } + assert_eq!( + get_float("nan"), + FloatVal::Nan { + val: None, + negative: false + }, + ); + assert_eq!( + get_float("-nan"), + FloatVal::Nan { + val: None, + negative: true, + }, + ); + assert_eq!( + get_float("+nan"), + FloatVal::Nan { + val: None, + negative: false, + }, + ); + assert_eq!( + get_float("+nan:0x1"), + FloatVal::Nan { + val: Some(1), + negative: false, + }, + ); + assert_eq!( + get_float("nan:0x7f_ffff"), + FloatVal::Nan { + val: Some(0x7fffff), + negative: false, + }, + ); + assert_eq!(get_float("inf"), FloatVal::Inf { negative: false }); + assert_eq!(get_float("-inf"), FloatVal::Inf { negative: true }); + assert_eq!(get_float("+inf"), FloatVal::Inf { negative: false }); + + assert_eq!( + get_float("1.2"), + FloatVal::Val { + integral: "1".into(), + decimal: Some("2".into()), + exponent: None, + hex: false, + }, + ); + assert_eq!( + get_float("1.2e3"), + FloatVal::Val { + integral: "1".into(), + decimal: Some("2".into()), + exponent: Some("3".into()), + hex: false, + }, + ); + assert_eq!( + get_float("-1_2.1_1E+0_1"), + FloatVal::Val { + integral: "-12".into(), + decimal: Some("11".into()), + exponent: Some("01".into()), + hex: false, + }, + ); + assert_eq!( + get_float("+1_2.1_1E-0_1"), + FloatVal::Val { + integral: "12".into(), + decimal: Some("11".into()), + exponent: Some("-01".into()), + hex: false, + }, + ); + assert_eq!( + get_float("0x1_2.3_4p5_6"), + FloatVal::Val { + integral: "12".into(), + decimal: Some("34".into()), + exponent: Some("56".into()), + hex: true, + }, + ); + assert_eq!( + get_float("+0x1_2.3_4P-5_6"), + FloatVal::Val { + integral: "12".into(), + decimal: Some("34".into()), + exponent: Some("-56".into()), + hex: true, + }, + ); + assert_eq!( + get_float("1."), + FloatVal::Val { + integral: "1".into(), + decimal: None, + exponent: None, + hex: false, + }, + ); + assert_eq!( + get_float("0x1p-24"), + FloatVal::Val { + integral: "1".into(), + decimal: None, + exponent: Some("-24".into()), + hex: true, + }, + ); + } +} diff --git a/third_party/rust/wast/src/lib.rs b/third_party/rust/wast/src/lib.rs new file mode 100644 index 0000000000..d5d04c07e4 --- /dev/null +++ b/third_party/rust/wast/src/lib.rs @@ -0,0 +1,518 @@ +//! A crate for low-level parsing of the WebAssembly text formats: WAT and WAST. +//! +//! This crate is intended to be a low-level detail of the `wat` crate, +//! providing a low-level parsing API for parsing WebAssembly text format +//! structures. The API provided by this crate is very similar to +//! [`syn`](https://docs.rs/syn) and provides the ability to write customized +//! parsers which may be an extension to the core WebAssembly text format. For +//! more documentation see the [`parser`] module. +//! +//! # High-level Overview +//! +//! This crate provides a few major pieces of functionality +//! +//! * [`lexer`] - this is a raw lexer for the wasm text format. This is not +//! customizable, but if you'd like to iterate over raw tokens this is the +//! module for you. You likely won't use this much. +//! +//! * [`parser`] - this is the workhorse of this crate. The [`parser`] module +//! provides the [`Parse`][] trait primarily and utilities +//! around working with a [`Parser`](`parser::Parser`) to parse streams of +//! tokens. +//! +//! * [`Module`](crate::core::Module) - this contains an Abstract Syntax Tree +//! (AST) of the WebAssembly Text format (WAT) as well as the unofficial WAST +//! format. This also has a [`Module::encode`](crate::core::Module::encode) +//! method to emit a module in its binary form. +//! +//! # Stability and WebAssembly Features +//! +//! This crate provides support for many in-progress WebAssembly features such +//! as reference types, multi-value, etc. Be sure to check out the documentation +//! of the [`wast` crate](https://docs.rs/wast) for policy information on crate +//! stability vs WebAssembly Features. The tl;dr; version is that this crate +//! will issue semver-non-breaking releases which will break the parsing of the +//! text format. This crate, unlike `wast`, is expected to have numerous Rust +//! public API changes, all of which will be accompanied with a semver-breaking +//! release. +//! +//! # Compile-time Cargo features +//! +//! This crate has a `wasm-module` feature which is turned on by default which +//! includes all necessary support to parse full WebAssembly modules. If you +//! don't need this (for example you're parsing your own s-expression format) +//! then this feature can be disabled. +//! +//! [`Parse`]: parser::Parse +//! [`LexError`]: lexer::LexError + +#![deny(missing_docs, rustdoc::broken_intra_doc_links)] + +/// A macro to create a custom keyword parser. +/// +/// This macro is invoked in one of two forms: +/// +/// ``` +/// // keyword derived from the Rust identifier: +/// wast::custom_keyword!(foo); +/// +/// // or an explicitly specified string representation of the keyword: +/// wast::custom_keyword!(my_keyword = "the-wasm-keyword"); +/// ``` +/// +/// This can then be used to parse custom keyword for custom items, such as: +/// +/// ``` +/// use wast::parser::{Parser, Result, Parse}; +/// +/// struct InlineModule<'a> { +/// inline_text: &'a str, +/// } +/// +/// mod kw { +/// wast::custom_keyword!(inline); +/// } +/// +/// // Parse an inline string module of the form: +/// // +/// // (inline "(module (func))") +/// impl<'a> Parse<'a> for InlineModule<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// parser.parse::<kw::inline>()?; +/// Ok(InlineModule { +/// inline_text: parser.parse()?, +/// }) +/// } +/// } +/// ``` +/// +/// Note that the keyword name can only start with a lower-case letter, i.e. 'a'..'z'. +#[macro_export] +macro_rules! custom_keyword { + ($name:ident) => { + $crate::custom_keyword!($name = stringify!($name)); + }; + ($name:ident = $kw:expr) => { + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + #[derive(Debug, Copy, Clone)] + pub struct $name(pub $crate::token::Span); + + impl<'a> $crate::parser::Parse<'a> for $name { + fn parse(parser: $crate::parser::Parser<'a>) -> $crate::parser::Result<Self> { + parser.step(|c| { + if let Some((kw, rest)) = c.keyword() { + if kw == $kw { + return Ok(($name(c.cur_span()), rest)); + } + } + Err(c.error(concat!("expected keyword `", $kw, "`"))) + }) + } + } + + impl $crate::parser::Peek for $name { + fn peek(cursor: $crate::parser::Cursor<'_>) -> bool { + if let Some((kw, _rest)) = cursor.keyword() { + kw == $kw + } else { + false + } + } + + fn display() -> &'static str { + concat!("`", $kw, "`") + } + } + }; +} + +/// A macro for defining custom reserved symbols. +/// +/// This is like `custom_keyword!` but for reserved symbols (`Token::Reserved`) +/// instead of keywords (`Token::Keyword`). +/// +/// ``` +/// use wast::parser::{Parser, Result, Parse}; +/// +/// // Define a custom reserved symbol, the "spaceship" operator: `<=>`. +/// wast::custom_reserved!(spaceship = "<=>"); +/// +/// /// A "three-way comparison" like `(<=> a b)` that returns -1 if `a` is less +/// /// than `b`, 0 if they're equal, and 1 if `a` is greater than `b`. +/// struct ThreeWayComparison<'a> { +/// lhs: wast::core::Expression<'a>, +/// rhs: wast::core::Expression<'a>, +/// } +/// +/// impl<'a> Parse<'a> for ThreeWayComparison<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// parser.parse::<spaceship>()?; +/// let lhs = parser.parse()?; +/// let rhs = parser.parse()?; +/// Ok(ThreeWayComparison { lhs, rhs }) +/// } +/// } +/// ``` +#[macro_export] +macro_rules! custom_reserved { + ($name:ident) => { + $crate::custom_reserved!($name = stringify!($name)); + }; + ($name:ident = $rsv:expr) => { + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + #[derive(Debug)] + pub struct $name(pub $crate::token::Span); + + impl<'a> $crate::parser::Parse<'a> for $name { + fn parse(parser: $crate::parser::Parser<'a>) -> $crate::parser::Result<Self> { + parser.step(|c| { + if let Some((rsv, rest)) = c.reserved() { + if rsv == $rsv { + return Ok(($name(c.cur_span()), rest)); + } + } + Err(c.error(concat!("expected reserved symbol `", $rsv, "`"))) + }) + } + } + + impl $crate::parser::Peek for $name { + fn peek(cursor: $crate::parser::Cursor<'_>) -> bool { + if let Some((rsv, _rest)) = cursor.reserved() { + rsv == $rsv + } else { + false + } + } + + fn display() -> &'static str { + concat!("`", $rsv, "`") + } + } + }; +} + +/// A macro, like [`custom_keyword`], to create a type which can be used to +/// parse/peek annotation directives. +/// +/// Note that when you're parsing custom annotations it can be somewhat tricky +/// due to the nature that most of them are skipped. You'll want to be sure to +/// consult the documentation of [`Parser::register_annotation`][register] when +/// using this macro. +/// +/// # Examples +/// +/// To see an example of how to use this macro, let's invent our own syntax for +/// the [producers section][section] which looks like: +/// +/// ```wat +/// (@producer "wat" "1.0.2") +/// ``` +/// +/// Here, for simplicity, we'll assume everything is a `processed-by` directive, +/// but you could get much more fancy with this as well. +/// +/// ``` +/// # use wast::*; +/// # use wast::parser::*; +/// +/// // First we define the custom annotation keyword we're using, and by +/// // convention we define it in an `annotation` module. +/// mod annotation { +/// wast::annotation!(producer); +/// } +/// +/// struct Producer<'a> { +/// name: &'a str, +/// version: &'a str, +/// } +/// +/// impl<'a> Parse<'a> for Producer<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // Remember that parser conventionally parse the *interior* of an +/// // s-expression, so we parse our `@producer` annotation and then we +/// // parse the payload of our annotation. +/// parser.parse::<annotation::producer>()?; +/// Ok(Producer { +/// name: parser.parse()?, +/// version: parser.parse()?, +/// }) +/// } +/// } +/// ``` +/// +/// Note though that this is only half of the parser for annotations. The other +/// half is calling the [`register_annotation`][register] method at the right +/// time to ensure the parser doesn't automatically skip our `@producer` +/// directive. Note that we *can't* call it inside the `Parse for Producer` +/// definition because that's too late and the annotation would already have +/// been skipped. +/// +/// Instead we'll need to call it from a higher level-parser before the +/// parenthesis have been parsed, like so: +/// +/// ``` +/// # use wast::*; +/// # use wast::parser::*; +/// struct Module<'a> { +/// fields: Vec<ModuleField<'a>>, +/// } +/// +/// impl<'a> Parse<'a> for Module<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // .. parse module header here ... +/// +/// // register our custom `@producer` annotation before we start +/// // parsing the parentheses of each field +/// let _r = parser.register_annotation("producer"); +/// +/// let mut fields = Vec::new(); +/// while !parser.is_empty() { +/// fields.push(parser.parens(|p| p.parse())?); +/// } +/// Ok(Module { fields }) +/// } +/// } +/// +/// enum ModuleField<'a> { +/// Producer(Producer<'a>), +/// // ... +/// } +/// # struct Producer<'a>(&'a str); +/// # impl<'a> Parse<'a> for Producer<'a> { +/// # fn parse(parser: Parser<'a>) -> Result<Self> { Ok(Producer(parser.parse()?)) } +/// # } +/// # mod annotation { wast::annotation!(producer); } +/// +/// impl<'a> Parse<'a> for ModuleField<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // and here `peek` works and our delegated parsing works because the +/// // annotation has been registered. +/// if parser.peek::<annotation::producer>() { +/// return Ok(ModuleField::Producer(parser.parse()?)); +/// } +/// +/// // .. typically we'd parse other module fields here... +/// +/// Err(parser.error("unknown module field")) +/// } +/// } +/// ``` +/// +/// [register]: crate::parser::Parser::register_annotation +/// [section]: https://github.com/WebAssembly/tool-conventions/blob/master/ProducersSection.md +#[macro_export] +macro_rules! annotation { + ($name:ident) => { + $crate::annotation!($name = stringify!($name)); + }; + ($name:ident = $annotation:expr) => { + #[allow(non_camel_case_types)] + #[allow(missing_docs)] + #[derive(Debug)] + pub struct $name(pub $crate::token::Span); + + impl<'a> $crate::parser::Parse<'a> for $name { + fn parse(parser: $crate::parser::Parser<'a>) -> $crate::parser::Result<Self> { + parser.step(|c| { + if let Some((a, rest)) = c.annotation() { + if a == $annotation { + return Ok(($name(c.cur_span()), rest)); + } + } + Err(c.error(concat!("expected annotation `@", $annotation, "`"))) + }) + } + } + + impl $crate::parser::Peek for $name { + fn peek(cursor: $crate::parser::Cursor<'_>) -> bool { + if let Some((a, _rest)) = cursor.annotation() { + a == $annotation + } else { + false + } + } + + fn display() -> &'static str { + concat!("`@", $annotation, "`") + } + } + }; +} + +pub mod lexer; +pub mod parser; +pub mod token; + +mod encode; +mod error; +mod gensym; +mod names; +pub use self::error::*; + +macro_rules! id { + ($($t:tt)*) => ($($t)*) +} + +#[cfg(feature = "wasm-module")] +id! { + mod wast; + mod wat; + pub use self::wast::*; + pub use self::wat::*; + + // Support for core wasm parsing + pub mod core; + + // Support for component model parsing + pub mod component; +} + +/// Common keyword used to parse WebAssembly text files. +pub mod kw { + custom_keyword!(after); + custom_keyword!(alias); + custom_keyword!(any); + custom_keyword!(anyfunc); + custom_keyword!(anyref); + custom_keyword!(arg); + custom_keyword!(array); + custom_keyword!(arrayref); + custom_keyword!(assert_exception); + custom_keyword!(assert_exhaustion); + custom_keyword!(assert_invalid); + custom_keyword!(assert_malformed); + custom_keyword!(assert_return); + custom_keyword!(assert_trap); + custom_keyword!(assert_unlinkable); + custom_keyword!(before); + custom_keyword!(binary); + custom_keyword!(block); + custom_keyword!(catch); + custom_keyword!(catch_all); + custom_keyword!(code); + custom_keyword!(component); + custom_keyword!(data); + custom_keyword!(declare); + custom_keyword!(delegate); + custom_keyword!(r#do = "do"); + custom_keyword!(elem); + custom_keyword!(end); + custom_keyword!(tag); + custom_keyword!(export); + custom_keyword!(r#extern = "extern"); + custom_keyword!(externref); + custom_keyword!(eq); + custom_keyword!(eqref); + custom_keyword!(f32); + custom_keyword!(f32x4); + custom_keyword!(f64); + custom_keyword!(f64x2); + custom_keyword!(field); + custom_keyword!(first); + custom_keyword!(func); + custom_keyword!(funcref); + custom_keyword!(get); + custom_keyword!(global); + custom_keyword!(i16); + custom_keyword!(i16x8); + custom_keyword!(i31); + custom_keyword!(i31ref); + custom_keyword!(i32); + custom_keyword!(i32x4); + custom_keyword!(i64); + custom_keyword!(i64x2); + custom_keyword!(i8); + custom_keyword!(i8x16); + custom_keyword!(import); + custom_keyword!(instance); + custom_keyword!(instantiate); + custom_keyword!(invoke); + custom_keyword!(item); + custom_keyword!(last); + custom_keyword!(local); + custom_keyword!(memory); + custom_keyword!(module); + custom_keyword!(modulecode); + custom_keyword!(nan_arithmetic = "nan:arithmetic"); + custom_keyword!(nan_canonical = "nan:canonical"); + custom_keyword!(nofunc); + custom_keyword!(noextern); + custom_keyword!(none); + custom_keyword!(null); + custom_keyword!(nullfuncref); + custom_keyword!(nullexternref); + custom_keyword!(nullref); + custom_keyword!(offset); + custom_keyword!(outer); + custom_keyword!(param); + custom_keyword!(parent); + custom_keyword!(passive); + custom_keyword!(quote); + custom_keyword!(r#else = "else"); + custom_keyword!(r#if = "if"); + custom_keyword!(r#loop = "loop"); + custom_keyword!(r#mut = "mut"); + custom_keyword!(r#type = "type"); + custom_keyword!(r#ref = "ref"); + custom_keyword!(ref_func = "ref.func"); + custom_keyword!(ref_null = "ref.null"); + custom_keyword!(register); + custom_keyword!(rec); + custom_keyword!(result); + custom_keyword!(shared); + custom_keyword!(start); + custom_keyword!(sub); + custom_keyword!(table); + custom_keyword!(then); + custom_keyword!(r#try = "try"); + custom_keyword!(v128); + custom_keyword!(value); + custom_keyword!(s8); + custom_keyword!(s16); + custom_keyword!(s32); + custom_keyword!(s64); + custom_keyword!(u8); + custom_keyword!(u16); + custom_keyword!(u32); + custom_keyword!(u64); + custom_keyword!(char); + custom_keyword!(case); + custom_keyword!(refines); + custom_keyword!(record); + custom_keyword!(string); + custom_keyword!(bool_ = "bool"); + custom_keyword!(float32); + custom_keyword!(float64); + custom_keyword!(variant); + custom_keyword!(flags); + custom_keyword!(option); + custom_keyword!(tuple); + custom_keyword!(list); + custom_keyword!(error); + custom_keyword!(union); + custom_keyword!(canon); + custom_keyword!(lift); + custom_keyword!(lower); + custom_keyword!(enum_ = "enum"); + custom_keyword!(string_utf8 = "string-encoding=utf8"); + custom_keyword!(string_utf16 = "string-encoding=utf16"); + custom_keyword!(string_latin1_utf16 = "string-encoding=latin1+utf16"); + custom_keyword!(r#struct = "struct"); + custom_keyword!(structref); + custom_keyword!(realloc); + custom_keyword!(post_return = "post-return"); + custom_keyword!(with); + custom_keyword!(core); + custom_keyword!(true_ = "true"); + custom_keyword!(false_ = "false"); +} + +/// Common annotations used to parse WebAssembly text files. +pub mod annotation { + annotation!(custom); + annotation!(name); +} diff --git a/third_party/rust/wast/src/names.rs b/third_party/rust/wast/src/names.rs new file mode 100644 index 0000000000..7cbfc5d9ca --- /dev/null +++ b/third_party/rust/wast/src/names.rs @@ -0,0 +1,86 @@ +use crate::token::{Id, Index}; +use crate::Error; +use std::collections::HashMap; + +#[derive(Default)] +pub struct Namespace<'a> { + names: HashMap<Id<'a>, u32>, + count: u32, +} + +impl<'a> Namespace<'a> { + pub fn register(&mut self, name: Option<Id<'a>>, desc: &str) -> Result<u32, Error> { + let index = self.alloc(); + if let Some(name) = name { + if let Some(_prev) = self.names.insert(name, index) { + // FIXME: temporarily allow duplicately-named data and element + // segments. This is a sort of dumb hack to get the spec test + // suite working (ironically). + // + // So as background, the text format disallows duplicate + // identifiers, causing a parse error if they're found. There + // are two tests currently upstream, however, data.wast and + // elem.wast, which *look* like they have duplicately named + // element and data segments. These tests, however, are using + // pre-bulk-memory syntax where a bare identifier was the + // table/memory being initialized. In post-bulk-memory this + // identifier is the name of the segment. Since we implement + // post-bulk-memory features that means that we're parsing the + // memory/table-to-initialize as the name of the segment. + // + // This is technically incorrect behavior but no one is + // hopefully relying on this too much. To get the spec tests + // passing we ignore errors for elem/data segments. Once the + // spec tests get updated enough we can remove this condition + // and return errors for them. + if desc != "elem" && desc != "data" { + return Err(Error::new( + name.span(), + format!("duplicate {} identifier", desc), + )); + } + } + } + Ok(index) + } + + pub fn alloc(&mut self) -> u32 { + let index = self.count; + self.count += 1; + index + } + + pub fn register_specific(&mut self, name: Id<'a>, index: u32, desc: &str) -> Result<(), Error> { + if let Some(_prev) = self.names.insert(name, index) { + return Err(Error::new( + name.span(), + format!("duplicate identifier for {}", desc), + )); + } + Ok(()) + } + + pub fn resolve(&self, idx: &mut Index<'a>, desc: &str) -> Result<u32, Error> { + let id = match idx { + Index::Num(n, _) => return Ok(*n), + Index::Id(id) => id, + }; + if let Some(&n) = self.names.get(id) { + *idx = Index::Num(n, id.span()); + return Ok(n); + } + Err(resolve_error(*id, desc)) + } +} + +pub fn resolve_error(id: Id<'_>, ns: &str) -> Error { + assert!( + !id.is_gensym(), + "symbol generated by `wast` itself cannot be resolved {:?}", + id + ); + Error::new( + id.span(), + format!("unknown {ns}: failed to find name `${}`", id.name()), + ) +} diff --git a/third_party/rust/wast/src/parser.rs b/third_party/rust/wast/src/parser.rs new file mode 100644 index 0000000000..6b1a9debf6 --- /dev/null +++ b/third_party/rust/wast/src/parser.rs @@ -0,0 +1,1315 @@ +//! Traits for parsing the WebAssembly Text format +//! +//! This module contains the traits, abstractions, and utilities needed to +//! define custom parsers for WebAssembly text format items. This module exposes +//! a recursive descent parsing strategy and centers around the +//! [`Parse`](crate::parser::Parse) trait for defining new fragments of +//! WebAssembly text syntax. +//! +//! The top-level [`parse`](crate::parser::parse) function can be used to fully parse AST fragments: +//! +//! ``` +//! use wast::Wat; +//! use wast::parser::{self, ParseBuffer}; +//! +//! # fn foo() -> Result<(), wast::Error> { +//! let wat = "(module (func))"; +//! let buf = ParseBuffer::new(wat)?; +//! let module = parser::parse::<Wat>(&buf)?; +//! # Ok(()) +//! # } +//! ``` +//! +//! and you can also define your own new syntax with the +//! [`Parse`](crate::parser::Parse) trait: +//! +//! ``` +//! use wast::kw; +//! use wast::core::{Import, Func}; +//! use wast::parser::{Parser, Parse, Result}; +//! +//! // Fields of a WebAssembly which only allow imports and functions, and all +//! // imports must come before all the functions +//! struct OnlyImportsAndFunctions<'a> { +//! imports: Vec<Import<'a>>, +//! functions: Vec<Func<'a>>, +//! } +//! +//! impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> { +//! fn parse(parser: Parser<'a>) -> Result<Self> { +//! // While the second token is `import` (the first is `(`, so we care +//! // about the second) we parse an `ast::ModuleImport` inside of +//! // parentheses. The `parens` function here ensures that what we +//! // parse inside of it is surrounded by `(` and `)`. +//! let mut imports = Vec::new(); +//! while parser.peek2::<kw::import>() { +//! let import = parser.parens(|p| p.parse())?; +//! imports.push(import); +//! } +//! +//! // Afterwards we assume everything else is a function. Note that +//! // `parse` here is a generic function and type inference figures out +//! // that we're parsing functions here and imports above. +//! let mut functions = Vec::new(); +//! while !parser.is_empty() { +//! let func = parser.parens(|p| p.parse())?; +//! functions.push(func); +//! } +//! +//! Ok(OnlyImportsAndFunctions { imports, functions }) +//! } +//! } +//! ``` +//! +//! This module is heavily inspired by [`syn`](https://docs.rs/syn) so you can +//! likely also draw inspiration from the excellent examples in the `syn` crate. + +use crate::lexer::{Float, Integer, Lexer, Token}; +use crate::token::Span; +use crate::Error; +use std::cell::{Cell, RefCell}; +use std::collections::HashMap; +use std::fmt; +use std::usize; + +/// The maximum recursive depth of parens to parse. +/// +/// This is sort of a fundamental limitation of the way this crate is +/// designed. Everything is done through recursive descent parsing which +/// means, well, that we're recursively going down the stack as we parse +/// nested data structures. While we can handle this for wasm expressions +/// since that's a pretty local decision, handling this for nested +/// modules/components which be far trickier. For now we just say that when +/// the parser goes too deep we return an error saying there's too many +/// nested items. It would be great to not return an error here, though! +pub(crate) const MAX_PARENS_DEPTH: usize = 100; + +/// A top-level convenience parsing function that parses a `T` from `buf` and +/// requires that all tokens in `buf` are consume. +/// +/// This generic parsing function can be used to parse any `T` implementing the +/// [`Parse`] trait. It is not used from [`Parse`] trait implementations. +/// +/// # Examples +/// +/// ``` +/// use wast::Wat; +/// use wast::parser::{self, ParseBuffer}; +/// +/// # fn foo() -> Result<(), wast::Error> { +/// let wat = "(module (func))"; +/// let buf = ParseBuffer::new(wat)?; +/// let module = parser::parse::<Wat>(&buf)?; +/// # Ok(()) +/// # } +/// ``` +/// +/// or parsing simply a fragment +/// +/// ``` +/// use wast::parser::{self, ParseBuffer}; +/// +/// # fn foo() -> Result<(), wast::Error> { +/// let wat = "12"; +/// let buf = ParseBuffer::new(wat)?; +/// let val = parser::parse::<u32>(&buf)?; +/// assert_eq!(val, 12); +/// # Ok(()) +/// # } +/// ``` +pub fn parse<'a, T: Parse<'a>>(buf: &'a ParseBuffer<'a>) -> Result<T> { + let parser = buf.parser(); + let result = parser.parse()?; + if parser.cursor().advance_token().is_none() { + Ok(result) + } else { + Err(parser.error("extra tokens remaining after parse")) + } +} + +/// A trait for parsing a fragment of syntax in a recursive descent fashion. +/// +/// The [`Parse`] trait is main abstraction you'll be working with when defining +/// custom parser or custom syntax for your WebAssembly text format (or when +/// using the official format items). Almost all items in the +/// [`core`](crate::core) module implement the [`Parse`] trait, and you'll +/// commonly use this with: +/// +/// * The top-level [`parse`] function to parse an entire input. +/// * The intermediate [`Parser::parse`] function to parse an item out of an +/// input stream and then parse remaining items. +/// +/// Implementation of [`Parse`] take a [`Parser`] as input and will mutate the +/// parser as they parse syntax. Once a token is consume it cannot be +/// "un-consumed". Utilities such as [`Parser::peek`] and [`Parser::lookahead1`] +/// can be used to determine what to parse next. +/// +/// ## When to parse `(` and `)`? +/// +/// Conventionally types are not responsible for parsing their own `(` and `)` +/// tokens which surround the type. For example WebAssembly imports look like: +/// +/// ```text +/// (import "foo" "bar" (func (type 0))) +/// ``` +/// +/// but the [`Import`](crate::core::Import) type parser looks like: +/// +/// ``` +/// # use wast::kw; +/// # use wast::parser::{Parser, Parse, Result}; +/// # struct Import<'a>(&'a str); +/// impl<'a> Parse<'a> for Import<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// parser.parse::<kw::import>()?; +/// // ... +/// # panic!() +/// } +/// } +/// ``` +/// +/// It is assumed here that the `(` and `)` tokens which surround an `import` +/// statement in the WebAssembly text format are parsed by the parent item +/// parsing `Import`. +/// +/// Note that this is just a convention, so it's not necessarily required for +/// all types. It's recommended that your types stick to this convention where +/// possible to avoid nested calls to [`Parser::parens`] or accidentally trying +/// to parse too many parenthesis. +/// +/// # Examples +/// +/// Let's say you want to define your own WebAssembly text format which only +/// contains imports and functions. You also require all imports to be listed +/// before all functions. An example [`Parse`] implementation might look like: +/// +/// ``` +/// use wast::core::{Import, Func}; +/// use wast::kw; +/// use wast::parser::{Parser, Parse, Result}; +/// +/// // Fields of a WebAssembly which only allow imports and functions, and all +/// // imports must come before all the functions +/// struct OnlyImportsAndFunctions<'a> { +/// imports: Vec<Import<'a>>, +/// functions: Vec<Func<'a>>, +/// } +/// +/// impl<'a> Parse<'a> for OnlyImportsAndFunctions<'a> { +/// fn parse(parser: Parser<'a>) -> Result<Self> { +/// // While the second token is `import` (the first is `(`, so we care +/// // about the second) we parse an `ast::ModuleImport` inside of +/// // parentheses. The `parens` function here ensures that what we +/// // parse inside of it is surrounded by `(` and `)`. +/// let mut imports = Vec::new(); +/// while parser.peek2::<kw::import>() { +/// let import = parser.parens(|p| p.parse())?; +/// imports.push(import); +/// } +/// +/// // Afterwards we assume everything else is a function. Note that +/// // `parse` here is a generic function and type inference figures out +/// // that we're parsing functions here and imports above. +/// let mut functions = Vec::new(); +/// while !parser.is_empty() { +/// let func = parser.parens(|p| p.parse())?; +/// functions.push(func); +/// } +/// +/// Ok(OnlyImportsAndFunctions { imports, functions }) +/// } +/// } +/// ``` +pub trait Parse<'a>: Sized { + /// Attempts to parse `Self` from `parser`, returning an error if it could + /// not be parsed. + /// + /// This method will mutate the state of `parser` after attempting to parse + /// an instance of `Self`. If an error happens then it is likely fatal and + /// there is no guarantee of how many tokens have been consumed from + /// `parser`. + /// + /// As recommended in the documentation of [`Parse`], implementations of + /// this function should not start out by parsing `(` and `)` tokens, but + /// rather parents calling recursive parsers should parse the `(` and `)` + /// tokens for their child item that's being parsed. + /// + /// # Errors + /// + /// This function will return an error if `Self` could not be parsed. Note + /// that creating an [`Error`] is not exactly a cheap operation, so + /// [`Error`] is typically fatal and propagated all the way back to the top + /// parse call site. + fn parse(parser: Parser<'a>) -> Result<Self>; +} + +/// A trait for types which be used to "peek" to see if they're the next token +/// in an input stream of [`Parser`]. +/// +/// Often when implementing [`Parse`] you'll need to query what the next token +/// in the stream is to figure out what to parse next. This [`Peek`] trait +/// defines the set of types that can be tested whether they're the next token +/// in the input stream. +/// +/// Implementations of [`Peek`] should only be present on types that consume +/// exactly one token (not zero, not more, exactly one). Types implementing +/// [`Peek`] should also typically implement [`Parse`] should also typically +/// implement [`Parse`]. +/// +/// See the documentation of [`Parser::peek`] for example usage. +pub trait Peek { + /// Tests to see whether this token is the first token within the [`Cursor`] + /// specified. + /// + /// Returns `true` if [`Parse`] for this type is highly likely to succeed + /// failing no other error conditions happening (like an integer literal + /// being too big). + fn peek(cursor: Cursor<'_>) -> bool; + + /// The same as `peek`, except it checks the token immediately following + /// the current token. + fn peek2(mut cursor: Cursor<'_>) -> bool { + if cursor.advance_token().is_some() { + Self::peek(cursor) + } else { + false + } + } + + /// Returns a human-readable name of this token to display when generating + /// errors about this token missing. + fn display() -> &'static str; +} + +/// A convenience type definition for `Result` where the error is hardwired to +/// [`Error`]. +pub type Result<T, E = Error> = std::result::Result<T, E>; + +/// A low-level buffer of tokens which represents a completely lexed file. +/// +/// A `ParseBuffer` will immediately lex an entire file and then store all +/// tokens internally. A `ParseBuffer` only used to pass to the top-level +/// [`parse`] function. +pub struct ParseBuffer<'a> { + // list of tokens from the tokenized source (including whitespace and + // comments), and the second element is how to skip this token, if it can be + // skipped. + tokens: Box<[(Token<'a>, Cell<NextTokenAt>)]>, + input: &'a str, + cur: Cell<usize>, + known_annotations: RefCell<HashMap<String, usize>>, + depth: Cell<usize>, +} + +#[derive(Copy, Clone, Debug)] +enum NextTokenAt { + /// Haven't computed where the next token is yet. + Unknown, + /// Previously computed the index of the next token. + Index(usize), + /// There is no next token, this is the last token. + Eof, +} + +/// An in-progress parser for the tokens of a WebAssembly text file. +/// +/// A `Parser` is argument to the [`Parse`] trait and is now the input stream is +/// interacted with to parse new items. Cloning [`Parser`] or copying a parser +/// refers to the same stream of tokens to parse, you cannot clone a [`Parser`] +/// and clone two items. +/// +/// For more information about a [`Parser`] see its methods. +#[derive(Copy, Clone)] +pub struct Parser<'a> { + buf: &'a ParseBuffer<'a>, +} + +/// A helpful structure to perform a lookahead of one token to determine what to +/// parse. +/// +/// For more information see the [`Parser::lookahead1`] method. +pub struct Lookahead1<'a> { + parser: Parser<'a>, + attempts: Vec<&'static str>, +} + +/// An immutable cursor into a list of tokens. +/// +/// This cursor cannot be mutated but can be used to parse more tokens in a list +/// of tokens. Cursors are created from the [`Parser::step`] method. This is a +/// very low-level parsing structure and you likely won't use it much. +#[derive(Copy, Clone)] +pub struct Cursor<'a> { + parser: Parser<'a>, + cur: usize, +} + +impl ParseBuffer<'_> { + /// Creates a new [`ParseBuffer`] by lexing the given `input` completely. + /// + /// # Errors + /// + /// Returns an error if `input` fails to lex. + pub fn new(input: &str) -> Result<ParseBuffer<'_>> { + ParseBuffer::new_with_lexer(Lexer::new(input)) + } + + /// Creates a new [`ParseBuffer`] by lexing the given `input` completely. + /// + /// # Errors + /// + /// Returns an error if `input` fails to lex. + pub fn new_with_lexer(lexer: Lexer<'_>) -> Result<ParseBuffer<'_>> { + let mut tokens = Vec::new(); + let input = lexer.input(); + for token in lexer { + tokens.push((token?, Cell::new(NextTokenAt::Unknown))); + } + let ret = ParseBuffer { + tokens: tokens.into_boxed_slice(), + cur: Cell::new(0), + depth: Cell::new(0), + input, + known_annotations: Default::default(), + }; + ret.validate_annotations()?; + Ok(ret) + } + + fn parser(&self) -> Parser<'_> { + Parser { buf: self } + } + + // Validates that all annotations properly parse in that they have balanced + // delimiters. This is required since while parsing we generally skip + // annotations and there's no real opportunity to return a parse error. + fn validate_annotations(&self) -> Result<()> { + use crate::lexer::Token::*; + enum State { + None, + LParen, + Annotation { depth: usize, span: Span }, + } + let mut state = State::None; + for token in self.tokens.iter() { + state = match (&token.0, state) { + // From nothing, a `(` starts the search for an annotation + (LParen(_), State::None) => State::LParen, + // ... otherwise in nothing we always preserve that state. + (_, State::None) => State::None, + + // If the previous state was an `LParen`, we may have an + // annotation if the next keyword is reserved + (Reserved(s), State::LParen) if s.starts_with('@') && !s.is_empty() => { + let offset = self.input_pos(s); + State::Annotation { + span: Span { offset }, + depth: 1, + } + } + // ... otherwise anything after an `LParen` kills the lparen + // state. + (_, State::LParen) => State::None, + + // Once we're in an annotation we need to balance parentheses, + // so handle the depth changes. + (LParen(_), State::Annotation { span, depth }) => State::Annotation { + span, + depth: depth + 1, + }, + (RParen(_), State::Annotation { depth: 1, .. }) => State::None, + (RParen(_), State::Annotation { span, depth }) => State::Annotation { + span, + depth: depth - 1, + }, + // ... and otherwise all tokens are allowed in annotations. + (_, s @ State::Annotation { .. }) => s, + }; + } + if let State::Annotation { span, .. } = state { + return Err(Error::new(span, "unclosed annotation".to_string())); + } + Ok(()) + } + + fn input_pos(&self, src: &str) -> usize { + src.as_ptr() as usize - self.input.as_ptr() as usize + } +} + +impl<'a> Parser<'a> { + /// Returns whether there are no more `Token` tokens to parse from this + /// [`Parser`]. + /// + /// This indicates that either we've reached the end of the input, or we're + /// a sub-[`Parser`] inside of a parenthesized expression and we've hit the + /// `)` token. + /// + /// Note that if `false` is returned there *may* be more comments. Comments + /// and whitespace are not considered for whether this parser is empty. + pub fn is_empty(self) -> bool { + match self.cursor().advance_token() { + Some(Token::RParen(_)) | None => true, + Some(_) => false, // more tokens to parse! + } + } + + pub(crate) fn has_meaningful_tokens(self) -> bool { + self.buf.tokens[self.cursor().cur..].iter().any(|(t, _)| { + !matches!( + t, + Token::Whitespace(_) | Token::LineComment(_) | Token::BlockComment(_) + ) + }) + } + + /// Parses a `T` from this [`Parser`]. + /// + /// This method has a trivial definition (it simply calls + /// [`T::parse`](Parse::parse)) but is here for syntactic purposes. This is + /// what you'll call 99% of the time in a [`Parse`] implementation in order + /// to parse sub-items. + /// + /// Typically you always want to use `?` with the result of this method, you + /// should not handle errors and decide what else to parse. To handle + /// branches in parsing, use [`Parser::peek`]. + /// + /// # Examples + /// + /// A good example of using `parse` is to see how the [`TableType`] type is + /// parsed in this crate. A [`TableType`] is defined in the official + /// specification as [`tabletype`][spec] and is defined as: + /// + /// [spec]: https://webassembly.github.io/spec/core/text/types.html#table-types + /// + /// ```text + /// tabletype ::= lim:limits et:reftype + /// ``` + /// + /// so to parse a [`TableType`] we recursively need to parse a [`Limits`] + /// and a [`RefType`] + /// + /// ``` + /// # use wast::core::*; + /// # use wast::parser::*; + /// struct TableType<'a> { + /// limits: Limits, + /// elem: RefType<'a>, + /// } + /// + /// impl<'a> Parse<'a> for TableType<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // parse the `lim` then `et` in sequence + /// Ok(TableType { + /// limits: parser.parse()?, + /// elem: parser.parse()?, + /// }) + /// } + /// } + /// ``` + /// + /// [`Limits`]: crate::core::Limits + /// [`TableType`]: crate::core::TableType + /// [`RefType`]: crate::core::RefType + pub fn parse<T: Parse<'a>>(self) -> Result<T> { + T::parse(self) + } + + /// Performs a cheap test to see whether the current token in this stream is + /// `T`. + /// + /// This method can be used to efficiently determine what next to parse. The + /// [`Peek`] trait is defined for types which can be used to test if they're + /// the next item in the input stream. + /// + /// Nothing is actually parsed in this method, nor does this mutate the + /// state of this [`Parser`]. Instead, this simply performs a check. + /// + /// This method is frequently combined with the [`Parser::lookahead1`] + /// method to automatically produce nice error messages if some tokens + /// aren't found. + /// + /// # Examples + /// + /// For an example of using the `peek` method let's take a look at parsing + /// the [`Limits`] type. This is [defined in the official spec][spec] as: + /// + /// ```text + /// limits ::= n:u32 + /// | n:u32 m:u32 + /// ``` + /// + /// which means that it's either one `u32` token or two, so we need to know + /// whether to consume two tokens or one: + /// + /// ``` + /// # use wast::parser::*; + /// struct Limits { + /// min: u32, + /// max: Option<u32>, + /// } + /// + /// impl<'a> Parse<'a> for Limits { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Always parse the first number... + /// let min = parser.parse()?; + /// + /// // ... and then test if there's a second number before parsing + /// let max = if parser.peek::<u32>() { + /// Some(parser.parse()?) + /// } else { + /// None + /// }; + /// + /// Ok(Limits { min, max }) + /// } + /// } + /// ``` + /// + /// [spec]: https://webassembly.github.io/spec/core/text/types.html#limits + /// [`Limits`]: crate::core::Limits + pub fn peek<T: Peek>(self) -> bool { + T::peek(self.cursor()) + } + + /// Same as the [`Parser::peek`] method, except checks the next token, not + /// the current token. + pub fn peek2<T: Peek>(self) -> bool { + let mut cursor = self.cursor(); + if cursor.advance_token().is_some() { + T::peek(cursor) + } else { + false + } + } + + /// Same as the [`Parser::peek2`] method, except checks the next next token, + /// not the next token. + pub fn peek3<T: Peek>(self) -> bool { + let mut cursor = self.cursor(); + if cursor.advance_token().is_some() && cursor.advance_token().is_some() { + T::peek(cursor) + } else { + false + } + } + + /// A helper structure to perform a sequence of `peek` operations and if + /// they all fail produce a nice error message. + /// + /// This method purely exists for conveniently producing error messages and + /// provides no functionality that [`Parser::peek`] doesn't already give. + /// The [`Lookahead1`] structure has one main method [`Lookahead1::peek`], + /// which is the same method as [`Parser::peek`]. The difference is that the + /// [`Lookahead1::error`] method needs no arguments. + /// + /// # Examples + /// + /// Let's look at the parsing of [`Index`]. This type is either a `u32` or + /// an [`Id`] and is used in name resolution primarily. The [official + /// grammar for an index][spec] is: + /// + /// ```text + /// idx ::= x:u32 + /// | v:id + /// ``` + /// + /// Which is to say that an index is either a `u32` or an [`Id`]. When + /// parsing an [`Index`] we can do: + /// + /// ``` + /// # use wast::token::*; + /// # use wast::parser::*; + /// enum Index<'a> { + /// Num(u32), + /// Id(Id<'a>), + /// } + /// + /// impl<'a> Parse<'a> for Index<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// let mut l = parser.lookahead1(); + /// if l.peek::<Id>() { + /// Ok(Index::Id(parser.parse()?)) + /// } else if l.peek::<u32>() { + /// Ok(Index::Num(parser.parse()?)) + /// } else { + /// // produces error message of `expected identifier or u32` + /// Err(l.error()) + /// } + /// } + /// } + /// ``` + /// + /// [spec]: https://webassembly.github.io/spec/core/text/modules.html#indices + /// [`Index`]: crate::token::Index + /// [`Id`]: crate::token::Id + pub fn lookahead1(self) -> Lookahead1<'a> { + Lookahead1 { + attempts: Vec::new(), + parser: self, + } + } + + /// Parse an item surrounded by parentheses. + /// + /// WebAssembly's text format is all based on s-expressions, so naturally + /// you're going to want to parse a lot of parenthesized things! As noted in + /// the documentation of [`Parse`] you typically don't parse your own + /// surrounding `(` and `)` tokens, but the parser above you parsed them for + /// you. This is method method the parser above you uses. + /// + /// This method will parse a `(` token, and then call `f` on a sub-parser + /// which when finished asserts that a `)` token is the next token. This + /// requires that `f` consumes all tokens leading up to the paired `)`. + /// + /// Usage will often simply be `parser.parens(|p| p.parse())?` to + /// automatically parse a type within parentheses, but you can, as always, + /// go crazy and do whatever you'd like too. + /// + /// # Examples + /// + /// A good example of this is to see how a `Module` is parsed. This isn't + /// the exact definition, but it's close enough! + /// + /// ``` + /// # use wast::kw; + /// # use wast::core::*; + /// # use wast::parser::*; + /// struct Module<'a> { + /// fields: Vec<ModuleField<'a>>, + /// } + /// + /// impl<'a> Parse<'a> for Module<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Modules start out with a `module` keyword + /// parser.parse::<kw::module>()?; + /// + /// // And then everything else is `(field ...)`, so while we've got + /// // items left we continuously parse parenthesized items. + /// let mut fields = Vec::new(); + /// while !parser.is_empty() { + /// fields.push(parser.parens(|p| p.parse())?); + /// } + /// Ok(Module { fields }) + /// } + /// } + /// ``` + pub fn parens<T>(self, f: impl FnOnce(Parser<'a>) -> Result<T>) -> Result<T> { + self.buf.depth.set(self.buf.depth.get() + 1); + let before = self.buf.cur.get(); + let res = self.step(|cursor| { + let mut cursor = match cursor.lparen() { + Some(rest) => rest, + None => return Err(cursor.error("expected `(`")), + }; + cursor.parser.buf.cur.set(cursor.cur); + let result = f(cursor.parser)?; + cursor.cur = cursor.parser.buf.cur.get(); + match cursor.rparen() { + Some(rest) => Ok((result, rest)), + None => Err(cursor.error("expected `)`")), + } + }); + self.buf.depth.set(self.buf.depth.get() - 1); + if res.is_err() { + self.buf.cur.set(before); + } + res + } + + /// Return the depth of nested parens we've parsed so far. + /// + /// This is a low-level method that is only useful for implementing + /// recursion limits in custom parsers. + pub fn parens_depth(&self) -> usize { + self.buf.depth.get() + } + + /// Checks that the parser parens depth hasn't exceeded the maximum depth. + pub(crate) fn depth_check(&self) -> Result<()> { + if self.parens_depth() > MAX_PARENS_DEPTH { + Err(self.error("item nesting too deep")) + } else { + Ok(()) + } + } + + fn cursor(self) -> Cursor<'a> { + Cursor { + parser: self, + cur: self.buf.cur.get(), + } + } + + /// A low-level parsing method you probably won't use. + /// + /// This is used to implement parsing of the most primitive types in the + /// [`core`](crate::core) module. You probably don't want to use this, but + /// probably want to use something like [`Parser::parse`] or + /// [`Parser::parens`]. + pub fn step<F, T>(self, f: F) -> Result<T> + where + F: FnOnce(Cursor<'a>) -> Result<(T, Cursor<'a>)>, + { + let (result, cursor) = f(self.cursor())?; + self.buf.cur.set(cursor.cur); + Ok(result) + } + + /// Creates an error whose line/column information is pointing at the + /// current token. + /// + /// This is used to produce human-readable error messages which point to the + /// right location in the input stream, and the `msg` here is arbitrary text + /// used to associate with the error and indicate why it was generated. + pub fn error(self, msg: impl fmt::Display) -> Error { + self.error_at(self.cursor().cur_span(), msg) + } + + /// Creates an error whose line/column information is pointing at the + /// given span. + pub fn error_at(self, span: Span, msg: impl fmt::Display) -> Error { + Error::parse(span, self.buf.input, msg.to_string()) + } + + /// Returns the span of the current token + pub fn cur_span(&self) -> Span { + self.cursor().cur_span() + } + + /// Returns the span of the previous token + pub fn prev_span(&self) -> Span { + self.cursor() + .prev_span() + .unwrap_or_else(|| Span::from_offset(0)) + } + + /// Registers a new known annotation with this parser to allow parsing + /// annotations with this name. + /// + /// [WebAssembly annotations][annotation] are a proposal for the text format + /// which allows decorating the text format with custom structured + /// information. By default all annotations are ignored when parsing, but + /// the whole purpose of them is to sometimes parse them! + /// + /// To support parsing text annotations this method is used to allow + /// annotations and their tokens to *not* be skipped. Once an annotation is + /// registered with this method, then while the return value has not been + /// dropped (e.g. the scope of where this function is called) annotations + /// with the name `annotation` will be parse of the token stream and not + /// implicitly skipped. + /// + /// # Skipping annotations + /// + /// The behavior of skipping unknown/unregistered annotations can be + /// somewhat subtle and surprising, so if you're interested in parsing + /// annotations it's important to point out the importance of this method + /// and where to call it. + /// + /// Generally when parsing tokens you'll be bottoming out in various + /// `Cursor` methods. These are all documented as advancing the stream as + /// much as possible to the next token, skipping "irrelevant stuff" like + /// comments, whitespace, etc. The `Cursor` methods will also skip unknown + /// annotations. This means that if you parse *any* token, it will skip over + /// any number of annotations that are unknown at all times. + /// + /// To parse an annotation you must, before parsing any token of the + /// annotation, register the annotation via this method. This includes the + /// beginning `(` token, which is otherwise skipped if the annotation isn't + /// marked as registered. Typically parser parse the *contents* of an + /// s-expression, so this means that the outer parser of an s-expression + /// must register the custom annotation name, rather than the inner parser. + /// + /// # Return + /// + /// This function returns an RAII guard which, when dropped, will unregister + /// the `annotation` given. Parsing `annotation` is only supported while the + /// returned value is still alive, and once dropped the parser will go back + /// to skipping annotations with the name `annotation`. + /// + /// # Example + /// + /// Let's see an example of how the `@name` annotation is parsed for modules + /// to get an idea of how this works: + /// + /// ``` + /// # use wast::kw; + /// # use wast::token::NameAnnotation; + /// # use wast::parser::*; + /// struct Module<'a> { + /// name: Option<NameAnnotation<'a>>, + /// } + /// + /// impl<'a> Parse<'a> for Module<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Modules start out with a `module` keyword + /// parser.parse::<kw::module>()?; + /// + /// // Next may be `(@name "foo")`. Typically this annotation would + /// // skipped, but we don't want it skipped, so we register it. + /// // Note that the parse implementation of + /// // `Option<NameAnnotation>` is the one that consumes the + /// // parentheses here. + /// let _r = parser.register_annotation("name"); + /// let name = parser.parse()?; + /// + /// // ... and normally you'd otherwise parse module fields here ... + /// + /// Ok(Module { name }) + /// } + /// } + /// ``` + /// + /// Another example is how we parse the `@custom` annotation. Note that this + /// is parsed as part of `ModuleField`, so note how the annotation is + /// registered *before* we parse the parentheses of the annotation. + /// + /// ``` + /// # use wast::{kw, annotation}; + /// # use wast::core::Custom; + /// # use wast::parser::*; + /// struct Module<'a> { + /// fields: Vec<ModuleField<'a>>, + /// } + /// + /// impl<'a> Parse<'a> for Module<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Modules start out with a `module` keyword + /// parser.parse::<kw::module>()?; + /// + /// // register the `@custom` annotation *first* before we start + /// // parsing fields, because each field is contained in + /// // parentheses and to parse the parentheses of an annotation we + /// // have to known to not skip it. + /// let _r = parser.register_annotation("custom"); + /// + /// let mut fields = Vec::new(); + /// while !parser.is_empty() { + /// fields.push(parser.parens(|p| p.parse())?); + /// } + /// Ok(Module { fields }) + /// } + /// } + /// + /// enum ModuleField<'a> { + /// Custom(Custom<'a>), + /// // ... + /// } + /// + /// impl<'a> Parse<'a> for ModuleField<'a> { + /// fn parse(parser: Parser<'a>) -> Result<Self> { + /// // Note that because we have previously registered the `@custom` + /// // annotation with the parser we known that `peek` methods like + /// // this, working on the annotation token, are enabled to ever + /// // return `true`. + /// if parser.peek::<annotation::custom>() { + /// return Ok(ModuleField::Custom(parser.parse()?)); + /// } + /// + /// // .. typically we'd parse other module fields here... + /// + /// Err(parser.error("unknown module field")) + /// } + /// } + /// ``` + /// + /// [annotation]: https://github.com/WebAssembly/annotations + pub fn register_annotation<'b>(self, annotation: &'b str) -> impl Drop + 'b + where + 'a: 'b, + { + let mut annotations = self.buf.known_annotations.borrow_mut(); + if !annotations.contains_key(annotation) { + annotations.insert(annotation.to_string(), 0); + } + *annotations.get_mut(annotation).unwrap() += 1; + + return RemoveOnDrop(self, annotation); + + struct RemoveOnDrop<'a>(Parser<'a>, &'a str); + + impl Drop for RemoveOnDrop<'_> { + fn drop(&mut self) { + let mut annotations = self.0.buf.known_annotations.borrow_mut(); + let slot = annotations.get_mut(self.1).unwrap(); + *slot -= 1; + } + } + } +} + +impl<'a> Cursor<'a> { + /// Returns the span of the next `Token` token. + /// + /// Does not take into account whitespace or comments. + pub fn cur_span(&self) -> Span { + let offset = match self.clone().advance_token() { + Some(t) => self.parser.buf.input_pos(t.src()), + None => self.parser.buf.input.len(), + }; + Span { offset } + } + + /// Returns the span of the previous `Token` token. + /// + /// Does not take into account whitespace or comments. + pub(crate) fn prev_span(&self) -> Option<Span> { + let (token, _) = self.parser.buf.tokens.get(self.cur.checked_sub(1)?)?; + Some(Span { + offset: self.parser.buf.input_pos(token.src()), + }) + } + + /// Same as [`Parser::error`], but works with the current token in this + /// [`Cursor`] instead. + pub fn error(&self, msg: impl fmt::Display) -> Error { + self.parser.error_at(self.cur_span(), msg) + } + + /// Attempts to advance this cursor if the current token is a `(`. + /// + /// If the current token is `(`, returns a new [`Cursor`] pointing at the + /// rest of the tokens in the stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn lparen(mut self) -> Option<Self> { + match self.advance_token()? { + Token::LParen(_) => Some(self), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a `)`. + /// + /// If the current token is `)`, returns a new [`Cursor`] pointing at the + /// rest of the tokens in the stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn rparen(mut self) -> Option<Self> { + match self.advance_token()? { + Token::RParen(_) => Some(self), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Id`](crate::lexer::Token) + /// + /// If the current token is `Id`, returns the identifier minus the leading + /// `$` character as well as a new [`Cursor`] pointing at the rest of the + /// tokens in the stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn id(mut self) -> Option<(&'a str, Self)> { + match self.advance_token()? { + Token::Id(id) => Some((&id[1..], self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Keyword`](crate::lexer::Token) + /// + /// If the current token is `Keyword`, returns the keyword as well as a new + /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise + /// returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn keyword(mut self) -> Option<(&'a str, Self)> { + match self.advance_token()? { + Token::Keyword(id) => Some((id, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Reserved`](crate::lexer::Token) + /// + /// If the current token is `Reserved`, returns the reserved token as well + /// as a new [`Cursor`] pointing at the rest of the tokens in the stream. + /// Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn reserved(mut self) -> Option<(&'a str, Self)> { + match self.advance_token()? { + Token::Reserved(id) => Some((id, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Integer`](crate::lexer::Token) + /// + /// If the current token is `Integer`, returns the integer as well as a new + /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise + /// returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn integer(mut self) -> Option<(&'a Integer<'a>, Self)> { + match self.advance_token()? { + Token::Integer(i) => Some((i, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Float`](crate::lexer::Token) + /// + /// If the current token is `Float`, returns the float as well as a new + /// [`Cursor`] pointing at the rest of the tokens in the stream. Otherwise + /// returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn float(mut self) -> Option<(&'a Float<'a>, Self)> { + match self.advance_token()? { + Token::Float(f) => Some((f, self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::String`](crate::lexer::Token) + /// + /// If the current token is `String`, returns the byte value of the string + /// as well as a new [`Cursor`] pointing at the rest of the tokens in the + /// stream. Otherwise returns `None`. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + pub fn string(mut self) -> Option<(&'a [u8], Self)> { + match self.advance_token()? { + Token::String(s) => Some((s.val(), self)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::Reserved`](crate::lexer::Token) and looks like the start of an + /// annotation. + /// + /// [Annotations][annotation] are a WebAssembly proposal for the text format + /// which allows placing structured text inside of a text file, for example + /// to specify the name section or other custom sections. + /// + /// This function will attempt to see if the current token is the `@foo` + /// part of the annotation. This requires the previous token to be `(` and + /// the current token is `Reserved` which starts with `@` and has a nonzero + /// length for the following name. + /// + /// Note that this will skip *unknown* annotations. Only pre-registered + /// annotations will be returned here. + /// + /// This function will automatically skip over any comments, whitespace, or + /// unknown annotations. + /// + /// [annotation]: https://github.com/WebAssembly/annotations + pub fn annotation(self) -> Option<(&'a str, Self)> { + let (token, cursor) = self.reserved()?; + if !token.starts_with('@') || token.len() <= 1 { + return None; + } + match &self.parser.buf.tokens.get(self.cur.wrapping_sub(1))?.0 { + Token::LParen(_) => Some((&token[1..], cursor)), + _ => None, + } + } + + /// Attempts to advance this cursor if the current token is a + /// [`Token::LineComment`](crate::lexer::Token) or a + /// [`Token::BlockComment`](crate::lexer::Token) + /// + /// This function will only skip whitespace, no other tokens. + pub fn comment(mut self) -> Option<(&'a str, Self)> { + let comment = loop { + match &self.parser.buf.tokens.get(self.cur)?.0 { + Token::LineComment(c) | Token::BlockComment(c) => { + self.cur += 1; + break c; + } + Token::Whitespace(_) => { + self.cur += 1; + } + _ => return None, + } + }; + Some((comment, self)) + } + + fn advance_token(&mut self) -> Option<&'a Token<'a>> { + let known_annotations = self.parser.buf.known_annotations.borrow(); + let is_known_annotation = |name: &str| match known_annotations.get(name) { + Some(0) | None => false, + Some(_) => true, + }; + + loop { + let (token, next) = self.parser.buf.tokens.get(self.cur)?; + + // If we're currently pointing at a token, and it's not the start + // of an annotation, then we return that token and advance + // ourselves to just after that token. + match token { + Token::Whitespace(_) | Token::LineComment(_) | Token::BlockComment(_) => {} + _ => match self.annotation_start() { + Some(n) if !is_known_annotation(n) => {} + _ => { + self.cur += 1; + return Some(token); + } + }, + } + + // ... otherwise we need to skip the current token, and possibly + // more. Here we're skipping whitespace, comments, annotations, etc. + // Basically stuff that's intended to not be that relevant to the + // text format. This is a pretty common operation, though, and we + // may do it multiple times through peeks and such. As a result + // this is somewhat cached. + // + // The `next` field, if "unknown", means we haven't calculated the + // next token. Otherwise it's an index of where to resume searching + // for the next token. + // + // Note that this entire operation happens in a loop (hence the + // "somewhat cached") because the set of known annotations is + // dynamic and we can't cache which annotations are skipped. What we + // can do though is cache the number of tokens in the annotation so + // we know how to skip ahead of it. + match next.get() { + NextTokenAt::Unknown => match self.find_next() { + Some(i) => { + next.set(NextTokenAt::Index(i)); + self.cur = i; + } + None => { + next.set(NextTokenAt::Eof); + return None; + } + }, + NextTokenAt::Eof => return None, + NextTokenAt::Index(i) => self.cur = i, + } + } + } + + fn annotation_start(&self) -> Option<&'a str> { + match self.parser.buf.tokens.get(self.cur).map(|p| &p.0) { + Some(Token::LParen(_)) => {} + _ => return None, + } + let reserved = match self.parser.buf.tokens.get(self.cur + 1).map(|p| &p.0) { + Some(Token::Reserved(n)) => n, + _ => return None, + }; + if reserved.starts_with('@') && reserved.len() > 1 { + Some(&reserved[1..]) + } else { + None + } + } + + /// Finds the next "real" token from the current position onwards. + /// + /// This is a somewhat expensive operation to call quite a lot, so it's + /// cached in the token list. See the comment above in `advance_token` for + /// how this works. + /// + /// Returns the index of the next relevant token to parse + fn find_next(mut self) -> Option<usize> { + // If we're pointing to the start of annotation we need to skip it + // in its entirety, so match the parentheses and figure out where + // the annotation ends. + if self.annotation_start().is_some() { + let mut depth = 1; + self.cur += 1; + while depth > 0 { + match &self.parser.buf.tokens.get(self.cur)?.0 { + Token::LParen(_) => depth += 1, + Token::RParen(_) => depth -= 1, + _ => {} + } + self.cur += 1; + } + return Some(self.cur); + } + + // ... otherwise we're pointing at whitespace/comments, so we need to + // figure out how many of them we can skip. + loop { + let (token, _) = self.parser.buf.tokens.get(self.cur)?; + // and otherwise we skip all comments/whitespace and only + // get interested once a normal `Token` pops up. + match token { + Token::Whitespace(_) | Token::LineComment(_) | Token::BlockComment(_) => { + self.cur += 1 + } + _ => return Some(self.cur), + } + } + } +} + +impl Lookahead1<'_> { + /// Attempts to see if `T` is the next token in the [`Parser`] this + /// [`Lookahead1`] references. + /// + /// For more information see [`Parser::lookahead1`] and [`Parser::peek`] + pub fn peek<T: Peek>(&mut self) -> bool { + if self.parser.peek::<T>() { + true + } else { + self.attempts.push(T::display()); + false + } + } + + /// Generates an error message saying that one of the tokens passed to + /// [`Lookahead1::peek`] method was expected. + /// + /// Before calling this method you should call [`Lookahead1::peek`] for all + /// possible tokens you'd like to parse. + pub fn error(self) -> Error { + match self.attempts.len() { + 0 => { + if self.parser.is_empty() { + self.parser.error("unexpected end of input") + } else { + self.parser.error("unexpected token") + } + } + 1 => { + let message = format!("unexpected token, expected {}", self.attempts[0]); + self.parser.error(&message) + } + 2 => { + let message = format!( + "unexpected token, expected {} or {}", + self.attempts[0], self.attempts[1] + ); + self.parser.error(&message) + } + _ => { + let join = self.attempts.join(", "); + let message = format!("unexpected token, expected one of: {}", join); + self.parser.error(&message) + } + } + } +} + +impl<'a, T: Peek + Parse<'a>> Parse<'a> for Option<T> { + fn parse(parser: Parser<'a>) -> Result<Option<T>> { + if parser.peek::<T>() { + Ok(Some(parser.parse()?)) + } else { + Ok(None) + } + } +} diff --git a/third_party/rust/wast/src/token.rs b/third_party/rust/wast/src/token.rs new file mode 100644 index 0000000000..a2bcfed5bd --- /dev/null +++ b/third_party/rust/wast/src/token.rs @@ -0,0 +1,695 @@ +//! Common tokens that implement the [`Parse`] trait which are otherwise not +//! associated specifically with the wasm text format per se (useful in other +//! contexts too perhaps). + +use crate::annotation; +use crate::lexer::FloatVal; +use crate::parser::{Cursor, Parse, Parser, Peek, Result}; +use std::fmt; +use std::hash::{Hash, Hasher}; +use std::str; + +/// A position in the original source stream, used to render errors. +#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] +pub struct Span { + pub(crate) offset: usize, +} + +impl Span { + /// Construct a `Span` from a byte offset in the source file. + pub fn from_offset(offset: usize) -> Self { + Span { offset } + } + + /// Returns the line/column information of this span within `text`. + /// Line and column numbers are 0-indexed. User presentation is typically + /// 1-indexed, but 0-indexing is appropriate for internal use with + /// iterators and slices. + pub fn linecol_in(&self, text: &str) -> (usize, usize) { + let mut cur = 0; + // Use split_terminator instead of lines so that if there is a `\r`, + // it is included in the offset calculation. The `+1` values below + // account for the `\n`. + for (i, line) in text.split_terminator('\n').enumerate() { + if cur + line.len() + 1 > self.offset { + return (i, self.offset - cur); + } + cur += line.len() + 1; + } + (text.lines().count(), 0) + } + + /// Returns the byte offset of this span. + pub fn offset(&self) -> usize { + self.offset + } +} + +/// An identifier in a WebAssembly module, prefixed by `$` in the textual +/// format. +/// +/// An identifier is used to symbolically refer to items in a a wasm module, +/// typically via the [`Index`] type. +#[derive(Copy, Clone)] +pub struct Id<'a> { + name: &'a str, + gen: u32, + span: Span, +} + +impl<'a> Id<'a> { + fn new(name: &'a str, span: Span) -> Id<'a> { + Id { name, gen: 0, span } + } + + pub(crate) fn gensym(span: Span, gen: u32) -> Id<'a> { + Id { + name: "gensym", + gen, + span, + } + } + + /// Returns the underlying name of this identifier. + /// + /// The name returned does not contain the leading `$`. + pub fn name(&self) -> &'a str { + self.name + } + + /// Returns span of this identifier in the original source + pub fn span(&self) -> Span { + self.span + } + + pub(crate) fn is_gensym(&self) -> bool { + self.gen != 0 + } +} + +impl<'a> Hash for Id<'a> { + fn hash<H: Hasher>(&self, hasher: &mut H) { + self.name.hash(hasher); + self.gen.hash(hasher); + } +} + +impl<'a> PartialEq for Id<'a> { + fn eq(&self, other: &Id<'a>) -> bool { + self.name == other.name && self.gen == other.gen + } +} + +impl<'a> Eq for Id<'a> {} + +impl<'a> Parse<'a> for Id<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + if let Some((name, rest)) = c.id() { + return Ok((Id::new(name, c.cur_span()), rest)); + } + Err(c.error("expected an identifier")) + }) + } +} + +impl fmt::Debug for Id<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if self.gen != 0 { + f.debug_struct("Id").field("gen", &self.gen).finish() + } else { + self.name.fmt(f) + } + } +} + +impl Peek for Id<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.id().is_some() + } + + fn display() -> &'static str { + "an identifier" + } +} + +/// A reference to another item in a wasm module. +/// +/// This type is used for items referring to other items (such as `call $foo` +/// referencing function `$foo`). References can be either an index (u32) or an +/// [`Id`] in the textual format. +/// +/// The emission phase of a module will ensure that `Index::Id` is never used +/// and switch them all to `Index::Num`. +#[derive(Copy, Clone, Debug)] +pub enum Index<'a> { + /// A numerical index that this references. The index space this is + /// referencing is implicit based on where this [`Index`] is stored. + Num(u32, Span), + /// A human-readable identifier this references. Like `Num`, the namespace + /// this references is based on where this is stored. + Id(Id<'a>), +} + +impl Index<'_> { + /// Returns the source location where this `Index` was defined. + pub fn span(&self) -> Span { + match self { + Index::Num(_, span) => *span, + Index::Id(id) => id.span(), + } + } + + pub(crate) fn is_resolved(&self) -> bool { + matches!(self, Index::Num(..)) + } +} + +impl<'a> Parse<'a> for Index<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<Id>() { + Ok(Index::Id(parser.parse()?)) + } else if l.peek::<u32>() { + let (val, span) = parser.parse()?; + Ok(Index::Num(val, span)) + } else { + Err(l.error()) + } + } +} + +impl Peek for Index<'_> { + fn peek(cursor: Cursor<'_>) -> bool { + u32::peek(cursor) || Id::peek(cursor) + } + + fn display() -> &'static str { + "an index" + } +} + +impl<'a> From<Id<'a>> for Index<'a> { + fn from(id: Id<'a>) -> Index<'a> { + Index::Id(id) + } +} + +impl PartialEq for Index<'_> { + fn eq(&self, other: &Index<'_>) -> bool { + match (self, other) { + (Index::Num(a, _), Index::Num(b, _)) => a == b, + (Index::Id(a), Index::Id(b)) => a == b, + _ => false, + } + } +} + +impl Eq for Index<'_> {} + +impl Hash for Index<'_> { + fn hash<H: Hasher>(&self, hasher: &mut H) { + match self { + Index::Num(a, _) => { + 0u8.hash(hasher); + a.hash(hasher); + } + Index::Id(a) => { + 1u8.hash(hasher); + a.hash(hasher); + } + } + } +} + +/// Parses `(func $foo)` +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub struct ItemRef<'a, K> { + pub kind: K, + pub idx: Index<'a>, +} + +impl<'a, K: Parse<'a>> Parse<'a> for ItemRef<'a, K> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parens(|parser| { + let kind = parser.parse::<K>()?; + let idx = parser.parse()?; + Ok(ItemRef { kind, idx }) + }) + } +} + +impl<'a, K: Peek> Peek for ItemRef<'a, K> { + fn peek(cursor: Cursor<'_>) -> bool { + match cursor.lparen() { + Some(remaining) => K::peek(remaining), + None => false, + } + } + + fn display() -> &'static str { + "an item reference" + } +} + +/// An `@name` annotation in source, currently of the form `@name "foo"` +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct NameAnnotation<'a> { + /// The name specified for the item + pub name: &'a str, +} + +impl<'a> Parse<'a> for NameAnnotation<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.parse::<annotation::name>()?; + let name = parser.parse()?; + Ok(NameAnnotation { name }) + } +} + +impl<'a> Parse<'a> for Option<NameAnnotation<'a>> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let _r = parser.register_annotation("name"); + Ok(if parser.peek2::<annotation::name>() { + Some(parser.parens(|p| p.parse())?) + } else { + None + }) + } +} + +macro_rules! integers { + ($($i:ident($u:ident))*) => ($( + impl<'a> Parse<'a> for $i { + fn parse(parser: Parser<'a>) -> Result<Self> { + Ok(parser.parse::<($i, Span)>()?.0) + } + } + + impl<'a> Parse<'a> for ($i, Span) { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + if let Some((i, rest)) = c.integer() { + let (s, base) = i.val(); + let val = $i::from_str_radix(s, base) + .or_else(|_| { + $u::from_str_radix(s, base).map(|i| i as $i) + }); + return match val { + Ok(n) => Ok(((n, c.cur_span()), rest)), + Err(_) => Err(c.error(concat!( + "invalid ", + stringify!($i), + " number: constant out of range", + ))), + }; + } + Err(c.error(concat!("expected a ", stringify!($i)))) + }) + } + } + + impl Peek for $i { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.integer().is_some() + } + + fn display() -> &'static str { + stringify!($i) + } + } + )*) +} + +integers! { + u8(u8) u16(u16) u32(u32) u64(u64) + i8(u8) i16(u16) i32(u32) i64(u64) +} + +impl<'a> Parse<'a> for &'a [u8] { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + if let Some((i, rest)) = c.string() { + return Ok((i, rest)); + } + Err(c.error("expected a string")) + }) + } +} + +impl Peek for &'_ [u8] { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.string().is_some() + } + + fn display() -> &'static str { + "string" + } +} + +impl<'a> Parse<'a> for &'a str { + fn parse(parser: Parser<'a>) -> Result<Self> { + str::from_utf8(parser.parse()?) + .map_err(|_| parser.error_at(parser.prev_span(), "malformed UTF-8 encoding")) + } +} + +impl Parse<'_> for String { + fn parse(parser: Parser<'_>) -> Result<Self> { + Ok(<&str>::parse(parser)?.to_string()) + } +} + +impl Peek for &'_ str { + fn peek(cursor: Cursor<'_>) -> bool { + <&[u8]>::peek(cursor) + } + + fn display() -> &'static str { + <&[u8]>::display() + } +} + +macro_rules! float { + ($($name:ident => { + bits: $int:ident, + float: $float:ident, + exponent_bits: $exp_bits:tt, + name: $parse:ident, + })*) => ($( + /// A parsed floating-point type + #[derive(Debug, Copy, Clone)] + pub struct $name { + /// The raw bits that this floating point number represents. + pub bits: $int, + } + + impl<'a> Parse<'a> for $name { + fn parse(parser: Parser<'a>) -> Result<Self> { + parser.step(|c| { + let (val, rest) = if let Some((f, rest)) = c.float() { + ($parse(f.val()), rest) + } else if let Some((i, rest)) = c.integer() { + let (s, base) = i.val(); + ( + $parse(&FloatVal::Val { + hex: base == 16, + integral: s.into(), + decimal: None, + exponent: None, + }), + rest, + ) + } else { + return Err(c.error("expected a float")); + }; + match val { + Some(bits) => Ok(($name { bits }, rest)), + None => Err(c.error("invalid float value: constant out of range")), + } + }) + } + } + + fn $parse(val: &FloatVal<'_>) -> Option<$int> { + // Compute a few well-known constants about the float representation + // given the parameters to the macro here. + let width = std::mem::size_of::<$int>() * 8; + let neg_offset = width - 1; + let exp_offset = neg_offset - $exp_bits; + let signif_bits = width - 1 - $exp_bits; + let signif_mask = (1 << exp_offset) - 1; + let bias = (1 << ($exp_bits - 1)) - 1; + + let (hex, integral, decimal, exponent_str) = match val { + // Infinity is when the exponent bits are all set and + // the significand is zero. + FloatVal::Inf { negative } => { + let exp_bits = (1 << $exp_bits) - 1; + let neg_bit = *negative as $int; + return Some( + (neg_bit << neg_offset) | + (exp_bits << exp_offset) + ); + } + + // NaN is when the exponent bits are all set and + // the significand is nonzero. The default of NaN is + // when only the highest bit of the significand is set. + FloatVal::Nan { negative, val } => { + let exp_bits = (1 << $exp_bits) - 1; + let neg_bit = *negative as $int; + let signif = val.unwrap_or(1 << (signif_bits - 1)) as $int; + // If the significand is zero then this is actually infinity + // so we fail to parse it. + if signif & signif_mask == 0 { + return None; + } + return Some( + (neg_bit << neg_offset) | + (exp_bits << exp_offset) | + (signif & signif_mask) + ); + } + + // This is trickier, handle this below + FloatVal::Val { hex, integral, decimal, exponent } => { + (hex, integral, decimal, exponent) + } + }; + + // Rely on Rust's standard library to parse base 10 floats + // correctly. + if !*hex { + let mut s = integral.to_string(); + if let Some(decimal) = decimal { + s.push_str("."); + s.push_str(&decimal); + } + if let Some(exponent) = exponent_str { + s.push_str("e"); + s.push_str(&exponent); + } + let float = s.parse::<$float>().ok()?; + // looks like the `*.wat` format considers infinite overflow to + // be invalid. + if float.is_infinite() { + return None; + } + return Some(float.to_bits()); + } + + // Parsing hex floats is... hard! I don't really know what most of + // this below does. It was copied from Gecko's implementation in + // `WasmTextToBinary.cpp`. Would love comments on this if you have + // them! + let decimal = decimal.as_ref().map(|s| &**s).unwrap_or(""); + let negative = integral.starts_with('-'); + let integral = integral.trim_start_matches('-').trim_start_matches('0'); + + // Do a bunch of work up front to locate the first non-zero digit + // to determine the initial exponent. There's a number of + // adjustments depending on where the digit was found, but the + // general idea here is that I'm not really sure why things are + // calculated the way they are but it should match Gecko. + let decimal_no_leading = decimal.trim_start_matches('0'); + let decimal_iter = if integral.is_empty() { + decimal_no_leading.chars() + } else { + decimal.chars() + }; + let mut digits = integral.chars() + .map(|c| (to_hex(c) as $int, false)) + .chain(decimal_iter.map(|c| (to_hex(c) as $int, true))); + let lead_nonzero_digit = match digits.next() { + Some((c, _)) => c, + // No digits? Must be `+0` or `-0`, being careful to handle the + // sign encoding here. + None if negative => return Some(1 << (width - 1)), + None => return Some(0), + }; + let mut significand = 0 as $int; + let mut exponent = if !integral.is_empty() { + 1 + } else { + -((decimal.len() - decimal_no_leading.len() + 1) as i32) + 1 + }; + let lz = (lead_nonzero_digit as u8).leading_zeros() as i32 - 4; + exponent = exponent.checked_mul(4)?.checked_sub(lz + 1)?; + let mut significand_pos = (width - (4 - (lz as usize))) as isize; + assert!(significand_pos >= 0); + significand |= lead_nonzero_digit << significand_pos; + + // Now that we've got an anchor in the string we parse the remaining + // digits. Again, not entirely sure why everything is the way it is + // here! This is copied frmo gecko. + let mut discarded_extra_nonzero = false; + for (digit, decimal) in digits { + if !decimal { + exponent += 4; + } + if significand_pos > -4 { + significand_pos -= 4; + } + + if significand_pos >= 0 { + significand |= digit << significand_pos; + } else if significand_pos > -4 { + significand |= digit >> (4 - significand_pos); + discarded_extra_nonzero = (digit & !((!0) >> (4 - significand_pos))) != 0; + } else if digit != 0 { + discarded_extra_nonzero = true; + } + } + + exponent = exponent.checked_add(match exponent_str { + Some(s) => s.parse::<i32>().ok()?, + None => 0, + })?; + debug_assert!(significand != 0); + + let (encoded_exponent, encoded_significand, discarded_significand) = + if exponent <= -bias { + // Underflow to subnormal or zero. + let shift = exp_offset as i32 + exponent + bias; + if shift == 0 { + (0, 0, significand) + } else if shift < 0 || shift >= width as i32 { + (0, 0, 0) + } else { + ( + 0, + significand >> (width as i32 - shift), + significand << shift, + ) + } + } else if exponent <= bias { + // Normal (non-zero). The significand's leading 1 is encoded + // implicitly. + ( + ((exponent + bias) as $int) << exp_offset, + (significand >> (width - exp_offset - 1)) & signif_mask, + significand << (exp_offset + 1), + ) + } else { + // Overflow to infinity. + ( + ((1 << $exp_bits) - 1) << exp_offset, + 0, + 0, + ) + }; + + let bits = encoded_exponent | encoded_significand; + + // Apply rounding. If this overflows the significand, it carries + // into the exponent bit according to the magic of the IEEE 754 + // encoding. + // + // Or rather, the comment above is what Gecko says so it's copied + // here too. + let msb = 1 << (width - 1); + let bits = bits + + (((discarded_significand & msb != 0) + && ((discarded_significand & !msb != 0) || + discarded_extra_nonzero || + // ties to even + (encoded_significand & 1 != 0))) as $int); + + // Just before we return the bits be sure to handle the sign bit we + // found at the beginning. + let bits = if negative { + bits | (1 << (width - 1)) + } else { + bits + }; + // looks like the `*.wat` format considers infinite overflow to + // be invalid. + if $float::from_bits(bits).is_infinite() { + return None; + } + Some(bits) + } + + )*) +} + +float! { + Float32 => { + bits: u32, + float: f32, + exponent_bits: 8, + name: strtof, + } + Float64 => { + bits: u64, + float: f64, + exponent_bits: 11, + name: strtod, + } +} + +fn to_hex(c: char) -> u8 { + match c { + 'a'..='f' => c as u8 - b'a' + 10, + 'A'..='F' => c as u8 - b'A' + 10, + _ => c as u8 - b'0', + } +} + +/// A convenience type to use with [`Parser::peek`](crate::parser::Parser::peek) +/// to see if the next token is an s-expression. +pub struct LParen { + _priv: (), +} + +impl Peek for LParen { + fn peek(cursor: Cursor<'_>) -> bool { + cursor.lparen().is_some() + } + + fn display() -> &'static str { + "left paren" + } +} + +#[cfg(test)] +mod tests { + #[test] + fn hex_strtof() { + macro_rules! f { + ($a:tt) => (f!(@mk $a, None, None)); + ($a:tt p $e:tt) => (f!(@mk $a, None, Some($e.into()))); + ($a:tt . $b:tt) => (f!(@mk $a, Some($b.into()), None)); + ($a:tt . $b:tt p $e:tt) => (f!(@mk $a, Some($b.into()), Some($e.into()))); + (@mk $a:tt, $b:expr, $e:expr) => (crate::lexer::FloatVal::Val { + hex: true, + integral: $a.into(), + decimal: $b, + exponent: $e + }); + } + assert_eq!(super::strtof(&f!("0")), Some(0)); + assert_eq!(super::strtof(&f!("0" . "0")), Some(0)); + assert_eq!(super::strtof(&f!("0" . "0" p "2354")), Some(0)); + assert_eq!(super::strtof(&f!("-0")), Some(1 << 31)); + assert_eq!(super::strtof(&f!("f32")), Some(0x45732000)); + assert_eq!(super::strtof(&f!("0" . "f32")), Some(0x3f732000)); + assert_eq!(super::strtof(&f!("1" . "2")), Some(0x3f900000)); + assert_eq!( + super::strtof(&f!("0" . "00000100000000000" p "-126")), + Some(0) + ); + assert_eq!( + super::strtof(&f!("1" . "fffff4" p "-106")), + Some(0x0afffffa) + ); + assert_eq!(super::strtof(&f!("fffff98" p "-133")), Some(0x0afffffa)); + assert_eq!(super::strtof(&f!("0" . "081" p "023")), Some(0x48810000)); + assert_eq!( + super::strtof(&f!("1" . "00000100000000000" p "-50")), + Some(0x26800000) + ); + } +} diff --git a/third_party/rust/wast/src/wast.rs b/third_party/rust/wast/src/wast.rs new file mode 100644 index 0000000000..ec589e59d6 --- /dev/null +++ b/third_party/rust/wast/src/wast.rs @@ -0,0 +1,365 @@ +use crate::component::WastVal; +use crate::core::{WastArgCore, WastRetCore}; +use crate::kw; +use crate::parser::{self, Cursor, Parse, ParseBuffer, Parser, Peek, Result}; +use crate::token::{Id, Span}; +use crate::{Error, Wat}; + +/// A parsed representation of a `*.wast` file. +/// +/// WAST files are not officially specified but are used in the official test +/// suite to write official spec tests for wasm. This type represents a parsed +/// `*.wast` file which parses a list of directives in a file. +#[derive(Debug)] +pub struct Wast<'a> { + #[allow(missing_docs)] + pub directives: Vec<WastDirective<'a>>, +} + +impl<'a> Parse<'a> for Wast<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut directives = Vec::new(); + + // If it looks like a directive token is in the stream then we parse a + // bunch of directives, otherwise assume this is an inline module. + if parser.peek2::<WastDirectiveToken>() { + while !parser.is_empty() { + directives.push(parser.parens(|p| p.parse())?); + } + } else { + let module = parser.parse::<Wat>()?; + directives.push(WastDirective::Wat(QuoteWat::Wat(module))); + } + Ok(Wast { directives }) + } +} + +struct WastDirectiveToken; + +impl Peek for WastDirectiveToken { + fn peek(cursor: Cursor<'_>) -> bool { + let kw = match cursor.keyword() { + Some((kw, _)) => kw, + None => return false, + }; + kw.starts_with("assert_") + || kw == "module" + || kw == "component" + || kw == "register" + || kw == "invoke" + } + + fn display() -> &'static str { + unimplemented!() + } +} + +/// The different kinds of directives found in a `*.wast` file. +/// +/// It's not entirely clear to me what all of these are per se, but they're only +/// really interesting to test harnesses mostly. +#[allow(missing_docs)] +#[derive(Debug)] +pub enum WastDirective<'a> { + Wat(QuoteWat<'a>), + AssertMalformed { + span: Span, + module: QuoteWat<'a>, + message: &'a str, + }, + AssertInvalid { + span: Span, + module: QuoteWat<'a>, + message: &'a str, + }, + Register { + span: Span, + name: &'a str, + module: Option<Id<'a>>, + }, + Invoke(WastInvoke<'a>), + AssertTrap { + span: Span, + exec: WastExecute<'a>, + message: &'a str, + }, + AssertReturn { + span: Span, + exec: WastExecute<'a>, + results: Vec<WastRet<'a>>, + }, + AssertExhaustion { + span: Span, + call: WastInvoke<'a>, + message: &'a str, + }, + AssertUnlinkable { + span: Span, + module: Wat<'a>, + message: &'a str, + }, + AssertException { + span: Span, + exec: WastExecute<'a>, + }, +} + +impl WastDirective<'_> { + /// Returns the location in the source that this directive was defined at + pub fn span(&self) -> Span { + match self { + WastDirective::Wat(QuoteWat::Wat(Wat::Module(m))) => m.span, + WastDirective::Wat(QuoteWat::Wat(Wat::Component(c))) => c.span, + WastDirective::Wat(QuoteWat::QuoteModule(span, _)) => *span, + WastDirective::Wat(QuoteWat::QuoteComponent(span, _)) => *span, + WastDirective::AssertMalformed { span, .. } + | WastDirective::Register { span, .. } + | WastDirective::AssertTrap { span, .. } + | WastDirective::AssertReturn { span, .. } + | WastDirective::AssertExhaustion { span, .. } + | WastDirective::AssertUnlinkable { span, .. } + | WastDirective::AssertInvalid { span, .. } + | WastDirective::AssertException { span, .. } => *span, + WastDirective::Invoke(i) => i.span, + } + } +} + +impl<'a> Parse<'a> for WastDirective<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::module>() || l.peek::<kw::component>() { + Ok(WastDirective::Wat(parser.parse()?)) + } else if l.peek::<kw::assert_malformed>() { + let span = parser.parse::<kw::assert_malformed>()?.0; + Ok(WastDirective::AssertMalformed { + span, + module: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_invalid>() { + let span = parser.parse::<kw::assert_invalid>()?.0; + Ok(WastDirective::AssertInvalid { + span, + module: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::register>() { + let span = parser.parse::<kw::register>()?.0; + Ok(WastDirective::Register { + span, + name: parser.parse()?, + module: parser.parse()?, + }) + } else if l.peek::<kw::invoke>() { + Ok(WastDirective::Invoke(parser.parse()?)) + } else if l.peek::<kw::assert_trap>() { + let span = parser.parse::<kw::assert_trap>()?.0; + Ok(WastDirective::AssertTrap { + span, + exec: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_return>() { + let span = parser.parse::<kw::assert_return>()?.0; + let exec = parser.parens(|p| p.parse())?; + let mut results = Vec::new(); + while !parser.is_empty() { + results.push(parser.parens(|p| p.parse())?); + } + Ok(WastDirective::AssertReturn { + span, + exec, + results, + }) + } else if l.peek::<kw::assert_exhaustion>() { + let span = parser.parse::<kw::assert_exhaustion>()?.0; + Ok(WastDirective::AssertExhaustion { + span, + call: parser.parens(|p| p.parse())?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_unlinkable>() { + let span = parser.parse::<kw::assert_unlinkable>()?.0; + Ok(WastDirective::AssertUnlinkable { + span, + module: parser.parens(parse_wat)?, + message: parser.parse()?, + }) + } else if l.peek::<kw::assert_exception>() { + let span = parser.parse::<kw::assert_exception>()?.0; + Ok(WastDirective::AssertException { + span, + exec: parser.parens(|p| p.parse())?, + }) + } else { + Err(l.error()) + } + } +} + +#[allow(missing_docs)] +#[derive(Debug)] +pub enum WastExecute<'a> { + Invoke(WastInvoke<'a>), + Wat(Wat<'a>), + Get { + module: Option<Id<'a>>, + global: &'a str, + }, +} + +impl<'a> Parse<'a> for WastExecute<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let mut l = parser.lookahead1(); + if l.peek::<kw::invoke>() { + Ok(WastExecute::Invoke(parser.parse()?)) + } else if l.peek::<kw::module>() || l.peek::<kw::component>() { + Ok(WastExecute::Wat(parse_wat(parser)?)) + } else if l.peek::<kw::get>() { + parser.parse::<kw::get>()?; + Ok(WastExecute::Get { + module: parser.parse()?, + global: parser.parse()?, + }) + } else { + Err(l.error()) + } + } +} + +fn parse_wat(parser: Parser) -> Result<Wat> { + // Note that this doesn't use `Parse for Wat` since the `parser` provided + // has already peeled back the first layer of parentheses while `Parse for + // Wat` expects to be the top layer which means it also tries to peel off + // the parens. Instead we can skip the sugar that `Wat` has for simply a + // list of fields (no `(module ...)` container) and just parse the `Module` + // itself. + if parser.peek::<kw::component>() { + Ok(Wat::Component(parser.parse()?)) + } else { + Ok(Wat::Module(parser.parse()?)) + } +} + +#[allow(missing_docs)] +#[derive(Debug)] +pub struct WastInvoke<'a> { + pub span: Span, + pub module: Option<Id<'a>>, + pub name: &'a str, + pub args: Vec<WastArg<'a>>, +} + +impl<'a> Parse<'a> for WastInvoke<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + let span = parser.parse::<kw::invoke>()?.0; + let module = parser.parse()?; + let name = parser.parse()?; + let mut args = Vec::new(); + while !parser.is_empty() { + args.push(parser.parens(|p| p.parse())?); + } + Ok(WastInvoke { + span, + module, + name, + args, + }) + } +} + +#[allow(missing_docs)] +#[derive(Debug)] +pub enum QuoteWat<'a> { + Wat(Wat<'a>), + QuoteModule(Span, Vec<(Span, &'a [u8])>), + QuoteComponent(Span, Vec<(Span, &'a [u8])>), +} + +impl QuoteWat<'_> { + /// Encodes this module to bytes, either by encoding the module directly or + /// parsing the contents and then encoding it. + pub fn encode(&mut self) -> Result<Vec<u8>, Error> { + let (source, prefix) = match self { + QuoteWat::Wat(m) => return m.encode(), + QuoteWat::QuoteModule(_, source) => (source, None), + QuoteWat::QuoteComponent(_, source) => (source, Some("(component")), + }; + let mut ret = String::new(); + for (span, src) in source { + match std::str::from_utf8(src) { + Ok(s) => ret.push_str(s), + Err(_) => { + return Err(Error::new(*span, "malformed UTF-8 encoding".to_string())); + } + } + ret.push(' '); + } + if let Some(prefix) = prefix { + ret.insert_str(0, prefix); + ret.push(')'); + } + let buf = ParseBuffer::new(&ret)?; + let mut wat = parser::parse::<Wat<'_>>(&buf)?; + wat.encode() + } +} + +impl<'a> Parse<'a> for QuoteWat<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek2::<kw::quote>() { + let ctor = if parser.peek::<kw::component>() { + parser.parse::<kw::component>()?; + QuoteWat::QuoteComponent + } else { + parser.parse::<kw::module>()?; + QuoteWat::QuoteModule + }; + let span = parser.parse::<kw::quote>()?.0; + let mut src = Vec::new(); + while !parser.is_empty() { + let span = parser.cur_span(); + let string = parser.parse()?; + src.push((span, string)); + } + Ok(ctor(span, src)) + } else { + Ok(QuoteWat::Wat(parse_wat(parser)?)) + } + } +} + +#[derive(Debug)] +#[allow(missing_docs)] +pub enum WastArg<'a> { + Core(WastArgCore<'a>), + Component(WastVal<'a>), +} + +impl<'a> Parse<'a> for WastArg<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<WastArgCore<'_>>() { + Ok(WastArg::Core(parser.parse()?)) + } else { + Ok(WastArg::Component(parser.parse()?)) + } + } +} + +#[derive(Debug)] +#[allow(missing_docs)] +pub enum WastRet<'a> { + Core(WastRetCore<'a>), + Component(WastVal<'a>), +} + +impl<'a> Parse<'a> for WastRet<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if parser.peek::<WastRetCore<'_>>() { + Ok(WastRet::Core(parser.parse()?)) + } else { + Ok(WastRet::Component(parser.parse()?)) + } + } +} diff --git a/third_party/rust/wast/src/wat.rs b/third_party/rust/wast/src/wat.rs new file mode 100644 index 0000000000..631bc3d0ed --- /dev/null +++ b/third_party/rust/wast/src/wat.rs @@ -0,0 +1,60 @@ +use crate::component::Component; +use crate::core::{Module, ModuleField, ModuleKind}; +use crate::kw; +use crate::parser::{Parse, Parser, Result}; +use crate::token::Span; + +/// A `*.wat` file parser, or a parser for one parenthesized module. +/// +/// This is the top-level type which you'll frequently parse when working with +/// this crate. A `*.wat` file is either one `module` s-expression or a sequence +/// of s-expressions that are module fields. +#[derive(Debug)] +#[allow(missing_docs)] +pub enum Wat<'a> { + Module(Module<'a>), + Component(Component<'a>), +} + +impl Wat<'_> { + fn validate(&self, parser: Parser<'_>) -> Result<()> { + match self { + Wat::Module(m) => m.validate(parser), + Wat::Component(c) => c.validate(parser), + } + } + + /// Encodes this `Wat` to binary form. This calls either [`Module::encode`] + /// or [`Component::encode`]. + pub fn encode(&mut self) -> std::result::Result<Vec<u8>, crate::Error> { + match self { + Wat::Module(m) => m.encode(), + Wat::Component(c) => c.encode(), + } + } +} + +impl<'a> Parse<'a> for Wat<'a> { + fn parse(parser: Parser<'a>) -> Result<Self> { + if !parser.has_meaningful_tokens() { + return Err(parser.error("expected at least one module field")); + } + + let _r = parser.register_annotation("custom"); + let wat = if parser.peek2::<kw::module>() { + Wat::Module(parser.parens(|parser| parser.parse())?) + } else if parser.peek2::<kw::component>() { + Wat::Component(parser.parens(|parser| parser.parse())?) + } else { + let fields = ModuleField::parse_remaining(parser)?; + Wat::Module(Module { + span: Span { offset: 0 }, + id: None, + name: None, + kind: ModuleKind::Text(fields), + }) + }; + wat.validate(parser)?; + Ok(wat) + } +} |