From d8bbc7858622b6d9c278469aab701ca0b609cddf Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 15 May 2024 05:35:49 +0200 Subject: Merging upstream version 126.0. Signed-off-by: Daniel Baumann --- third_party/rust/wast/src/component/component.rs | 1 + third_party/rust/wast/src/core/binary.rs | 124 +++++++++++++++++---- third_party/rust/wast/src/core/expr.rs | 53 ++++++++- third_party/rust/wast/src/core/memory.rs | 2 + third_party/rust/wast/src/core/module.rs | 1 + .../src/core/resolve/deinline_import_export.rs | 2 + third_party/rust/wast/src/core/table.rs | 1 + third_party/rust/wast/src/lib.rs | 1 + third_party/rust/wast/src/parser.rs | 12 +- third_party/rust/wast/src/wat.rs | 1 + 10 files changed, 169 insertions(+), 29 deletions(-) (limited to 'third_party/rust/wast/src') diff --git a/third_party/rust/wast/src/component/component.rs b/third_party/rust/wast/src/component/component.rs index f954935d87..cc9171b505 100644 --- a/third_party/rust/wast/src/component/component.rs +++ b/third_party/rust/wast/src/component/component.rs @@ -111,6 +111,7 @@ impl<'a> Parse<'a> for Component<'a> { let _r = parser.register_annotation("custom"); let _r = parser.register_annotation("producers"); let _r = parser.register_annotation("name"); + let _r = parser.register_annotation("metadata.code.branch_hint"); let span = parser.parse::()?.0; let id = parser.parse()?; diff --git a/third_party/rust/wast/src/core/binary.rs b/third_party/rust/wast/src/core/binary.rs index 68facd6431..da94da0241 100644 --- a/third_party/rust/wast/src/core/binary.rs +++ b/third_party/rust/wast/src/core/binary.rs @@ -69,7 +69,7 @@ pub fn encode( if needs_data_count(&funcs) { e.section(12, &data.len()); } - e.section_list(10, Code, &funcs); + e.code_section(&funcs, &imports); e.section_list(11, Data, &data); let names = find_names(module_id, module_name, fields); @@ -121,6 +121,47 @@ impl Encoder<'_> { } self.custom_sections(CustomPlace::After(anchor)); } + + /// Encodes the code section of a wasm module module while additionally + /// handling the branch hinting proposal. + /// + /// The branch hinting proposal requires to encode the offsets of the + /// instructions relative from the beginning of the function. Here we encode + /// each instruction and we save its offset. If needed, we use this + /// information to build the branch hint section and insert it before the + /// code section. + fn code_section<'a>(&'a mut self, list: &[&'a Func<'_>], imports: &[&Import<'_>]) { + self.custom_sections(CustomPlace::Before(CustomPlaceAnchor::Code)); + + if !list.is_empty() { + let mut branch_hints = Vec::new(); + let mut code_section = Vec::new(); + + list.len().encode(&mut code_section); + let mut func_index = imports + .iter() + .filter(|i| matches!(i.item.kind, ItemKind::Func(..))) + .count() as u32; + for func in list.iter() { + let hints = func.encode(&mut code_section); + if !hints.is_empty() { + branch_hints.push(FunctionBranchHints { func_index, hints }); + } + func_index += 1; + } + + // Branch hints section has to be inserted before the Code section + // Insert the section only if we have some hints + if !branch_hints.is_empty() { + self.section(0, &("metadata.code.branch_hint", branch_hints)); + } + + // Finally, insert the Code section from the tmp buffer + self.wasm.push(10); + code_section.encode(&mut self.wasm); + } + self.custom_sections(CustomPlace::After(CustomPlaceAnchor::Code)); + } } impl Encode for FunctionType<'_> { @@ -475,7 +516,7 @@ impl Encode for Table<'_> { e.push(0x40); e.push(0x00); ty.encode(e); - init_expr.encode(e); + init_expr.encode(e, 0); } _ => panic!("TableKind should be normal during encoding"), } @@ -497,7 +538,9 @@ impl Encode for Global<'_> { assert!(self.exports.names.is_empty()); self.ty.encode(e); match &self.kind { - GlobalKind::Inline(expr) => expr.encode(e), + GlobalKind::Inline(expr) => { + let _hints = expr.encode(e, 0); + } _ => panic!("GlobalKind should be inline during encoding"), } } @@ -534,7 +577,7 @@ impl Encode for Elem<'_> { ElemPayload::Indices(_), ) => { e.push(0x00); - offset.encode(e); + offset.encode(e, 0); } (ElemKind::Passive, ElemPayload::Indices(_)) => { e.push(0x01); // flags @@ -543,7 +586,7 @@ impl Encode for Elem<'_> { (ElemKind::Active { table, offset }, ElemPayload::Indices(_)) => { e.push(0x02); // flags table.encode(e); - offset.encode(e); + offset.encode(e, 0); e.push(0x00); // extern_kind } (ElemKind::Declared, ElemPayload::Indices(_)) => { @@ -565,7 +608,7 @@ impl Encode for Elem<'_> { }, ) => { e.push(0x04); - offset.encode(e); + offset.encode(e, 0); } (ElemKind::Passive, ElemPayload::Exprs { ty, .. }) => { e.push(0x05); @@ -574,7 +617,7 @@ impl Encode for Elem<'_> { (ElemKind::Active { table, offset }, ElemPayload::Exprs { ty, .. }) => { e.push(0x06); table.encode(e); - offset.encode(e); + offset.encode(e, 0); ty.encode(e); } (ElemKind::Declared, ElemPayload::Exprs { ty, .. }) => { @@ -594,7 +637,7 @@ impl Encode for ElemPayload<'_> { ElemPayload::Exprs { exprs, ty: _ } => { exprs.len().encode(e); for expr in exprs { - expr.encode(e); + expr.encode(e, 0); } } } @@ -610,12 +653,12 @@ impl Encode for Data<'_> { offset, } => { e.push(0x00); - offset.encode(e); + offset.encode(e, 0); } DataKind::Active { memory, offset } => { e.push(0x02); memory.encode(e); - offset.encode(e); + offset.encode(e, 0); } } self.data.iter().map(|l| l.len()).sum::().encode(e); @@ -625,20 +668,25 @@ impl Encode for Data<'_> { } } -impl Encode for Func<'_> { - fn encode(&self, e: &mut Vec) { +impl Func<'_> { + /// Encodes the function into `e` while returning all branch hints with + /// known relative offsets after encoding. + fn encode(&self, e: &mut Vec) -> Vec { assert!(self.exports.names.is_empty()); - let mut tmp = Vec::new(); let (expr, locals) = match &self.kind { FuncKind::Inline { expression, locals } => (expression, locals), _ => panic!("should only have inline functions in emission"), }; + // Encode the function into a temporary vector because functions are + // prefixed with their length. The temporary vector, when encoded, + // encodes its length first then the body. + let mut tmp = Vec::new(); locals.encode(&mut tmp); - expr.encode(&mut tmp); + let branch_hints = expr.encode(&mut tmp, 0); + tmp.encode(e); - tmp.len().encode(e); - e.extend_from_slice(&tmp); + branch_hints } } @@ -658,12 +706,25 @@ impl Encode for Box<[Local<'_>]> { } } -impl Encode for Expression<'_> { - fn encode(&self, e: &mut Vec) { - for instr in self.instrs.iter() { +// Encode the expression and store the offset from the beginning +// for each instruction. +impl Expression<'_> { + fn encode(&self, e: &mut Vec, relative_start: usize) -> Vec { + let mut hints = Vec::with_capacity(self.branch_hints.len()); + let mut next_hint = self.branch_hints.iter().peekable(); + + for (i, instr) in self.instrs.iter().enumerate() { + if let Some(hint) = next_hint.next_if(|h| h.instr_index == i) { + hints.push(BranchHint { + branch_func_offset: u32::try_from(e.len() - relative_start).unwrap(), + branch_hint_value: hint.value, + }); + } instr.encode(e); } e.push(0x0b); + + hints } } @@ -1146,6 +1207,31 @@ impl Encode for Dylink0Subsection<'_> { } } +struct FunctionBranchHints { + func_index: u32, + hints: Vec, +} + +struct BranchHint { + branch_func_offset: u32, + branch_hint_value: u32, +} + +impl Encode for FunctionBranchHints { + fn encode(&self, e: &mut Vec) { + self.func_index.encode(e); + self.hints.encode(e); + } +} + +impl Encode for BranchHint { + fn encode(&self, e: &mut Vec) { + self.branch_func_offset.encode(e); + 1u32.encode(e); + self.branch_hint_value.encode(e); + } +} + impl Encode for Tag<'_> { fn encode(&self, e: &mut Vec) { self.ty.encode(e); diff --git a/third_party/rust/wast/src/core/expr.rs b/third_party/rust/wast/src/core/expr.rs index 489ac205af..b45950b896 100644 --- a/third_party/rust/wast/src/core/expr.rs +++ b/third_party/rust/wast/src/core/expr.rs @@ -1,3 +1,4 @@ +use crate::annotation; use crate::core::*; use crate::encode::Encode; use crate::kw; @@ -14,6 +15,20 @@ use std::mem; #[allow(missing_docs)] pub struct Expression<'a> { pub instrs: Box<[Instruction<'a>]>, + pub branch_hints: Vec, +} + +/// A `@metadata.code.branch_hint` in the code, associated with a If or BrIf +/// This instruction is a placeholder and won't produce anything. Its purpose +/// is to store the offset of the following instruction and check that +/// it's followed by `br_if` or `if`. +#[derive(Debug)] +pub struct BranchHint { + /// Index of instructions in `instrs` field of `Expression` that this hint + /// appplies to. + pub instr_index: usize, + /// The value of this branch hint + pub value: u32, } impl<'a> Parse<'a> for Expression<'a> { @@ -22,6 +37,7 @@ impl<'a> Parse<'a> for Expression<'a> { exprs.parse(parser)?; Ok(Expression { instrs: exprs.instrs.into(), + branch_hints: exprs.branch_hints, }) } } @@ -47,6 +63,7 @@ impl<'a> Expression<'a> { exprs.parse_folded_instruction(parser)?; Ok(Expression { instrs: exprs.instrs.into(), + branch_hints: exprs.branch_hints, }) } } @@ -66,6 +83,11 @@ struct ExpressionParser<'a> { /// Descriptor of all our nested s-expr blocks. This only happens when /// instructions themselves are nested. stack: Vec>, + + /// Related to the branch hints proposal. + /// Will be used later to collect the offsets in the final binary. + /// <(index of branch instructions, BranchHintAnnotation)> + branch_hints: Vec, } enum Paren { @@ -89,6 +111,9 @@ enum Level<'a> { /// which don't correspond to terminating instructions, we're just in a /// nested block. IfArm, + + /// This means we are finishing the parsing of a branch hint annotation. + BranchHint, } /// Possible states of "what is currently being parsed?" in an `if` expression. @@ -145,6 +170,14 @@ impl<'a> ExpressionParser<'a> { if self.handle_if_lparen(parser)? { continue; } + + // Handle the case of a branch hint annotation + if parser.peek::()? { + self.parse_branch_hint(parser)?; + self.stack.push(Level::BranchHint); + continue; + } + match parser.parse()? { // If block/loop show up then we just need to be sure to // push an `end` instruction whenever the `)` token is @@ -177,6 +210,7 @@ impl<'a> ExpressionParser<'a> { Paren::Right => match self.stack.pop().unwrap() { Level::EndWith(i) => self.instrs.push(i), Level::IfArm => {} + Level::BranchHint => {} // If an `if` statement hasn't parsed the clause or `then` // block, then that's an error because there weren't enough @@ -191,7 +225,6 @@ impl<'a> ExpressionParser<'a> { }, } } - Ok(()) } @@ -287,6 +320,24 @@ impl<'a> ExpressionParser<'a> { If::Else => Err(parser.error("unexpected token: too many payloads inside of `(if)`")), } } + + fn parse_branch_hint(&mut self, parser: Parser<'a>) -> Result<()> { + parser.parse::()?; + + let hint = parser.parse::()?; + + let value = match hint.as_bytes() { + [0] => 0, + [1] => 1, + _ => return Err(parser.error("invalid value for branch hint")), + }; + + self.branch_hints.push(BranchHint { + instr_index: self.instrs.len(), + value, + }); + Ok(()) + } } // TODO: document this obscenity diff --git a/third_party/rust/wast/src/core/memory.rs b/third_party/rust/wast/src/core/memory.rs index 3bc7345ef2..eb1baa1a95 100644 --- a/third_party/rust/wast/src/core/memory.rs +++ b/third_party/rust/wast/src/core/memory.rs @@ -165,6 +165,7 @@ impl<'a> Parse<'a> for Data<'a> { if parser.is_empty() { return Ok(Expression { instrs: [insn].into(), + branch_hints: Vec::new(), }); } @@ -184,6 +185,7 @@ impl<'a> Parse<'a> for Data<'a> { instrs.push(insn); Ok(Expression { instrs: instrs.into(), + branch_hints: Vec::new(), }) } })?; diff --git a/third_party/rust/wast/src/core/module.rs b/third_party/rust/wast/src/core/module.rs index 569a8884d4..f74ce6b619 100644 --- a/third_party/rust/wast/src/core/module.rs +++ b/third_party/rust/wast/src/core/module.rs @@ -114,6 +114,7 @@ impl<'a> Parse<'a> for Module<'a> { let _r = parser.register_annotation("producers"); let _r = parser.register_annotation("name"); let _r = parser.register_annotation("dylink.0"); + let _r = parser.register_annotation("metadata.code.branch_hint"); let span = parser.parse::()?.0; let id = parser.parse()?; diff --git a/third_party/rust/wast/src/core/resolve/deinline_import_export.rs b/third_party/rust/wast/src/core/resolve/deinline_import_export.rs index c338407182..98e680b58a 100644 --- a/third_party/rust/wast/src/core/resolve/deinline_import_export.rs +++ b/third_party/rust/wast/src/core/resolve/deinline_import_export.rs @@ -85,6 +85,7 @@ pub fn run(fields: &mut Vec) { } else { Instruction::I64Const(0) }]), + branch_hints: Vec::new(), }, }, data, @@ -143,6 +144,7 @@ pub fn run(fields: &mut Vec) { table: Index::Id(id), offset: Expression { instrs: Box::new([Instruction::I32Const(0)]), + branch_hints: Vec::new(), }, }, payload, diff --git a/third_party/rust/wast/src/core/table.rs b/third_party/rust/wast/src/core/table.rs index 280244498f..e7f0b0f974 100644 --- a/third_party/rust/wast/src/core/table.rs +++ b/third_party/rust/wast/src/core/table.rs @@ -253,6 +253,7 @@ impl<'a> ElemPayload<'a> { ElemPayload::Exprs { exprs, .. } => { let expr = Expression { instrs: [Instruction::RefFunc(func)].into(), + branch_hints: Vec::new(), }; exprs.push(expr); } diff --git a/third_party/rust/wast/src/lib.rs b/third_party/rust/wast/src/lib.rs index 7923a343b5..bb16574177 100644 --- a/third_party/rust/wast/src/lib.rs +++ b/third_party/rust/wast/src/lib.rs @@ -538,4 +538,5 @@ pub mod annotation { annotation!(name); annotation!(producers); annotation!(dylink_0 = "dylink.0"); + annotation!(metadata_code_branch_hint = "metadata.code.branch_hint"); } diff --git a/third_party/rust/wast/src/parser.rs b/third_party/rust/wast/src/parser.rs index 0c85923f83..7a20ebe255 100644 --- a/third_party/rust/wast/src/parser.rs +++ b/third_party/rust/wast/src/parser.rs @@ -65,6 +65,7 @@ use crate::lexer::{Float, Integer, Lexer, Token, TokenKind}; use crate::token::Span; use crate::Error; +use bumpalo::Bump; use std::borrow::Cow; use std::cell::{Cell, RefCell}; use std::collections::HashMap; @@ -303,7 +304,7 @@ pub struct ParseBuffer<'a> { cur: Cell, known_annotations: RefCell>, depth: Cell, - strings: RefCell>>, + strings: Bump, } /// The current position within a `Lexer` that we're at. This simultaneously @@ -396,14 +397,7 @@ impl ParseBuffer<'_> { /// This will return a reference to `s`, but one that's safely rooted in the /// `Parser`. fn push_str(&self, s: Vec) -> &[u8] { - let s = Box::from(s); - let ret = &*s as *const [u8]; - self.strings.borrow_mut().push(s); - // This should be safe in that the address of `ret` isn't changing as - // it's on the heap itself. Additionally the lifetime of this return - // value is tied to the lifetime of `self` (nothing is deallocated - // early), so it should be safe to say the two have the same lifetime. - unsafe { &*ret } + self.strings.alloc_slice_copy(&s) } /// Lexes the next "significant" token from the `pos` specified. diff --git a/third_party/rust/wast/src/wat.rs b/third_party/rust/wast/src/wat.rs index f74121187d..6d9a233359 100644 --- a/third_party/rust/wast/src/wat.rs +++ b/third_party/rust/wast/src/wat.rs @@ -43,6 +43,7 @@ impl<'a> Parse<'a> for Wat<'a> { let _r = parser.register_annotation("custom"); let _r = parser.register_annotation("producers"); let _r = parser.register_annotation("name"); + let _r = parser.register_annotation("metadata.code.branch_hint"); let wat = if parser.peek2::()? { Wat::Module(parser.parens(|parser| parser.parse())?) } else if parser.peek2::()? { -- cgit v1.2.3