diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 09:22:09 +0000 |
commit | 43a97878ce14b72f0981164f87f2e35e14151312 (patch) | |
tree | 620249daf56c0258faa40cbdcf9cfba06de2a846 /third_party/rust/bindgen/ir | |
parent | Initial commit. (diff) | |
download | firefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip |
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/bindgen/ir')
27 files changed, 14774 insertions, 0 deletions
diff --git a/third_party/rust/bindgen/ir/analysis/derive.rs b/third_party/rust/bindgen/ir/analysis/derive.rs new file mode 100644 index 0000000000..d888cd558b --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/derive.rs @@ -0,0 +1,732 @@ +//! Determining which types for which we cannot emit `#[derive(Trait)]`. + +use std::fmt; + +use super::{generate_dependencies, ConstrainResult, MonotoneFramework}; +use crate::ir::analysis::has_vtable::HasVtable; +use crate::ir::comp::CompKind; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::derive::CanDerive; +use crate::ir::function::FunctionSig; +use crate::ir::item::{IsOpaque, Item}; +use crate::ir::layout::Layout; +use crate::ir::template::TemplateParameters; +use crate::ir::traversal::{EdgeKind, Trace}; +use crate::ir::ty::RUST_DERIVE_IN_ARRAY_LIMIT; +use crate::ir::ty::{Type, TypeKind}; +use crate::{Entry, HashMap, HashSet}; + +/// Which trait to consider when doing the `CannotDerive` analysis. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)] +pub enum DeriveTrait { + /// The `Copy` trait. + Copy, + /// The `Debug` trait. + Debug, + /// The `Default` trait. + Default, + /// The `Hash` trait. + Hash, + /// The `PartialEq` and `PartialOrd` traits. + PartialEqOrPartialOrd, +} + +/// An analysis that finds for each IR item whether a trait cannot be derived. +/// +/// We use the monotone constraint function `cannot_derive`, defined as follows +/// for type T: +/// +/// * If T is Opaque and the layout of the type is known, get this layout as an +/// opaquetype and check whether it can derive using trivial checks. +/// +/// * If T is Array, a trait cannot be derived if the array is incomplete, +/// if the length of the array is larger than the limit (unless the trait +/// allows it), or the trait cannot be derived for the type of data the array +/// contains. +/// +/// * If T is Vector, a trait cannot be derived if the trait cannot be derived +/// for the type of data the vector contains. +/// +/// * If T is a type alias, a templated alias or an indirection to another type, +/// the trait cannot be derived if the trait cannot be derived for type T +/// refers to. +/// +/// * If T is a compound type, the trait cannot be derived if the trait cannot +/// be derived for any of its base members or fields. +/// +/// * If T is an instantiation of an abstract template definition, the trait +/// cannot be derived if any of the template arguments or template definition +/// cannot derive the trait. +/// +/// * For all other (simple) types, compiler and standard library limitations +/// dictate whether the trait is implemented. +#[derive(Debug, Clone)] +pub struct CannotDerive<'ctx> { + ctx: &'ctx BindgenContext, + + derive_trait: DeriveTrait, + + // The incremental result of this analysis's computation. + // Contains information whether particular item can derive `derive_trait` + can_derive: HashMap<ItemId, CanDerive>, + + // Dependencies saying that if a key ItemId has been inserted into the + // `cannot_derive_partialeq_or_partialord` set, then each of the ids + // in Vec<ItemId> need to be considered again. + // + // This is a subset of the natural IR graph with reversed edges, where we + // only include the edges from the IR graph that can affect whether a type + // can derive `derive_trait`. + dependencies: HashMap<ItemId, Vec<ItemId>>, +} + +type EdgePredicate = fn(EdgeKind) -> bool; + +fn consider_edge_default(kind: EdgeKind) -> bool { + match kind { + // These are the only edges that can affect whether a type can derive + EdgeKind::BaseMember | + EdgeKind::Field | + EdgeKind::TypeReference | + EdgeKind::VarType | + EdgeKind::TemplateArgument | + EdgeKind::TemplateDeclaration | + EdgeKind::TemplateParameterDefinition => true, + + EdgeKind::Constructor | + EdgeKind::Destructor | + EdgeKind::FunctionReturn | + EdgeKind::FunctionParameter | + EdgeKind::InnerType | + EdgeKind::InnerVar | + EdgeKind::Method | + EdgeKind::Generic => false, + } +} + +impl<'ctx> CannotDerive<'ctx> { + fn insert<Id: Into<ItemId>>( + &mut self, + id: Id, + can_derive: CanDerive, + ) -> ConstrainResult { + let id = id.into(); + trace!( + "inserting {:?} can_derive<{}>={:?}", + id, + self.derive_trait, + can_derive + ); + + if let CanDerive::Yes = can_derive { + return ConstrainResult::Same; + } + + match self.can_derive.entry(id) { + Entry::Occupied(mut entry) => { + if *entry.get() < can_derive { + entry.insert(can_derive); + ConstrainResult::Changed + } else { + ConstrainResult::Same + } + } + Entry::Vacant(entry) => { + entry.insert(can_derive); + ConstrainResult::Changed + } + } + } + + fn constrain_type(&mut self, item: &Item, ty: &Type) -> CanDerive { + if !self.ctx.allowlisted_items().contains(&item.id()) { + let can_derive = self + .ctx + .blocklisted_type_implements_trait(item, self.derive_trait); + match can_derive { + CanDerive::Yes => trace!( + " blocklisted type explicitly implements {}", + self.derive_trait + ), + CanDerive::Manually => trace!( + " blocklisted type requires manual implementation of {}", + self.derive_trait + ), + CanDerive::No => trace!( + " cannot derive {} for blocklisted type", + self.derive_trait + ), + } + return can_derive; + } + + if self.derive_trait.not_by_name(self.ctx, item) { + trace!( + " cannot derive {} for explicitly excluded type", + self.derive_trait + ); + return CanDerive::No; + } + + trace!("ty: {:?}", ty); + if item.is_opaque(self.ctx, &()) { + if !self.derive_trait.can_derive_union() && + ty.is_union() && + self.ctx.options().rust_features().untagged_union + { + trace!( + " cannot derive {} for Rust unions", + self.derive_trait + ); + return CanDerive::No; + } + + let layout_can_derive = + ty.layout(self.ctx).map_or(CanDerive::Yes, |l| { + l.opaque().array_size_within_derive_limit(self.ctx) + }); + + match layout_can_derive { + CanDerive::Yes => { + trace!( + " we can trivially derive {} for the layout", + self.derive_trait + ); + } + _ => { + trace!( + " we cannot derive {} for the layout", + self.derive_trait + ); + } + }; + return layout_can_derive; + } + + match *ty.kind() { + // Handle the simple cases. These can derive traits without further + // information. + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Complex(..) | + TypeKind::Float(..) | + TypeKind::Enum(..) | + TypeKind::TypeParam | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::Reference(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel => { + return self.derive_trait.can_derive_simple(ty.kind()); + } + TypeKind::Pointer(inner) => { + let inner_type = + self.ctx.resolve_type(inner).canonical_type(self.ctx); + if let TypeKind::Function(ref sig) = *inner_type.kind() { + self.derive_trait.can_derive_fnptr(sig) + } else { + self.derive_trait.can_derive_pointer() + } + } + TypeKind::Function(ref sig) => { + self.derive_trait.can_derive_fnptr(sig) + } + + // Complex cases need more information + TypeKind::Array(t, len) => { + let inner_type = + self.can_derive.get(&t.into()).cloned().unwrap_or_default(); + if inner_type != CanDerive::Yes { + trace!( + " arrays of T for which we cannot derive {} \ + also cannot derive {}", + self.derive_trait, + self.derive_trait + ); + return CanDerive::No; + } + + if len == 0 && !self.derive_trait.can_derive_incomplete_array() + { + trace!( + " cannot derive {} for incomplete arrays", + self.derive_trait + ); + return CanDerive::No; + } + + if self.derive_trait.can_derive_large_array(self.ctx) { + trace!(" array can derive {}", self.derive_trait); + return CanDerive::Yes; + } + + if len > RUST_DERIVE_IN_ARRAY_LIMIT { + trace!( + " array is too large to derive {}, but it may be implemented", self.derive_trait + ); + return CanDerive::Manually; + } + trace!( + " array is small enough to derive {}", + self.derive_trait + ); + CanDerive::Yes + } + TypeKind::Vector(t, len) => { + let inner_type = + self.can_derive.get(&t.into()).cloned().unwrap_or_default(); + if inner_type != CanDerive::Yes { + trace!( + " vectors of T for which we cannot derive {} \ + also cannot derive {}", + self.derive_trait, + self.derive_trait + ); + return CanDerive::No; + } + assert_ne!(len, 0, "vectors cannot have zero length"); + self.derive_trait.can_derive_vector() + } + + TypeKind::Comp(ref info) => { + assert!( + !info.has_non_type_template_params(), + "The early ty.is_opaque check should have handled this case" + ); + + if !self.derive_trait.can_derive_compound_forward_decl() && + info.is_forward_declaration() + { + trace!( + " cannot derive {} for forward decls", + self.derive_trait + ); + return CanDerive::No; + } + + // NOTE: Take into account that while unions in C and C++ are copied by + // default, the may have an explicit destructor in C++, so we can't + // defer this check just for the union case. + if !self.derive_trait.can_derive_compound_with_destructor() && + self.ctx.lookup_has_destructor( + item.id().expect_type_id(self.ctx), + ) + { + trace!( + " comp has destructor which cannot derive {}", + self.derive_trait + ); + return CanDerive::No; + } + + if info.kind() == CompKind::Union { + if self.derive_trait.can_derive_union() { + if self.ctx.options().rust_features().untagged_union && + // https://github.com/rust-lang/rust/issues/36640 + (!info.self_template_params(self.ctx).is_empty() || + !item.all_template_params(self.ctx).is_empty()) + { + trace!( + " cannot derive {} for Rust union because issue 36640", self.derive_trait + ); + return CanDerive::No; + } + // fall through to be same as non-union handling + } else { + if self.ctx.options().rust_features().untagged_union { + trace!( + " cannot derive {} for Rust unions", + self.derive_trait + ); + return CanDerive::No; + } + + let layout_can_derive = + ty.layout(self.ctx).map_or(CanDerive::Yes, |l| { + l.opaque() + .array_size_within_derive_limit(self.ctx) + }); + match layout_can_derive { + CanDerive::Yes => { + trace!( + " union layout can trivially derive {}", + self.derive_trait + ); + } + _ => { + trace!( + " union layout cannot derive {}", + self.derive_trait + ); + } + }; + return layout_can_derive; + } + } + + if !self.derive_trait.can_derive_compound_with_vtable() && + item.has_vtable(self.ctx) + { + trace!( + " cannot derive {} for comp with vtable", + self.derive_trait + ); + return CanDerive::No; + } + + // Bitfield units are always represented as arrays of u8, but + // they're not traced as arrays, so we need to check here + // instead. + if !self.derive_trait.can_derive_large_array(self.ctx) && + info.has_too_large_bitfield_unit() && + !item.is_opaque(self.ctx, &()) + { + trace!( + " cannot derive {} for comp with too large bitfield unit", + self.derive_trait + ); + return CanDerive::No; + } + + let pred = self.derive_trait.consider_edge_comp(); + self.constrain_join(item, pred) + } + + TypeKind::ResolvedTypeRef(..) | + TypeKind::TemplateAlias(..) | + TypeKind::Alias(..) | + TypeKind::BlockPointer(..) => { + let pred = self.derive_trait.consider_edge_typeref(); + self.constrain_join(item, pred) + } + + TypeKind::TemplateInstantiation(..) => { + let pred = self.derive_trait.consider_edge_tmpl_inst(); + self.constrain_join(item, pred) + } + + TypeKind::Opaque => unreachable!( + "The early ty.is_opaque check should have handled this case" + ), + } + } + + fn constrain_join( + &mut self, + item: &Item, + consider_edge: EdgePredicate, + ) -> CanDerive { + let mut candidate = None; + + item.trace( + self.ctx, + &mut |sub_id, edge_kind| { + // Ignore ourselves, since union with ourself is a + // no-op. Ignore edges that aren't relevant to the + // analysis. + if sub_id == item.id() || !consider_edge(edge_kind) { + return; + } + + let can_derive = self.can_derive + .get(&sub_id) + .cloned() + .unwrap_or_default(); + + match can_derive { + CanDerive::Yes => trace!(" member {:?} can derive {}", sub_id, self.derive_trait), + CanDerive::Manually => trace!(" member {:?} cannot derive {}, but it may be implemented", sub_id, self.derive_trait), + CanDerive::No => trace!(" member {:?} cannot derive {}", sub_id, self.derive_trait), + } + + *candidate.get_or_insert(CanDerive::Yes) |= can_derive; + }, + &(), + ); + + if candidate.is_none() { + trace!( + " can derive {} because there are no members", + self.derive_trait + ); + } + candidate.unwrap_or_default() + } +} + +impl DeriveTrait { + fn not_by_name(&self, ctx: &BindgenContext, item: &Item) -> bool { + match self { + DeriveTrait::Copy => ctx.no_copy_by_name(item), + DeriveTrait::Debug => ctx.no_debug_by_name(item), + DeriveTrait::Default => ctx.no_default_by_name(item), + DeriveTrait::Hash => ctx.no_hash_by_name(item), + DeriveTrait::PartialEqOrPartialOrd => { + ctx.no_partialeq_by_name(item) + } + } + } + + fn consider_edge_comp(&self) -> EdgePredicate { + match self { + DeriveTrait::PartialEqOrPartialOrd => consider_edge_default, + _ => |kind| matches!(kind, EdgeKind::BaseMember | EdgeKind::Field), + } + } + + fn consider_edge_typeref(&self) -> EdgePredicate { + match self { + DeriveTrait::PartialEqOrPartialOrd => consider_edge_default, + _ => |kind| kind == EdgeKind::TypeReference, + } + } + + fn consider_edge_tmpl_inst(&self) -> EdgePredicate { + match self { + DeriveTrait::PartialEqOrPartialOrd => consider_edge_default, + _ => |kind| { + matches!( + kind, + EdgeKind::TemplateArgument | EdgeKind::TemplateDeclaration + ) + }, + } + } + + fn can_derive_large_array(&self, ctx: &BindgenContext) -> bool { + if ctx.options().rust_features().larger_arrays { + !matches!(self, DeriveTrait::Default) + } else { + matches!(self, DeriveTrait::Copy) + } + } + + fn can_derive_union(&self) -> bool { + matches!(self, DeriveTrait::Copy) + } + + fn can_derive_compound_with_destructor(&self) -> bool { + !matches!(self, DeriveTrait::Copy) + } + + fn can_derive_compound_with_vtable(&self) -> bool { + !matches!(self, DeriveTrait::Default) + } + + fn can_derive_compound_forward_decl(&self) -> bool { + matches!(self, DeriveTrait::Copy | DeriveTrait::Debug) + } + + fn can_derive_incomplete_array(&self) -> bool { + !matches!( + self, + DeriveTrait::Copy | + DeriveTrait::Hash | + DeriveTrait::PartialEqOrPartialOrd + ) + } + + fn can_derive_fnptr(&self, f: &FunctionSig) -> CanDerive { + match (self, f.function_pointers_can_derive()) { + (DeriveTrait::Copy, _) | (DeriveTrait::Default, _) | (_, true) => { + trace!(" function pointer can derive {}", self); + CanDerive::Yes + } + (DeriveTrait::Debug, false) => { + trace!(" function pointer cannot derive {}, but it may be implemented", self); + CanDerive::Manually + } + (_, false) => { + trace!(" function pointer cannot derive {}", self); + CanDerive::No + } + } + } + + fn can_derive_vector(&self) -> CanDerive { + match self { + DeriveTrait::PartialEqOrPartialOrd => { + // FIXME: vectors always can derive PartialEq, but they should + // not derive PartialOrd: + // https://github.com/rust-lang-nursery/packed_simd/issues/48 + trace!(" vectors cannot derive PartialOrd"); + CanDerive::No + } + _ => { + trace!(" vector can derive {}", self); + CanDerive::Yes + } + } + } + + fn can_derive_pointer(&self) -> CanDerive { + match self { + DeriveTrait::Default => { + trace!(" pointer cannot derive Default"); + CanDerive::No + } + _ => { + trace!(" pointer can derive {}", self); + CanDerive::Yes + } + } + } + + fn can_derive_simple(&self, kind: &TypeKind) -> CanDerive { + match (self, kind) { + // === Default === + (DeriveTrait::Default, TypeKind::Void) | + (DeriveTrait::Default, TypeKind::NullPtr) | + (DeriveTrait::Default, TypeKind::Enum(..)) | + (DeriveTrait::Default, TypeKind::Reference(..)) | + (DeriveTrait::Default, TypeKind::TypeParam) | + (DeriveTrait::Default, TypeKind::ObjCInterface(..)) | + (DeriveTrait::Default, TypeKind::ObjCId) | + (DeriveTrait::Default, TypeKind::ObjCSel) => { + trace!(" types that always cannot derive Default"); + CanDerive::No + } + (DeriveTrait::Default, TypeKind::UnresolvedTypeRef(..)) => { + unreachable!( + "Type with unresolved type ref can't reach derive default" + ) + } + // === Hash === + (DeriveTrait::Hash, TypeKind::Float(..)) | + (DeriveTrait::Hash, TypeKind::Complex(..)) => { + trace!(" float cannot derive Hash"); + CanDerive::No + } + // === others === + _ => { + trace!(" simple type that can always derive {}", self); + CanDerive::Yes + } + } + } +} + +impl fmt::Display for DeriveTrait { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let s = match self { + DeriveTrait::Copy => "Copy", + DeriveTrait::Debug => "Debug", + DeriveTrait::Default => "Default", + DeriveTrait::Hash => "Hash", + DeriveTrait::PartialEqOrPartialOrd => "PartialEq/PartialOrd", + }; + s.fmt(f) + } +} + +impl<'ctx> MonotoneFramework for CannotDerive<'ctx> { + type Node = ItemId; + type Extra = (&'ctx BindgenContext, DeriveTrait); + type Output = HashMap<ItemId, CanDerive>; + + fn new( + (ctx, derive_trait): (&'ctx BindgenContext, DeriveTrait), + ) -> CannotDerive<'ctx> { + let can_derive = HashMap::default(); + let dependencies = generate_dependencies(ctx, consider_edge_default); + + CannotDerive { + ctx, + derive_trait, + can_derive, + dependencies, + } + } + + fn initial_worklist(&self) -> Vec<ItemId> { + // The transitive closure of all allowlisted items, including explicitly + // blocklisted items. + self.ctx + .allowlisted_items() + .iter() + .cloned() + .flat_map(|i| { + let mut reachable = vec![i]; + i.trace( + self.ctx, + &mut |s, _| { + reachable.push(s); + }, + &(), + ); + reachable + }) + .collect() + } + + fn constrain(&mut self, id: ItemId) -> ConstrainResult { + trace!("constrain: {:?}", id); + + if let Some(CanDerive::No) = self.can_derive.get(&id).cloned() { + trace!(" already know it cannot derive {}", self.derive_trait); + return ConstrainResult::Same; + } + + let item = self.ctx.resolve_item(id); + let can_derive = match item.as_type() { + Some(ty) => { + let mut can_derive = self.constrain_type(item, ty); + if let CanDerive::Yes = can_derive { + let is_reached_limit = + |l: Layout| l.align > RUST_DERIVE_IN_ARRAY_LIMIT; + if !self.derive_trait.can_derive_large_array(self.ctx) && + ty.layout(self.ctx).map_or(false, is_reached_limit) + { + // We have to be conservative: the struct *could* have enough + // padding that we emit an array that is longer than + // `RUST_DERIVE_IN_ARRAY_LIMIT`. If we moved padding calculations + // into the IR and computed them before this analysis, then we could + // be precise rather than conservative here. + can_derive = CanDerive::Manually; + } + } + can_derive + } + None => self.constrain_join(item, consider_edge_default), + }; + + self.insert(id, can_derive) + } + + fn each_depending_on<F>(&self, id: ItemId, mut f: F) + where + F: FnMut(ItemId), + { + if let Some(edges) = self.dependencies.get(&id) { + for item in edges { + trace!("enqueue {:?} into worklist", item); + f(*item); + } + } + } +} + +impl<'ctx> From<CannotDerive<'ctx>> for HashMap<ItemId, CanDerive> { + fn from(analysis: CannotDerive<'ctx>) -> Self { + extra_assert!(analysis + .can_derive + .values() + .all(|v| *v != CanDerive::Yes)); + + analysis.can_derive + } +} + +/// Convert a `HashMap<ItemId, CanDerive>` into a `HashSet<ItemId>`. +/// +/// Elements that are not `CanDerive::Yes` are kept in the set, so that it +/// represents all items that cannot derive. +pub fn as_cannot_derive_set( + can_derive: HashMap<ItemId, CanDerive>, +) -> HashSet<ItemId> { + can_derive + .into_iter() + .filter_map(|(k, v)| if v != CanDerive::Yes { Some(k) } else { None }) + .collect() +} diff --git a/third_party/rust/bindgen/ir/analysis/has_destructor.rs b/third_party/rust/bindgen/ir/analysis/has_destructor.rs new file mode 100644 index 0000000000..74fd73d14e --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/has_destructor.rs @@ -0,0 +1,176 @@ +//! Determining which types have destructors + +use super::{generate_dependencies, ConstrainResult, MonotoneFramework}; +use crate::ir::comp::{CompKind, Field, FieldMethods}; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::traversal::EdgeKind; +use crate::ir::ty::TypeKind; +use crate::{HashMap, HashSet}; + +/// An analysis that finds for each IR item whether it has a destructor or not +/// +/// We use the monotone function `has destructor`, defined as follows: +/// +/// * If T is a type alias, a templated alias, or an indirection to another type, +/// T has a destructor if the type T refers to has a destructor. +/// * If T is a compound type, T has a destructor if we saw a destructor when parsing it, +/// or if it's a struct, T has a destructor if any of its base members has a destructor, +/// or if any of its fields have a destructor. +/// * If T is an instantiation of an abstract template definition, T has +/// a destructor if its template definition has a destructor, +/// or if any of the template arguments has a destructor. +/// * If T is the type of a field, that field has a destructor if it's not a bitfield, +/// and if T has a destructor. +#[derive(Debug, Clone)] +pub struct HasDestructorAnalysis<'ctx> { + ctx: &'ctx BindgenContext, + + // The incremental result of this analysis's computation. Everything in this + // set definitely has a destructor. + have_destructor: HashSet<ItemId>, + + // Dependencies saying that if a key ItemId has been inserted into the + // `have_destructor` set, then each of the ids in Vec<ItemId> need to be + // considered again. + // + // This is a subset of the natural IR graph with reversed edges, where we + // only include the edges from the IR graph that can affect whether a type + // has a destructor or not. + dependencies: HashMap<ItemId, Vec<ItemId>>, +} + +impl<'ctx> HasDestructorAnalysis<'ctx> { + fn consider_edge(kind: EdgeKind) -> bool { + // These are the only edges that can affect whether a type has a + // destructor or not. + matches!( + kind, + EdgeKind::TypeReference | + EdgeKind::BaseMember | + EdgeKind::Field | + EdgeKind::TemplateArgument | + EdgeKind::TemplateDeclaration + ) + } + + fn insert<Id: Into<ItemId>>(&mut self, id: Id) -> ConstrainResult { + let id = id.into(); + let was_not_already_in_set = self.have_destructor.insert(id); + assert!( + was_not_already_in_set, + "We shouldn't try and insert {:?} twice because if it was \ + already in the set, `constrain` should have exited early.", + id + ); + ConstrainResult::Changed + } +} + +impl<'ctx> MonotoneFramework for HasDestructorAnalysis<'ctx> { + type Node = ItemId; + type Extra = &'ctx BindgenContext; + type Output = HashSet<ItemId>; + + fn new(ctx: &'ctx BindgenContext) -> Self { + let have_destructor = HashSet::default(); + let dependencies = generate_dependencies(ctx, Self::consider_edge); + + HasDestructorAnalysis { + ctx, + have_destructor, + dependencies, + } + } + + fn initial_worklist(&self) -> Vec<ItemId> { + self.ctx.allowlisted_items().iter().cloned().collect() + } + + fn constrain(&mut self, id: ItemId) -> ConstrainResult { + if self.have_destructor.contains(&id) { + // We've already computed that this type has a destructor and that can't + // change. + return ConstrainResult::Same; + } + + let item = self.ctx.resolve_item(id); + let ty = match item.as_type() { + None => return ConstrainResult::Same, + Some(ty) => ty, + }; + + match *ty.kind() { + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::ResolvedTypeRef(t) => { + if self.have_destructor.contains(&t.into()) { + self.insert(id) + } else { + ConstrainResult::Same + } + } + + TypeKind::Comp(ref info) => { + if info.has_own_destructor() { + return self.insert(id); + } + + match info.kind() { + CompKind::Union => ConstrainResult::Same, + CompKind::Struct => { + let base_or_field_destructor = + info.base_members().iter().any(|base| { + self.have_destructor.contains(&base.ty.into()) + }) || info.fields().iter().any( + |field| match *field { + Field::DataMember(ref data) => self + .have_destructor + .contains(&data.ty().into()), + Field::Bitfields(_) => false, + }, + ); + if base_or_field_destructor { + self.insert(id) + } else { + ConstrainResult::Same + } + } + } + } + + TypeKind::TemplateInstantiation(ref inst) => { + let definition_or_arg_destructor = self + .have_destructor + .contains(&inst.template_definition().into()) || + inst.template_arguments().iter().any(|arg| { + self.have_destructor.contains(&arg.into()) + }); + if definition_or_arg_destructor { + self.insert(id) + } else { + ConstrainResult::Same + } + } + + _ => ConstrainResult::Same, + } + } + + fn each_depending_on<F>(&self, id: ItemId, mut f: F) + where + F: FnMut(ItemId), + { + if let Some(edges) = self.dependencies.get(&id) { + for item in edges { + trace!("enqueue {:?} into worklist", item); + f(*item); + } + } + } +} + +impl<'ctx> From<HasDestructorAnalysis<'ctx>> for HashSet<ItemId> { + fn from(analysis: HasDestructorAnalysis<'ctx>) -> Self { + analysis.have_destructor + } +} diff --git a/third_party/rust/bindgen/ir/analysis/has_float.rs b/third_party/rust/bindgen/ir/analysis/has_float.rs new file mode 100644 index 0000000000..bbf2126f70 --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/has_float.rs @@ -0,0 +1,252 @@ +//! Determining which types has float. + +use super::{generate_dependencies, ConstrainResult, MonotoneFramework}; +use crate::ir::comp::Field; +use crate::ir::comp::FieldMethods; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::traversal::EdgeKind; +use crate::ir::ty::TypeKind; +use crate::{HashMap, HashSet}; + +/// An analysis that finds for each IR item whether it has float or not. +/// +/// We use the monotone constraint function `has_float`, +/// defined as follows: +/// +/// * If T is float or complex float, T trivially has. +/// * If T is a type alias, a templated alias or an indirection to another type, +/// it has float if the type T refers to has. +/// * If T is a compound type, it has float if any of base memter or field +/// has. +/// * If T is an instantiation of an abstract template definition, T has +/// float if any of the template arguments or template definition +/// has. +#[derive(Debug, Clone)] +pub struct HasFloat<'ctx> { + ctx: &'ctx BindgenContext, + + // The incremental result of this analysis's computation. Everything in this + // set has float. + has_float: HashSet<ItemId>, + + // Dependencies saying that if a key ItemId has been inserted into the + // `has_float` set, then each of the ids in Vec<ItemId> need to be + // considered again. + // + // This is a subset of the natural IR graph with reversed edges, where we + // only include the edges from the IR graph that can affect whether a type + // has float or not. + dependencies: HashMap<ItemId, Vec<ItemId>>, +} + +impl<'ctx> HasFloat<'ctx> { + fn consider_edge(kind: EdgeKind) -> bool { + match kind { + EdgeKind::BaseMember | + EdgeKind::Field | + EdgeKind::TypeReference | + EdgeKind::VarType | + EdgeKind::TemplateArgument | + EdgeKind::TemplateDeclaration | + EdgeKind::TemplateParameterDefinition => true, + + EdgeKind::Constructor | + EdgeKind::Destructor | + EdgeKind::FunctionReturn | + EdgeKind::FunctionParameter | + EdgeKind::InnerType | + EdgeKind::InnerVar | + EdgeKind::Method => false, + EdgeKind::Generic => false, + } + } + + fn insert<Id: Into<ItemId>>(&mut self, id: Id) -> ConstrainResult { + let id = id.into(); + trace!("inserting {:?} into the has_float set", id); + + let was_not_already_in_set = self.has_float.insert(id); + assert!( + was_not_already_in_set, + "We shouldn't try and insert {:?} twice because if it was \ + already in the set, `constrain` should have exited early.", + id + ); + + ConstrainResult::Changed + } +} + +impl<'ctx> MonotoneFramework for HasFloat<'ctx> { + type Node = ItemId; + type Extra = &'ctx BindgenContext; + type Output = HashSet<ItemId>; + + fn new(ctx: &'ctx BindgenContext) -> HasFloat<'ctx> { + let has_float = HashSet::default(); + let dependencies = generate_dependencies(ctx, Self::consider_edge); + + HasFloat { + ctx, + has_float, + dependencies, + } + } + + fn initial_worklist(&self) -> Vec<ItemId> { + self.ctx.allowlisted_items().iter().cloned().collect() + } + + fn constrain(&mut self, id: ItemId) -> ConstrainResult { + trace!("constrain: {:?}", id); + + if self.has_float.contains(&id) { + trace!(" already know it do not have float"); + return ConstrainResult::Same; + } + + let item = self.ctx.resolve_item(id); + let ty = match item.as_type() { + Some(ty) => ty, + None => { + trace!(" not a type; ignoring"); + return ConstrainResult::Same; + } + }; + + match *ty.kind() { + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::Reference(..) | + TypeKind::TypeParam | + TypeKind::Opaque | + TypeKind::Pointer(..) | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel => { + trace!(" simple type that do not have float"); + ConstrainResult::Same + } + + TypeKind::Float(..) | TypeKind::Complex(..) => { + trace!(" float type has float"); + self.insert(id) + } + + TypeKind::Array(t, _) => { + if self.has_float.contains(&t.into()) { + trace!( + " Array with type T that has float also has float" + ); + return self.insert(id); + } + trace!(" Array with type T that do not have float also do not have float"); + ConstrainResult::Same + } + TypeKind::Vector(t, _) => { + if self.has_float.contains(&t.into()) { + trace!( + " Vector with type T that has float also has float" + ); + return self.insert(id); + } + trace!(" Vector with type T that do not have float also do not have float"); + ConstrainResult::Same + } + + TypeKind::ResolvedTypeRef(t) | + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) => { + if self.has_float.contains(&t.into()) { + trace!( + " aliases and type refs to T which have float \ + also have float" + ); + self.insert(id) + } else { + trace!(" aliases and type refs to T which do not have float \ + also do not have floaarrayt"); + ConstrainResult::Same + } + } + + TypeKind::Comp(ref info) => { + let bases_have = info + .base_members() + .iter() + .any(|base| self.has_float.contains(&base.ty.into())); + if bases_have { + trace!(" bases have float, so we also have"); + return self.insert(id); + } + let fields_have = info.fields().iter().any(|f| match *f { + Field::DataMember(ref data) => { + self.has_float.contains(&data.ty().into()) + } + Field::Bitfields(ref bfu) => bfu + .bitfields() + .iter() + .any(|b| self.has_float.contains(&b.ty().into())), + }); + if fields_have { + trace!(" fields have float, so we also have"); + return self.insert(id); + } + + trace!(" comp doesn't have float"); + ConstrainResult::Same + } + + TypeKind::TemplateInstantiation(ref template) => { + let args_have = template + .template_arguments() + .iter() + .any(|arg| self.has_float.contains(&arg.into())); + if args_have { + trace!( + " template args have float, so \ + insantiation also has float" + ); + return self.insert(id); + } + + let def_has = self + .has_float + .contains(&template.template_definition().into()); + if def_has { + trace!( + " template definition has float, so \ + insantiation also has" + ); + return self.insert(id); + } + + trace!(" template instantiation do not have float"); + ConstrainResult::Same + } + } + } + + fn each_depending_on<F>(&self, id: ItemId, mut f: F) + where + F: FnMut(ItemId), + { + if let Some(edges) = self.dependencies.get(&id) { + for item in edges { + trace!("enqueue {:?} into worklist", item); + f(*item); + } + } + } +} + +impl<'ctx> From<HasFloat<'ctx>> for HashSet<ItemId> { + fn from(analysis: HasFloat<'ctx>) -> Self { + analysis.has_float + } +} diff --git a/third_party/rust/bindgen/ir/analysis/has_type_param_in_array.rs b/third_party/rust/bindgen/ir/analysis/has_type_param_in_array.rs new file mode 100644 index 0000000000..aa52304758 --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/has_type_param_in_array.rs @@ -0,0 +1,252 @@ +//! Determining which types has typed parameters in array. + +use super::{generate_dependencies, ConstrainResult, MonotoneFramework}; +use crate::ir::comp::Field; +use crate::ir::comp::FieldMethods; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::traversal::EdgeKind; +use crate::ir::ty::TypeKind; +use crate::{HashMap, HashSet}; + +/// An analysis that finds for each IR item whether it has array or not. +/// +/// We use the monotone constraint function `has_type_parameter_in_array`, +/// defined as follows: +/// +/// * If T is Array type with type parameter, T trivially has. +/// * If T is a type alias, a templated alias or an indirection to another type, +/// it has type parameter in array if the type T refers to has. +/// * If T is a compound type, it has array if any of base memter or field +/// has type paramter in array. +/// * If T is an instantiation of an abstract template definition, T has +/// type parameter in array if any of the template arguments or template definition +/// has. +#[derive(Debug, Clone)] +pub struct HasTypeParameterInArray<'ctx> { + ctx: &'ctx BindgenContext, + + // The incremental result of this analysis's computation. Everything in this + // set has array. + has_type_parameter_in_array: HashSet<ItemId>, + + // Dependencies saying that if a key ItemId has been inserted into the + // `has_type_parameter_in_array` set, then each of the ids in Vec<ItemId> need to be + // considered again. + // + // This is a subset of the natural IR graph with reversed edges, where we + // only include the edges from the IR graph that can affect whether a type + // has array or not. + dependencies: HashMap<ItemId, Vec<ItemId>>, +} + +impl<'ctx> HasTypeParameterInArray<'ctx> { + fn consider_edge(kind: EdgeKind) -> bool { + match kind { + // These are the only edges that can affect whether a type has type parameter + // in array or not. + EdgeKind::BaseMember | + EdgeKind::Field | + EdgeKind::TypeReference | + EdgeKind::VarType | + EdgeKind::TemplateArgument | + EdgeKind::TemplateDeclaration | + EdgeKind::TemplateParameterDefinition => true, + + EdgeKind::Constructor | + EdgeKind::Destructor | + EdgeKind::FunctionReturn | + EdgeKind::FunctionParameter | + EdgeKind::InnerType | + EdgeKind::InnerVar | + EdgeKind::Method => false, + EdgeKind::Generic => false, + } + } + + fn insert<Id: Into<ItemId>>(&mut self, id: Id) -> ConstrainResult { + let id = id.into(); + trace!( + "inserting {:?} into the has_type_parameter_in_array set", + id + ); + + let was_not_already_in_set = + self.has_type_parameter_in_array.insert(id); + assert!( + was_not_already_in_set, + "We shouldn't try and insert {:?} twice because if it was \ + already in the set, `constrain` should have exited early.", + id + ); + + ConstrainResult::Changed + } +} + +impl<'ctx> MonotoneFramework for HasTypeParameterInArray<'ctx> { + type Node = ItemId; + type Extra = &'ctx BindgenContext; + type Output = HashSet<ItemId>; + + fn new(ctx: &'ctx BindgenContext) -> HasTypeParameterInArray<'ctx> { + let has_type_parameter_in_array = HashSet::default(); + let dependencies = generate_dependencies(ctx, Self::consider_edge); + + HasTypeParameterInArray { + ctx, + has_type_parameter_in_array, + dependencies, + } + } + + fn initial_worklist(&self) -> Vec<ItemId> { + self.ctx.allowlisted_items().iter().cloned().collect() + } + + fn constrain(&mut self, id: ItemId) -> ConstrainResult { + trace!("constrain: {:?}", id); + + if self.has_type_parameter_in_array.contains(&id) { + trace!(" already know it do not have array"); + return ConstrainResult::Same; + } + + let item = self.ctx.resolve_item(id); + let ty = match item.as_type() { + Some(ty) => ty, + None => { + trace!(" not a type; ignoring"); + return ConstrainResult::Same; + } + }; + + match *ty.kind() { + // Handle the simple cases. These cannot have array in type parameter + // without further information. + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Vector(..) | + TypeKind::Complex(..) | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::Reference(..) | + TypeKind::TypeParam | + TypeKind::Opaque | + TypeKind::Pointer(..) | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::ObjCInterface(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel => { + trace!(" simple type that do not have array"); + ConstrainResult::Same + } + + TypeKind::Array(t, _) => { + let inner_ty = + self.ctx.resolve_type(t).canonical_type(self.ctx); + match *inner_ty.kind() { + TypeKind::TypeParam => { + trace!(" Array with Named type has type parameter"); + self.insert(id) + } + _ => { + trace!( + " Array without Named type does have type parameter" + ); + ConstrainResult::Same + } + } + } + + TypeKind::ResolvedTypeRef(t) | + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) => { + if self.has_type_parameter_in_array.contains(&t.into()) { + trace!( + " aliases and type refs to T which have array \ + also have array" + ); + self.insert(id) + } else { + trace!( + " aliases and type refs to T which do not have array \ + also do not have array" + ); + ConstrainResult::Same + } + } + + TypeKind::Comp(ref info) => { + let bases_have = info.base_members().iter().any(|base| { + self.has_type_parameter_in_array.contains(&base.ty.into()) + }); + if bases_have { + trace!(" bases have array, so we also have"); + return self.insert(id); + } + let fields_have = info.fields().iter().any(|f| match *f { + Field::DataMember(ref data) => self + .has_type_parameter_in_array + .contains(&data.ty().into()), + Field::Bitfields(..) => false, + }); + if fields_have { + trace!(" fields have array, so we also have"); + return self.insert(id); + } + + trace!(" comp doesn't have array"); + ConstrainResult::Same + } + + TypeKind::TemplateInstantiation(ref template) => { + let args_have = + template.template_arguments().iter().any(|arg| { + self.has_type_parameter_in_array.contains(&arg.into()) + }); + if args_have { + trace!( + " template args have array, so \ + insantiation also has array" + ); + return self.insert(id); + } + + let def_has = self + .has_type_parameter_in_array + .contains(&template.template_definition().into()); + if def_has { + trace!( + " template definition has array, so \ + insantiation also has" + ); + return self.insert(id); + } + + trace!(" template instantiation do not have array"); + ConstrainResult::Same + } + } + } + + fn each_depending_on<F>(&self, id: ItemId, mut f: F) + where + F: FnMut(ItemId), + { + if let Some(edges) = self.dependencies.get(&id) { + for item in edges { + trace!("enqueue {:?} into worklist", item); + f(*item); + } + } + } +} + +impl<'ctx> From<HasTypeParameterInArray<'ctx>> for HashSet<ItemId> { + fn from(analysis: HasTypeParameterInArray<'ctx>) -> Self { + analysis.has_type_parameter_in_array + } +} diff --git a/third_party/rust/bindgen/ir/analysis/has_vtable.rs b/third_party/rust/bindgen/ir/analysis/has_vtable.rs new file mode 100644 index 0000000000..8ac47a65da --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/has_vtable.rs @@ -0,0 +1,240 @@ +//! Determining which types has vtable + +use super::{generate_dependencies, ConstrainResult, MonotoneFramework}; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::traversal::EdgeKind; +use crate::ir::ty::TypeKind; +use crate::{Entry, HashMap}; +use std::cmp; +use std::ops; + +/// The result of the `HasVtableAnalysis` for an individual item. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum HasVtableResult { + /// The item does not have a vtable pointer. + No, + + /// The item has a vtable and the actual vtable pointer is within this item. + SelfHasVtable, + + /// The item has a vtable, but the actual vtable pointer is in a base + /// member. + BaseHasVtable, +} + +impl Default for HasVtableResult { + fn default() -> Self { + HasVtableResult::No + } +} + +impl HasVtableResult { + /// Take the least upper bound of `self` and `rhs`. + pub fn join(self, rhs: Self) -> Self { + cmp::max(self, rhs) + } +} + +impl ops::BitOr for HasVtableResult { + type Output = Self; + + fn bitor(self, rhs: HasVtableResult) -> Self::Output { + self.join(rhs) + } +} + +impl ops::BitOrAssign for HasVtableResult { + fn bitor_assign(&mut self, rhs: HasVtableResult) { + *self = self.join(rhs) + } +} + +/// An analysis that finds for each IR item whether it has vtable or not +/// +/// We use the monotone function `has vtable`, defined as follows: +/// +/// * If T is a type alias, a templated alias, an indirection to another type, +/// or a reference of a type, T has vtable if the type T refers to has vtable. +/// * If T is a compound type, T has vtable if we saw a virtual function when +/// parsing it or any of its base member has vtable. +/// * If T is an instantiation of an abstract template definition, T has +/// vtable if template definition has vtable +#[derive(Debug, Clone)] +pub struct HasVtableAnalysis<'ctx> { + ctx: &'ctx BindgenContext, + + // The incremental result of this analysis's computation. Everything in this + // set definitely has a vtable. + have_vtable: HashMap<ItemId, HasVtableResult>, + + // Dependencies saying that if a key ItemId has been inserted into the + // `have_vtable` set, then each of the ids in Vec<ItemId> need to be + // considered again. + // + // This is a subset of the natural IR graph with reversed edges, where we + // only include the edges from the IR graph that can affect whether a type + // has a vtable or not. + dependencies: HashMap<ItemId, Vec<ItemId>>, +} + +impl<'ctx> HasVtableAnalysis<'ctx> { + fn consider_edge(kind: EdgeKind) -> bool { + // These are the only edges that can affect whether a type has a + // vtable or not. + matches!( + kind, + EdgeKind::TypeReference | + EdgeKind::BaseMember | + EdgeKind::TemplateDeclaration + ) + } + + fn insert<Id: Into<ItemId>>( + &mut self, + id: Id, + result: HasVtableResult, + ) -> ConstrainResult { + if let HasVtableResult::No = result { + return ConstrainResult::Same; + } + + let id = id.into(); + match self.have_vtable.entry(id) { + Entry::Occupied(mut entry) => { + if *entry.get() < result { + entry.insert(result); + ConstrainResult::Changed + } else { + ConstrainResult::Same + } + } + Entry::Vacant(entry) => { + entry.insert(result); + ConstrainResult::Changed + } + } + } + + fn forward<Id1, Id2>(&mut self, from: Id1, to: Id2) -> ConstrainResult + where + Id1: Into<ItemId>, + Id2: Into<ItemId>, + { + let from = from.into(); + let to = to.into(); + + match self.have_vtable.get(&from).cloned() { + None => ConstrainResult::Same, + Some(r) => self.insert(to, r), + } + } +} + +impl<'ctx> MonotoneFramework for HasVtableAnalysis<'ctx> { + type Node = ItemId; + type Extra = &'ctx BindgenContext; + type Output = HashMap<ItemId, HasVtableResult>; + + fn new(ctx: &'ctx BindgenContext) -> HasVtableAnalysis<'ctx> { + let have_vtable = HashMap::default(); + let dependencies = generate_dependencies(ctx, Self::consider_edge); + + HasVtableAnalysis { + ctx, + have_vtable, + dependencies, + } + } + + fn initial_worklist(&self) -> Vec<ItemId> { + self.ctx.allowlisted_items().iter().cloned().collect() + } + + fn constrain(&mut self, id: ItemId) -> ConstrainResult { + trace!("constrain {:?}", id); + + let item = self.ctx.resolve_item(id); + let ty = match item.as_type() { + None => return ConstrainResult::Same, + Some(ty) => ty, + }; + + // TODO #851: figure out a way to handle deriving from template type parameters. + match *ty.kind() { + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::ResolvedTypeRef(t) | + TypeKind::Reference(t) => { + trace!( + " aliases and references forward to their inner type" + ); + self.forward(t, id) + } + + TypeKind::Comp(ref info) => { + trace!(" comp considers its own methods and bases"); + let mut result = HasVtableResult::No; + + if info.has_own_virtual_method() { + trace!(" comp has its own virtual method"); + result |= HasVtableResult::SelfHasVtable; + } + + let bases_has_vtable = info.base_members().iter().any(|base| { + trace!(" comp has a base with a vtable: {:?}", base); + self.have_vtable.contains_key(&base.ty.into()) + }); + if bases_has_vtable { + result |= HasVtableResult::BaseHasVtable; + } + + self.insert(id, result) + } + + TypeKind::TemplateInstantiation(ref inst) => { + self.forward(inst.template_definition(), id) + } + + _ => ConstrainResult::Same, + } + } + + fn each_depending_on<F>(&self, id: ItemId, mut f: F) + where + F: FnMut(ItemId), + { + if let Some(edges) = self.dependencies.get(&id) { + for item in edges { + trace!("enqueue {:?} into worklist", item); + f(*item); + } + } + } +} + +impl<'ctx> From<HasVtableAnalysis<'ctx>> for HashMap<ItemId, HasVtableResult> { + fn from(analysis: HasVtableAnalysis<'ctx>) -> Self { + // We let the lack of an entry mean "No" to save space. + extra_assert!(analysis + .have_vtable + .values() + .all(|v| { *v != HasVtableResult::No })); + + analysis.have_vtable + } +} + +/// A convenience trait for the things for which we might wonder if they have a +/// vtable during codegen. +/// +/// This is not for _computing_ whether the thing has a vtable, it is for +/// looking up the results of the HasVtableAnalysis's computations for a +/// specific thing. +pub trait HasVtable { + /// Return `true` if this thing has vtable, `false` otherwise. + fn has_vtable(&self, ctx: &BindgenContext) -> bool; + + /// Return `true` if this thing has an actual vtable pointer in itself, as + /// opposed to transitively in a base member. + fn has_vtable_ptr(&self, ctx: &BindgenContext) -> bool; +} diff --git a/third_party/rust/bindgen/ir/analysis/mod.rs b/third_party/rust/bindgen/ir/analysis/mod.rs new file mode 100644 index 0000000000..40dfc6d644 --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/mod.rs @@ -0,0 +1,402 @@ +//! Fix-point analyses on the IR using the "monotone framework". +//! +//! A lattice is a set with a partial ordering between elements, where there is +//! a single least upper bound and a single greatest least bound for every +//! subset. We are dealing with finite lattices, which means that it has a +//! finite number of elements, and it follows that there exists a single top and +//! a single bottom member of the lattice. For example, the power set of a +//! finite set forms a finite lattice where partial ordering is defined by set +//! inclusion, that is `a <= b` if `a` is a subset of `b`. Here is the finite +//! lattice constructed from the set {0,1,2}: +//! +//! ```text +//! .----- Top = {0,1,2} -----. +//! / | \ +//! / | \ +//! / | \ +//! {0,1} -------. {0,2} .--------- {1,2} +//! | \ / \ / | +//! | / \ | +//! | / \ / \ | +//! {0} --------' {1} `---------- {2} +//! \ | / +//! \ | / +//! \ | / +//! `------ Bottom = {} ------' +//! ``` +//! +//! A monotone function `f` is a function where if `x <= y`, then it holds that +//! `f(x) <= f(y)`. It should be clear that running a monotone function to a +//! fix-point on a finite lattice will always terminate: `f` can only "move" +//! along the lattice in a single direction, and therefore can only either find +//! a fix-point in the middle of the lattice or continue to the top or bottom +//! depending if it is ascending or descending the lattice respectively. +//! +//! For a deeper introduction to the general form of this kind of analysis, see +//! [Static Program Analysis by Anders Møller and Michael I. Schwartzbach][spa]. +//! +//! [spa]: https://cs.au.dk/~amoeller/spa/spa.pdf + +// Re-export individual analyses. +mod template_params; +pub use self::template_params::UsedTemplateParameters; +mod derive; +pub use self::derive::{as_cannot_derive_set, CannotDerive, DeriveTrait}; +mod has_vtable; +pub use self::has_vtable::{HasVtable, HasVtableAnalysis, HasVtableResult}; +mod has_destructor; +pub use self::has_destructor::HasDestructorAnalysis; +mod has_type_param_in_array; +pub use self::has_type_param_in_array::HasTypeParameterInArray; +mod has_float; +pub use self::has_float::HasFloat; +mod sizedness; +pub use self::sizedness::{Sizedness, SizednessAnalysis, SizednessResult}; + +use crate::ir::context::{BindgenContext, ItemId}; + +use crate::ir::traversal::{EdgeKind, Trace}; +use crate::HashMap; +use std::fmt; +use std::ops; + +/// An analysis in the monotone framework. +/// +/// Implementors of this trait must maintain the following two invariants: +/// +/// 1. The concrete data must be a member of a finite-height lattice. +/// 2. The concrete `constrain` method must be monotone: that is, +/// if `x <= y`, then `constrain(x) <= constrain(y)`. +/// +/// If these invariants do not hold, iteration to a fix-point might never +/// complete. +/// +/// For a simple example analysis, see the `ReachableFrom` type in the `tests` +/// module below. +pub trait MonotoneFramework: Sized + fmt::Debug { + /// The type of node in our dependency graph. + /// + /// This is just generic (and not `ItemId`) so that we can easily unit test + /// without constructing real `Item`s and their `ItemId`s. + type Node: Copy; + + /// Any extra data that is needed during computation. + /// + /// Again, this is just generic (and not `&BindgenContext`) so that we can + /// easily unit test without constructing real `BindgenContext`s full of + /// real `Item`s and real `ItemId`s. + type Extra: Sized; + + /// The final output of this analysis. Once we have reached a fix-point, we + /// convert `self` into this type, and return it as the final result of the + /// analysis. + type Output: From<Self> + fmt::Debug; + + /// Construct a new instance of this analysis. + fn new(extra: Self::Extra) -> Self; + + /// Get the initial set of nodes from which to start the analysis. Unless + /// you are sure of some domain-specific knowledge, this should be the + /// complete set of nodes. + fn initial_worklist(&self) -> Vec<Self::Node>; + + /// Update the analysis for the given node. + /// + /// If this results in changing our internal state (ie, we discovered that + /// we have not reached a fix-point and iteration should continue), return + /// `ConstrainResult::Changed`. Otherwise, return `ConstrainResult::Same`. + /// When `constrain` returns `ConstrainResult::Same` for all nodes in the + /// set, we have reached a fix-point and the analysis is complete. + fn constrain(&mut self, node: Self::Node) -> ConstrainResult; + + /// For each node `d` that depends on the given `node`'s current answer when + /// running `constrain(d)`, call `f(d)`. This informs us which new nodes to + /// queue up in the worklist when `constrain(node)` reports updated + /// information. + fn each_depending_on<F>(&self, node: Self::Node, f: F) + where + F: FnMut(Self::Node); +} + +/// Whether an analysis's `constrain` function modified the incremental results +/// or not. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ConstrainResult { + /// The incremental results were updated, and the fix-point computation + /// should continue. + Changed, + + /// The incremental results were not updated. + Same, +} + +impl Default for ConstrainResult { + fn default() -> Self { + ConstrainResult::Same + } +} + +impl ops::BitOr for ConstrainResult { + type Output = Self; + + fn bitor(self, rhs: ConstrainResult) -> Self::Output { + if self == ConstrainResult::Changed || rhs == ConstrainResult::Changed { + ConstrainResult::Changed + } else { + ConstrainResult::Same + } + } +} + +impl ops::BitOrAssign for ConstrainResult { + fn bitor_assign(&mut self, rhs: ConstrainResult) { + *self = *self | rhs; + } +} + +/// Run an analysis in the monotone framework. +pub fn analyze<Analysis>(extra: Analysis::Extra) -> Analysis::Output +where + Analysis: MonotoneFramework, +{ + let mut analysis = Analysis::new(extra); + let mut worklist = analysis.initial_worklist(); + + while let Some(node) = worklist.pop() { + if let ConstrainResult::Changed = analysis.constrain(node) { + analysis.each_depending_on(node, |needs_work| { + worklist.push(needs_work); + }); + } + } + + analysis.into() +} + +/// Generate the dependency map for analysis +pub fn generate_dependencies<F>( + ctx: &BindgenContext, + consider_edge: F, +) -> HashMap<ItemId, Vec<ItemId>> +where + F: Fn(EdgeKind) -> bool, +{ + let mut dependencies = HashMap::default(); + + for &item in ctx.allowlisted_items() { + dependencies.entry(item).or_insert_with(Vec::new); + + { + // We reverse our natural IR graph edges to find dependencies + // between nodes. + item.trace( + ctx, + &mut |sub_item: ItemId, edge_kind| { + if ctx.allowlisted_items().contains(&sub_item) && + consider_edge(edge_kind) + { + dependencies + .entry(sub_item) + .or_insert_with(Vec::new) + .push(item); + } + }, + &(), + ); + } + } + dependencies +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::{HashMap, HashSet}; + + // Here we find the set of nodes that are reachable from any given + // node. This is a lattice mapping nodes to subsets of all nodes. Our join + // function is set union. + // + // This is our test graph: + // + // +---+ +---+ + // | | | | + // | 1 | .----| 2 | + // | | | | | + // +---+ | +---+ + // | | ^ + // | | | + // | +---+ '------' + // '----->| | + // | 3 | + // .------| |------. + // | +---+ | + // | ^ | + // v | v + // +---+ | +---+ +---+ + // | | | | | | | + // | 4 | | | 5 |--->| 6 | + // | | | | | | | + // +---+ | +---+ +---+ + // | | | | + // | | | v + // | +---+ | +---+ + // | | | | | | + // '----->| 7 |<-----' | 8 | + // | | | | + // +---+ +---+ + // + // And here is the mapping from a node to the set of nodes that are + // reachable from it within the test graph: + // + // 1: {3,4,5,6,7,8} + // 2: {2} + // 3: {3,4,5,6,7,8} + // 4: {3,4,5,6,7,8} + // 5: {3,4,5,6,7,8} + // 6: {8} + // 7: {3,4,5,6,7,8} + // 8: {} + + #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] + struct Node(usize); + + #[derive(Clone, Debug, Default, PartialEq, Eq)] + struct Graph(HashMap<Node, Vec<Node>>); + + impl Graph { + fn make_test_graph() -> Graph { + let mut g = Graph::default(); + g.0.insert(Node(1), vec![Node(3)]); + g.0.insert(Node(2), vec![Node(2)]); + g.0.insert(Node(3), vec![Node(4), Node(5)]); + g.0.insert(Node(4), vec![Node(7)]); + g.0.insert(Node(5), vec![Node(6), Node(7)]); + g.0.insert(Node(6), vec![Node(8)]); + g.0.insert(Node(7), vec![Node(3)]); + g.0.insert(Node(8), vec![]); + g + } + + fn reverse(&self) -> Graph { + let mut reversed = Graph::default(); + for (node, edges) in self.0.iter() { + reversed.0.entry(*node).or_insert_with(Vec::new); + for referent in edges.iter() { + reversed + .0 + .entry(*referent) + .or_insert_with(Vec::new) + .push(*node); + } + } + reversed + } + } + + #[derive(Clone, Debug, PartialEq, Eq)] + struct ReachableFrom<'a> { + reachable: HashMap<Node, HashSet<Node>>, + graph: &'a Graph, + reversed: Graph, + } + + impl<'a> MonotoneFramework for ReachableFrom<'a> { + type Node = Node; + type Extra = &'a Graph; + type Output = HashMap<Node, HashSet<Node>>; + + fn new(graph: &'a Graph) -> ReachableFrom { + let reversed = graph.reverse(); + ReachableFrom { + reachable: Default::default(), + graph, + reversed, + } + } + + fn initial_worklist(&self) -> Vec<Node> { + self.graph.0.keys().cloned().collect() + } + + fn constrain(&mut self, node: Node) -> ConstrainResult { + // The set of nodes reachable from a node `x` is + // + // reachable(x) = s_0 U s_1 U ... U reachable(s_0) U reachable(s_1) U ... + // + // where there exist edges from `x` to each of `s_0, s_1, ...`. + // + // Yes, what follows is a **terribly** inefficient set union + // implementation. Don't copy this code outside of this test! + + let original_size = self + .reachable + .entry(node) + .or_insert_with(HashSet::default) + .len(); + + for sub_node in self.graph.0[&node].iter() { + self.reachable.get_mut(&node).unwrap().insert(*sub_node); + + let sub_reachable = self + .reachable + .entry(*sub_node) + .or_insert_with(HashSet::default) + .clone(); + + for transitive in sub_reachable { + self.reachable.get_mut(&node).unwrap().insert(transitive); + } + } + + let new_size = self.reachable[&node].len(); + if original_size != new_size { + ConstrainResult::Changed + } else { + ConstrainResult::Same + } + } + + fn each_depending_on<F>(&self, node: Node, mut f: F) + where + F: FnMut(Node), + { + for dep in self.reversed.0[&node].iter() { + f(*dep); + } + } + } + + impl<'a> From<ReachableFrom<'a>> for HashMap<Node, HashSet<Node>> { + fn from(reachable: ReachableFrom<'a>) -> Self { + reachable.reachable + } + } + + #[test] + fn monotone() { + let g = Graph::make_test_graph(); + let reachable = analyze::<ReachableFrom>(&g); + println!("reachable = {:#?}", reachable); + + fn nodes<A>(nodes: A) -> HashSet<Node> + where + A: AsRef<[usize]>, + { + nodes.as_ref().iter().cloned().map(Node).collect() + } + + let mut expected = HashMap::default(); + expected.insert(Node(1), nodes([3, 4, 5, 6, 7, 8])); + expected.insert(Node(2), nodes([2])); + expected.insert(Node(3), nodes([3, 4, 5, 6, 7, 8])); + expected.insert(Node(4), nodes([3, 4, 5, 6, 7, 8])); + expected.insert(Node(5), nodes([3, 4, 5, 6, 7, 8])); + expected.insert(Node(6), nodes([8])); + expected.insert(Node(7), nodes([3, 4, 5, 6, 7, 8])); + expected.insert(Node(8), nodes([])); + println!("expected = {:#?}", expected); + + assert_eq!(reachable, expected); + } +} diff --git a/third_party/rust/bindgen/ir/analysis/sizedness.rs b/third_party/rust/bindgen/ir/analysis/sizedness.rs new file mode 100644 index 0000000000..251c3747b2 --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/sizedness.rs @@ -0,0 +1,361 @@ +//! Determining the sizedness of types (as base classes and otherwise). + +use super::{ + generate_dependencies, ConstrainResult, HasVtable, MonotoneFramework, +}; +use crate::ir::context::{BindgenContext, TypeId}; +use crate::ir::item::IsOpaque; +use crate::ir::traversal::EdgeKind; +use crate::ir::ty::TypeKind; +use crate::{Entry, HashMap}; +use std::{cmp, ops}; + +/// The result of the `Sizedness` analysis for an individual item. +/// +/// This is a chain lattice of the form: +/// +/// ```ignore +/// NonZeroSized +/// | +/// DependsOnTypeParam +/// | +/// ZeroSized +/// ``` +/// +/// We initially assume that all types are `ZeroSized` and then update our +/// understanding as we learn more about each type. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum SizednessResult { + /// The type is zero-sized. + /// + /// This means that if it is a C++ type, and is not being used as a base + /// member, then we must add an `_address` byte to enforce the + /// unique-address-per-distinct-object-instance rule. + ZeroSized, + + /// Whether this type is zero-sized or not depends on whether a type + /// parameter is zero-sized or not. + /// + /// For example, given these definitions: + /// + /// ```c++ + /// template<class T> + /// class Flongo : public T {}; + /// + /// class Empty {}; + /// + /// class NonEmpty { int x; }; + /// ``` + /// + /// Then `Flongo<Empty>` is zero-sized, and needs an `_address` byte + /// inserted, while `Flongo<NonEmpty>` is *not* zero-sized, and should *not* + /// have an `_address` byte inserted. + /// + /// We don't properly handle this situation correctly right now: + /// https://github.com/rust-lang/rust-bindgen/issues/586 + DependsOnTypeParam, + + /// Has some size that is known to be greater than zero. That doesn't mean + /// it has a static size, but it is not zero sized for sure. In other words, + /// it might contain an incomplete array or some other dynamically sized + /// type. + NonZeroSized, +} + +impl Default for SizednessResult { + fn default() -> Self { + SizednessResult::ZeroSized + } +} + +impl SizednessResult { + /// Take the least upper bound of `self` and `rhs`. + pub fn join(self, rhs: Self) -> Self { + cmp::max(self, rhs) + } +} + +impl ops::BitOr for SizednessResult { + type Output = Self; + + fn bitor(self, rhs: SizednessResult) -> Self::Output { + self.join(rhs) + } +} + +impl ops::BitOrAssign for SizednessResult { + fn bitor_assign(&mut self, rhs: SizednessResult) { + *self = self.join(rhs) + } +} + +/// An analysis that computes the sizedness of all types. +/// +/// * For types with known sizes -- for example pointers, scalars, etc... -- +/// they are assigned `NonZeroSized`. +/// +/// * For compound structure types with one or more fields, they are assigned +/// `NonZeroSized`. +/// +/// * For compound structure types without any fields, the results of the bases +/// are `join`ed. +/// +/// * For type parameters, `DependsOnTypeParam` is assigned. +#[derive(Debug)] +pub struct SizednessAnalysis<'ctx> { + ctx: &'ctx BindgenContext, + dependencies: HashMap<TypeId, Vec<TypeId>>, + // Incremental results of the analysis. Missing entries are implicitly + // considered `ZeroSized`. + sized: HashMap<TypeId, SizednessResult>, +} + +impl<'ctx> SizednessAnalysis<'ctx> { + fn consider_edge(kind: EdgeKind) -> bool { + // These are the only edges that can affect whether a type is + // zero-sized or not. + matches!( + kind, + EdgeKind::TemplateArgument | + EdgeKind::TemplateParameterDefinition | + EdgeKind::TemplateDeclaration | + EdgeKind::TypeReference | + EdgeKind::BaseMember | + EdgeKind::Field + ) + } + + /// Insert an incremental result, and return whether this updated our + /// knowledge of types and we should continue the analysis. + fn insert( + &mut self, + id: TypeId, + result: SizednessResult, + ) -> ConstrainResult { + trace!("inserting {:?} for {:?}", result, id); + + if let SizednessResult::ZeroSized = result { + return ConstrainResult::Same; + } + + match self.sized.entry(id) { + Entry::Occupied(mut entry) => { + if *entry.get() < result { + entry.insert(result); + ConstrainResult::Changed + } else { + ConstrainResult::Same + } + } + Entry::Vacant(entry) => { + entry.insert(result); + ConstrainResult::Changed + } + } + } + + fn forward(&mut self, from: TypeId, to: TypeId) -> ConstrainResult { + match self.sized.get(&from).cloned() { + None => ConstrainResult::Same, + Some(r) => self.insert(to, r), + } + } +} + +impl<'ctx> MonotoneFramework for SizednessAnalysis<'ctx> { + type Node = TypeId; + type Extra = &'ctx BindgenContext; + type Output = HashMap<TypeId, SizednessResult>; + + fn new(ctx: &'ctx BindgenContext) -> SizednessAnalysis<'ctx> { + let dependencies = generate_dependencies(ctx, Self::consider_edge) + .into_iter() + .filter_map(|(id, sub_ids)| { + id.as_type_id(ctx).map(|id| { + ( + id, + sub_ids + .into_iter() + .filter_map(|s| s.as_type_id(ctx)) + .collect::<Vec<_>>(), + ) + }) + }) + .collect(); + + let sized = HashMap::default(); + + SizednessAnalysis { + ctx, + dependencies, + sized, + } + } + + fn initial_worklist(&self) -> Vec<TypeId> { + self.ctx + .allowlisted_items() + .iter() + .cloned() + .filter_map(|id| id.as_type_id(self.ctx)) + .collect() + } + + fn constrain(&mut self, id: TypeId) -> ConstrainResult { + trace!("constrain {:?}", id); + + if let Some(SizednessResult::NonZeroSized) = + self.sized.get(&id).cloned() + { + trace!(" already know it is not zero-sized"); + return ConstrainResult::Same; + } + + if id.has_vtable_ptr(self.ctx) { + trace!(" has an explicit vtable pointer, therefore is not zero-sized"); + return self.insert(id, SizednessResult::NonZeroSized); + } + + let ty = self.ctx.resolve_type(id); + + if id.is_opaque(self.ctx, &()) { + trace!(" type is opaque; checking layout..."); + let result = + ty.layout(self.ctx).map_or(SizednessResult::ZeroSized, |l| { + if l.size == 0 { + trace!(" ...layout has size == 0"); + SizednessResult::ZeroSized + } else { + trace!(" ...layout has size > 0"); + SizednessResult::NonZeroSized + } + }); + return self.insert(id, result); + } + + match *ty.kind() { + TypeKind::Void => { + trace!(" void is zero-sized"); + self.insert(id, SizednessResult::ZeroSized) + } + + TypeKind::TypeParam => { + trace!( + " type params sizedness depends on what they're \ + instantiated as" + ); + self.insert(id, SizednessResult::DependsOnTypeParam) + } + + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::Reference(..) | + TypeKind::NullPtr | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::Pointer(..) => { + trace!(" {:?} is known not to be zero-sized", ty.kind()); + self.insert(id, SizednessResult::NonZeroSized) + } + + TypeKind::ObjCInterface(..) => { + trace!(" obj-c interfaces always have at least the `isa` pointer"); + self.insert(id, SizednessResult::NonZeroSized) + } + + TypeKind::TemplateAlias(t, _) | + TypeKind::Alias(t) | + TypeKind::BlockPointer(t) | + TypeKind::ResolvedTypeRef(t) => { + trace!(" aliases and type refs forward to their inner type"); + self.forward(t, id) + } + + TypeKind::TemplateInstantiation(ref inst) => { + trace!( + " template instantiations are zero-sized if their \ + definition is zero-sized" + ); + self.forward(inst.template_definition(), id) + } + + TypeKind::Array(_, 0) => { + trace!(" arrays of zero elements are zero-sized"); + self.insert(id, SizednessResult::ZeroSized) + } + TypeKind::Array(..) => { + trace!(" arrays of > 0 elements are not zero-sized"); + self.insert(id, SizednessResult::NonZeroSized) + } + TypeKind::Vector(..) => { + trace!(" vectors are not zero-sized"); + self.insert(id, SizednessResult::NonZeroSized) + } + + TypeKind::Comp(ref info) => { + trace!(" comp considers its own fields and bases"); + + if !info.fields().is_empty() { + return self.insert(id, SizednessResult::NonZeroSized); + } + + let result = info + .base_members() + .iter() + .filter_map(|base| self.sized.get(&base.ty)) + .fold(SizednessResult::ZeroSized, |a, b| a.join(*b)); + + self.insert(id, result) + } + + TypeKind::Opaque => { + unreachable!("covered by the .is_opaque() check above") + } + + TypeKind::UnresolvedTypeRef(..) => { + unreachable!("Should have been resolved after parsing!"); + } + } + } + + fn each_depending_on<F>(&self, id: TypeId, mut f: F) + where + F: FnMut(TypeId), + { + if let Some(edges) = self.dependencies.get(&id) { + for ty in edges { + trace!("enqueue {:?} into worklist", ty); + f(*ty); + } + } + } +} + +impl<'ctx> From<SizednessAnalysis<'ctx>> for HashMap<TypeId, SizednessResult> { + fn from(analysis: SizednessAnalysis<'ctx>) -> Self { + // We let the lack of an entry mean "ZeroSized" to save space. + extra_assert!(analysis + .sized + .values() + .all(|v| { *v != SizednessResult::ZeroSized })); + + analysis.sized + } +} + +/// A convenience trait for querying whether some type or id is sized. +/// +/// This is not for _computing_ whether the thing is sized, it is for looking up +/// the results of the `Sizedness` analysis's computations for a specific thing. +pub trait Sizedness { + /// Get the sizedness of this type. + fn sizedness(&self, ctx: &BindgenContext) -> SizednessResult; + + /// Is the sizedness for this type `SizednessResult::ZeroSized`? + fn is_zero_sized(&self, ctx: &BindgenContext) -> bool { + self.sizedness(ctx) == SizednessResult::ZeroSized + } +} diff --git a/third_party/rust/bindgen/ir/analysis/template_params.rs b/third_party/rust/bindgen/ir/analysis/template_params.rs new file mode 100644 index 0000000000..e88b774dee --- /dev/null +++ b/third_party/rust/bindgen/ir/analysis/template_params.rs @@ -0,0 +1,608 @@ +//! Discover which template type parameters are actually used. +//! +//! ### Why do we care? +//! +//! C++ allows ignoring template parameters, while Rust does not. Usually we can +//! blindly stick a `PhantomData<T>` inside a generic Rust struct to make up for +//! this. That doesn't work for templated type aliases, however: +//! +//! ```C++ +//! template <typename T> +//! using Fml = int; +//! ``` +//! +//! If we generate the naive Rust code for this alias, we get: +//! +//! ```ignore +//! pub type Fml<T> = ::std::os::raw::int; +//! ``` +//! +//! And this is rejected by `rustc` due to the unused type parameter. +//! +//! (Aside: in these simple cases, `libclang` will often just give us the +//! aliased type directly, and we will never even know we were dealing with +//! aliases, let alone templated aliases. It's the more convoluted scenarios +//! where we get to have some fun...) +//! +//! For such problematic template aliases, we could generate a tuple whose +//! second member is a `PhantomData<T>`. Or, if we wanted to go the extra mile, +//! we could even generate some smarter wrapper that implements `Deref`, +//! `DerefMut`, `From`, `Into`, `AsRef`, and `AsMut` to the actually aliased +//! type. However, this is still lackluster: +//! +//! 1. Even with a billion conversion-trait implementations, using the generated +//! bindings is rather un-ergonomic. +//! 2. With either of these solutions, we need to keep track of which aliases +//! we've transformed like this in order to generate correct uses of the +//! wrapped type. +//! +//! Given that we have to properly track which template parameters ended up used +//! for (2), we might as well leverage that information to make ergonomic +//! bindings that don't contain any unused type parameters at all, and +//! completely avoid the pain of (1). +//! +//! ### How do we determine which template parameters are used? +//! +//! Determining which template parameters are actually used is a trickier +//! problem than it might seem at a glance. On the one hand, trivial uses are +//! easy to detect: +//! +//! ```C++ +//! template <typename T> +//! class Foo { +//! T trivial_use_of_t; +//! }; +//! ``` +//! +//! It gets harder when determining if one template parameter is used depends on +//! determining if another template parameter is used. In this example, whether +//! `U` is used depends on whether `T` is used. +//! +//! ```C++ +//! template <typename T> +//! class DoesntUseT { +//! int x; +//! }; +//! +//! template <typename U> +//! class Fml { +//! DoesntUseT<U> lololol; +//! }; +//! ``` +//! +//! We can express the set of used template parameters as a constraint solving +//! problem (where the set of template parameters used by a given IR item is the +//! union of its sub-item's used template parameters) and iterate to a +//! fixed-point. +//! +//! We use the `ir::analysis::MonotoneFramework` infrastructure for this +//! fix-point analysis, where our lattice is the mapping from each IR item to +//! the powerset of the template parameters that appear in the input C++ header, +//! our join function is set union. The set of template parameters appearing in +//! the program is finite, as is the number of IR items. We start at our +//! lattice's bottom element: every item mapping to an empty set of template +//! parameters. Our analysis only adds members to each item's set of used +//! template parameters, never removes them, so it is monotone. Because our +//! lattice is finite and our constraint function is monotone, iteration to a +//! fix-point will terminate. +//! +//! See `src/ir/analysis.rs` for more. + +use super::{ConstrainResult, MonotoneFramework}; +use crate::ir::context::{BindgenContext, ItemId}; +use crate::ir::item::{Item, ItemSet}; +use crate::ir::template::{TemplateInstantiation, TemplateParameters}; +use crate::ir::traversal::{EdgeKind, Trace}; +use crate::ir::ty::TypeKind; +use crate::{HashMap, HashSet}; + +/// An analysis that finds for each IR item its set of template parameters that +/// it uses. +/// +/// We use the monotone constraint function `template_param_usage`, defined as +/// follows: +/// +/// * If `T` is a named template type parameter, it trivially uses itself: +/// +/// ```ignore +/// template_param_usage(T) = { T } +/// ``` +/// +/// * If `inst` is a template instantiation, `inst.args` are the template +/// instantiation's template arguments, `inst.def` is the template definition +/// being instantiated, and `inst.def.params` is the template definition's +/// template parameters, then the instantiation's usage is the union of each +/// of its arguments' usages *if* the corresponding template parameter is in +/// turn used by the template definition: +/// +/// ```ignore +/// template_param_usage(inst) = union( +/// template_param_usage(inst.args[i]) +/// for i in 0..length(inst.args.length) +/// if inst.def.params[i] in template_param_usage(inst.def) +/// ) +/// ``` +/// +/// * Finally, for all other IR item kinds, we use our lattice's `join` +/// operation: set union with each successor of the given item's template +/// parameter usage: +/// +/// ```ignore +/// template_param_usage(v) = +/// union(template_param_usage(w) for w in successors(v)) +/// ``` +/// +/// Note that we ignore certain edges in the graph, such as edges from a +/// template declaration to its template parameters' definitions for this +/// analysis. If we didn't, then we would mistakenly determine that ever +/// template parameter is always used. +/// +/// The final wrinkle is handling of blocklisted types. Normally, we say that +/// the set of allowlisted items is the transitive closure of items explicitly +/// called out for allowlisting, *without* any items explicitly called out as +/// blocklisted. However, for the purposes of this analysis's correctness, we +/// simplify and consider run the analysis on the full transitive closure of +/// allowlisted items. We do, however, treat instantiations of blocklisted items +/// specially; see `constrain_instantiation_of_blocklisted_template` and its +/// documentation for details. +#[derive(Debug, Clone)] +pub struct UsedTemplateParameters<'ctx> { + ctx: &'ctx BindgenContext, + + // The Option is only there for temporary moves out of the hash map. See the + // comments in `UsedTemplateParameters::constrain` below. + used: HashMap<ItemId, Option<ItemSet>>, + + dependencies: HashMap<ItemId, Vec<ItemId>>, + + // The set of allowlisted items, without any blocklisted items reachable + // from the allowlisted items which would otherwise be considered + // allowlisted as well. + allowlisted_items: HashSet<ItemId>, +} + +impl<'ctx> UsedTemplateParameters<'ctx> { + fn consider_edge(kind: EdgeKind) -> bool { + match kind { + // For each of these kinds of edges, if the referent uses a template + // parameter, then it should be considered that the origin of the + // edge also uses the template parameter. + EdgeKind::TemplateArgument | + EdgeKind::BaseMember | + EdgeKind::Field | + EdgeKind::Constructor | + EdgeKind::Destructor | + EdgeKind::VarType | + EdgeKind::FunctionReturn | + EdgeKind::FunctionParameter | + EdgeKind::TypeReference => true, + + // An inner var or type using a template parameter is orthogonal + // from whether we use it. See template-param-usage-{6,11}.hpp. + EdgeKind::InnerVar | EdgeKind::InnerType => false, + + // We can't emit machine code for new monomorphizations of class + // templates' methods (and don't detect explicit instantiations) so + // we must ignore template parameters that are only used by + // methods. This doesn't apply to a function type's return or + // parameter types, however, because of type aliases of function + // pointers that use template parameters, eg + // tests/headers/struct_with_typedef_template_arg.hpp + EdgeKind::Method => false, + + // If we considered these edges, we would end up mistakenly claiming + // that every template parameter always used. + EdgeKind::TemplateDeclaration | + EdgeKind::TemplateParameterDefinition => false, + + // Since we have to be careful about which edges we consider for + // this analysis to be correct, we ignore generic edges. We also + // avoid a `_` wild card to force authors of new edge kinds to + // determine whether they need to be considered by this analysis. + EdgeKind::Generic => false, + } + } + + fn take_this_id_usage_set<Id: Into<ItemId>>( + &mut self, + this_id: Id, + ) -> ItemSet { + let this_id = this_id.into(); + self.used + .get_mut(&this_id) + .expect( + "Should have a set of used template params for every item \ + id", + ) + .take() + .expect( + "Should maintain the invariant that all used template param \ + sets are `Some` upon entry of `constrain`", + ) + } + + /// We say that blocklisted items use all of their template parameters. The + /// blocklisted type is most likely implemented explicitly by the user, + /// since it won't be in the generated bindings, and we don't know exactly + /// what they'll to with template parameters, but we can push the issue down + /// the line to them. + fn constrain_instantiation_of_blocklisted_template( + &self, + this_id: ItemId, + used_by_this_id: &mut ItemSet, + instantiation: &TemplateInstantiation, + ) { + trace!( + " instantiation of blocklisted template, uses all template \ + arguments" + ); + + let args = instantiation + .template_arguments() + .iter() + .map(|a| { + a.into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(self.ctx) + .id() + }) + .filter(|a| *a != this_id) + .flat_map(|a| { + self.used + .get(&a) + .expect("Should have a used entry for the template arg") + .as_ref() + .expect( + "Because a != this_id, and all used template \ + param sets other than this_id's are `Some`, \ + a's used template param set should be `Some`", + ) + .iter() + .cloned() + }); + + used_by_this_id.extend(args); + } + + /// A template instantiation's concrete template argument is only used if + /// the template definition uses the corresponding template parameter. + fn constrain_instantiation( + &self, + this_id: ItemId, + used_by_this_id: &mut ItemSet, + instantiation: &TemplateInstantiation, + ) { + trace!(" template instantiation"); + + let decl = self.ctx.resolve_type(instantiation.template_definition()); + let args = instantiation.template_arguments(); + + let params = decl.self_template_params(self.ctx); + + debug_assert!(this_id != instantiation.template_definition()); + let used_by_def = self.used + .get(&instantiation.template_definition().into()) + .expect("Should have a used entry for instantiation's template definition") + .as_ref() + .expect("And it should be Some because only this_id's set is None, and an \ + instantiation's template definition should never be the \ + instantiation itself"); + + for (arg, param) in args.iter().zip(params.iter()) { + trace!( + " instantiation's argument {:?} is used if definition's \ + parameter {:?} is used", + arg, + param + ); + + if used_by_def.contains(¶m.into()) { + trace!(" param is used by template definition"); + + let arg = arg + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(self.ctx) + .id(); + + if arg == this_id { + continue; + } + + let used_by_arg = self + .used + .get(&arg) + .expect("Should have a used entry for the template arg") + .as_ref() + .expect( + "Because arg != this_id, and all used template \ + param sets other than this_id's are `Some`, \ + arg's used template param set should be \ + `Some`", + ) + .iter() + .cloned(); + used_by_this_id.extend(used_by_arg); + } + } + } + + /// The join operation on our lattice: the set union of all of this id's + /// successors. + fn constrain_join(&self, used_by_this_id: &mut ItemSet, item: &Item) { + trace!(" other item: join with successors' usage"); + + item.trace( + self.ctx, + &mut |sub_id, edge_kind| { + // Ignore ourselves, since union with ourself is a + // no-op. Ignore edges that aren't relevant to the + // analysis. + if sub_id == item.id() || !Self::consider_edge(edge_kind) { + return; + } + + let used_by_sub_id = self + .used + .get(&sub_id) + .expect("Should have a used set for the sub_id successor") + .as_ref() + .expect( + "Because sub_id != id, and all used template \ + param sets other than id's are `Some`, \ + sub_id's used template param set should be \ + `Some`", + ) + .iter() + .cloned(); + + trace!( + " union with {:?}'s usage: {:?}", + sub_id, + used_by_sub_id.clone().collect::<Vec<_>>() + ); + + used_by_this_id.extend(used_by_sub_id); + }, + &(), + ); + } +} + +impl<'ctx> MonotoneFramework for UsedTemplateParameters<'ctx> { + type Node = ItemId; + type Extra = &'ctx BindgenContext; + type Output = HashMap<ItemId, ItemSet>; + + fn new(ctx: &'ctx BindgenContext) -> UsedTemplateParameters<'ctx> { + let mut used = HashMap::default(); + let mut dependencies = HashMap::default(); + let allowlisted_items: HashSet<_> = + ctx.allowlisted_items().iter().cloned().collect(); + + let allowlisted_and_blocklisted_items: ItemSet = allowlisted_items + .iter() + .cloned() + .flat_map(|i| { + let mut reachable = vec![i]; + i.trace( + ctx, + &mut |s, _| { + reachable.push(s); + }, + &(), + ); + reachable + }) + .collect(); + + for item in allowlisted_and_blocklisted_items { + dependencies.entry(item).or_insert_with(Vec::new); + used.entry(item).or_insert_with(|| Some(ItemSet::new())); + + { + // We reverse our natural IR graph edges to find dependencies + // between nodes. + item.trace( + ctx, + &mut |sub_item: ItemId, _| { + used.entry(sub_item) + .or_insert_with(|| Some(ItemSet::new())); + dependencies + .entry(sub_item) + .or_insert_with(Vec::new) + .push(item); + }, + &(), + ); + } + + // Additionally, whether a template instantiation's template + // arguments are used depends on whether the template declaration's + // generic template parameters are used. + let item_kind = + ctx.resolve_item(item).as_type().map(|ty| ty.kind()); + if let Some(&TypeKind::TemplateInstantiation(ref inst)) = item_kind + { + let decl = ctx.resolve_type(inst.template_definition()); + let args = inst.template_arguments(); + + // Although template definitions should always have + // template parameters, there is a single exception: + // opaque templates. Hence the unwrap_or. + let params = decl.self_template_params(ctx); + + for (arg, param) in args.iter().zip(params.iter()) { + let arg = arg + .into_resolver() + .through_type_aliases() + .through_type_refs() + .resolve(ctx) + .id(); + + let param = param + .into_resolver() + .through_type_aliases() + .through_type_refs() + .resolve(ctx) + .id(); + + used.entry(arg).or_insert_with(|| Some(ItemSet::new())); + used.entry(param).or_insert_with(|| Some(ItemSet::new())); + + dependencies + .entry(arg) + .or_insert_with(Vec::new) + .push(param); + } + } + } + + if cfg!(feature = "testing_only_extra_assertions") { + // Invariant: The `used` map has an entry for every allowlisted + // item, as well as all explicitly blocklisted items that are + // reachable from allowlisted items. + // + // Invariant: the `dependencies` map has an entry for every + // allowlisted item. + // + // (This is so that every item we call `constrain` on is guaranteed + // to have a set of template parameters, and we can allow + // blocklisted templates to use all of their parameters). + for item in allowlisted_items.iter() { + extra_assert!(used.contains_key(item)); + extra_assert!(dependencies.contains_key(item)); + item.trace( + ctx, + &mut |sub_item, _| { + extra_assert!(used.contains_key(&sub_item)); + extra_assert!(dependencies.contains_key(&sub_item)); + }, + &(), + ) + } + } + + UsedTemplateParameters { + ctx, + used, + dependencies, + allowlisted_items, + } + } + + fn initial_worklist(&self) -> Vec<ItemId> { + // The transitive closure of all allowlisted items, including explicitly + // blocklisted items. + self.ctx + .allowlisted_items() + .iter() + .cloned() + .flat_map(|i| { + let mut reachable = vec![i]; + i.trace( + self.ctx, + &mut |s, _| { + reachable.push(s); + }, + &(), + ); + reachable + }) + .collect() + } + + fn constrain(&mut self, id: ItemId) -> ConstrainResult { + // Invariant: all hash map entries' values are `Some` upon entering and + // exiting this method. + extra_assert!(self.used.values().all(|v| v.is_some())); + + // Take the set for this id out of the hash map while we mutate it based + // on other hash map entries. We *must* put it back into the hash map at + // the end of this method. This allows us to side-step HashMap's lack of + // an analog to slice::split_at_mut. + let mut used_by_this_id = self.take_this_id_usage_set(id); + + trace!("constrain {:?}", id); + trace!(" initially, used set is {:?}", used_by_this_id); + + let original_len = used_by_this_id.len(); + + let item = self.ctx.resolve_item(id); + let ty_kind = item.as_type().map(|ty| ty.kind()); + match ty_kind { + // Named template type parameters trivially use themselves. + Some(&TypeKind::TypeParam) => { + trace!(" named type, trivially uses itself"); + used_by_this_id.insert(id); + } + // Template instantiations only use their template arguments if the + // template definition uses the corresponding template parameter. + Some(&TypeKind::TemplateInstantiation(ref inst)) => { + if self + .allowlisted_items + .contains(&inst.template_definition().into()) + { + self.constrain_instantiation( + id, + &mut used_by_this_id, + inst, + ); + } else { + self.constrain_instantiation_of_blocklisted_template( + id, + &mut used_by_this_id, + inst, + ); + } + } + // Otherwise, add the union of each of its referent item's template + // parameter usage. + _ => self.constrain_join(&mut used_by_this_id, item), + } + + trace!(" finally, used set is {:?}", used_by_this_id); + + let new_len = used_by_this_id.len(); + assert!( + new_len >= original_len, + "This is the property that ensures this function is monotone -- \ + if it doesn't hold, the analysis might never terminate!" + ); + + // Put the set back in the hash map and restore our invariant. + debug_assert!(self.used[&id].is_none()); + self.used.insert(id, Some(used_by_this_id)); + extra_assert!(self.used.values().all(|v| v.is_some())); + + if new_len != original_len { + ConstrainResult::Changed + } else { + ConstrainResult::Same + } + } + + fn each_depending_on<F>(&self, item: ItemId, mut f: F) + where + F: FnMut(ItemId), + { + if let Some(edges) = self.dependencies.get(&item) { + for item in edges { + trace!("enqueue {:?} into worklist", item); + f(*item); + } + } + } +} + +impl<'ctx> From<UsedTemplateParameters<'ctx>> for HashMap<ItemId, ItemSet> { + fn from(used_templ_params: UsedTemplateParameters<'ctx>) -> Self { + used_templ_params + .used + .into_iter() + .map(|(k, v)| (k, v.unwrap())) + .collect() + } +} diff --git a/third_party/rust/bindgen/ir/annotations.rs b/third_party/rust/bindgen/ir/annotations.rs new file mode 100644 index 0000000000..288c11ebae --- /dev/null +++ b/third_party/rust/bindgen/ir/annotations.rs @@ -0,0 +1,211 @@ +//! Types and functions related to bindgen annotation comments. +//! +//! Users can add annotations in doc comments to types that they would like to +//! replace other types with, mark as opaque, etc. This module deals with all of +//! that stuff. + +use crate::clang; + +/// What kind of accessor should we provide for a field? +#[derive(Copy, PartialEq, Eq, Clone, Debug)] +pub enum FieldAccessorKind { + /// No accessor. + None, + /// Plain accessor. + Regular, + /// Unsafe accessor. + Unsafe, + /// Immutable accessor. + Immutable, +} + +/// Annotations for a given item, or a field. +/// +/// You can see the kind of comments that are accepted in the Doxygen +/// documentation: +/// +/// http://www.stack.nl/~dimitri/doxygen/manual/docblocks.html +#[derive(Default, Clone, PartialEq, Eq, Debug)] +pub struct Annotations { + /// Whether this item is marked as opaque. Only applies to types. + opaque: bool, + /// Whether this item should be hidden from the output. Only applies to + /// types, or enum variants. + hide: bool, + /// Whether this type should be replaced by another. The name is a + /// namespace-aware path. + use_instead_of: Option<Vec<String>>, + /// Manually disable deriving copy/clone on this type. Only applies to + /// struct or union types. + disallow_copy: bool, + /// Manually disable deriving debug on this type. + disallow_debug: bool, + /// Manually disable deriving/implement default on this type. + disallow_default: bool, + /// Whether to add a #[must_use] annotation to this type. + must_use_type: bool, + /// Whether fields should be marked as private or not. You can set this on + /// structs (it will apply to all the fields), or individual fields. + private_fields: Option<bool>, + /// The kind of accessor this field will have. Also can be applied to + /// structs so all the fields inside share it by default. + accessor_kind: Option<FieldAccessorKind>, + /// Whether this enum variant should be constified. + /// + /// This is controlled by the `constant` attribute, this way: + /// + /// ```cpp + /// enum Foo { + /// Bar = 0, /**< <div rustbindgen constant></div> */ + /// Baz = 0, + /// }; + /// ``` + /// + /// In that case, bindgen will generate a constant for `Bar` instead of + /// `Baz`. + constify_enum_variant: bool, + /// List of explicit derives for this type. + derives: Vec<String>, +} + +fn parse_accessor(s: &str) -> FieldAccessorKind { + match s { + "false" => FieldAccessorKind::None, + "unsafe" => FieldAccessorKind::Unsafe, + "immutable" => FieldAccessorKind::Immutable, + _ => FieldAccessorKind::Regular, + } +} + +impl Annotations { + /// Construct new annotations for the given cursor and its bindgen comments + /// (if any). + pub fn new(cursor: &clang::Cursor) -> Option<Annotations> { + let mut anno = Annotations::default(); + let mut matched_one = false; + anno.parse(&cursor.comment(), &mut matched_one); + + if matched_one { + Some(anno) + } else { + None + } + } + + /// Should this type be hidden? + pub fn hide(&self) -> bool { + self.hide + } + + /// Should this type be opaque? + pub fn opaque(&self) -> bool { + self.opaque + } + + /// For a given type, indicates the type it should replace. + /// + /// For example, in the following code: + /// + /// ```cpp + /// + /// /** <div rustbindgen replaces="Bar"></div> */ + /// struct Foo { int x; }; + /// + /// struct Bar { char foo; }; + /// ``` + /// + /// the generated code would look something like: + /// + /// ``` + /// /** <div rustbindgen replaces="Bar"></div> */ + /// struct Bar { + /// x: ::std::os::raw::c_int, + /// }; + /// ``` + /// + /// That is, code for `Foo` is used to generate `Bar`. + pub fn use_instead_of(&self) -> Option<&[String]> { + self.use_instead_of.as_deref() + } + + /// The list of derives that have been specified in this annotation. + pub fn derives(&self) -> &[String] { + &self.derives + } + + /// Should we avoid implementing the `Copy` trait? + pub fn disallow_copy(&self) -> bool { + self.disallow_copy + } + + /// Should we avoid implementing the `Debug` trait? + pub fn disallow_debug(&self) -> bool { + self.disallow_debug + } + + /// Should we avoid implementing the `Default` trait? + pub fn disallow_default(&self) -> bool { + self.disallow_default + } + + /// Should this type get a `#[must_use]` annotation? + pub fn must_use_type(&self) -> bool { + self.must_use_type + } + + /// Should the fields be private? + pub fn private_fields(&self) -> Option<bool> { + self.private_fields + } + + /// What kind of accessors should we provide for this type's fields? + pub fn accessor_kind(&self) -> Option<FieldAccessorKind> { + self.accessor_kind + } + + fn parse(&mut self, comment: &clang::Comment, matched: &mut bool) { + use clang_sys::CXComment_HTMLStartTag; + if comment.kind() == CXComment_HTMLStartTag && + comment.get_tag_name() == "div" && + comment + .get_tag_attrs() + .next() + .map_or(false, |attr| attr.name == "rustbindgen") + { + *matched = true; + for attr in comment.get_tag_attrs() { + match attr.name.as_str() { + "opaque" => self.opaque = true, + "hide" => self.hide = true, + "nocopy" => self.disallow_copy = true, + "nodebug" => self.disallow_debug = true, + "nodefault" => self.disallow_default = true, + "mustusetype" => self.must_use_type = true, + "replaces" => { + self.use_instead_of = Some( + attr.value.split("::").map(Into::into).collect(), + ) + } + "derive" => self.derives.push(attr.value), + "private" => { + self.private_fields = Some(attr.value != "false") + } + "accessor" => { + self.accessor_kind = Some(parse_accessor(&attr.value)) + } + "constant" => self.constify_enum_variant = true, + _ => {} + } + } + } + + for child in comment.get_children() { + self.parse(&child, matched); + } + } + + /// Returns whether we've parsed a "constant" attribute. + pub fn constify_enum_variant(&self) -> bool { + self.constify_enum_variant + } +} diff --git a/third_party/rust/bindgen/ir/comment.rs b/third_party/rust/bindgen/ir/comment.rs new file mode 100644 index 0000000000..3eb17aacb9 --- /dev/null +++ b/third_party/rust/bindgen/ir/comment.rs @@ -0,0 +1,100 @@ +//! Utilities for manipulating C/C++ comments. + +/// The type of a comment. +#[derive(Debug, PartialEq, Eq)] +enum Kind { + /// A `///` comment, or something of the like. + /// All lines in a comment should start with the same symbol. + SingleLines, + /// A `/**` comment, where each other line can start with `*` and the + /// entire block ends with `*/`. + MultiLine, +} + +/// Preprocesses a C/C++ comment so that it is a valid Rust comment. +pub fn preprocess(comment: &str) -> String { + match self::kind(comment) { + Some(Kind::SingleLines) => preprocess_single_lines(comment), + Some(Kind::MultiLine) => preprocess_multi_line(comment), + None => comment.to_owned(), + } +} + +/// Gets the kind of the doc comment, if it is one. +fn kind(comment: &str) -> Option<Kind> { + if comment.starts_with("/*") { + Some(Kind::MultiLine) + } else if comment.starts_with("//") { + Some(Kind::SingleLines) + } else { + None + } +} + +/// Preprocesses multiple single line comments. +/// +/// Handles lines starting with both `//` and `///`. +fn preprocess_single_lines(comment: &str) -> String { + debug_assert!(comment.starts_with("//"), "comment is not single line"); + + let lines: Vec<_> = comment + .lines() + .map(|l| l.trim().trim_start_matches('/')) + .collect(); + lines.join("\n") +} + +fn preprocess_multi_line(comment: &str) -> String { + let comment = comment + .trim_start_matches('/') + .trim_end_matches('/') + .trim_end_matches('*'); + + // Strip any potential `*` characters preceding each line. + let mut lines: Vec<_> = comment + .lines() + .map(|line| line.trim().trim_start_matches('*').trim_start_matches('!')) + .skip_while(|line| line.trim().is_empty()) // Skip the first empty lines. + .collect(); + + // Remove the trailing line corresponding to the `*/`. + if lines.last().map_or(false, |l| l.trim().is_empty()) { + lines.pop(); + } + + lines.join("\n") +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn picks_up_single_and_multi_line_doc_comments() { + assert_eq!(kind("/// hello"), Some(Kind::SingleLines)); + assert_eq!(kind("/** world */"), Some(Kind::MultiLine)); + } + + #[test] + fn processes_single_lines_correctly() { + assert_eq!(preprocess("///"), ""); + assert_eq!(preprocess("/// hello"), " hello"); + assert_eq!(preprocess("// hello"), " hello"); + assert_eq!(preprocess("// hello"), " hello"); + } + + #[test] + fn processes_multi_lines_correctly() { + assert_eq!(preprocess("/**/"), ""); + + assert_eq!( + preprocess("/** hello \n * world \n * foo \n */"), + " hello\n world\n foo" + ); + + assert_eq!( + preprocess("/**\nhello\n*world\n*foo\n*/"), + "hello\nworld\nfoo" + ); + } +} diff --git a/third_party/rust/bindgen/ir/comp.rs b/third_party/rust/bindgen/ir/comp.rs new file mode 100644 index 0000000000..039742a48d --- /dev/null +++ b/third_party/rust/bindgen/ir/comp.rs @@ -0,0 +1,1890 @@ +//! Compound types (unions and structs) in our intermediate representation. + +use super::analysis::Sizedness; +use super::annotations::Annotations; +use super::context::{BindgenContext, FunctionId, ItemId, TypeId, VarId}; +use super::dot::DotAttributes; +use super::item::{IsOpaque, Item}; +use super::layout::Layout; +use super::template::TemplateParameters; +use super::traversal::{EdgeKind, Trace, Tracer}; +use super::ty::RUST_DERIVE_IN_ARRAY_LIMIT; +use crate::clang; +use crate::codegen::struct_layout::{align_to, bytes_from_bits_pow2}; +use crate::ir::derive::CanDeriveCopy; +use crate::parse::{ClangItemParser, ParseError}; +use crate::HashMap; +use crate::NonCopyUnionStyle; +use peeking_take_while::PeekableExt; +use std::cmp; +use std::io; +use std::mem; + +/// The kind of compound type. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum CompKind { + /// A struct. + Struct, + /// A union. + Union, +} + +/// The kind of C++ method. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum MethodKind { + /// A constructor. We represent it as method for convenience, to avoid code + /// duplication. + Constructor, + /// A destructor. + Destructor, + /// A virtual destructor. + VirtualDestructor { + /// Whether it's pure virtual. + pure_virtual: bool, + }, + /// A static method. + Static, + /// A normal method. + Normal, + /// A virtual method. + Virtual { + /// Whether it's pure virtual. + pure_virtual: bool, + }, +} + +impl MethodKind { + /// Is this a destructor method? + pub fn is_destructor(&self) -> bool { + matches!( + *self, + MethodKind::Destructor | MethodKind::VirtualDestructor { .. } + ) + } + + /// Is this a pure virtual method? + pub fn is_pure_virtual(&self) -> bool { + match *self { + MethodKind::Virtual { pure_virtual } | + MethodKind::VirtualDestructor { pure_virtual } => pure_virtual, + _ => false, + } + } +} + +/// A struct representing a C++ method, either static, normal, or virtual. +#[derive(Debug)] +pub struct Method { + kind: MethodKind, + /// The signature of the method. Take into account this is not a `Type` + /// item, but a `Function` one. + /// + /// This is tricky and probably this field should be renamed. + signature: FunctionId, + is_const: bool, +} + +impl Method { + /// Construct a new `Method`. + pub fn new( + kind: MethodKind, + signature: FunctionId, + is_const: bool, + ) -> Self { + Method { + kind, + signature, + is_const, + } + } + + /// What kind of method is this? + pub fn kind(&self) -> MethodKind { + self.kind + } + + /// Is this a constructor? + pub fn is_constructor(&self) -> bool { + self.kind == MethodKind::Constructor + } + + /// Is this a virtual method? + pub fn is_virtual(&self) -> bool { + matches!( + self.kind, + MethodKind::Virtual { .. } | MethodKind::VirtualDestructor { .. } + ) + } + + /// Is this a static method? + pub fn is_static(&self) -> bool { + self.kind == MethodKind::Static + } + + /// Get the id for the `Function` signature for this method. + pub fn signature(&self) -> FunctionId { + self.signature + } + + /// Is this a const qualified method? + pub fn is_const(&self) -> bool { + self.is_const + } +} + +/// Methods common to the various field types. +pub trait FieldMethods { + /// Get the name of this field. + fn name(&self) -> Option<&str>; + + /// Get the type of this field. + fn ty(&self) -> TypeId; + + /// Get the comment for this field. + fn comment(&self) -> Option<&str>; + + /// If this is a bitfield, how many bits does it need? + fn bitfield_width(&self) -> Option<u32>; + + /// Is this feild declared public? + fn is_public(&self) -> bool; + + /// Get the annotations for this field. + fn annotations(&self) -> &Annotations; + + /// The offset of the field (in bits) + fn offset(&self) -> Option<usize>; +} + +/// A contiguous set of logical bitfields that live within the same physical +/// allocation unit. See 9.2.4 [class.bit] in the C++ standard and [section +/// 2.4.II.1 in the Itanium C++ +/// ABI](http://itanium-cxx-abi.github.io/cxx-abi/abi.html#class-types). +#[derive(Debug)] +pub struct BitfieldUnit { + nth: usize, + layout: Layout, + bitfields: Vec<Bitfield>, +} + +impl BitfieldUnit { + /// Get the 1-based index of this bitfield unit within its containing + /// struct. Useful for generating a Rust struct's field name for this unit + /// of bitfields. + pub fn nth(&self) -> usize { + self.nth + } + + /// Get the layout within which these bitfields reside. + pub fn layout(&self) -> Layout { + self.layout + } + + /// Get the bitfields within this unit. + pub fn bitfields(&self) -> &[Bitfield] { + &self.bitfields + } +} + +/// A struct representing a C++ field. +#[derive(Debug)] +pub enum Field { + /// A normal data member. + DataMember(FieldData), + + /// A physical allocation unit containing many logical bitfields. + Bitfields(BitfieldUnit), +} + +impl Field { + /// Get this field's layout. + pub fn layout(&self, ctx: &BindgenContext) -> Option<Layout> { + match *self { + Field::Bitfields(BitfieldUnit { layout, .. }) => Some(layout), + Field::DataMember(ref data) => { + ctx.resolve_type(data.ty).layout(ctx) + } + } + } +} + +impl Trace for Field { + type Extra = (); + + fn trace<T>(&self, _: &BindgenContext, tracer: &mut T, _: &()) + where + T: Tracer, + { + match *self { + Field::DataMember(ref data) => { + tracer.visit_kind(data.ty.into(), EdgeKind::Field); + } + Field::Bitfields(BitfieldUnit { ref bitfields, .. }) => { + for bf in bitfields { + tracer.visit_kind(bf.ty().into(), EdgeKind::Field); + } + } + } + } +} + +impl DotAttributes for Field { + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + match *self { + Field::DataMember(ref data) => data.dot_attributes(ctx, out), + Field::Bitfields(BitfieldUnit { + layout, + ref bitfields, + .. + }) => { + writeln!( + out, + r#"<tr> + <td>bitfield unit</td> + <td> + <table border="0"> + <tr> + <td>unit.size</td><td>{}</td> + </tr> + <tr> + <td>unit.align</td><td>{}</td> + </tr> + "#, + layout.size, layout.align + )?; + for bf in bitfields { + bf.dot_attributes(ctx, out)?; + } + writeln!(out, "</table></td></tr>") + } + } + } +} + +impl DotAttributes for FieldData { + fn dot_attributes<W>( + &self, + _ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!( + out, + "<tr><td>{}</td><td>{:?}</td></tr>", + self.name().unwrap_or("(anonymous)"), + self.ty() + ) + } +} + +impl DotAttributes for Bitfield { + fn dot_attributes<W>( + &self, + _ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!( + out, + "<tr><td>{} : {}</td><td>{:?}</td></tr>", + self.name().unwrap_or("(anonymous)"), + self.width(), + self.ty() + ) + } +} + +/// A logical bitfield within some physical bitfield allocation unit. +#[derive(Debug)] +pub struct Bitfield { + /// Index of the bit within this bitfield's allocation unit where this + /// bitfield's bits begin. + offset_into_unit: usize, + + /// The field data for this bitfield. + data: FieldData, + + /// Name of the generated Rust getter for this bitfield. + /// + /// Should be assigned before codegen. + getter_name: Option<String>, + + /// Name of the generated Rust setter for this bitfield. + /// + /// Should be assigned before codegen. + setter_name: Option<String>, +} + +impl Bitfield { + /// Construct a new bitfield. + fn new(offset_into_unit: usize, raw: RawField) -> Bitfield { + assert!(raw.bitfield_width().is_some()); + + Bitfield { + offset_into_unit, + data: raw.0, + getter_name: None, + setter_name: None, + } + } + + /// Get the index of the bit within this bitfield's allocation unit where + /// this bitfield begins. + pub fn offset_into_unit(&self) -> usize { + self.offset_into_unit + } + + /// Get the mask value that when &'ed with this bitfield's allocation unit + /// produces this bitfield's value. + pub fn mask(&self) -> u64 { + use std::u64; + + let unoffseted_mask = + if self.width() as u64 == mem::size_of::<u64>() as u64 * 8 { + u64::MAX + } else { + (1u64 << self.width()) - 1u64 + }; + + unoffseted_mask << self.offset_into_unit() + } + + /// Get the bit width of this bitfield. + pub fn width(&self) -> u32 { + self.data.bitfield_width().unwrap() + } + + /// Name of the generated Rust getter for this bitfield. + /// + /// Panics if called before assigning bitfield accessor names or if + /// this bitfield have no name. + pub fn getter_name(&self) -> &str { + assert!( + self.name().is_some(), + "`Bitfield::getter_name` called on anonymous field" + ); + self.getter_name.as_ref().expect( + "`Bitfield::getter_name` should only be called after\ + assigning bitfield accessor names", + ) + } + + /// Name of the generated Rust setter for this bitfield. + /// + /// Panics if called before assigning bitfield accessor names or if + /// this bitfield have no name. + pub fn setter_name(&self) -> &str { + assert!( + self.name().is_some(), + "`Bitfield::setter_name` called on anonymous field" + ); + self.setter_name.as_ref().expect( + "`Bitfield::setter_name` should only be called\ + after assigning bitfield accessor names", + ) + } +} + +impl FieldMethods for Bitfield { + fn name(&self) -> Option<&str> { + self.data.name() + } + + fn ty(&self) -> TypeId { + self.data.ty() + } + + fn comment(&self) -> Option<&str> { + self.data.comment() + } + + fn bitfield_width(&self) -> Option<u32> { + self.data.bitfield_width() + } + + fn is_public(&self) -> bool { + self.data.is_public() + } + + fn annotations(&self) -> &Annotations { + self.data.annotations() + } + + fn offset(&self) -> Option<usize> { + self.data.offset() + } +} + +/// A raw field might be either of a plain data member or a bitfield within a +/// bitfield allocation unit, but we haven't processed it and determined which +/// yet (which would involve allocating it into a bitfield unit if it is a +/// bitfield). +#[derive(Debug)] +struct RawField(FieldData); + +impl RawField { + /// Construct a new `RawField`. + fn new( + name: Option<String>, + ty: TypeId, + comment: Option<String>, + annotations: Option<Annotations>, + bitfield_width: Option<u32>, + public: bool, + offset: Option<usize>, + ) -> RawField { + RawField(FieldData { + name, + ty, + comment, + annotations: annotations.unwrap_or_default(), + bitfield_width, + public, + offset, + }) + } +} + +impl FieldMethods for RawField { + fn name(&self) -> Option<&str> { + self.0.name() + } + + fn ty(&self) -> TypeId { + self.0.ty() + } + + fn comment(&self) -> Option<&str> { + self.0.comment() + } + + fn bitfield_width(&self) -> Option<u32> { + self.0.bitfield_width() + } + + fn is_public(&self) -> bool { + self.0.is_public() + } + + fn annotations(&self) -> &Annotations { + self.0.annotations() + } + + fn offset(&self) -> Option<usize> { + self.0.offset() + } +} + +/// Convert the given ordered set of raw fields into a list of either plain data +/// members, and/or bitfield units containing multiple bitfields. +/// +/// If we do not have the layout for a bitfield's type, then we can't reliably +/// compute its allocation unit. In such cases, we return an error. +fn raw_fields_to_fields_and_bitfield_units<I>( + ctx: &BindgenContext, + raw_fields: I, + packed: bool, +) -> Result<(Vec<Field>, bool), ()> +where + I: IntoIterator<Item = RawField>, +{ + let mut raw_fields = raw_fields.into_iter().fuse().peekable(); + let mut fields = vec![]; + let mut bitfield_unit_count = 0; + + loop { + // While we have plain old data members, just keep adding them to our + // resulting fields. We introduce a scope here so that we can use + // `raw_fields` again after the `by_ref` iterator adaptor is dropped. + { + let non_bitfields = raw_fields + .by_ref() + .peeking_take_while(|f| f.bitfield_width().is_none()) + .map(|f| Field::DataMember(f.0)); + fields.extend(non_bitfields); + } + + // Now gather all the consecutive bitfields. Only consecutive bitfields + // may potentially share a bitfield allocation unit with each other in + // the Itanium C++ ABI. + let mut bitfields = raw_fields + .by_ref() + .peeking_take_while(|f| f.bitfield_width().is_some()) + .peekable(); + + if bitfields.peek().is_none() { + break; + } + + bitfields_to_allocation_units( + ctx, + &mut bitfield_unit_count, + &mut fields, + bitfields, + packed, + )?; + } + + assert!( + raw_fields.next().is_none(), + "The above loop should consume all items in `raw_fields`" + ); + + Ok((fields, bitfield_unit_count != 0)) +} + +/// Given a set of contiguous raw bitfields, group and allocate them into +/// (potentially multiple) bitfield units. +fn bitfields_to_allocation_units<E, I>( + ctx: &BindgenContext, + bitfield_unit_count: &mut usize, + fields: &mut E, + raw_bitfields: I, + packed: bool, +) -> Result<(), ()> +where + E: Extend<Field>, + I: IntoIterator<Item = RawField>, +{ + assert!(ctx.collected_typerefs()); + + // NOTE: What follows is reverse-engineered from LLVM's + // lib/AST/RecordLayoutBuilder.cpp + // + // FIXME(emilio): There are some differences between Microsoft and the + // Itanium ABI, but we'll ignore those and stick to Itanium for now. + // + // Also, we need to handle packed bitfields and stuff. + // + // TODO(emilio): Take into account C++'s wide bitfields, and + // packing, sigh. + + fn flush_allocation_unit<E>( + fields: &mut E, + bitfield_unit_count: &mut usize, + unit_size_in_bits: usize, + unit_align_in_bits: usize, + bitfields: Vec<Bitfield>, + packed: bool, + ) where + E: Extend<Field>, + { + *bitfield_unit_count += 1; + let align = if packed { + 1 + } else { + bytes_from_bits_pow2(unit_align_in_bits) + }; + let size = align_to(unit_size_in_bits, 8) / 8; + let layout = Layout::new(size, align); + fields.extend(Some(Field::Bitfields(BitfieldUnit { + nth: *bitfield_unit_count, + layout, + bitfields, + }))); + } + + let mut max_align = 0; + let mut unfilled_bits_in_unit = 0; + let mut unit_size_in_bits = 0; + let mut unit_align = 0; + let mut bitfields_in_unit = vec![]; + + // TODO(emilio): Determine this from attributes or pragma ms_struct + // directives. Also, perhaps we should check if the target is MSVC? + const is_ms_struct: bool = false; + + for bitfield in raw_bitfields { + let bitfield_width = bitfield.bitfield_width().unwrap() as usize; + let bitfield_layout = + ctx.resolve_type(bitfield.ty()).layout(ctx).ok_or(())?; + let bitfield_size = bitfield_layout.size; + let bitfield_align = bitfield_layout.align; + + let mut offset = unit_size_in_bits; + if !packed { + if is_ms_struct { + if unit_size_in_bits != 0 && + (bitfield_width == 0 || + bitfield_width > unfilled_bits_in_unit) + { + // We've reached the end of this allocation unit, so flush it + // and its bitfields. + unit_size_in_bits = + align_to(unit_size_in_bits, unit_align * 8); + flush_allocation_unit( + fields, + bitfield_unit_count, + unit_size_in_bits, + unit_align, + mem::take(&mut bitfields_in_unit), + packed, + ); + + // Now we're working on a fresh bitfield allocation unit, so reset + // the current unit size and alignment. + offset = 0; + unit_align = 0; + } + } else if offset != 0 && + (bitfield_width == 0 || + (offset & (bitfield_align * 8 - 1)) + bitfield_width > + bitfield_size * 8) + { + offset = align_to(offset, bitfield_align * 8); + } + } + + // According to the x86[-64] ABI spec: "Unnamed bit-fields’ types do not + // affect the alignment of a structure or union". This makes sense: such + // bit-fields are only used for padding, and we can't perform an + // un-aligned read of something we can't read because we can't even name + // it. + if bitfield.name().is_some() { + max_align = cmp::max(max_align, bitfield_align); + + // NB: The `bitfield_width` here is completely, absolutely + // intentional. Alignment of the allocation unit is based on the + // maximum bitfield width, not (directly) on the bitfields' types' + // alignment. + unit_align = cmp::max(unit_align, bitfield_width); + } + + // Always keep all bitfields around. While unnamed bitifields are used + // for padding (and usually not needed hereafter), large unnamed + // bitfields over their types size cause weird allocation size behavior from clang. + // Therefore, all bitfields needed to be kept around in order to check for this + // and make the struct opaque in this case + bitfields_in_unit.push(Bitfield::new(offset, bitfield)); + + unit_size_in_bits = offset + bitfield_width; + + // Compute what the physical unit's final size would be given what we + // have seen so far, and use that to compute how many bits are still + // available in the unit. + let data_size = align_to(unit_size_in_bits, bitfield_align * 8); + unfilled_bits_in_unit = data_size - unit_size_in_bits; + } + + if unit_size_in_bits != 0 { + // Flush the last allocation unit and its bitfields. + flush_allocation_unit( + fields, + bitfield_unit_count, + unit_size_in_bits, + unit_align, + bitfields_in_unit, + packed, + ); + } + + Ok(()) +} + +/// A compound structure's fields are initially raw, and have bitfields that +/// have not been grouped into allocation units. During this time, the fields +/// are mutable and we build them up during parsing. +/// +/// Then, once resolving typerefs is completed, we compute all structs' fields' +/// bitfield allocation units, and they remain frozen and immutable forever +/// after. +#[derive(Debug)] +enum CompFields { + Before(Vec<RawField>), + After { + fields: Vec<Field>, + has_bitfield_units: bool, + }, + Error, +} + +impl Default for CompFields { + fn default() -> CompFields { + CompFields::Before(vec![]) + } +} + +impl CompFields { + fn append_raw_field(&mut self, raw: RawField) { + match *self { + CompFields::Before(ref mut raws) => { + raws.push(raw); + } + _ => { + panic!( + "Must not append new fields after computing bitfield allocation units" + ); + } + } + } + + fn compute_bitfield_units(&mut self, ctx: &BindgenContext, packed: bool) { + let raws = match *self { + CompFields::Before(ref mut raws) => mem::take(raws), + _ => { + panic!("Already computed bitfield units"); + } + }; + + let result = raw_fields_to_fields_and_bitfield_units(ctx, raws, packed); + + match result { + Ok((fields, has_bitfield_units)) => { + *self = CompFields::After { + fields, + has_bitfield_units, + }; + } + Err(()) => { + *self = CompFields::Error; + } + } + } + + fn deanonymize_fields(&mut self, ctx: &BindgenContext, methods: &[Method]) { + let fields = match *self { + CompFields::After { ref mut fields, .. } => fields, + // Nothing to do here. + CompFields::Error => return, + CompFields::Before(_) => { + panic!("Not yet computed bitfield units."); + } + }; + + fn has_method( + methods: &[Method], + ctx: &BindgenContext, + name: &str, + ) -> bool { + methods.iter().any(|method| { + let method_name = ctx.resolve_func(method.signature()).name(); + method_name == name || ctx.rust_mangle(method_name) == name + }) + } + + struct AccessorNamesPair { + getter: String, + setter: String, + } + + let mut accessor_names: HashMap<String, AccessorNamesPair> = fields + .iter() + .flat_map(|field| match *field { + Field::Bitfields(ref bu) => &*bu.bitfields, + Field::DataMember(_) => &[], + }) + .filter_map(|bitfield| bitfield.name()) + .map(|bitfield_name| { + let bitfield_name = bitfield_name.to_string(); + let getter = { + let mut getter = + ctx.rust_mangle(&bitfield_name).to_string(); + if has_method(methods, ctx, &getter) { + getter.push_str("_bindgen_bitfield"); + } + getter + }; + let setter = { + let setter = format!("set_{}", bitfield_name); + let mut setter = ctx.rust_mangle(&setter).to_string(); + if has_method(methods, ctx, &setter) { + setter.push_str("_bindgen_bitfield"); + } + setter + }; + (bitfield_name, AccessorNamesPair { getter, setter }) + }) + .collect(); + + let mut anon_field_counter = 0; + for field in fields.iter_mut() { + match *field { + Field::DataMember(FieldData { ref mut name, .. }) => { + if name.is_some() { + continue; + } + + anon_field_counter += 1; + *name = Some(format!( + "{}{}", + ctx.options().anon_fields_prefix, + anon_field_counter + )); + } + Field::Bitfields(ref mut bu) => { + for bitfield in &mut bu.bitfields { + if bitfield.name().is_none() { + continue; + } + + if let Some(AccessorNamesPair { getter, setter }) = + accessor_names.remove(bitfield.name().unwrap()) + { + bitfield.getter_name = Some(getter); + bitfield.setter_name = Some(setter); + } + } + } + } + } + } +} + +impl Trace for CompFields { + type Extra = (); + + fn trace<T>(&self, context: &BindgenContext, tracer: &mut T, _: &()) + where + T: Tracer, + { + match *self { + CompFields::Error => {} + CompFields::Before(ref fields) => { + for f in fields { + tracer.visit_kind(f.ty().into(), EdgeKind::Field); + } + } + CompFields::After { ref fields, .. } => { + for f in fields { + f.trace(context, tracer, &()); + } + } + } + } +} + +/// Common data shared across different field types. +#[derive(Clone, Debug)] +pub struct FieldData { + /// The name of the field, empty if it's an unnamed bitfield width. + name: Option<String>, + + /// The inner type. + ty: TypeId, + + /// The doc comment on the field if any. + comment: Option<String>, + + /// Annotations for this field, or the default. + annotations: Annotations, + + /// If this field is a bitfield, and how many bits does it contain if it is. + bitfield_width: Option<u32>, + + /// If the C++ field is declared `public` + public: bool, + + /// The offset of the field (in bits) + offset: Option<usize>, +} + +impl FieldMethods for FieldData { + fn name(&self) -> Option<&str> { + self.name.as_deref() + } + + fn ty(&self) -> TypeId { + self.ty + } + + fn comment(&self) -> Option<&str> { + self.comment.as_deref() + } + + fn bitfield_width(&self) -> Option<u32> { + self.bitfield_width + } + + fn is_public(&self) -> bool { + self.public + } + + fn annotations(&self) -> &Annotations { + &self.annotations + } + + fn offset(&self) -> Option<usize> { + self.offset + } +} + +/// The kind of inheritance a base class is using. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum BaseKind { + /// Normal inheritance, like: + /// + /// ```cpp + /// class A : public B {}; + /// ``` + Normal, + /// Virtual inheritance, like: + /// + /// ```cpp + /// class A: public virtual B {}; + /// ``` + Virtual, +} + +/// A base class. +#[derive(Clone, Debug)] +pub struct Base { + /// The type of this base class. + pub ty: TypeId, + /// The kind of inheritance we're doing. + pub kind: BaseKind, + /// Name of the field in which this base should be stored. + pub field_name: String, + /// Whether this base is inherited from publically. + pub is_pub: bool, +} + +impl Base { + /// Whether this base class is inheriting virtually. + pub fn is_virtual(&self) -> bool { + self.kind == BaseKind::Virtual + } + + /// Whether this base class should have it's own field for storage. + pub fn requires_storage(&self, ctx: &BindgenContext) -> bool { + // Virtual bases are already taken into account by the vtable + // pointer. + // + // FIXME(emilio): Is this always right? + if self.is_virtual() { + return false; + } + + // NB: We won't include zero-sized types in our base chain because they + // would contribute to our size given the dummy field we insert for + // zero-sized types. + if self.ty.is_zero_sized(ctx) { + return false; + } + + true + } + + /// Whether this base is inherited from publically. + pub fn is_public(&self) -> bool { + self.is_pub + } +} + +/// A compound type. +/// +/// Either a struct or union, a compound type is built up from the combination +/// of fields which also are associated with their own (potentially compound) +/// type. +#[derive(Debug)] +pub struct CompInfo { + /// Whether this is a struct or a union. + kind: CompKind, + + /// The members of this struct or union. + fields: CompFields, + + /// The abstract template parameters of this class. Note that these are NOT + /// concrete template arguments, and should always be a + /// `Type(TypeKind::TypeParam(name))`. For concrete template arguments, see + /// `TypeKind::TemplateInstantiation`. + template_params: Vec<TypeId>, + + /// The method declarations inside this class, if in C++ mode. + methods: Vec<Method>, + + /// The different constructors this struct or class contains. + constructors: Vec<FunctionId>, + + /// The destructor of this type. The bool represents whether this destructor + /// is virtual. + destructor: Option<(MethodKind, FunctionId)>, + + /// Vector of classes this one inherits from. + base_members: Vec<Base>, + + /// The inner types that were declared inside this class, in something like: + /// + /// class Foo { + /// typedef int FooTy; + /// struct Bar { + /// int baz; + /// }; + /// } + /// + /// static Foo::Bar const = {3}; + inner_types: Vec<TypeId>, + + /// Set of static constants declared inside this class. + inner_vars: Vec<VarId>, + + /// Whether this type should generate an vtable (TODO: Should be able to + /// look at the virtual methods and ditch this field). + has_own_virtual_method: bool, + + /// Whether this type has destructor. + has_destructor: bool, + + /// Whether this type has a base type with more than one member. + /// + /// TODO: We should be able to compute this. + has_nonempty_base: bool, + + /// If this type has a template parameter which is not a type (e.g.: a + /// size_t) + has_non_type_template_params: bool, + + /// Whether this type has a bit field member whose width couldn't be + /// evaluated (e.g. if it depends on a template parameter). We generate an + /// opaque type in this case. + has_unevaluable_bit_field_width: bool, + + /// Whether we saw `__attribute__((packed))` on or within this type. + packed_attr: bool, + + /// Used to know if we've found an opaque attribute that could cause us to + /// generate a type with invalid layout. This is explicitly used to avoid us + /// generating bad alignments when parsing types like max_align_t. + /// + /// It's not clear what the behavior should be here, if generating the item + /// and pray, or behave as an opaque type. + found_unknown_attr: bool, + + /// Used to indicate when a struct has been forward declared. Usually used + /// in headers so that APIs can't modify them directly. + is_forward_declaration: bool, +} + +impl CompInfo { + /// Construct a new compound type. + pub fn new(kind: CompKind) -> Self { + CompInfo { + kind, + fields: CompFields::default(), + template_params: vec![], + methods: vec![], + constructors: vec![], + destructor: None, + base_members: vec![], + inner_types: vec![], + inner_vars: vec![], + has_own_virtual_method: false, + has_destructor: false, + has_nonempty_base: false, + has_non_type_template_params: false, + has_unevaluable_bit_field_width: false, + packed_attr: false, + found_unknown_attr: false, + is_forward_declaration: false, + } + } + + /// Compute the layout of this type. + /// + /// This is called as a fallback under some circumstances where LLVM doesn't + /// give us the correct layout. + /// + /// If we're a union without known layout, we try to compute it from our + /// members. This is not ideal, but clang fails to report the size for these + /// kind of unions, see test/headers/template_union.hpp + pub fn layout(&self, ctx: &BindgenContext) -> Option<Layout> { + // We can't do better than clang here, sorry. + if self.kind == CompKind::Struct { + return None; + } + + // By definition, we don't have the right layout information here if + // we're a forward declaration. + if self.is_forward_declaration() { + return None; + } + + // empty union case + if !self.has_fields() { + return None; + } + + let mut max_size = 0; + // Don't allow align(0) + let mut max_align = 1; + self.each_known_field_layout(ctx, |layout| { + max_size = cmp::max(max_size, layout.size); + max_align = cmp::max(max_align, layout.align); + }); + + Some(Layout::new(max_size, max_align)) + } + + /// Get this type's set of fields. + pub fn fields(&self) -> &[Field] { + match self.fields { + CompFields::Error => &[], + CompFields::After { ref fields, .. } => fields, + CompFields::Before(..) => { + panic!("Should always have computed bitfield units first"); + } + } + } + + fn has_fields(&self) -> bool { + match self.fields { + CompFields::Error => false, + CompFields::After { ref fields, .. } => !fields.is_empty(), + CompFields::Before(ref raw_fields) => !raw_fields.is_empty(), + } + } + + fn each_known_field_layout( + &self, + ctx: &BindgenContext, + mut callback: impl FnMut(Layout), + ) { + match self.fields { + CompFields::Error => {} + CompFields::After { ref fields, .. } => { + for field in fields.iter() { + if let Some(layout) = field.layout(ctx) { + callback(layout); + } + } + } + CompFields::Before(ref raw_fields) => { + for field in raw_fields.iter() { + let field_ty = ctx.resolve_type(field.0.ty); + if let Some(layout) = field_ty.layout(ctx) { + callback(layout); + } + } + } + } + } + + fn has_bitfields(&self) -> bool { + match self.fields { + CompFields::Error => false, + CompFields::After { + has_bitfield_units, .. + } => has_bitfield_units, + CompFields::Before(_) => { + panic!("Should always have computed bitfield units first"); + } + } + } + + /// Returns whether we have a too large bitfield unit, in which case we may + /// not be able to derive some of the things we should be able to normally + /// derive. + pub fn has_too_large_bitfield_unit(&self) -> bool { + if !self.has_bitfields() { + return false; + } + self.fields().iter().any(|field| match *field { + Field::DataMember(..) => false, + Field::Bitfields(ref unit) => { + unit.layout.size > RUST_DERIVE_IN_ARRAY_LIMIT + } + }) + } + + /// Does this type have any template parameters that aren't types + /// (e.g. int)? + pub fn has_non_type_template_params(&self) -> bool { + self.has_non_type_template_params + } + + /// Do we see a virtual function during parsing? + /// Get the has_own_virtual_method boolean. + pub fn has_own_virtual_method(&self) -> bool { + self.has_own_virtual_method + } + + /// Did we see a destructor when parsing this type? + pub fn has_own_destructor(&self) -> bool { + self.has_destructor + } + + /// Get this type's set of methods. + pub fn methods(&self) -> &[Method] { + &self.methods + } + + /// Get this type's set of constructors. + pub fn constructors(&self) -> &[FunctionId] { + &self.constructors + } + + /// Get this type's destructor. + pub fn destructor(&self) -> Option<(MethodKind, FunctionId)> { + self.destructor + } + + /// What kind of compound type is this? + pub fn kind(&self) -> CompKind { + self.kind + } + + /// Is this a union? + pub fn is_union(&self) -> bool { + self.kind() == CompKind::Union + } + + /// The set of types that this one inherits from. + pub fn base_members(&self) -> &[Base] { + &self.base_members + } + + /// Construct a new compound type from a Clang type. + pub fn from_ty( + potential_id: ItemId, + ty: &clang::Type, + location: Option<clang::Cursor>, + ctx: &mut BindgenContext, + ) -> Result<Self, ParseError> { + use clang_sys::*; + assert!( + ty.template_args().is_none(), + "We handle template instantiations elsewhere" + ); + + let mut cursor = ty.declaration(); + let mut kind = Self::kind_from_cursor(&cursor); + if kind.is_err() { + if let Some(location) = location { + kind = Self::kind_from_cursor(&location); + cursor = location; + } + } + + let kind = kind?; + + debug!("CompInfo::from_ty({:?}, {:?})", kind, cursor); + + let mut ci = CompInfo::new(kind); + ci.is_forward_declaration = + location.map_or(true, |cur| match cur.kind() { + CXCursor_ParmDecl => true, + CXCursor_StructDecl | CXCursor_UnionDecl | + CXCursor_ClassDecl => !cur.is_definition(), + _ => false, + }); + + let mut maybe_anonymous_struct_field = None; + cursor.visit(|cur| { + if cur.kind() != CXCursor_FieldDecl { + if let Some((ty, clang_ty, public, offset)) = + maybe_anonymous_struct_field.take() + { + if cur.kind() == CXCursor_TypedefDecl && + cur.typedef_type().unwrap().canonical_type() == + clang_ty + { + // Typedefs of anonymous structs appear later in the ast + // than the struct itself, that would otherwise be an + // anonymous field. Detect that case here, and do + // nothing. + } else { + let field = RawField::new( + None, ty, None, None, None, public, offset, + ); + ci.fields.append_raw_field(field); + } + } + } + + match cur.kind() { + CXCursor_FieldDecl => { + if let Some((ty, clang_ty, public, offset)) = + maybe_anonymous_struct_field.take() + { + let mut used = false; + cur.visit(|child| { + if child.cur_type() == clang_ty { + used = true; + } + CXChildVisit_Continue + }); + + if !used { + let field = RawField::new( + None, ty, None, None, None, public, offset, + ); + ci.fields.append_raw_field(field); + } + } + + let bit_width = if cur.is_bit_field() { + let width = cur.bit_width(); + + // Make opaque type if the bit width couldn't be + // evaluated. + if width.is_none() { + ci.has_unevaluable_bit_field_width = true; + return CXChildVisit_Break; + } + + width + } else { + None + }; + + let field_type = Item::from_ty_or_ref( + cur.cur_type(), + cur, + Some(potential_id), + ctx, + ); + + let comment = cur.raw_comment(); + let annotations = Annotations::new(&cur); + let name = cur.spelling(); + let is_public = cur.public_accessible(); + let offset = cur.offset_of_field().ok(); + + // Name can be empty if there are bitfields, for example, + // see tests/headers/struct_with_bitfields.h + assert!( + !name.is_empty() || bit_width.is_some(), + "Empty field name?" + ); + + let name = if name.is_empty() { None } else { Some(name) }; + + let field = RawField::new( + name, + field_type, + comment, + annotations, + bit_width, + is_public, + offset, + ); + ci.fields.append_raw_field(field); + + // No we look for things like attributes and stuff. + cur.visit(|cur| { + if cur.kind() == CXCursor_UnexposedAttr { + ci.found_unknown_attr = true; + } + CXChildVisit_Continue + }); + } + CXCursor_UnexposedAttr => { + ci.found_unknown_attr = true; + } + CXCursor_EnumDecl | + CXCursor_TypeAliasDecl | + CXCursor_TypeAliasTemplateDecl | + CXCursor_TypedefDecl | + CXCursor_StructDecl | + CXCursor_UnionDecl | + CXCursor_ClassTemplate | + CXCursor_ClassDecl => { + // We can find non-semantic children here, clang uses a + // StructDecl to note incomplete structs that haven't been + // forward-declared before, see [1]. + // + // Also, clang seems to scope struct definitions inside + // unions, and other named struct definitions inside other + // structs to the whole translation unit. + // + // Let's just assume that if the cursor we've found is a + // definition, it's a valid inner type. + // + // [1]: https://github.com/rust-lang/rust-bindgen/issues/482 + let is_inner_struct = + cur.semantic_parent() == cursor || cur.is_definition(); + if !is_inner_struct { + return CXChildVisit_Continue; + } + + // Even if this is a definition, we may not be the semantic + // parent, see #1281. + let inner = Item::parse(cur, Some(potential_id), ctx) + .expect("Inner ClassDecl"); + + // If we avoided recursion parsing this type (in + // `Item::from_ty_with_id()`), then this might not be a + // valid type ID, so check and gracefully handle this. + if ctx.resolve_item_fallible(inner).is_some() { + let inner = inner.expect_type_id(ctx); + + ci.inner_types.push(inner); + + // A declaration of an union or a struct without name + // could also be an unnamed field, unfortunately. + if cur.is_anonymous() && cur.kind() != CXCursor_EnumDecl + { + let ty = cur.cur_type(); + let public = cur.public_accessible(); + let offset = cur.offset_of_field().ok(); + + maybe_anonymous_struct_field = + Some((inner, ty, public, offset)); + } + } + } + CXCursor_PackedAttr => { + ci.packed_attr = true; + } + CXCursor_TemplateTypeParameter => { + let param = Item::type_param(None, cur, ctx).expect( + "Item::type_param should't fail when pointing \ + at a TemplateTypeParameter", + ); + ci.template_params.push(param); + } + CXCursor_CXXBaseSpecifier => { + let is_virtual_base = cur.is_virtual_base(); + ci.has_own_virtual_method |= is_virtual_base; + + let kind = if is_virtual_base { + BaseKind::Virtual + } else { + BaseKind::Normal + }; + + let field_name = match ci.base_members.len() { + 0 => "_base".into(), + n => format!("_base_{}", n), + }; + let type_id = + Item::from_ty_or_ref(cur.cur_type(), cur, None, ctx); + ci.base_members.push(Base { + ty: type_id, + kind, + field_name, + is_pub: cur.access_specifier() == + clang_sys::CX_CXXPublic, + }); + } + CXCursor_Constructor | CXCursor_Destructor | + CXCursor_CXXMethod => { + let is_virtual = cur.method_is_virtual(); + let is_static = cur.method_is_static(); + debug_assert!(!(is_static && is_virtual), "How?"); + + ci.has_destructor |= cur.kind() == CXCursor_Destructor; + ci.has_own_virtual_method |= is_virtual; + + // This used to not be here, but then I tried generating + // stylo bindings with this (without path filters), and + // cried a lot with a method in gfx/Point.h + // (ToUnknownPoint), that somehow was causing the same type + // to be inserted in the map two times. + // + // I couldn't make a reduced test case, but anyway... + // Methods of template functions not only used to be inlined, + // but also instantiated, and we wouldn't be able to call + // them, so just bail out. + if !ci.template_params.is_empty() { + return CXChildVisit_Continue; + } + + // NB: This gets us an owned `Function`, not a + // `FunctionSig`. + let signature = + match Item::parse(cur, Some(potential_id), ctx) { + Ok(item) + if ctx + .resolve_item(item) + .kind() + .is_function() => + { + item + } + _ => return CXChildVisit_Continue, + }; + + let signature = signature.expect_function_id(ctx); + + match cur.kind() { + CXCursor_Constructor => { + ci.constructors.push(signature); + } + CXCursor_Destructor => { + let kind = if is_virtual { + MethodKind::VirtualDestructor { + pure_virtual: cur.method_is_pure_virtual(), + } + } else { + MethodKind::Destructor + }; + ci.destructor = Some((kind, signature)); + } + CXCursor_CXXMethod => { + let is_const = cur.method_is_const(); + let method_kind = if is_static { + MethodKind::Static + } else if is_virtual { + MethodKind::Virtual { + pure_virtual: cur.method_is_pure_virtual(), + } + } else { + MethodKind::Normal + }; + + let method = + Method::new(method_kind, signature, is_const); + + ci.methods.push(method); + } + _ => unreachable!("How can we see this here?"), + } + } + CXCursor_NonTypeTemplateParameter => { + ci.has_non_type_template_params = true; + } + CXCursor_VarDecl => { + let linkage = cur.linkage(); + if linkage != CXLinkage_External && + linkage != CXLinkage_UniqueExternal + { + return CXChildVisit_Continue; + } + + let visibility = cur.visibility(); + if visibility != CXVisibility_Default { + return CXChildVisit_Continue; + } + + if let Ok(item) = Item::parse(cur, Some(potential_id), ctx) + { + ci.inner_vars.push(item.as_var_id_unchecked()); + } + } + // Intentionally not handled + CXCursor_CXXAccessSpecifier | + CXCursor_CXXFinalAttr | + CXCursor_FunctionTemplate | + CXCursor_ConversionFunction => {} + _ => { + warn!( + "unhandled comp member `{}` (kind {:?}) in `{}` ({})", + cur.spelling(), + clang::kind_to_str(cur.kind()), + cursor.spelling(), + cur.location() + ); + } + } + CXChildVisit_Continue + }); + + if let Some((ty, _, public, offset)) = maybe_anonymous_struct_field { + let field = + RawField::new(None, ty, None, None, None, public, offset); + ci.fields.append_raw_field(field); + } + + Ok(ci) + } + + fn kind_from_cursor( + cursor: &clang::Cursor, + ) -> Result<CompKind, ParseError> { + use clang_sys::*; + Ok(match cursor.kind() { + CXCursor_UnionDecl => CompKind::Union, + CXCursor_ClassDecl | CXCursor_StructDecl => CompKind::Struct, + CXCursor_CXXBaseSpecifier | + CXCursor_ClassTemplatePartialSpecialization | + CXCursor_ClassTemplate => match cursor.template_kind() { + CXCursor_UnionDecl => CompKind::Union, + _ => CompKind::Struct, + }, + _ => { + warn!("Unknown kind for comp type: {:?}", cursor); + return Err(ParseError::Continue); + } + }) + } + + /// Get the set of types that were declared within this compound type + /// (e.g. nested class definitions). + pub fn inner_types(&self) -> &[TypeId] { + &self.inner_types + } + + /// Get the set of static variables declared within this compound type. + pub fn inner_vars(&self) -> &[VarId] { + &self.inner_vars + } + + /// Have we found a field with an opaque type that could potentially mess up + /// the layout of this compound type? + pub fn found_unknown_attr(&self) -> bool { + self.found_unknown_attr + } + + /// Is this compound type packed? + pub fn is_packed( + &self, + ctx: &BindgenContext, + layout: Option<&Layout>, + ) -> bool { + if self.packed_attr { + return true; + } + + // Even though `libclang` doesn't expose `#pragma packed(...)`, we can + // detect it through its effects. + if let Some(parent_layout) = layout { + let mut packed = false; + self.each_known_field_layout(ctx, |layout| { + packed = packed || layout.align > parent_layout.align; + }); + if packed { + info!("Found a struct that was defined within `#pragma packed(...)`"); + return true; + } + + if self.has_own_virtual_method && parent_layout.align == 1 { + return true; + } + } + + false + } + + /// Returns true if compound type has been forward declared + pub fn is_forward_declaration(&self) -> bool { + self.is_forward_declaration + } + + /// Compute this compound structure's bitfield allocation units. + pub fn compute_bitfield_units( + &mut self, + ctx: &BindgenContext, + layout: Option<&Layout>, + ) { + let packed = self.is_packed(ctx, layout); + self.fields.compute_bitfield_units(ctx, packed) + } + + /// Assign for each anonymous field a generated name. + pub fn deanonymize_fields(&mut self, ctx: &BindgenContext) { + self.fields.deanonymize_fields(ctx, &self.methods); + } + + /// Returns whether the current union can be represented as a Rust `union` + /// + /// Requirements: + /// 1. Current RustTarget allows for `untagged_union` + /// 2. Each field can derive `Copy` or we use ManuallyDrop. + /// 3. It's not zero-sized. + /// + /// Second boolean returns whether all fields can be copied (and thus + /// ManuallyDrop is not needed). + pub fn is_rust_union( + &self, + ctx: &BindgenContext, + layout: Option<&Layout>, + name: &str, + ) -> (bool, bool) { + if !self.is_union() { + return (false, false); + } + + if !ctx.options().rust_features().untagged_union { + return (false, false); + } + + if self.is_forward_declaration() { + return (false, false); + } + + let union_style = if ctx.options().bindgen_wrapper_union.matches(name) { + NonCopyUnionStyle::BindgenWrapper + } else if ctx.options().manually_drop_union.matches(name) { + NonCopyUnionStyle::ManuallyDrop + } else { + ctx.options().default_non_copy_union_style + }; + + let all_can_copy = self.fields().iter().all(|f| match *f { + Field::DataMember(ref field_data) => { + field_data.ty().can_derive_copy(ctx) + } + Field::Bitfields(_) => true, + }); + + if !all_can_copy && union_style == NonCopyUnionStyle::BindgenWrapper { + return (false, false); + } + + if layout.map_or(false, |l| l.size == 0) { + return (false, false); + } + + (true, all_can_copy) + } +} + +impl DotAttributes for CompInfo { + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!(out, "<tr><td>CompKind</td><td>{:?}</td></tr>", self.kind)?; + + if self.has_own_virtual_method { + writeln!(out, "<tr><td>has_vtable</td><td>true</td></tr>")?; + } + + if self.has_destructor { + writeln!(out, "<tr><td>has_destructor</td><td>true</td></tr>")?; + } + + if self.has_nonempty_base { + writeln!(out, "<tr><td>has_nonempty_base</td><td>true</td></tr>")?; + } + + if self.has_non_type_template_params { + writeln!( + out, + "<tr><td>has_non_type_template_params</td><td>true</td></tr>" + )?; + } + + if self.packed_attr { + writeln!(out, "<tr><td>packed_attr</td><td>true</td></tr>")?; + } + + if self.is_forward_declaration { + writeln!( + out, + "<tr><td>is_forward_declaration</td><td>true</td></tr>" + )?; + } + + if !self.fields().is_empty() { + writeln!(out, r#"<tr><td>fields</td><td><table border="0">"#)?; + for field in self.fields() { + field.dot_attributes(ctx, out)?; + } + writeln!(out, "</table></td></tr>")?; + } + + Ok(()) + } +} + +impl IsOpaque for CompInfo { + type Extra = Option<Layout>; + + fn is_opaque(&self, ctx: &BindgenContext, layout: &Option<Layout>) -> bool { + if self.has_non_type_template_params || + self.has_unevaluable_bit_field_width + { + return true; + } + + // When we do not have the layout for a bitfield's type (for example, it + // is a type parameter), then we can't compute bitfield units. We are + // left with no choice but to make the whole struct opaque, or else we + // might generate structs with incorrect sizes and alignments. + if let CompFields::Error = self.fields { + return true; + } + + // Bitfields with a width that is larger than their unit's width have + // some strange things going on, and the best we can do is make the + // whole struct opaque. + if self.fields().iter().any(|f| match *f { + Field::DataMember(_) => false, + Field::Bitfields(ref unit) => unit.bitfields().iter().any(|bf| { + let bitfield_layout = ctx + .resolve_type(bf.ty()) + .layout(ctx) + .expect("Bitfield without layout? Gah!"); + bf.width() / 8 > bitfield_layout.size as u32 + }), + }) { + return true; + } + + if !ctx.options().rust_features().repr_packed_n { + // If we don't have `#[repr(packed(N)]`, the best we can + // do is make this struct opaque. + // + // See https://github.com/rust-lang/rust-bindgen/issues/537 and + // https://github.com/rust-lang/rust/issues/33158 + if self.is_packed(ctx, layout.as_ref()) && + layout.map_or(false, |l| l.align > 1) + { + warn!("Found a type that is both packed and aligned to greater than \ + 1; Rust before version 1.33 doesn't have `#[repr(packed(N))]`, so we \ + are treating it as opaque. You may wish to set bindgen's rust target \ + version to 1.33 or later to enable `#[repr(packed(N))]` support."); + return true; + } + } + + false + } +} + +impl TemplateParameters for CompInfo { + fn self_template_params(&self, _ctx: &BindgenContext) -> Vec<TypeId> { + self.template_params.clone() + } +} + +impl Trace for CompInfo { + type Extra = Item; + + fn trace<T>(&self, context: &BindgenContext, tracer: &mut T, item: &Item) + where + T: Tracer, + { + for p in item.all_template_params(context) { + tracer.visit_kind(p.into(), EdgeKind::TemplateParameterDefinition); + } + + for ty in self.inner_types() { + tracer.visit_kind(ty.into(), EdgeKind::InnerType); + } + + for &var in self.inner_vars() { + tracer.visit_kind(var.into(), EdgeKind::InnerVar); + } + + for method in self.methods() { + tracer.visit_kind(method.signature.into(), EdgeKind::Method); + } + + if let Some((_kind, signature)) = self.destructor() { + tracer.visit_kind(signature.into(), EdgeKind::Destructor); + } + + for ctor in self.constructors() { + tracer.visit_kind(ctor.into(), EdgeKind::Constructor); + } + + // Base members and fields are not generated for opaque types (but all + // of the above things are) so stop here. + if item.is_opaque(context, &()) { + return; + } + + for base in self.base_members() { + tracer.visit_kind(base.ty.into(), EdgeKind::BaseMember); + } + + self.fields.trace(context, tracer, &()); + } +} diff --git a/third_party/rust/bindgen/ir/context.rs b/third_party/rust/bindgen/ir/context.rs new file mode 100644 index 0000000000..4623b25344 --- /dev/null +++ b/third_party/rust/bindgen/ir/context.rs @@ -0,0 +1,2858 @@ +//! Common context that is passed around during parsing and codegen. + +use super::super::time::Timer; +use super::analysis::{ + analyze, as_cannot_derive_set, CannotDerive, DeriveTrait, + HasDestructorAnalysis, HasFloat, HasTypeParameterInArray, + HasVtableAnalysis, HasVtableResult, SizednessAnalysis, SizednessResult, + UsedTemplateParameters, +}; +use super::derive::{ + CanDerive, CanDeriveCopy, CanDeriveDebug, CanDeriveDefault, CanDeriveEq, + CanDeriveHash, CanDeriveOrd, CanDerivePartialEq, CanDerivePartialOrd, +}; +use super::function::Function; +use super::int::IntKind; +use super::item::{IsOpaque, Item, ItemAncestors, ItemSet}; +use super::item_kind::ItemKind; +use super::module::{Module, ModuleKind}; +use super::template::{TemplateInstantiation, TemplateParameters}; +use super::traversal::{self, Edge, ItemTraversal}; +use super::ty::{FloatKind, Type, TypeKind}; +use crate::clang::{self, Cursor}; +use crate::parse::ClangItemParser; +use crate::BindgenOptions; +use crate::{Entry, HashMap, HashSet}; +use cexpr; +use clang_sys; +use proc_macro2::{Ident, Span, TokenStream}; +use quote::ToTokens; +use std::borrow::Cow; +use std::cell::{Cell, RefCell}; +use std::collections::{BTreeSet, HashMap as StdHashMap}; +use std::iter::IntoIterator; +use std::mem; + +/// An identifier for some kind of IR item. +#[derive(Debug, Copy, Clone, Eq, PartialOrd, Ord, Hash)] +pub struct ItemId(usize); + +macro_rules! item_id_newtype { + ( + $( #[$attr:meta] )* + pub struct $name:ident(ItemId) + where + $( #[$checked_attr:meta] )* + checked = $checked:ident with $check_method:ident, + $( #[$expected_attr:meta] )* + expected = $expected:ident, + $( #[$unchecked_attr:meta] )* + unchecked = $unchecked:ident; + ) => { + $( #[$attr] )* + #[derive(Debug, Copy, Clone, Eq, PartialOrd, Ord, Hash)] + pub struct $name(ItemId); + + impl $name { + /// Create an `ItemResolver` from this id. + pub fn into_resolver(self) -> ItemResolver { + let id: ItemId = self.into(); + id.into() + } + } + + impl<T> ::std::cmp::PartialEq<T> for $name + where + T: Copy + Into<ItemId> + { + fn eq(&self, rhs: &T) -> bool { + let rhs: ItemId = (*rhs).into(); + self.0 == rhs + } + } + + impl From<$name> for ItemId { + fn from(id: $name) -> ItemId { + id.0 + } + } + + impl<'a> From<&'a $name> for ItemId { + fn from(id: &'a $name) -> ItemId { + id.0 + } + } + + impl ItemId { + $( #[$checked_attr] )* + pub fn $checked(&self, ctx: &BindgenContext) -> Option<$name> { + if ctx.resolve_item(*self).kind().$check_method() { + Some($name(*self)) + } else { + None + } + } + + $( #[$expected_attr] )* + pub fn $expected(&self, ctx: &BindgenContext) -> $name { + self.$checked(ctx) + .expect(concat!( + stringify!($expected), + " called with ItemId that points to the wrong ItemKind" + )) + } + + $( #[$unchecked_attr] )* + pub fn $unchecked(&self) -> $name { + $name(*self) + } + } + } +} + +item_id_newtype! { + /// An identifier for an `Item` whose `ItemKind` is known to be + /// `ItemKind::Type`. + pub struct TypeId(ItemId) + where + /// Convert this `ItemId` into a `TypeId` if its associated item is a type, + /// otherwise return `None`. + checked = as_type_id with is_type, + + /// Convert this `ItemId` into a `TypeId`. + /// + /// If this `ItemId` does not point to a type, then panic. + expected = expect_type_id, + + /// Convert this `ItemId` into a `TypeId` without actually checking whether + /// this id actually points to a `Type`. + unchecked = as_type_id_unchecked; +} + +item_id_newtype! { + /// An identifier for an `Item` whose `ItemKind` is known to be + /// `ItemKind::Module`. + pub struct ModuleId(ItemId) + where + /// Convert this `ItemId` into a `ModuleId` if its associated item is a + /// module, otherwise return `None`. + checked = as_module_id with is_module, + + /// Convert this `ItemId` into a `ModuleId`. + /// + /// If this `ItemId` does not point to a module, then panic. + expected = expect_module_id, + + /// Convert this `ItemId` into a `ModuleId` without actually checking + /// whether this id actually points to a `Module`. + unchecked = as_module_id_unchecked; +} + +item_id_newtype! { + /// An identifier for an `Item` whose `ItemKind` is known to be + /// `ItemKind::Var`. + pub struct VarId(ItemId) + where + /// Convert this `ItemId` into a `VarId` if its associated item is a var, + /// otherwise return `None`. + checked = as_var_id with is_var, + + /// Convert this `ItemId` into a `VarId`. + /// + /// If this `ItemId` does not point to a var, then panic. + expected = expect_var_id, + + /// Convert this `ItemId` into a `VarId` without actually checking whether + /// this id actually points to a `Var`. + unchecked = as_var_id_unchecked; +} + +item_id_newtype! { + /// An identifier for an `Item` whose `ItemKind` is known to be + /// `ItemKind::Function`. + pub struct FunctionId(ItemId) + where + /// Convert this `ItemId` into a `FunctionId` if its associated item is a function, + /// otherwise return `None`. + checked = as_function_id with is_function, + + /// Convert this `ItemId` into a `FunctionId`. + /// + /// If this `ItemId` does not point to a function, then panic. + expected = expect_function_id, + + /// Convert this `ItemId` into a `FunctionId` without actually checking whether + /// this id actually points to a `Function`. + unchecked = as_function_id_unchecked; +} + +impl From<ItemId> for usize { + fn from(id: ItemId) -> usize { + id.0 + } +} + +impl ItemId { + /// Get a numeric representation of this id. + pub fn as_usize(&self) -> usize { + (*self).into() + } +} + +impl<T> ::std::cmp::PartialEq<T> for ItemId +where + T: Copy + Into<ItemId>, +{ + fn eq(&self, rhs: &T) -> bool { + let rhs: ItemId = (*rhs).into(); + self.0 == rhs.0 + } +} + +impl<T> CanDeriveDebug for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_debug(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_debug && ctx.lookup_can_derive_debug(*self) + } +} + +impl<T> CanDeriveDefault for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_default(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_default && ctx.lookup_can_derive_default(*self) + } +} + +impl<T> CanDeriveCopy for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_copy(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_copy && ctx.lookup_can_derive_copy(*self) + } +} + +impl<T> CanDeriveHash for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_hash(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_hash && ctx.lookup_can_derive_hash(*self) + } +} + +impl<T> CanDerivePartialOrd for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_partialord(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_partialord && + ctx.lookup_can_derive_partialeq_or_partialord(*self) == + CanDerive::Yes + } +} + +impl<T> CanDerivePartialEq for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_partialeq(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_partialeq && + ctx.lookup_can_derive_partialeq_or_partialord(*self) == + CanDerive::Yes + } +} + +impl<T> CanDeriveEq for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_eq(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_eq && + ctx.lookup_can_derive_partialeq_or_partialord(*self) == + CanDerive::Yes && + !ctx.lookup_has_float(*self) + } +} + +impl<T> CanDeriveOrd for T +where + T: Copy + Into<ItemId>, +{ + fn can_derive_ord(&self, ctx: &BindgenContext) -> bool { + ctx.options().derive_ord && + ctx.lookup_can_derive_partialeq_or_partialord(*self) == + CanDerive::Yes && + !ctx.lookup_has_float(*self) + } +} + +/// A key used to index a resolved type, so we only process it once. +/// +/// This is almost always a USR string (an unique identifier generated by +/// clang), but it can also be the canonical declaration if the type is unnamed, +/// in which case clang may generate the same USR for multiple nested unnamed +/// types. +#[derive(Eq, PartialEq, Hash, Debug)] +enum TypeKey { + Usr(String), + Declaration(Cursor), +} + +/// A context used during parsing and generation of structs. +#[derive(Debug)] +pub struct BindgenContext { + /// The map of all the items parsed so far, keyed off ItemId. + items: Vec<Option<Item>>, + + /// Clang USR to type map. This is needed to be able to associate types with + /// item ids during parsing. + types: HashMap<TypeKey, TypeId>, + + /// Maps from a cursor to the item id of the named template type parameter + /// for that cursor. + type_params: HashMap<clang::Cursor, TypeId>, + + /// A cursor to module map. Similar reason than above. + modules: HashMap<Cursor, ModuleId>, + + /// The root module, this is guaranteed to be an item of kind Module. + root_module: ModuleId, + + /// Current module being traversed. + current_module: ModuleId, + + /// A HashMap keyed on a type definition, and whose value is the parent id + /// of the declaration. + /// + /// This is used to handle the cases where the semantic and the lexical + /// parents of the cursor differ, like when a nested class is defined + /// outside of the parent class. + semantic_parents: HashMap<clang::Cursor, ItemId>, + + /// A stack with the current type declarations and types we're parsing. This + /// is needed to avoid infinite recursion when parsing a type like: + /// + /// struct c { struct c* next; }; + /// + /// This means effectively, that a type has a potential ID before knowing if + /// it's a correct type. But that's not important in practice. + /// + /// We could also use the `types` HashMap, but my intention with it is that + /// only valid types and declarations end up there, and this could + /// potentially break that assumption. + currently_parsed_types: Vec<PartialType>, + + /// A map with all the already parsed macro names. This is done to avoid + /// hard errors while parsing duplicated macros, as well to allow macro + /// expression parsing. + /// + /// This needs to be an std::HashMap because the cexpr API requires it. + parsed_macros: StdHashMap<Vec<u8>, cexpr::expr::EvalResult>, + + /// A set of all the included filenames. + deps: BTreeSet<String>, + + /// The active replacements collected from replaces="xxx" annotations. + replacements: HashMap<Vec<String>, ItemId>, + + collected_typerefs: bool, + + in_codegen: bool, + + /// The translation unit for parsing. + translation_unit: clang::TranslationUnit, + + /// Target information that can be useful for some stuff. + target_info: clang::TargetInfo, + + /// The options given by the user via cli or other medium. + options: BindgenOptions, + + /// Whether a bindgen complex was generated + generated_bindgen_complex: Cell<bool>, + + /// The set of `ItemId`s that are allowlisted. This the very first thing + /// computed after parsing our IR, and before running any of our analyses. + allowlisted: Option<ItemSet>, + + /// Cache for calls to `ParseCallbacks::blocklisted_type_implements_trait` + blocklisted_types_implement_traits: + RefCell<HashMap<DeriveTrait, HashMap<ItemId, CanDerive>>>, + + /// The set of `ItemId`s that are allowlisted for code generation _and_ that + /// we should generate accounting for the codegen options. + /// + /// It's computed right after computing the allowlisted items. + codegen_items: Option<ItemSet>, + + /// Map from an item's id to the set of template parameter items that it + /// uses. See `ir::named` for more details. Always `Some` during the codegen + /// phase. + used_template_parameters: Option<HashMap<ItemId, ItemSet>>, + + /// The set of `TypeKind::Comp` items found during parsing that need their + /// bitfield allocation units computed. Drained in `compute_bitfield_units`. + need_bitfield_allocation: Vec<ItemId>, + + /// The set of (`ItemId`s of) types that can't derive debug. + /// + /// This is populated when we enter codegen by `compute_cannot_derive_debug` + /// and is always `None` before that and `Some` after. + cannot_derive_debug: Option<HashSet<ItemId>>, + + /// The set of (`ItemId`s of) types that can't derive default. + /// + /// This is populated when we enter codegen by `compute_cannot_derive_default` + /// and is always `None` before that and `Some` after. + cannot_derive_default: Option<HashSet<ItemId>>, + + /// The set of (`ItemId`s of) types that can't derive copy. + /// + /// This is populated when we enter codegen by `compute_cannot_derive_copy` + /// and is always `None` before that and `Some` after. + cannot_derive_copy: Option<HashSet<ItemId>>, + + /// The set of (`ItemId`s of) types that can't derive hash. + /// + /// This is populated when we enter codegen by `compute_can_derive_hash` + /// and is always `None` before that and `Some` after. + cannot_derive_hash: Option<HashSet<ItemId>>, + + /// The map why specified `ItemId`s of) types that can't derive hash. + /// + /// This is populated when we enter codegen by + /// `compute_cannot_derive_partialord_partialeq_or_eq` and is always `None` + /// before that and `Some` after. + cannot_derive_partialeq_or_partialord: Option<HashMap<ItemId, CanDerive>>, + + /// The sizedness of types. + /// + /// This is populated by `compute_sizedness` and is always `None` before + /// that function is invoked and `Some` afterwards. + sizedness: Option<HashMap<TypeId, SizednessResult>>, + + /// The set of (`ItemId's of`) types that has vtable. + /// + /// Populated when we enter codegen by `compute_has_vtable`; always `None` + /// before that and `Some` after. + have_vtable: Option<HashMap<ItemId, HasVtableResult>>, + + /// The set of (`ItemId's of`) types that has destructor. + /// + /// Populated when we enter codegen by `compute_has_destructor`; always `None` + /// before that and `Some` after. + have_destructor: Option<HashSet<ItemId>>, + + /// The set of (`ItemId's of`) types that has array. + /// + /// Populated when we enter codegen by `compute_has_type_param_in_array`; always `None` + /// before that and `Some` after. + has_type_param_in_array: Option<HashSet<ItemId>>, + + /// The set of (`ItemId's of`) types that has float. + /// + /// Populated when we enter codegen by `compute_has_float`; always `None` + /// before that and `Some` after. + has_float: Option<HashSet<ItemId>>, + + /// The set of warnings raised during binding generation. + warnings: Vec<String>, +} + +/// A traversal of allowlisted items. +struct AllowlistedItemsTraversal<'ctx> { + ctx: &'ctx BindgenContext, + traversal: ItemTraversal<'ctx, ItemSet, Vec<ItemId>>, +} + +impl<'ctx> Iterator for AllowlistedItemsTraversal<'ctx> { + type Item = ItemId; + + fn next(&mut self) -> Option<ItemId> { + loop { + let id = self.traversal.next()?; + + if self.ctx.resolve_item(id).is_blocklisted(self.ctx) { + continue; + } + + return Some(id); + } + } +} + +impl<'ctx> AllowlistedItemsTraversal<'ctx> { + /// Construct a new allowlisted items traversal. + pub fn new<R>( + ctx: &'ctx BindgenContext, + roots: R, + predicate: for<'a> fn(&'a BindgenContext, Edge) -> bool, + ) -> Self + where + R: IntoIterator<Item = ItemId>, + { + AllowlistedItemsTraversal { + ctx, + traversal: ItemTraversal::new(ctx, roots, predicate), + } + } +} + +impl BindgenContext { + /// Construct the context for the given `options`. + pub(crate) fn new( + options: BindgenOptions, + input_unsaved_files: &[clang::UnsavedFile], + ) -> Self { + // TODO(emilio): Use the CXTargetInfo here when available. + // + // see: https://reviews.llvm.org/D32389 + let index = clang::Index::new(false, true); + + let parse_options = + clang_sys::CXTranslationUnit_DetailedPreprocessingRecord; + + let translation_unit = { + let _t = + Timer::new("translation_unit").with_output(options.time_phases); + + clang::TranslationUnit::parse( + &index, + "", + &options.clang_args, + input_unsaved_files, + parse_options, + ).expect("libclang error; possible causes include: +- Invalid flag syntax +- Unrecognized flags +- Invalid flag arguments +- File I/O errors +- Host vs. target architecture mismatch +If you encounter an error missing from this list, please file an issue or a PR!") + }; + + let target_info = clang::TargetInfo::new(&translation_unit); + let root_module = Self::build_root_module(ItemId(0)); + let root_module_id = root_module.id().as_module_id_unchecked(); + + // depfiles need to include the explicitly listed headers too + let deps = options.input_headers.iter().cloned().collect(); + + BindgenContext { + items: vec![Some(root_module)], + deps, + types: Default::default(), + type_params: Default::default(), + modules: Default::default(), + root_module: root_module_id, + current_module: root_module_id, + semantic_parents: Default::default(), + currently_parsed_types: vec![], + parsed_macros: Default::default(), + replacements: Default::default(), + collected_typerefs: false, + in_codegen: false, + translation_unit, + target_info, + options, + generated_bindgen_complex: Cell::new(false), + allowlisted: None, + blocklisted_types_implement_traits: Default::default(), + codegen_items: None, + used_template_parameters: None, + need_bitfield_allocation: Default::default(), + cannot_derive_debug: None, + cannot_derive_default: None, + cannot_derive_copy: None, + cannot_derive_hash: None, + cannot_derive_partialeq_or_partialord: None, + sizedness: None, + have_vtable: None, + have_destructor: None, + has_type_param_in_array: None, + has_float: None, + warnings: Vec::new(), + } + } + + /// Returns `true` if the target architecture is wasm32 + pub fn is_target_wasm32(&self) -> bool { + self.target_info.triple.starts_with("wasm32-") + } + + /// Creates a timer for the current bindgen phase. If time_phases is `true`, + /// the timer will print to stderr when it is dropped, otherwise it will do + /// nothing. + pub fn timer<'a>(&self, name: &'a str) -> Timer<'a> { + Timer::new(name).with_output(self.options.time_phases) + } + + /// Returns the pointer width to use for the target for the current + /// translation. + pub fn target_pointer_size(&self) -> usize { + self.target_info.pointer_width / 8 + } + + /// Get the stack of partially parsed types that we are in the middle of + /// parsing. + pub fn currently_parsed_types(&self) -> &[PartialType] { + &self.currently_parsed_types[..] + } + + /// Begin parsing the given partial type, and push it onto the + /// `currently_parsed_types` stack so that we won't infinite recurse if we + /// run into a reference to it while parsing it. + pub fn begin_parsing(&mut self, partial_ty: PartialType) { + self.currently_parsed_types.push(partial_ty); + } + + /// Finish parsing the current partial type, pop it off the + /// `currently_parsed_types` stack, and return it. + pub fn finish_parsing(&mut self) -> PartialType { + self.currently_parsed_types.pop().expect( + "should have been parsing a type, if we finished parsing a type", + ) + } + + /// Add another path to the set of included files. + pub fn include_file(&mut self, filename: String) { + for cb in &self.options().parse_callbacks { + cb.include_file(&filename); + } + self.deps.insert(filename); + } + + /// Get any included files. + pub fn deps(&self) -> &BTreeSet<String> { + &self.deps + } + + /// Define a new item. + /// + /// This inserts it into the internal items set, and its type into the + /// internal types set. + pub fn add_item( + &mut self, + item: Item, + declaration: Option<Cursor>, + location: Option<Cursor>, + ) { + debug!( + "BindgenContext::add_item({:?}, declaration: {:?}, loc: {:?}", + item, declaration, location + ); + debug_assert!( + declaration.is_some() || + !item.kind().is_type() || + item.kind().expect_type().is_builtin_or_type_param() || + item.kind().expect_type().is_opaque(self, &item) || + item.kind().expect_type().is_unresolved_ref(), + "Adding a type without declaration?" + ); + + let id = item.id(); + let is_type = item.kind().is_type(); + let is_unnamed = is_type && item.expect_type().name().is_none(); + let is_template_instantiation = + is_type && item.expect_type().is_template_instantiation(); + + if item.id() != self.root_module { + self.add_item_to_module(&item); + } + + if is_type && item.expect_type().is_comp() { + self.need_bitfield_allocation.push(id); + } + + let old_item = mem::replace(&mut self.items[id.0], Some(item)); + assert!( + old_item.is_none(), + "should not have already associated an item with the given id" + ); + + // Unnamed items can have an USR, but they can't be referenced from + // other sites explicitly and the USR can match if the unnamed items are + // nested, so don't bother tracking them. + if !is_type || is_template_instantiation { + return; + } + if let Some(mut declaration) = declaration { + if !declaration.is_valid() { + if let Some(location) = location { + if location.is_template_like() { + declaration = location; + } + } + } + declaration = declaration.canonical(); + if !declaration.is_valid() { + // This could happen, for example, with types like `int*` or + // similar. + // + // Fortunately, we don't care about those types being + // duplicated, so we can just ignore them. + debug!( + "Invalid declaration {:?} found for type {:?}", + declaration, + self.resolve_item_fallible(id) + .unwrap() + .kind() + .expect_type() + ); + return; + } + + let key = if is_unnamed { + TypeKey::Declaration(declaration) + } else if let Some(usr) = declaration.usr() { + TypeKey::Usr(usr) + } else { + warn!( + "Valid declaration with no USR: {:?}, {:?}", + declaration, location + ); + TypeKey::Declaration(declaration) + }; + + let old = self.types.insert(key, id.as_type_id_unchecked()); + debug_assert_eq!(old, None); + } + } + + /// Ensure that every item (other than the root module) is in a module's + /// children list. This is to make sure that every allowlisted item get's + /// codegen'd, even if its parent is not allowlisted. See issue #769 for + /// details. + fn add_item_to_module(&mut self, item: &Item) { + assert!(item.id() != self.root_module); + assert!(self.resolve_item_fallible(item.id()).is_none()); + + if let Some(ref mut parent) = self.items[item.parent_id().0] { + if let Some(module) = parent.as_module_mut() { + debug!( + "add_item_to_module: adding {:?} as child of parent module {:?}", + item.id(), + item.parent_id() + ); + + module.children_mut().insert(item.id()); + return; + } + } + + debug!( + "add_item_to_module: adding {:?} as child of current module {:?}", + item.id(), + self.current_module + ); + + self.items[(self.current_module.0).0] + .as_mut() + .expect("Should always have an item for self.current_module") + .as_module_mut() + .expect("self.current_module should always be a module") + .children_mut() + .insert(item.id()); + } + + /// Add a new named template type parameter to this context's item set. + pub fn add_type_param(&mut self, item: Item, definition: clang::Cursor) { + debug!( + "BindgenContext::add_type_param: item = {:?}; definition = {:?}", + item, definition + ); + + assert!( + item.expect_type().is_type_param(), + "Should directly be a named type, not a resolved reference or anything" + ); + assert_eq!( + definition.kind(), + clang_sys::CXCursor_TemplateTypeParameter + ); + + self.add_item_to_module(&item); + + let id = item.id(); + let old_item = mem::replace(&mut self.items[id.0], Some(item)); + assert!( + old_item.is_none(), + "should not have already associated an item with the given id" + ); + + let old_named_ty = self + .type_params + .insert(definition, id.as_type_id_unchecked()); + assert!( + old_named_ty.is_none(), + "should not have already associated a named type with this id" + ); + } + + /// Get the named type defined at the given cursor location, if we've + /// already added one. + pub fn get_type_param(&self, definition: &clang::Cursor) -> Option<TypeId> { + assert_eq!( + definition.kind(), + clang_sys::CXCursor_TemplateTypeParameter + ); + self.type_params.get(definition).cloned() + } + + // TODO: Move all this syntax crap to other part of the code. + + /// Mangles a name so it doesn't conflict with any keyword. + #[rustfmt::skip] + pub fn rust_mangle<'a>(&self, name: &'a str) -> Cow<'a, str> { + if name.contains('@') || + name.contains('?') || + name.contains('$') || + matches!( + name, + "abstract" | "alignof" | "as" | "async" | "await" | "become" | + "box" | "break" | "const" | "continue" | "crate" | "do" | + "dyn" | "else" | "enum" | "extern" | "false" | "final" | + "fn" | "for" | "if" | "impl" | "in" | "let" | "loop" | + "macro" | "match" | "mod" | "move" | "mut" | "offsetof" | + "override" | "priv" | "proc" | "pub" | "pure" | "ref" | + "return" | "Self" | "self" | "sizeof" | "static" | + "struct" | "super" | "trait" | "true" | "try" | "type" | "typeof" | + "unsafe" | "unsized" | "use" | "virtual" | "where" | + "while" | "yield" | "str" | "bool" | "f32" | "f64" | + "usize" | "isize" | "u128" | "i128" | "u64" | "i64" | + "u32" | "i32" | "u16" | "i16" | "u8" | "i8" | "_" + ) + { + let mut s = name.to_owned(); + s = s.replace('@', "_"); + s = s.replace('?', "_"); + s = s.replace('$', "_"); + s.push('_'); + return Cow::Owned(s); + } + Cow::Borrowed(name) + } + + /// Returns a mangled name as a rust identifier. + pub fn rust_ident<S>(&self, name: S) -> Ident + where + S: AsRef<str>, + { + self.rust_ident_raw(self.rust_mangle(name.as_ref())) + } + + /// Returns a mangled name as a rust identifier. + pub fn rust_ident_raw<T>(&self, name: T) -> Ident + where + T: AsRef<str>, + { + Ident::new(name.as_ref(), Span::call_site()) + } + + /// Iterate over all items that have been defined. + pub fn items(&self) -> impl Iterator<Item = (ItemId, &Item)> { + self.items.iter().enumerate().filter_map(|(index, item)| { + let item = item.as_ref()?; + Some((ItemId(index), item)) + }) + } + + /// Have we collected all unresolved type references yet? + pub fn collected_typerefs(&self) -> bool { + self.collected_typerefs + } + + /// Gather all the unresolved type references. + fn collect_typerefs( + &mut self, + ) -> Vec<(ItemId, clang::Type, clang::Cursor, Option<ItemId>)> { + debug_assert!(!self.collected_typerefs); + self.collected_typerefs = true; + let mut typerefs = vec![]; + + for (id, item) in self.items() { + let kind = item.kind(); + let ty = match kind.as_type() { + Some(ty) => ty, + None => continue, + }; + + if let TypeKind::UnresolvedTypeRef(ref ty, loc, parent_id) = + *ty.kind() + { + typerefs.push((id, *ty, loc, parent_id)); + }; + } + typerefs + } + + /// Collect all of our unresolved type references and resolve them. + fn resolve_typerefs(&mut self) { + let _t = self.timer("resolve_typerefs"); + + let typerefs = self.collect_typerefs(); + + for (id, ty, loc, parent_id) in typerefs { + let _resolved = + { + let resolved = Item::from_ty(&ty, loc, parent_id, self) + .unwrap_or_else(|_| { + warn!("Could not resolve type reference, falling back \ + to opaque blob"); + Item::new_opaque_type(self.next_item_id(), &ty, self) + }); + + let item = self.items[id.0].as_mut().unwrap(); + *item.kind_mut().as_type_mut().unwrap().kind_mut() = + TypeKind::ResolvedTypeRef(resolved); + resolved + }; + + // Something in the STL is trolling me. I don't need this assertion + // right now, but worth investigating properly once this lands. + // + // debug_assert!(self.items.get(&resolved).is_some(), "How?"); + // + // if let Some(parent_id) = parent_id { + // assert_eq!(self.items[&resolved].parent_id(), parent_id); + // } + } + } + + /// Temporarily loan `Item` with the given `ItemId`. This provides means to + /// mutably borrow `Item` while having a reference to `BindgenContext`. + /// + /// `Item` with the given `ItemId` is removed from the context, given + /// closure is executed and then `Item` is placed back. + /// + /// # Panics + /// + /// Panics if attempt to resolve given `ItemId` inside the given + /// closure is made. + fn with_loaned_item<F, T>(&mut self, id: ItemId, f: F) -> T + where + F: (FnOnce(&BindgenContext, &mut Item) -> T), + { + let mut item = self.items[id.0].take().unwrap(); + + let result = f(self, &mut item); + + let existing = mem::replace(&mut self.items[id.0], Some(item)); + assert!(existing.is_none()); + + result + } + + /// Compute the bitfield allocation units for all `TypeKind::Comp` items we + /// parsed. + fn compute_bitfield_units(&mut self) { + let _t = self.timer("compute_bitfield_units"); + + assert!(self.collected_typerefs()); + + let need_bitfield_allocation = + mem::take(&mut self.need_bitfield_allocation); + for id in need_bitfield_allocation { + self.with_loaned_item(id, |ctx, item| { + let ty = item.kind_mut().as_type_mut().unwrap(); + let layout = ty.layout(ctx); + ty.as_comp_mut() + .unwrap() + .compute_bitfield_units(ctx, layout.as_ref()); + }); + } + } + + /// Assign a new generated name for each anonymous field. + fn deanonymize_fields(&mut self) { + let _t = self.timer("deanonymize_fields"); + + let comp_item_ids: Vec<ItemId> = self + .items() + .filter_map(|(id, item)| { + if item.kind().as_type()?.is_comp() { + return Some(id); + } + None + }) + .collect(); + + for id in comp_item_ids { + self.with_loaned_item(id, |ctx, item| { + item.kind_mut() + .as_type_mut() + .unwrap() + .as_comp_mut() + .unwrap() + .deanonymize_fields(ctx); + }); + } + } + + /// Iterate over all items and replace any item that has been named in a + /// `replaces="SomeType"` annotation with the replacement type. + fn process_replacements(&mut self) { + let _t = self.timer("process_replacements"); + if self.replacements.is_empty() { + debug!("No replacements to process"); + return; + } + + // FIXME: This is linear, but the replaces="xxx" annotation was already + // there, and for better or worse it's useful, sigh... + // + // We leverage the ResolvedTypeRef thing, though, which is cool :P. + + let mut replacements = vec![]; + + for (id, item) in self.items() { + if item.annotations().use_instead_of().is_some() { + continue; + } + + // Calls to `canonical_name` are expensive, so eagerly filter out + // items that cannot be replaced. + let ty = match item.kind().as_type() { + Some(ty) => ty, + None => continue, + }; + + match *ty.kind() { + TypeKind::Comp(..) | + TypeKind::TemplateAlias(..) | + TypeKind::Enum(..) | + TypeKind::Alias(..) => {} + _ => continue, + } + + let path = item.path_for_allowlisting(self); + let replacement = self.replacements.get(&path[1..]); + + if let Some(replacement) = replacement { + if *replacement != id { + // We set this just after parsing the annotation. It's + // very unlikely, but this can happen. + if self.resolve_item_fallible(*replacement).is_some() { + replacements.push(( + id.expect_type_id(self), + replacement.expect_type_id(self), + )); + } + } + } + } + + for (id, replacement_id) in replacements { + debug!("Replacing {:?} with {:?}", id, replacement_id); + let new_parent = { + let item_id: ItemId = id.into(); + let item = self.items[item_id.0].as_mut().unwrap(); + *item.kind_mut().as_type_mut().unwrap().kind_mut() = + TypeKind::ResolvedTypeRef(replacement_id); + item.parent_id() + }; + + // Relocate the replacement item from where it was declared, to + // where the thing it is replacing was declared. + // + // First, we'll make sure that its parent id is correct. + + let old_parent = self.resolve_item(replacement_id).parent_id(); + if new_parent == old_parent { + // Same parent and therefore also same containing + // module. Nothing to do here. + continue; + } + + let replacement_item_id: ItemId = replacement_id.into(); + self.items[replacement_item_id.0] + .as_mut() + .unwrap() + .set_parent_for_replacement(new_parent); + + // Second, make sure that it is in the correct module's children + // set. + + let old_module = { + let immut_self = &*self; + old_parent + .ancestors(immut_self) + .chain(Some(immut_self.root_module.into())) + .find(|id| { + let item = immut_self.resolve_item(*id); + item.as_module().map_or(false, |m| { + m.children().contains(&replacement_id.into()) + }) + }) + }; + let old_module = old_module + .expect("Every replacement item should be in a module"); + + let new_module = { + let immut_self = &*self; + new_parent + .ancestors(immut_self) + .find(|id| immut_self.resolve_item(*id).is_module()) + }; + let new_module = + new_module.unwrap_or_else(|| self.root_module.into()); + + if new_module == old_module { + // Already in the correct module. + continue; + } + + self.items[old_module.0] + .as_mut() + .unwrap() + .as_module_mut() + .unwrap() + .children_mut() + .remove(&replacement_id.into()); + + self.items[new_module.0] + .as_mut() + .unwrap() + .as_module_mut() + .unwrap() + .children_mut() + .insert(replacement_id.into()); + } + } + + /// Enter the code generation phase, invoke the given callback `cb`, and + /// leave the code generation phase. + pub(crate) fn gen<F, Out>( + mut self, + cb: F, + ) -> (Out, BindgenOptions, Vec<String>) + where + F: FnOnce(&Self) -> Out, + { + self.in_codegen = true; + + self.resolve_typerefs(); + self.compute_bitfield_units(); + self.process_replacements(); + + self.deanonymize_fields(); + + self.assert_no_dangling_references(); + + // Compute the allowlisted set after processing replacements and + // resolving type refs, as those are the final mutations of the IR + // graph, and their completion means that the IR graph is now frozen. + self.compute_allowlisted_and_codegen_items(); + + // Make sure to do this after processing replacements, since that messes + // with the parentage and module children, and we want to assert that it + // messes with them correctly. + self.assert_every_item_in_a_module(); + + self.compute_has_vtable(); + self.compute_sizedness(); + self.compute_has_destructor(); + self.find_used_template_parameters(); + self.compute_cannot_derive_debug(); + self.compute_cannot_derive_default(); + self.compute_cannot_derive_copy(); + self.compute_has_type_param_in_array(); + self.compute_has_float(); + self.compute_cannot_derive_hash(); + self.compute_cannot_derive_partialord_partialeq_or_eq(); + + let ret = cb(&self); + (ret, self.options, self.warnings) + } + + /// When the `testing_only_extra_assertions` feature is enabled, this + /// function walks the IR graph and asserts that we do not have any edges + /// referencing an ItemId for which we do not have an associated IR item. + fn assert_no_dangling_references(&self) { + if cfg!(feature = "testing_only_extra_assertions") { + for _ in self.assert_no_dangling_item_traversal() { + // The iterator's next method does the asserting for us. + } + } + } + + fn assert_no_dangling_item_traversal( + &self, + ) -> traversal::AssertNoDanglingItemsTraversal { + assert!(self.in_codegen_phase()); + assert!(self.current_module == self.root_module); + + let roots = self.items().map(|(id, _)| id); + traversal::AssertNoDanglingItemsTraversal::new( + self, + roots, + traversal::all_edges, + ) + } + + /// When the `testing_only_extra_assertions` feature is enabled, walk over + /// every item and ensure that it is in the children set of one of its + /// module ancestors. + fn assert_every_item_in_a_module(&self) { + if cfg!(feature = "testing_only_extra_assertions") { + assert!(self.in_codegen_phase()); + assert!(self.current_module == self.root_module); + + for (id, _item) in self.items() { + if id == self.root_module { + continue; + } + + assert!( + { + let id = id + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(self) + .id(); + id.ancestors(self) + .chain(Some(self.root_module.into())) + .any(|ancestor| { + debug!( + "Checking if {:?} is a child of {:?}", + id, ancestor + ); + self.resolve_item(ancestor) + .as_module() + .map_or(false, |m| { + m.children().contains(&id) + }) + }) + }, + "{:?} should be in some ancestor module's children set", + id + ); + } + } + } + + /// Compute for every type whether it is sized or not, and whether it is + /// sized or not as a base class. + fn compute_sizedness(&mut self) { + let _t = self.timer("compute_sizedness"); + assert!(self.sizedness.is_none()); + self.sizedness = Some(analyze::<SizednessAnalysis>(self)); + } + + /// Look up whether the type with the given id is sized or not. + pub fn lookup_sizedness(&self, id: TypeId) -> SizednessResult { + assert!( + self.in_codegen_phase(), + "We only compute sizedness after we've entered codegen" + ); + + self.sizedness + .as_ref() + .unwrap() + .get(&id) + .cloned() + .unwrap_or(SizednessResult::ZeroSized) + } + + /// Compute whether the type has vtable. + fn compute_has_vtable(&mut self) { + let _t = self.timer("compute_has_vtable"); + assert!(self.have_vtable.is_none()); + self.have_vtable = Some(analyze::<HasVtableAnalysis>(self)); + } + + /// Look up whether the item with `id` has vtable or not. + pub fn lookup_has_vtable(&self, id: TypeId) -> HasVtableResult { + assert!( + self.in_codegen_phase(), + "We only compute vtables when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` has a + // vtable or not. + self.have_vtable + .as_ref() + .unwrap() + .get(&id.into()) + .cloned() + .unwrap_or(HasVtableResult::No) + } + + /// Compute whether the type has a destructor. + fn compute_has_destructor(&mut self) { + let _t = self.timer("compute_has_destructor"); + assert!(self.have_destructor.is_none()); + self.have_destructor = Some(analyze::<HasDestructorAnalysis>(self)); + } + + /// Look up whether the item with `id` has a destructor. + pub fn lookup_has_destructor(&self, id: TypeId) -> bool { + assert!( + self.in_codegen_phase(), + "We only compute destructors when we enter codegen" + ); + + self.have_destructor.as_ref().unwrap().contains(&id.into()) + } + + fn find_used_template_parameters(&mut self) { + let _t = self.timer("find_used_template_parameters"); + if self.options.allowlist_recursively { + let used_params = analyze::<UsedTemplateParameters>(self); + self.used_template_parameters = Some(used_params); + } else { + // If you aren't recursively allowlisting, then we can't really make + // any sense of template parameter usage, and you're on your own. + let mut used_params = HashMap::default(); + for &id in self.allowlisted_items() { + used_params.entry(id).or_insert_with(|| { + id.self_template_params(self) + .into_iter() + .map(|p| p.into()) + .collect() + }); + } + self.used_template_parameters = Some(used_params); + } + } + + /// Return `true` if `item` uses the given `template_param`, `false` + /// otherwise. + /// + /// This method may only be called during the codegen phase, because the + /// template usage information is only computed as we enter the codegen + /// phase. + /// + /// If the item is blocklisted, then we say that it always uses the template + /// parameter. This is a little subtle. The template parameter usage + /// analysis only considers allowlisted items, and if any blocklisted item + /// shows up in the generated bindings, it is the user's responsibility to + /// manually provide a definition for them. To give them the most + /// flexibility when doing that, we assume that they use every template + /// parameter and always pass template arguments through in instantiations. + pub fn uses_template_parameter( + &self, + item: ItemId, + template_param: TypeId, + ) -> bool { + assert!( + self.in_codegen_phase(), + "We only compute template parameter usage as we enter codegen" + ); + + if self.resolve_item(item).is_blocklisted(self) { + return true; + } + + let template_param = template_param + .into_resolver() + .through_type_refs() + .through_type_aliases() + .resolve(self) + .id(); + + self.used_template_parameters + .as_ref() + .expect("should have found template parameter usage if we're in codegen") + .get(&item) + .map_or(false, |items_used_params| items_used_params.contains(&template_param)) + } + + /// Return `true` if `item` uses any unbound, generic template parameters, + /// `false` otherwise. + /// + /// Has the same restrictions that `uses_template_parameter` has. + pub fn uses_any_template_parameters(&self, item: ItemId) -> bool { + assert!( + self.in_codegen_phase(), + "We only compute template parameter usage as we enter codegen" + ); + + self.used_template_parameters + .as_ref() + .expect( + "should have template parameter usage info in codegen phase", + ) + .get(&item) + .map_or(false, |used| !used.is_empty()) + } + + // This deserves a comment. Builtin types don't get a valid declaration, so + // we can't add it to the cursor->type map. + // + // That being said, they're not generated anyway, and are few, so the + // duplication and special-casing is fine. + // + // If at some point we care about the memory here, probably a map TypeKind + // -> builtin type ItemId would be the best to improve that. + fn add_builtin_item(&mut self, item: Item) { + debug!("add_builtin_item: item = {:?}", item); + debug_assert!(item.kind().is_type()); + self.add_item_to_module(&item); + let id = item.id(); + let old_item = mem::replace(&mut self.items[id.0], Some(item)); + assert!(old_item.is_none(), "Inserted type twice?"); + } + + fn build_root_module(id: ItemId) -> Item { + let module = Module::new(Some("root".into()), ModuleKind::Normal); + Item::new(id, None, None, id, ItemKind::Module(module), None) + } + + /// Get the root module. + pub fn root_module(&self) -> ModuleId { + self.root_module + } + + /// Resolve a type with the given id. + /// + /// Panics if there is no item for the given `TypeId` or if the resolved + /// item is not a `Type`. + pub fn resolve_type(&self, type_id: TypeId) -> &Type { + self.resolve_item(type_id).kind().expect_type() + } + + /// Resolve a function with the given id. + /// + /// Panics if there is no item for the given `FunctionId` or if the resolved + /// item is not a `Function`. + pub fn resolve_func(&self, func_id: FunctionId) -> &Function { + self.resolve_item(func_id).kind().expect_function() + } + + /// Resolve the given `ItemId` as a type, or `None` if there is no item with + /// the given id. + /// + /// Panics if the id resolves to an item that is not a type. + pub fn safe_resolve_type(&self, type_id: TypeId) -> Option<&Type> { + self.resolve_item_fallible(type_id) + .map(|t| t.kind().expect_type()) + } + + /// Resolve the given `ItemId` into an `Item`, or `None` if no such item + /// exists. + pub fn resolve_item_fallible<Id: Into<ItemId>>( + &self, + id: Id, + ) -> Option<&Item> { + self.items.get(id.into().0)?.as_ref() + } + + /// Resolve the given `ItemId` into an `Item`. + /// + /// Panics if the given id does not resolve to any item. + pub fn resolve_item<Id: Into<ItemId>>(&self, item_id: Id) -> &Item { + let item_id = item_id.into(); + match self.resolve_item_fallible(item_id) { + Some(item) => item, + None => panic!("Not an item: {:?}", item_id), + } + } + + /// Get the current module. + pub fn current_module(&self) -> ModuleId { + self.current_module + } + + /// Add a semantic parent for a given type definition. + /// + /// We do this from the type declaration, in order to be able to find the + /// correct type definition afterwards. + /// + /// TODO(emilio): We could consider doing this only when + /// declaration.lexical_parent() != definition.lexical_parent(), but it's + /// not sure it's worth it. + pub fn add_semantic_parent( + &mut self, + definition: clang::Cursor, + parent_id: ItemId, + ) { + self.semantic_parents.insert(definition, parent_id); + } + + /// Returns a known semantic parent for a given definition. + pub fn known_semantic_parent( + &self, + definition: clang::Cursor, + ) -> Option<ItemId> { + self.semantic_parents.get(&definition).cloned() + } + + /// Given a cursor pointing to the location of a template instantiation, + /// return a tuple of the form `(declaration_cursor, declaration_id, + /// num_expected_template_args)`. + /// + /// Note that `declaration_id` is not guaranteed to be in the context's item + /// set! It is possible that it is a partial type that we are still in the + /// middle of parsing. + fn get_declaration_info_for_template_instantiation( + &self, + instantiation: &Cursor, + ) -> Option<(Cursor, ItemId, usize)> { + instantiation + .cur_type() + .canonical_declaration(Some(instantiation)) + .and_then(|canon_decl| { + self.get_resolved_type(&canon_decl).and_then( + |template_decl_id| { + let num_params = + template_decl_id.num_self_template_params(self); + if num_params == 0 { + None + } else { + Some(( + *canon_decl.cursor(), + template_decl_id.into(), + num_params, + )) + } + }, + ) + }) + .or_else(|| { + // If we haven't already parsed the declaration of + // the template being instantiated, then it *must* + // be on the stack of types we are currently + // parsing. If it wasn't then clang would have + // already errored out before we started + // constructing our IR because you can't instantiate + // a template until it is fully defined. + instantiation + .referenced() + .and_then(|referenced| { + self.currently_parsed_types() + .iter() + .find(|partial_ty| *partial_ty.decl() == referenced) + .cloned() + }) + .and_then(|template_decl| { + let num_template_params = + template_decl.num_self_template_params(self); + if num_template_params == 0 { + None + } else { + Some(( + *template_decl.decl(), + template_decl.id(), + num_template_params, + )) + } + }) + }) + } + + /// Parse a template instantiation, eg `Foo<int>`. + /// + /// This is surprisingly difficult to do with libclang, due to the fact that + /// it doesn't provide explicit template argument information, except for + /// function template declarations(!?!??!). + /// + /// The only way to do this is manually inspecting the AST and looking for + /// TypeRefs and TemplateRefs inside. This, unfortunately, doesn't work for + /// more complex cases, see the comment on the assertion below. + /// + /// To add insult to injury, the AST itself has structure that doesn't make + /// sense. Sometimes `Foo<Bar<int>>` has an AST with nesting like you might + /// expect: `(Foo (Bar (int)))`. Other times, the AST we get is completely + /// flat: `(Foo Bar int)`. + /// + /// To see an example of what this method handles: + /// + /// ```c++ + /// template<typename T> + /// class Incomplete { + /// T p; + /// }; + /// + /// template<typename U> + /// class Foo { + /// Incomplete<U> bar; + /// }; + /// ``` + /// + /// Finally, template instantiations are always children of the current + /// module. They use their template's definition for their name, so the + /// parent is only useful for ensuring that their layout tests get + /// codegen'd. + fn instantiate_template( + &mut self, + with_id: ItemId, + template: TypeId, + ty: &clang::Type, + location: clang::Cursor, + ) -> Option<TypeId> { + let num_expected_args = + self.resolve_type(template).num_self_template_params(self); + if num_expected_args == 0 { + warn!( + "Tried to instantiate a template for which we could not \ + determine any template parameters" + ); + return None; + } + + let mut args = vec![]; + let mut found_const_arg = false; + let mut children = location.collect_children(); + + if children.iter().all(|c| !c.has_children()) { + // This is insanity... If clang isn't giving us a properly nested + // AST for which template arguments belong to which template we are + // instantiating, we'll need to construct it ourselves. However, + // there is an extra `NamespaceRef, NamespaceRef, ..., TemplateRef` + // representing a reference to the outermost template declaration + // that we need to filter out of the children. We need to do this + // filtering because we already know which template declaration is + // being specialized via the `location`'s type, and if we do not + // filter it out, we'll add an extra layer of template instantiation + // on accident. + let idx = children + .iter() + .position(|c| c.kind() == clang_sys::CXCursor_TemplateRef); + if let Some(idx) = idx { + if children + .iter() + .take(idx) + .all(|c| c.kind() == clang_sys::CXCursor_NamespaceRef) + { + children = children.into_iter().skip(idx + 1).collect(); + } + } + } + + for child in children.iter().rev() { + match child.kind() { + clang_sys::CXCursor_TypeRef | + clang_sys::CXCursor_TypedefDecl | + clang_sys::CXCursor_TypeAliasDecl => { + // The `with_id` id will potentially end up unused if we give up + // on this type (for example, because it has const value + // template args), so if we pass `with_id` as the parent, it is + // potentially a dangling reference. Instead, use the canonical + // template declaration as the parent. It is already parsed and + // has a known-resolvable `ItemId`. + let ty = Item::from_ty_or_ref( + child.cur_type(), + *child, + Some(template.into()), + self, + ); + args.push(ty); + } + clang_sys::CXCursor_TemplateRef => { + let ( + template_decl_cursor, + template_decl_id, + num_expected_template_args, + ) = self.get_declaration_info_for_template_instantiation( + child, + )?; + + if num_expected_template_args == 0 || + child.has_at_least_num_children( + num_expected_template_args, + ) + { + // Do a happy little parse. See comment in the TypeRef + // match arm about parent IDs. + let ty = Item::from_ty_or_ref( + child.cur_type(), + *child, + Some(template.into()), + self, + ); + args.push(ty); + } else { + // This is the case mentioned in the doc comment where + // clang gives us a flattened AST and we have to + // reconstruct which template arguments go to which + // instantiation :( + let args_len = args.len(); + if args_len < num_expected_template_args { + warn!( + "Found a template instantiation without \ + enough template arguments" + ); + return None; + } + + let mut sub_args: Vec<_> = args + .drain(args_len - num_expected_template_args..) + .collect(); + sub_args.reverse(); + + let sub_name = Some(template_decl_cursor.spelling()); + let sub_inst = TemplateInstantiation::new( + // This isn't guaranteed to be a type that we've + // already finished parsing yet. + template_decl_id.as_type_id_unchecked(), + sub_args, + ); + let sub_kind = + TypeKind::TemplateInstantiation(sub_inst); + let sub_ty = Type::new( + sub_name, + template_decl_cursor + .cur_type() + .fallible_layout(self) + .ok(), + sub_kind, + false, + ); + let sub_id = self.next_item_id(); + let sub_item = Item::new( + sub_id, + None, + None, + self.current_module.into(), + ItemKind::Type(sub_ty), + Some(child.location()), + ); + + // Bypass all the validations in add_item explicitly. + debug!( + "instantiate_template: inserting nested \ + instantiation item: {:?}", + sub_item + ); + self.add_item_to_module(&sub_item); + debug_assert_eq!(sub_id, sub_item.id()); + self.items[sub_id.0] = Some(sub_item); + args.push(sub_id.as_type_id_unchecked()); + } + } + _ => { + warn!( + "Found template arg cursor we can't handle: {:?}", + child + ); + found_const_arg = true; + } + } + } + + if found_const_arg { + // This is a dependently typed template instantiation. That is, an + // instantiation of a template with one or more const values as + // template arguments, rather than only types as template + // arguments. For example, `Foo<true, 5>` versus `Bar<bool, int>`. + // We can't handle these instantiations, so just punt in this + // situation... + warn!( + "Found template instantiated with a const value; \ + bindgen can't handle this kind of template instantiation!" + ); + return None; + } + + if args.len() != num_expected_args { + warn!( + "Found a template with an unexpected number of template \ + arguments" + ); + return None; + } + + args.reverse(); + let type_kind = TypeKind::TemplateInstantiation( + TemplateInstantiation::new(template, args), + ); + let name = ty.spelling(); + let name = if name.is_empty() { None } else { Some(name) }; + let ty = Type::new( + name, + ty.fallible_layout(self).ok(), + type_kind, + ty.is_const(), + ); + let item = Item::new( + with_id, + None, + None, + self.current_module.into(), + ItemKind::Type(ty), + Some(location.location()), + ); + + // Bypass all the validations in add_item explicitly. + debug!("instantiate_template: inserting item: {:?}", item); + self.add_item_to_module(&item); + debug_assert_eq!(with_id, item.id()); + self.items[with_id.0] = Some(item); + Some(with_id.as_type_id_unchecked()) + } + + /// If we have already resolved the type for the given type declaration, + /// return its `ItemId`. Otherwise, return `None`. + pub fn get_resolved_type( + &self, + decl: &clang::CanonicalTypeDeclaration, + ) -> Option<TypeId> { + self.types + .get(&TypeKey::Declaration(*decl.cursor())) + .or_else(|| { + decl.cursor() + .usr() + .and_then(|usr| self.types.get(&TypeKey::Usr(usr))) + }) + .cloned() + } + + /// Looks up for an already resolved type, either because it's builtin, or + /// because we already have it in the map. + pub fn builtin_or_resolved_ty( + &mut self, + with_id: ItemId, + parent_id: Option<ItemId>, + ty: &clang::Type, + location: Option<clang::Cursor>, + ) -> Option<TypeId> { + use clang_sys::{CXCursor_TypeAliasTemplateDecl, CXCursor_TypeRef}; + debug!( + "builtin_or_resolved_ty: {:?}, {:?}, {:?}, {:?}", + ty, location, with_id, parent_id + ); + + if let Some(decl) = ty.canonical_declaration(location.as_ref()) { + if let Some(id) = self.get_resolved_type(&decl) { + debug!( + "Already resolved ty {:?}, {:?}, {:?} {:?}", + id, decl, ty, location + ); + // If the declaration already exists, then either: + // + // * the declaration is a template declaration of some sort, + // and we are looking at an instantiation or specialization + // of it, or + // * we have already parsed and resolved this type, and + // there's nothing left to do. + if let Some(location) = location { + if decl.cursor().is_template_like() && + *ty != decl.cursor().cur_type() + { + // For specialized type aliases, there's no way to get the + // template parameters as of this writing (for a struct + // specialization we wouldn't be in this branch anyway). + // + // Explicitly return `None` if there aren't any + // unspecialized parameters (contains any `TypeRef`) so we + // resolve the canonical type if there is one and it's + // exposed. + // + // This is _tricky_, I know :( + if decl.cursor().kind() == + CXCursor_TypeAliasTemplateDecl && + !location.contains_cursor(CXCursor_TypeRef) && + ty.canonical_type().is_valid_and_exposed() + { + return None; + } + + return self + .instantiate_template(with_id, id, ty, location) + .or(Some(id)); + } + } + + return Some(self.build_ty_wrapper(with_id, id, parent_id, ty)); + } + } + + debug!("Not resolved, maybe builtin?"); + self.build_builtin_ty(ty) + } + + /// Make a new item that is a resolved type reference to the `wrapped_id`. + /// + /// This is unfortunately a lot of bloat, but is needed to properly track + /// constness et al. + /// + /// We should probably make the constness tracking separate, so it doesn't + /// bloat that much, but hey, we already bloat the heck out of builtin + /// types. + pub fn build_ty_wrapper( + &mut self, + with_id: ItemId, + wrapped_id: TypeId, + parent_id: Option<ItemId>, + ty: &clang::Type, + ) -> TypeId { + self.build_wrapper(with_id, wrapped_id, parent_id, ty, ty.is_const()) + } + + /// A wrapper over a type that adds a const qualifier explicitly. + /// + /// Needed to handle const methods in C++, wrapping the type . + pub fn build_const_wrapper( + &mut self, + with_id: ItemId, + wrapped_id: TypeId, + parent_id: Option<ItemId>, + ty: &clang::Type, + ) -> TypeId { + self.build_wrapper( + with_id, wrapped_id, parent_id, ty, /* is_const = */ true, + ) + } + + fn build_wrapper( + &mut self, + with_id: ItemId, + wrapped_id: TypeId, + parent_id: Option<ItemId>, + ty: &clang::Type, + is_const: bool, + ) -> TypeId { + let spelling = ty.spelling(); + let layout = ty.fallible_layout(self).ok(); + let location = ty.declaration().location(); + let type_kind = TypeKind::ResolvedTypeRef(wrapped_id); + let ty = Type::new(Some(spelling), layout, type_kind, is_const); + let item = Item::new( + with_id, + None, + None, + parent_id.unwrap_or_else(|| self.current_module.into()), + ItemKind::Type(ty), + Some(location), + ); + self.add_builtin_item(item); + with_id.as_type_id_unchecked() + } + + /// Returns the next item id to be used for an item. + pub fn next_item_id(&mut self) -> ItemId { + let ret = ItemId(self.items.len()); + self.items.push(None); + ret + } + + fn build_builtin_ty(&mut self, ty: &clang::Type) -> Option<TypeId> { + use clang_sys::*; + let type_kind = match ty.kind() { + CXType_NullPtr => TypeKind::NullPtr, + CXType_Void => TypeKind::Void, + CXType_Bool => TypeKind::Int(IntKind::Bool), + CXType_Int => TypeKind::Int(IntKind::Int), + CXType_UInt => TypeKind::Int(IntKind::UInt), + CXType_Char_S => TypeKind::Int(IntKind::Char { is_signed: true }), + CXType_Char_U => TypeKind::Int(IntKind::Char { is_signed: false }), + CXType_SChar => TypeKind::Int(IntKind::SChar), + CXType_UChar => TypeKind::Int(IntKind::UChar), + CXType_Short => TypeKind::Int(IntKind::Short), + CXType_UShort => TypeKind::Int(IntKind::UShort), + CXType_WChar => TypeKind::Int(IntKind::WChar), + CXType_Char16 => TypeKind::Int(IntKind::U16), + CXType_Char32 => TypeKind::Int(IntKind::U32), + CXType_Long => TypeKind::Int(IntKind::Long), + CXType_ULong => TypeKind::Int(IntKind::ULong), + CXType_LongLong => TypeKind::Int(IntKind::LongLong), + CXType_ULongLong => TypeKind::Int(IntKind::ULongLong), + CXType_Int128 => TypeKind::Int(IntKind::I128), + CXType_UInt128 => TypeKind::Int(IntKind::U128), + CXType_Float => TypeKind::Float(FloatKind::Float), + CXType_Double => TypeKind::Float(FloatKind::Double), + CXType_LongDouble => TypeKind::Float(FloatKind::LongDouble), + CXType_Float128 => TypeKind::Float(FloatKind::Float128), + CXType_Complex => { + let float_type = + ty.elem_type().expect("Not able to resolve complex type?"); + let float_kind = match float_type.kind() { + CXType_Float => FloatKind::Float, + CXType_Double => FloatKind::Double, + CXType_LongDouble => FloatKind::LongDouble, + CXType_Float128 => FloatKind::Float128, + _ => panic!( + "Non floating-type complex? {:?}, {:?}", + ty, float_type, + ), + }; + TypeKind::Complex(float_kind) + } + _ => return None, + }; + + let spelling = ty.spelling(); + let is_const = ty.is_const(); + let layout = ty.fallible_layout(self).ok(); + let location = ty.declaration().location(); + let ty = Type::new(Some(spelling), layout, type_kind, is_const); + let id = self.next_item_id(); + let item = Item::new( + id, + None, + None, + self.root_module.into(), + ItemKind::Type(ty), + Some(location), + ); + self.add_builtin_item(item); + Some(id.as_type_id_unchecked()) + } + + /// Get the current Clang translation unit that is being processed. + pub fn translation_unit(&self) -> &clang::TranslationUnit { + &self.translation_unit + } + + /// Have we parsed the macro named `macro_name` already? + pub fn parsed_macro(&self, macro_name: &[u8]) -> bool { + self.parsed_macros.contains_key(macro_name) + } + + /// Get the currently parsed macros. + pub fn parsed_macros( + &self, + ) -> &StdHashMap<Vec<u8>, cexpr::expr::EvalResult> { + debug_assert!(!self.in_codegen_phase()); + &self.parsed_macros + } + + /// Mark the macro named `macro_name` as parsed. + pub fn note_parsed_macro( + &mut self, + id: Vec<u8>, + value: cexpr::expr::EvalResult, + ) { + self.parsed_macros.insert(id, value); + } + + /// Are we in the codegen phase? + pub fn in_codegen_phase(&self) -> bool { + self.in_codegen + } + + /// Mark the type with the given `name` as replaced by the type with id + /// `potential_ty`. + /// + /// Replacement types are declared using the `replaces="xxx"` annotation, + /// and implies that the original type is hidden. + pub fn replace(&mut self, name: &[String], potential_ty: ItemId) { + match self.replacements.entry(name.into()) { + Entry::Vacant(entry) => { + debug!( + "Defining replacement for {:?} as {:?}", + name, potential_ty + ); + entry.insert(potential_ty); + } + Entry::Occupied(occupied) => { + warn!( + "Replacement for {:?} already defined as {:?}; \ + ignoring duplicate replacement definition as {:?}", + name, + occupied.get(), + potential_ty + ); + } + } + } + + /// Has the item with the given `name` and `id` been replaced by another + /// type? + pub fn is_replaced_type<Id: Into<ItemId>>( + &self, + path: &[String], + id: Id, + ) -> bool { + let id = id.into(); + matches!(self.replacements.get(path), Some(replaced_by) if *replaced_by != id) + } + + /// Is the type with the given `name` marked as opaque? + pub fn opaque_by_name(&self, path: &[String]) -> bool { + debug_assert!( + self.in_codegen_phase(), + "You're not supposed to call this yet" + ); + self.options.opaque_types.matches(path[1..].join("::")) + } + + /// Get the options used to configure this bindgen context. + pub(crate) fn options(&self) -> &BindgenOptions { + &self.options + } + + /// Tokenizes a namespace cursor in order to get the name and kind of the + /// namespace. + fn tokenize_namespace( + &self, + cursor: &clang::Cursor, + ) -> (Option<String>, ModuleKind) { + assert_eq!( + cursor.kind(), + ::clang_sys::CXCursor_Namespace, + "Be a nice person" + ); + + let mut module_name = None; + let spelling = cursor.spelling(); + if !spelling.is_empty() { + module_name = Some(spelling) + } + + let mut kind = ModuleKind::Normal; + let mut looking_for_name = false; + for token in cursor.tokens().iter() { + match token.spelling() { + b"inline" => { + debug_assert!( + kind != ModuleKind::Inline, + "Multiple inline keywords?" + ); + kind = ModuleKind::Inline; + // When hitting a nested inline namespace we get a spelling + // that looks like ["inline", "foo"]. Deal with it properly. + looking_for_name = true; + } + // The double colon allows us to handle nested namespaces like + // namespace foo::bar { } + // + // libclang still gives us two namespace cursors, which is cool, + // but the tokenization of the second begins with the double + // colon. That's ok, so we only need to handle the weird + // tokenization here. + b"namespace" | b"::" => { + looking_for_name = true; + } + b"{" => { + // This should be an anonymous namespace. + assert!(looking_for_name); + break; + } + name => { + if looking_for_name { + if module_name.is_none() { + module_name = Some( + String::from_utf8_lossy(name).into_owned(), + ); + } + break; + } else { + // This is _likely_, but not certainly, a macro that's + // been placed just before the namespace keyword. + // Unfortunately, clang tokens don't let us easily see + // through the ifdef tokens, so we don't know what this + // token should really be. Instead of panicking though, + // we warn the user that we assumed the token was blank, + // and then move on. + // + // See also https://github.com/rust-lang/rust-bindgen/issues/1676. + warn!( + "Ignored unknown namespace prefix '{}' at {:?} in {:?}", + String::from_utf8_lossy(name), + token, + cursor + ); + } + } + } + } + + (module_name, kind) + } + + /// Given a CXCursor_Namespace cursor, return the item id of the + /// corresponding module, or create one on the fly. + pub fn module(&mut self, cursor: clang::Cursor) -> ModuleId { + use clang_sys::*; + assert_eq!(cursor.kind(), CXCursor_Namespace, "Be a nice person"); + let cursor = cursor.canonical(); + if let Some(id) = self.modules.get(&cursor) { + return *id; + } + + let (module_name, kind) = self.tokenize_namespace(&cursor); + + let module_id = self.next_item_id(); + let module = Module::new(module_name, kind); + let module = Item::new( + module_id, + None, + None, + self.current_module.into(), + ItemKind::Module(module), + Some(cursor.location()), + ); + + let module_id = module.id().as_module_id_unchecked(); + self.modules.insert(cursor, module_id); + + self.add_item(module, None, None); + + module_id + } + + /// Start traversing the module with the given `module_id`, invoke the + /// callback `cb`, and then return to traversing the original module. + pub fn with_module<F>(&mut self, module_id: ModuleId, cb: F) + where + F: FnOnce(&mut Self), + { + debug_assert!(self.resolve_item(module_id).kind().is_module(), "Wat"); + + let previous_id = self.current_module; + self.current_module = module_id; + + cb(self); + + self.current_module = previous_id; + } + + /// Iterate over all (explicitly or transitively) allowlisted items. + /// + /// If no items are explicitly allowlisted, then all items are considered + /// allowlisted. + pub fn allowlisted_items(&self) -> &ItemSet { + assert!(self.in_codegen_phase()); + assert!(self.current_module == self.root_module); + + self.allowlisted.as_ref().unwrap() + } + + /// Check whether a particular blocklisted type implements a trait or not. + /// Results may be cached. + pub fn blocklisted_type_implements_trait( + &self, + item: &Item, + derive_trait: DeriveTrait, + ) -> CanDerive { + assert!(self.in_codegen_phase()); + assert!(self.current_module == self.root_module); + + *self + .blocklisted_types_implement_traits + .borrow_mut() + .entry(derive_trait) + .or_default() + .entry(item.id()) + .or_insert_with(|| { + item.expect_type() + .name() + .and_then(|name| { + if self.options.parse_callbacks.is_empty() { + // Sized integer types from <stdint.h> get mapped to Rust primitive + // types regardless of whether they are blocklisted, so ensure that + // standard traits are considered derivable for them too. + if self.is_stdint_type(name) { + Some(CanDerive::Yes) + } else { + Some(CanDerive::No) + } + } else { + self.options.last_callback(|cb| { + cb.blocklisted_type_implements_trait( + name, + derive_trait, + ) + }) + } + }) + .unwrap_or(CanDerive::No) + }) + } + + /// Is the given type a type from <stdint.h> that corresponds to a Rust primitive type? + pub fn is_stdint_type(&self, name: &str) -> bool { + match name { + "int8_t" | "uint8_t" | "int16_t" | "uint16_t" | "int32_t" | + "uint32_t" | "int64_t" | "uint64_t" | "uintptr_t" | + "intptr_t" | "ptrdiff_t" => true, + "size_t" | "ssize_t" => self.options.size_t_is_usize, + _ => false, + } + } + + /// Get a reference to the set of items we should generate. + pub fn codegen_items(&self) -> &ItemSet { + assert!(self.in_codegen_phase()); + assert!(self.current_module == self.root_module); + self.codegen_items.as_ref().unwrap() + } + + /// Compute the allowlisted items set and populate `self.allowlisted`. + fn compute_allowlisted_and_codegen_items(&mut self) { + assert!(self.in_codegen_phase()); + assert!(self.current_module == self.root_module); + assert!(self.allowlisted.is_none()); + let _t = self.timer("compute_allowlisted_and_codegen_items"); + + let roots = { + let mut roots = self + .items() + // Only consider roots that are enabled for codegen. + .filter(|&(_, item)| item.is_enabled_for_codegen(self)) + .filter(|&(_, item)| { + // If nothing is explicitly allowlisted, then everything is fair + // game. + if self.options().allowlisted_types.is_empty() && + self.options().allowlisted_functions.is_empty() && + self.options().allowlisted_vars.is_empty() && + self.options().allowlisted_files.is_empty() + { + return true; + } + + // If this is a type that explicitly replaces another, we assume + // you know what you're doing. + if item.annotations().use_instead_of().is_some() { + return true; + } + + // Items with a source location in an explicitly allowlisted file + // are always included. + if !self.options().allowlisted_files.is_empty() { + if let Some(location) = item.location() { + let (file, _, _, _) = location.location(); + if let Some(filename) = file.name() { + if self + .options() + .allowlisted_files + .matches(filename) + { + return true; + } + } + } + } + + let name = item.path_for_allowlisting(self)[1..].join("::"); + debug!("allowlisted_items: testing {:?}", name); + match *item.kind() { + ItemKind::Module(..) => true, + ItemKind::Function(_) => { + self.options().allowlisted_functions.matches(&name) + } + ItemKind::Var(_) => { + self.options().allowlisted_vars.matches(&name) + } + ItemKind::Type(ref ty) => { + if self.options().allowlisted_types.matches(&name) { + return true; + } + + // Auto-allowlist types that don't need code + // generation if not allowlisting recursively, to + // make the #[derive] analysis not be lame. + if !self.options().allowlist_recursively { + match *ty.kind() { + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Array(..) | + TypeKind::Vector(..) | + TypeKind::Pointer(..) | + TypeKind::Reference(..) | + TypeKind::Function(..) | + TypeKind::ResolvedTypeRef(..) | + TypeKind::Opaque | + TypeKind::TypeParam => return true, + _ => {} + } + if self.is_stdint_type(&name) { + return true; + } + } + + // Unnamed top-level enums are special and we + // allowlist them via the `allowlisted_vars` filter, + // since they're effectively top-level constants, + // and there's no way for them to be referenced + // consistently. + let parent = self.resolve_item(item.parent_id()); + if !parent.is_module() { + return false; + } + + let enum_ = match *ty.kind() { + TypeKind::Enum(ref e) => e, + _ => return false, + }; + + if ty.name().is_some() { + return false; + } + + let mut prefix_path = + parent.path_for_allowlisting(self).clone(); + enum_.variants().iter().any(|variant| { + prefix_path.push( + variant.name_for_allowlisting().into(), + ); + let name = prefix_path[1..].join("::"); + prefix_path.pop().unwrap(); + self.options().allowlisted_vars.matches(name) + }) + } + } + }) + .map(|(id, _)| id) + .collect::<Vec<_>>(); + + // The reversal preserves the expected ordering of traversal, + // resulting in more stable-ish bindgen-generated names for + // anonymous types (like unions). + roots.reverse(); + roots + }; + + let allowlisted_items_predicate = + if self.options().allowlist_recursively { + traversal::all_edges + } else { + // Only follow InnerType edges from the allowlisted roots. + // Such inner types (e.g. anonymous structs/unions) are + // always emitted by codegen, and they need to be allowlisted + // to make sure they are processed by e.g. the derive analysis. + traversal::only_inner_type_edges + }; + + let allowlisted = AllowlistedItemsTraversal::new( + self, + roots.clone(), + allowlisted_items_predicate, + ) + .collect::<ItemSet>(); + + let codegen_items = if self.options().allowlist_recursively { + AllowlistedItemsTraversal::new( + self, + roots, + traversal::codegen_edges, + ) + .collect::<ItemSet>() + } else { + allowlisted.clone() + }; + + self.allowlisted = Some(allowlisted); + self.codegen_items = Some(codegen_items); + + let mut warnings = Vec::new(); + + for item in self.options().allowlisted_functions.unmatched_items() { + warnings + .push(format!("unused option: --allowlist-function {}", item)); + } + + for item in self.options().allowlisted_vars.unmatched_items() { + warnings.push(format!("unused option: --allowlist-var {}", item)); + } + + for item in self.options().allowlisted_types.unmatched_items() { + warnings.push(format!("unused option: --allowlist-type {}", item)); + } + + for msg in warnings { + warn!("{}", msg); + self.warnings.push(msg); + } + } + + /// Convenient method for getting the prefix to use for most traits in + /// codegen depending on the `use_core` option. + pub fn trait_prefix(&self) -> Ident { + if self.options().use_core { + self.rust_ident_raw("core") + } else { + self.rust_ident_raw("std") + } + } + + /// Call if a bindgen complex is generated + pub fn generated_bindgen_complex(&self) { + self.generated_bindgen_complex.set(true) + } + + /// Whether we need to generate the bindgen complex type + pub fn need_bindgen_complex_type(&self) -> bool { + self.generated_bindgen_complex.get() + } + + /// Compute whether we can derive debug. + fn compute_cannot_derive_debug(&mut self) { + let _t = self.timer("compute_cannot_derive_debug"); + assert!(self.cannot_derive_debug.is_none()); + if self.options.derive_debug { + self.cannot_derive_debug = + Some(as_cannot_derive_set(analyze::<CannotDerive>(( + self, + DeriveTrait::Debug, + )))); + } + } + + /// Look up whether the item with `id` can + /// derive debug or not. + pub fn lookup_can_derive_debug<Id: Into<ItemId>>(&self, id: Id) -> bool { + let id = id.into(); + assert!( + self.in_codegen_phase(), + "We only compute can_derive_debug when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` can + // derive debug or not. + !self.cannot_derive_debug.as_ref().unwrap().contains(&id) + } + + /// Compute whether we can derive default. + fn compute_cannot_derive_default(&mut self) { + let _t = self.timer("compute_cannot_derive_default"); + assert!(self.cannot_derive_default.is_none()); + if self.options.derive_default { + self.cannot_derive_default = + Some(as_cannot_derive_set(analyze::<CannotDerive>(( + self, + DeriveTrait::Default, + )))); + } + } + + /// Look up whether the item with `id` can + /// derive default or not. + pub fn lookup_can_derive_default<Id: Into<ItemId>>(&self, id: Id) -> bool { + let id = id.into(); + assert!( + self.in_codegen_phase(), + "We only compute can_derive_default when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` can + // derive default or not. + !self.cannot_derive_default.as_ref().unwrap().contains(&id) + } + + /// Compute whether we can derive copy. + fn compute_cannot_derive_copy(&mut self) { + let _t = self.timer("compute_cannot_derive_copy"); + assert!(self.cannot_derive_copy.is_none()); + self.cannot_derive_copy = + Some(as_cannot_derive_set(analyze::<CannotDerive>(( + self, + DeriveTrait::Copy, + )))); + } + + /// Compute whether we can derive hash. + fn compute_cannot_derive_hash(&mut self) { + let _t = self.timer("compute_cannot_derive_hash"); + assert!(self.cannot_derive_hash.is_none()); + if self.options.derive_hash { + self.cannot_derive_hash = + Some(as_cannot_derive_set(analyze::<CannotDerive>(( + self, + DeriveTrait::Hash, + )))); + } + } + + /// Look up whether the item with `id` can + /// derive hash or not. + pub fn lookup_can_derive_hash<Id: Into<ItemId>>(&self, id: Id) -> bool { + let id = id.into(); + assert!( + self.in_codegen_phase(), + "We only compute can_derive_debug when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` can + // derive hash or not. + !self.cannot_derive_hash.as_ref().unwrap().contains(&id) + } + + /// Compute whether we can derive PartialOrd, PartialEq or Eq. + fn compute_cannot_derive_partialord_partialeq_or_eq(&mut self) { + let _t = self.timer("compute_cannot_derive_partialord_partialeq_or_eq"); + assert!(self.cannot_derive_partialeq_or_partialord.is_none()); + if self.options.derive_partialord || + self.options.derive_partialeq || + self.options.derive_eq + { + self.cannot_derive_partialeq_or_partialord = + Some(analyze::<CannotDerive>(( + self, + DeriveTrait::PartialEqOrPartialOrd, + ))); + } + } + + /// Look up whether the item with `id` can derive `Partial{Eq,Ord}`. + pub fn lookup_can_derive_partialeq_or_partialord<Id: Into<ItemId>>( + &self, + id: Id, + ) -> CanDerive { + let id = id.into(); + assert!( + self.in_codegen_phase(), + "We only compute can_derive_partialeq_or_partialord when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` can + // derive partialeq or not. + self.cannot_derive_partialeq_or_partialord + .as_ref() + .unwrap() + .get(&id) + .cloned() + .unwrap_or(CanDerive::Yes) + } + + /// Look up whether the item with `id` can derive `Copy` or not. + pub fn lookup_can_derive_copy<Id: Into<ItemId>>(&self, id: Id) -> bool { + assert!( + self.in_codegen_phase(), + "We only compute can_derive_debug when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` can + // derive `Copy` or not. + let id = id.into(); + + !self.lookup_has_type_param_in_array(id) && + !self.cannot_derive_copy.as_ref().unwrap().contains(&id) + } + + /// Compute whether the type has type parameter in array. + fn compute_has_type_param_in_array(&mut self) { + let _t = self.timer("compute_has_type_param_in_array"); + assert!(self.has_type_param_in_array.is_none()); + self.has_type_param_in_array = + Some(analyze::<HasTypeParameterInArray>(self)); + } + + /// Look up whether the item with `id` has type parameter in array or not. + pub fn lookup_has_type_param_in_array<Id: Into<ItemId>>( + &self, + id: Id, + ) -> bool { + assert!( + self.in_codegen_phase(), + "We only compute has array when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` has + // type parameter in array or not. + self.has_type_param_in_array + .as_ref() + .unwrap() + .contains(&id.into()) + } + + /// Compute whether the type has float. + fn compute_has_float(&mut self) { + let _t = self.timer("compute_has_float"); + assert!(self.has_float.is_none()); + if self.options.derive_eq || self.options.derive_ord { + self.has_float = Some(analyze::<HasFloat>(self)); + } + } + + /// Look up whether the item with `id` has array or not. + pub fn lookup_has_float<Id: Into<ItemId>>(&self, id: Id) -> bool { + assert!( + self.in_codegen_phase(), + "We only compute has float when we enter codegen" + ); + + // Look up the computed value for whether the item with `id` has + // float or not. + self.has_float.as_ref().unwrap().contains(&id.into()) + } + + /// Check if `--no-partialeq` flag is enabled for this item. + pub fn no_partialeq_by_name(&self, item: &Item) -> bool { + let name = item.path_for_allowlisting(self)[1..].join("::"); + self.options().no_partialeq_types.matches(name) + } + + /// Check if `--no-copy` flag is enabled for this item. + pub fn no_copy_by_name(&self, item: &Item) -> bool { + let name = item.path_for_allowlisting(self)[1..].join("::"); + self.options().no_copy_types.matches(name) + } + + /// Check if `--no-debug` flag is enabled for this item. + pub fn no_debug_by_name(&self, item: &Item) -> bool { + let name = item.path_for_allowlisting(self)[1..].join("::"); + self.options().no_debug_types.matches(name) + } + + /// Check if `--no-default` flag is enabled for this item. + pub fn no_default_by_name(&self, item: &Item) -> bool { + let name = item.path_for_allowlisting(self)[1..].join("::"); + self.options().no_default_types.matches(name) + } + + /// Check if `--no-hash` flag is enabled for this item. + pub fn no_hash_by_name(&self, item: &Item) -> bool { + let name = item.path_for_allowlisting(self)[1..].join("::"); + self.options().no_hash_types.matches(name) + } + + /// Check if `--must-use-type` flag is enabled for this item. + pub fn must_use_type_by_name(&self, item: &Item) -> bool { + let name = item.path_for_allowlisting(self)[1..].join("::"); + self.options().must_use_types.matches(name) + } + + pub(crate) fn wrap_unsafe_ops(&self, tokens: impl ToTokens) -> TokenStream { + if self.options.wrap_unsafe_ops { + quote!(unsafe { #tokens }) + } else { + tokens.into_token_stream() + } + } +} + +/// A builder struct for configuring item resolution options. +#[derive(Debug, Copy, Clone)] +pub struct ItemResolver { + id: ItemId, + through_type_refs: bool, + through_type_aliases: bool, +} + +impl ItemId { + /// Create an `ItemResolver` from this item id. + pub fn into_resolver(self) -> ItemResolver { + self.into() + } +} + +impl<T> From<T> for ItemResolver +where + T: Into<ItemId>, +{ + fn from(id: T) -> ItemResolver { + ItemResolver::new(id) + } +} + +impl ItemResolver { + /// Construct a new `ItemResolver` from the given id. + pub fn new<Id: Into<ItemId>>(id: Id) -> ItemResolver { + let id = id.into(); + ItemResolver { + id, + through_type_refs: false, + through_type_aliases: false, + } + } + + /// Keep resolving through `Type::TypeRef` items. + pub fn through_type_refs(mut self) -> ItemResolver { + self.through_type_refs = true; + self + } + + /// Keep resolving through `Type::Alias` items. + pub fn through_type_aliases(mut self) -> ItemResolver { + self.through_type_aliases = true; + self + } + + /// Finish configuring and perform the actual item resolution. + pub fn resolve(self, ctx: &BindgenContext) -> &Item { + assert!(ctx.collected_typerefs()); + + let mut id = self.id; + let mut seen_ids = HashSet::default(); + loop { + let item = ctx.resolve_item(id); + + // Detect cycles and bail out. These can happen in certain cases + // involving incomplete qualified dependent types (#2085). + if !seen_ids.insert(id) { + return item; + } + + let ty_kind = item.as_type().map(|t| t.kind()); + match ty_kind { + Some(&TypeKind::ResolvedTypeRef(next_id)) + if self.through_type_refs => + { + id = next_id.into(); + } + // We intentionally ignore template aliases here, as they are + // more complicated, and don't represent a simple renaming of + // some type. + Some(&TypeKind::Alias(next_id)) + if self.through_type_aliases => + { + id = next_id.into(); + } + _ => return item, + } + } + } +} + +/// A type that we are in the middle of parsing. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct PartialType { + decl: Cursor, + // Just an ItemId, and not a TypeId, because we haven't finished this type + // yet, so there's still time for things to go wrong. + id: ItemId, +} + +impl PartialType { + /// Construct a new `PartialType`. + pub fn new(decl: Cursor, id: ItemId) -> PartialType { + // assert!(decl == decl.canonical()); + PartialType { decl, id } + } + + /// The cursor pointing to this partial type's declaration location. + pub fn decl(&self) -> &Cursor { + &self.decl + } + + /// The item ID allocated for this type. This is *NOT* a key for an entry in + /// the context's item set yet! + pub fn id(&self) -> ItemId { + self.id + } +} + +impl TemplateParameters for PartialType { + fn self_template_params(&self, _ctx: &BindgenContext) -> Vec<TypeId> { + // Maybe at some point we will eagerly parse named types, but for now we + // don't and this information is unavailable. + vec![] + } + + fn num_self_template_params(&self, _ctx: &BindgenContext) -> usize { + // Wouldn't it be nice if libclang would reliably give us this + // information‽ + match self.decl().kind() { + clang_sys::CXCursor_ClassTemplate | + clang_sys::CXCursor_FunctionTemplate | + clang_sys::CXCursor_TypeAliasTemplateDecl => { + let mut num_params = 0; + self.decl().visit(|c| { + match c.kind() { + clang_sys::CXCursor_TemplateTypeParameter | + clang_sys::CXCursor_TemplateTemplateParameter | + clang_sys::CXCursor_NonTypeTemplateParameter => { + num_params += 1; + } + _ => {} + }; + clang_sys::CXChildVisit_Continue + }); + num_params + } + _ => 0, + } + } +} diff --git a/third_party/rust/bindgen/ir/derive.rs b/third_party/rust/bindgen/ir/derive.rs new file mode 100644 index 0000000000..594ce2ab8f --- /dev/null +++ b/third_party/rust/bindgen/ir/derive.rs @@ -0,0 +1,135 @@ +//! Traits for determining whether we can derive traits for a thing or not. +//! +//! These traits tend to come in pairs: +//! +//! 1. A "trivial" version, whose implementations aren't allowed to recursively +//! look at other types or the results of fix point analyses. +//! +//! 2. A "normal" version, whose implementations simply query the results of a +//! fix point analysis. +//! +//! The former is used by the analyses when creating the results queried by the +//! second. + +use super::context::BindgenContext; + +use std::cmp; +use std::ops; + +/// A trait that encapsulates the logic for whether or not we can derive `Debug` +/// for a given thing. +pub trait CanDeriveDebug { + /// Return `true` if `Debug` can be derived for this thing, `false` + /// otherwise. + fn can_derive_debug(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive `Copy` +/// for a given thing. +pub trait CanDeriveCopy { + /// Return `true` if `Copy` can be derived for this thing, `false` + /// otherwise. + fn can_derive_copy(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive +/// `Default` for a given thing. +pub trait CanDeriveDefault { + /// Return `true` if `Default` can be derived for this thing, `false` + /// otherwise. + fn can_derive_default(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive `Hash` +/// for a given thing. +pub trait CanDeriveHash { + /// Return `true` if `Hash` can be derived for this thing, `false` + /// otherwise. + fn can_derive_hash(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive +/// `PartialEq` for a given thing. +pub trait CanDerivePartialEq { + /// Return `true` if `PartialEq` can be derived for this thing, `false` + /// otherwise. + fn can_derive_partialeq(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive +/// `PartialOrd` for a given thing. +pub trait CanDerivePartialOrd { + /// Return `true` if `PartialOrd` can be derived for this thing, `false` + /// otherwise. + fn can_derive_partialord(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive `Eq` +/// for a given thing. +pub trait CanDeriveEq { + /// Return `true` if `Eq` can be derived for this thing, `false` otherwise. + fn can_derive_eq(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait that encapsulates the logic for whether or not we can derive `Ord` +/// for a given thing. +pub trait CanDeriveOrd { + /// Return `true` if `Ord` can be derived for this thing, `false` otherwise. + fn can_derive_ord(&self, ctx: &BindgenContext) -> bool; +} + +/// Whether it is possible or not to automatically derive trait for an item. +/// +/// ```ignore +/// No +/// ^ +/// | +/// Manually +/// ^ +/// | +/// Yes +/// ``` +/// +/// Initially we assume that we can derive trait for all types and then +/// update our understanding as we learn more about each type. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum CanDerive { + /// Yes, we can derive automatically. + Yes, + + /// The only thing that stops us from automatically deriving is that + /// array with more than maximum number of elements is used. + /// + /// This means we probably can "manually" implement such trait. + Manually, + + /// No, we cannot. + No, +} + +impl Default for CanDerive { + fn default() -> CanDerive { + CanDerive::Yes + } +} + +impl CanDerive { + /// Take the least upper bound of `self` and `rhs`. + pub fn join(self, rhs: Self) -> Self { + cmp::max(self, rhs) + } +} + +impl ops::BitOr for CanDerive { + type Output = Self; + + fn bitor(self, rhs: Self) -> Self::Output { + self.join(rhs) + } +} + +impl ops::BitOrAssign for CanDerive { + fn bitor_assign(&mut self, rhs: Self) { + *self = self.join(rhs) + } +} diff --git a/third_party/rust/bindgen/ir/dot.rs b/third_party/rust/bindgen/ir/dot.rs new file mode 100644 index 0000000000..f7d07f19e2 --- /dev/null +++ b/third_party/rust/bindgen/ir/dot.rs @@ -0,0 +1,86 @@ +//! Generating Graphviz `dot` files from our IR. + +use super::context::{BindgenContext, ItemId}; +use super::traversal::Trace; +use std::fs::File; +use std::io::{self, Write}; +use std::path::Path; + +/// A trait for anything that can write attributes as `<table>` rows to a dot +/// file. +pub trait DotAttributes { + /// Write this thing's attributes to the given output. Each attribute must + /// be its own `<tr>...</tr>`. + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write; +} + +/// Write a graphviz dot file containing our IR. +pub fn write_dot_file<P>(ctx: &BindgenContext, path: P) -> io::Result<()> +where + P: AsRef<Path>, +{ + let file = File::create(path)?; + let mut dot_file = io::BufWriter::new(file); + writeln!(&mut dot_file, "digraph {{")?; + + let mut err: Option<io::Result<_>> = None; + + for (id, item) in ctx.items() { + let is_allowlisted = ctx.allowlisted_items().contains(&id); + + writeln!( + &mut dot_file, + r#"{} [fontname="courier", color={}, label=< <table border="0" align="left">"#, + id.as_usize(), + if is_allowlisted { "black" } else { "gray" } + )?; + item.dot_attributes(ctx, &mut dot_file)?; + writeln!(&mut dot_file, r#"</table> >];"#)?; + + item.trace( + ctx, + &mut |sub_id: ItemId, edge_kind| { + if err.is_some() { + return; + } + + match writeln!( + &mut dot_file, + "{} -> {} [label={:?}, color={}];", + id.as_usize(), + sub_id.as_usize(), + edge_kind, + if is_allowlisted { "black" } else { "gray" } + ) { + Ok(_) => {} + Err(e) => err = Some(Err(e)), + } + }, + &(), + ); + + if let Some(err) = err { + return err; + } + + if let Some(module) = item.as_module() { + for child in module.children() { + writeln!( + &mut dot_file, + "{} -> {} [style=dotted, color=gray]", + item.id().as_usize(), + child.as_usize() + )?; + } + } + } + + writeln!(&mut dot_file, "}}")?; + Ok(()) +} diff --git a/third_party/rust/bindgen/ir/enum_ty.rs b/third_party/rust/bindgen/ir/enum_ty.rs new file mode 100644 index 0000000000..39677e93fd --- /dev/null +++ b/third_party/rust/bindgen/ir/enum_ty.rs @@ -0,0 +1,320 @@ +//! Intermediate representation for C/C++ enumerations. + +use super::super::codegen::EnumVariation; +use super::context::{BindgenContext, TypeId}; +use super::item::Item; +use super::ty::{Type, TypeKind}; +use crate::clang; +use crate::ir::annotations::Annotations; +use crate::parse::{ClangItemParser, ParseError}; +use crate::regex_set::RegexSet; + +/// An enum representing custom handling that can be given to a variant. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum EnumVariantCustomBehavior { + /// This variant will be a module containing constants. + ModuleConstify, + /// This variant will be constified, that is, forced to generate a constant. + Constify, + /// This variant will be hidden entirely from the resulting enum. + Hide, +} + +/// A C/C++ enumeration. +#[derive(Debug)] +pub struct Enum { + /// The representation used for this enum; it should be an `IntKind` type or + /// an alias to one. + /// + /// It's `None` if the enum is a forward declaration and isn't defined + /// anywhere else, see `tests/headers/func_ptr_in_struct.h`. + repr: Option<TypeId>, + + /// The different variants, with explicit values. + variants: Vec<EnumVariant>, +} + +impl Enum { + /// Construct a new `Enum` with the given representation and variants. + pub fn new(repr: Option<TypeId>, variants: Vec<EnumVariant>) -> Self { + Enum { repr, variants } + } + + /// Get this enumeration's representation. + pub fn repr(&self) -> Option<TypeId> { + self.repr + } + + /// Get this enumeration's variants. + pub fn variants(&self) -> &[EnumVariant] { + &self.variants + } + + /// Construct an enumeration from the given Clang type. + pub fn from_ty( + ty: &clang::Type, + ctx: &mut BindgenContext, + ) -> Result<Self, ParseError> { + use clang_sys::*; + debug!("Enum::from_ty {:?}", ty); + + if ty.kind() != CXType_Enum { + return Err(ParseError::Continue); + } + + let declaration = ty.declaration().canonical(); + let repr = declaration + .enum_type() + .and_then(|et| Item::from_ty(&et, declaration, None, ctx).ok()); + let mut variants = vec![]; + + let variant_ty = + repr.and_then(|r| ctx.resolve_type(r).safe_canonical_type(ctx)); + let is_bool = variant_ty.map_or(false, Type::is_bool); + + // Assume signedness since the default type by the C standard is an int. + let is_signed = variant_ty.map_or(true, |ty| match *ty.kind() { + TypeKind::Int(ref int_kind) => int_kind.is_signed(), + ref other => { + panic!("Since when enums can be non-integers? {:?}", other) + } + }); + + let type_name = ty.spelling(); + let type_name = if type_name.is_empty() { + None + } else { + Some(type_name) + }; + let type_name = type_name.as_deref(); + + let definition = declaration.definition().unwrap_or(declaration); + definition.visit(|cursor| { + if cursor.kind() == CXCursor_EnumConstantDecl { + let value = if is_bool { + cursor.enum_val_boolean().map(EnumVariantValue::Boolean) + } else if is_signed { + cursor.enum_val_signed().map(EnumVariantValue::Signed) + } else { + cursor.enum_val_unsigned().map(EnumVariantValue::Unsigned) + }; + if let Some(val) = value { + let name = cursor.spelling(); + let annotations = Annotations::new(&cursor); + let custom_behavior = ctx + .options() + .last_callback(|callbacks| { + callbacks + .enum_variant_behavior(type_name, &name, val) + }) + .or_else(|| { + let annotations = annotations.as_ref()?; + if annotations.hide() { + Some(EnumVariantCustomBehavior::Hide) + } else if annotations.constify_enum_variant() { + Some(EnumVariantCustomBehavior::Constify) + } else { + None + } + }); + + let new_name = ctx + .options() + .last_callback(|callbacks| { + callbacks.enum_variant_name(type_name, &name, val) + }) + .or_else(|| { + annotations + .as_ref()? + .use_instead_of()? + .last() + .cloned() + }) + .unwrap_or_else(|| name.clone()); + + let comment = cursor.raw_comment(); + variants.push(EnumVariant::new( + new_name, + name, + comment, + val, + custom_behavior, + )); + } + } + CXChildVisit_Continue + }); + Ok(Enum::new(repr, variants)) + } + + fn is_matching_enum( + &self, + ctx: &BindgenContext, + enums: &RegexSet, + item: &Item, + ) -> bool { + let path = item.path_for_allowlisting(ctx); + let enum_ty = item.expect_type(); + + if enums.matches(path[1..].join("::")) { + return true; + } + + // Test the variants if the enum is anonymous. + if enum_ty.name().is_some() { + return false; + } + + self.variants().iter().any(|v| enums.matches(v.name())) + } + + /// Returns the final representation of the enum. + pub fn computed_enum_variation( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> EnumVariation { + // ModuleConsts has higher precedence before Rust in order to avoid + // problems with overlapping match patterns. + if self.is_matching_enum( + ctx, + &ctx.options().constified_enum_modules, + item, + ) { + EnumVariation::ModuleConsts + } else if self.is_matching_enum( + ctx, + &ctx.options().bitfield_enums, + item, + ) { + EnumVariation::NewType { + is_bitfield: true, + is_global: false, + } + } else if self.is_matching_enum(ctx, &ctx.options().newtype_enums, item) + { + EnumVariation::NewType { + is_bitfield: false, + is_global: false, + } + } else if self.is_matching_enum( + ctx, + &ctx.options().newtype_global_enums, + item, + ) { + EnumVariation::NewType { + is_bitfield: false, + is_global: true, + } + } else if self.is_matching_enum( + ctx, + &ctx.options().rustified_enums, + item, + ) { + EnumVariation::Rust { + non_exhaustive: false, + } + } else if self.is_matching_enum( + ctx, + &ctx.options().rustified_non_exhaustive_enums, + item, + ) { + EnumVariation::Rust { + non_exhaustive: true, + } + } else if self.is_matching_enum( + ctx, + &ctx.options().constified_enums, + item, + ) { + EnumVariation::Consts + } else { + ctx.options().default_enum_style + } + } +} + +/// A single enum variant, to be contained only in an enum. +#[derive(Debug)] +pub struct EnumVariant { + /// The name of the variant. + name: String, + + /// The original name of the variant (without user mangling) + name_for_allowlisting: String, + + /// An optional doc comment. + comment: Option<String>, + + /// The integer value of the variant. + val: EnumVariantValue, + + /// The custom behavior this variant may have, if any. + custom_behavior: Option<EnumVariantCustomBehavior>, +} + +/// A constant value assigned to an enumeration variant. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum EnumVariantValue { + /// A boolean constant. + Boolean(bool), + + /// A signed constant. + Signed(i64), + + /// An unsigned constant. + Unsigned(u64), +} + +impl EnumVariant { + /// Construct a new enumeration variant from the given parts. + pub fn new( + name: String, + name_for_allowlisting: String, + comment: Option<String>, + val: EnumVariantValue, + custom_behavior: Option<EnumVariantCustomBehavior>, + ) -> Self { + EnumVariant { + name, + name_for_allowlisting, + comment, + val, + custom_behavior, + } + } + + /// Get this variant's name. + pub fn name(&self) -> &str { + &self.name + } + + /// Get this variant's name. + pub fn name_for_allowlisting(&self) -> &str { + &self.name_for_allowlisting + } + + /// Get this variant's value. + pub fn val(&self) -> EnumVariantValue { + self.val + } + + /// Get this variant's documentation. + pub fn comment(&self) -> Option<&str> { + self.comment.as_deref() + } + + /// Returns whether this variant should be enforced to be a constant by code + /// generation. + pub fn force_constification(&self) -> bool { + self.custom_behavior + .map_or(false, |b| b == EnumVariantCustomBehavior::Constify) + } + + /// Returns whether the current variant should be hidden completely from the + /// resulting rust enum. + pub fn hidden(&self) -> bool { + self.custom_behavior + .map_or(false, |b| b == EnumVariantCustomBehavior::Hide) + } +} diff --git a/third_party/rust/bindgen/ir/function.rs b/third_party/rust/bindgen/ir/function.rs new file mode 100644 index 0000000000..7dbbb8f849 --- /dev/null +++ b/third_party/rust/bindgen/ir/function.rs @@ -0,0 +1,747 @@ +//! Intermediate representation for C/C++ functions and methods. + +use super::comp::MethodKind; +use super::context::{BindgenContext, TypeId}; +use super::dot::DotAttributes; +use super::item::Item; +use super::traversal::{EdgeKind, Trace, Tracer}; +use super::ty::TypeKind; +use crate::clang::{self, Attribute}; +use crate::parse::{ + ClangItemParser, ClangSubItemParser, ParseError, ParseResult, +}; +use clang_sys::{self, CXCallingConv}; +use proc_macro2; +use quote; +use quote::TokenStreamExt; +use std::io; +use std::str::FromStr; + +const RUST_DERIVE_FUNPTR_LIMIT: usize = 12; + +/// What kind of a function are we looking at? +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum FunctionKind { + /// A plain, free function. + Function, + /// A method of some kind. + Method(MethodKind), +} + +impl FunctionKind { + /// Given a clang cursor, return the kind of function it represents, or + /// `None` otherwise. + pub fn from_cursor(cursor: &clang::Cursor) -> Option<FunctionKind> { + // FIXME(emilio): Deduplicate logic with `ir::comp`. + Some(match cursor.kind() { + clang_sys::CXCursor_FunctionDecl => FunctionKind::Function, + clang_sys::CXCursor_Constructor => { + FunctionKind::Method(MethodKind::Constructor) + } + clang_sys::CXCursor_Destructor => { + FunctionKind::Method(if cursor.method_is_virtual() { + MethodKind::VirtualDestructor { + pure_virtual: cursor.method_is_pure_virtual(), + } + } else { + MethodKind::Destructor + }) + } + clang_sys::CXCursor_CXXMethod => { + if cursor.method_is_virtual() { + FunctionKind::Method(MethodKind::Virtual { + pure_virtual: cursor.method_is_pure_virtual(), + }) + } else if cursor.method_is_static() { + FunctionKind::Method(MethodKind::Static) + } else { + FunctionKind::Method(MethodKind::Normal) + } + } + _ => return None, + }) + } +} + +/// The style of linkage +#[derive(Debug, Clone, Copy)] +pub enum Linkage { + /// Externally visible and can be linked against + External, + /// Not exposed externally. 'static inline' functions will have this kind of linkage + Internal, +} + +/// A function declaration, with a signature, arguments, and argument names. +/// +/// The argument names vector must be the same length as the ones in the +/// signature. +#[derive(Debug)] +pub struct Function { + /// The name of this function. + name: String, + + /// The mangled name, that is, the symbol. + mangled_name: Option<String>, + + /// The id pointing to the current function signature. + signature: TypeId, + + /// The doc comment on the function, if any. + comment: Option<String>, + + /// The kind of function this is. + kind: FunctionKind, + + /// The linkage of the function. + linkage: Linkage, +} + +impl Function { + /// Construct a new function. + pub fn new( + name: String, + mangled_name: Option<String>, + signature: TypeId, + comment: Option<String>, + kind: FunctionKind, + linkage: Linkage, + ) -> Self { + Function { + name, + mangled_name, + signature, + comment, + kind, + linkage, + } + } + + /// Get this function's name. + pub fn name(&self) -> &str { + &self.name + } + + /// Get this function's name. + pub fn mangled_name(&self) -> Option<&str> { + self.mangled_name.as_deref() + } + + /// Get this function's signature type. + pub fn signature(&self) -> TypeId { + self.signature + } + + /// Get this function's comment. + pub fn comment(&self) -> Option<&str> { + self.comment.as_deref() + } + + /// Get this function's kind. + pub fn kind(&self) -> FunctionKind { + self.kind + } + + /// Get this function's linkage. + pub fn linkage(&self) -> Linkage { + self.linkage + } +} + +impl DotAttributes for Function { + fn dot_attributes<W>( + &self, + _ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + if let Some(ref mangled) = self.mangled_name { + let mangled: String = + mangled.chars().flat_map(|c| c.escape_default()).collect(); + writeln!( + out, + "<tr><td>mangled name</td><td>{}</td></tr>", + mangled + )?; + } + + Ok(()) + } +} + +/// A valid rust ABI. +#[derive(Debug, Copy, Clone, Hash, Eq, PartialEq)] +pub enum Abi { + /// The default C ABI. + C, + /// The "stdcall" ABI. + Stdcall, + /// The "fastcall" ABI. + Fastcall, + /// The "thiscall" ABI. + ThisCall, + /// The "vectorcall" ABI. + Vectorcall, + /// The "aapcs" ABI. + Aapcs, + /// The "win64" ABI. + Win64, + /// The "C-unwind" ABI. + CUnwind, +} + +impl FromStr for Abi { + type Err = String; + + fn from_str(s: &str) -> Result<Self, Self::Err> { + match s { + "C" => Ok(Self::C), + "stdcall" => Ok(Self::Stdcall), + "fastcall" => Ok(Self::Fastcall), + "thiscall" => Ok(Self::ThisCall), + "vectorcall" => Ok(Self::Vectorcall), + "aapcs" => Ok(Self::Aapcs), + "win64" => Ok(Self::Win64), + "C-unwind" => Ok(Self::CUnwind), + _ => Err(format!("Invalid or unknown ABI {:?}", s)), + } + } +} + +impl std::fmt::Display for Abi { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let s = match *self { + Self::C => "C", + Self::Stdcall => "stdcall", + Self::Fastcall => "fastcall", + Self::ThisCall => "thiscall", + Self::Vectorcall => "vectorcall", + Self::Aapcs => "aapcs", + Self::Win64 => "win64", + Self::CUnwind => "C-unwind", + }; + + s.fmt(f) + } +} + +impl quote::ToTokens for Abi { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + let abi = self.to_string(); + tokens.append_all(quote! { #abi }); + } +} + +/// An ABI extracted from a clang cursor. +#[derive(Debug, Copy, Clone)] +pub(crate) enum ClangAbi { + Known(Abi), + /// An unknown or invalid ABI. + Unknown(CXCallingConv), +} + +impl ClangAbi { + /// Returns whether this Abi is known or not. + fn is_unknown(&self) -> bool { + matches!(*self, ClangAbi::Unknown(..)) + } +} + +impl quote::ToTokens for ClangAbi { + fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) { + match *self { + Self::Known(abi) => abi.to_tokens(tokens), + Self::Unknown(cc) => panic!( + "Cannot turn unknown calling convention to tokens: {:?}", + cc + ), + } + } +} + +/// A function signature. +#[derive(Debug)] +pub struct FunctionSig { + /// The return type of the function. + return_type: TypeId, + + /// The type of the arguments, optionally with the name of the argument when + /// declared. + argument_types: Vec<(Option<String>, TypeId)>, + + /// Whether this function is variadic. + is_variadic: bool, + is_divergent: bool, + + /// Whether this function's return value must be used. + must_use: bool, + + /// The ABI of this function. + abi: ClangAbi, +} + +fn get_abi(cc: CXCallingConv) -> ClangAbi { + use clang_sys::*; + match cc { + CXCallingConv_Default => ClangAbi::Known(Abi::C), + CXCallingConv_C => ClangAbi::Known(Abi::C), + CXCallingConv_X86StdCall => ClangAbi::Known(Abi::Stdcall), + CXCallingConv_X86FastCall => ClangAbi::Known(Abi::Fastcall), + CXCallingConv_X86ThisCall => ClangAbi::Known(Abi::ThisCall), + CXCallingConv_X86VectorCall => ClangAbi::Known(Abi::Vectorcall), + CXCallingConv_AAPCS => ClangAbi::Known(Abi::Aapcs), + CXCallingConv_X86_64Win64 => ClangAbi::Known(Abi::Win64), + other => ClangAbi::Unknown(other), + } +} + +/// Get the mangled name for the cursor's referent. +pub fn cursor_mangling( + ctx: &BindgenContext, + cursor: &clang::Cursor, +) -> Option<String> { + if !ctx.options().enable_mangling { + return None; + } + + // We early return here because libclang may crash in some case + // if we pass in a variable inside a partial specialized template. + // See rust-lang/rust-bindgen#67, and rust-lang/rust-bindgen#462. + if cursor.is_in_non_fully_specialized_template() { + return None; + } + + let is_destructor = cursor.kind() == clang_sys::CXCursor_Destructor; + if let Ok(mut manglings) = cursor.cxx_manglings() { + while let Some(m) = manglings.pop() { + // Only generate the destructor group 1, see below. + if is_destructor && !m.ends_with("D1Ev") { + continue; + } + + return Some(m); + } + } + + let mut mangling = cursor.mangling(); + if mangling.is_empty() { + return None; + } + + if is_destructor { + // With old (3.8-) libclang versions, and the Itanium ABI, clang returns + // the "destructor group 0" symbol, which means that it'll try to free + // memory, which definitely isn't what we want. + // + // Explicitly force the destructor group 1 symbol. + // + // See http://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special + // for the reference, and http://stackoverflow.com/a/6614369/1091587 for + // a more friendly explanation. + // + // We don't need to do this for constructors since clang seems to always + // have returned the C1 constructor. + // + // FIXME(emilio): Can a legit symbol in other ABIs end with this string? + // I don't think so, but if it can this would become a linker error + // anyway, not an invalid free at runtime. + // + // TODO(emilio, #611): Use cpp_demangle if this becomes nastier with + // time. + if mangling.ends_with("D0Ev") { + let new_len = mangling.len() - 4; + mangling.truncate(new_len); + mangling.push_str("D1Ev"); + } + } + + Some(mangling) +} + +fn args_from_ty_and_cursor( + ty: &clang::Type, + cursor: &clang::Cursor, + ctx: &mut BindgenContext, +) -> Vec<(Option<String>, TypeId)> { + let cursor_args = cursor.args().unwrap_or_default().into_iter(); + let type_args = ty.args().unwrap_or_default().into_iter(); + + // Argument types can be found in either the cursor or the type, but argument names may only be + // found on the cursor. We often have access to both a type and a cursor for each argument, but + // in some cases we may only have one. + // + // Prefer using the type as the source of truth for the argument's type, but fall back to + // inspecting the cursor (this happens for Objective C interfaces). + // + // Prefer using the cursor for the argument's type, but fall back to using the parent's cursor + // (this happens for function pointer return types). + cursor_args + .map(Some) + .chain(std::iter::repeat(None)) + .zip(type_args.map(Some).chain(std::iter::repeat(None))) + .take_while(|(cur, ty)| cur.is_some() || ty.is_some()) + .map(|(arg_cur, arg_ty)| { + let name = arg_cur.map(|a| a.spelling()).and_then(|name| { + if name.is_empty() { + None + } else { + Some(name) + } + }); + + let cursor = arg_cur.unwrap_or(*cursor); + let ty = arg_ty.unwrap_or_else(|| cursor.cur_type()); + (name, Item::from_ty_or_ref(ty, cursor, None, ctx)) + }) + .collect() +} + +impl FunctionSig { + /// Construct a new function signature from the given Clang type. + pub fn from_ty( + ty: &clang::Type, + cursor: &clang::Cursor, + ctx: &mut BindgenContext, + ) -> Result<Self, ParseError> { + use clang_sys::*; + debug!("FunctionSig::from_ty {:?} {:?}", ty, cursor); + + // Skip function templates + let kind = cursor.kind(); + if kind == CXCursor_FunctionTemplate { + return Err(ParseError::Continue); + } + + let spelling = cursor.spelling(); + + // Don't parse operatorxx functions in C++ + let is_operator = |spelling: &str| { + spelling.starts_with("operator") && + !clang::is_valid_identifier(spelling) + }; + if is_operator(&spelling) { + return Err(ParseError::Continue); + } + + // Constructors of non-type template parameter classes for some reason + // include the template parameter in their name. Just skip them, since + // we don't handle well non-type template parameters anyway. + if (kind == CXCursor_Constructor || kind == CXCursor_Destructor) && + spelling.contains('<') + { + return Err(ParseError::Continue); + } + + let cursor = if cursor.is_valid() { + *cursor + } else { + ty.declaration() + }; + + let mut args = match kind { + CXCursor_FunctionDecl | + CXCursor_Constructor | + CXCursor_CXXMethod | + CXCursor_ObjCInstanceMethodDecl | + CXCursor_ObjCClassMethodDecl => { + args_from_ty_and_cursor(ty, &cursor, ctx) + } + _ => { + // For non-CXCursor_FunctionDecl, visiting the cursor's children + // is the only reliable way to get parameter names. + let mut args = vec![]; + cursor.visit(|c| { + if c.kind() == CXCursor_ParmDecl { + let ty = + Item::from_ty_or_ref(c.cur_type(), c, None, ctx); + let name = c.spelling(); + let name = + if name.is_empty() { None } else { Some(name) }; + args.push((name, ty)); + } + CXChildVisit_Continue + }); + + if args.is_empty() { + // FIXME(emilio): Sometimes libclang doesn't expose the + // right AST for functions tagged as stdcall and such... + // + // https://bugs.llvm.org/show_bug.cgi?id=45919 + args_from_ty_and_cursor(ty, &cursor, ctx) + } else { + args + } + } + }; + + let (must_use, mut is_divergent) = + if ctx.options().enable_function_attribute_detection { + let [must_use, no_return, no_return_cpp] = cursor.has_attrs(&[ + Attribute::MUST_USE, + Attribute::NO_RETURN, + Attribute::NO_RETURN_CPP, + ]); + (must_use, no_return || no_return_cpp) + } else { + Default::default() + }; + + // This looks easy to break but the clang parser keeps the type spelling clean even if + // other attributes are added. + is_divergent = + is_divergent || ty.spelling().contains("__attribute__((noreturn))"); + + let is_method = kind == CXCursor_CXXMethod; + let is_constructor = kind == CXCursor_Constructor; + let is_destructor = kind == CXCursor_Destructor; + if (is_constructor || is_destructor || is_method) && + cursor.lexical_parent() != cursor.semantic_parent() + { + // Only parse constructors once. + return Err(ParseError::Continue); + } + + if is_method || is_constructor || is_destructor { + let is_const = is_method && cursor.method_is_const(); + let is_virtual = is_method && cursor.method_is_virtual(); + let is_static = is_method && cursor.method_is_static(); + if !is_static && !is_virtual { + let parent = cursor.semantic_parent(); + let class = Item::parse(parent, None, ctx) + .expect("Expected to parse the class"); + // The `class` most likely is not finished parsing yet, so use + // the unchecked variant. + let class = class.as_type_id_unchecked(); + + let class = if is_const { + let const_class_id = ctx.next_item_id(); + ctx.build_const_wrapper( + const_class_id, + class, + None, + &parent.cur_type(), + ) + } else { + class + }; + + let ptr = + Item::builtin_type(TypeKind::Pointer(class), false, ctx); + args.insert(0, (Some("this".into()), ptr)); + } else if is_virtual { + let void = Item::builtin_type(TypeKind::Void, false, ctx); + let ptr = + Item::builtin_type(TypeKind::Pointer(void), false, ctx); + args.insert(0, (Some("this".into()), ptr)); + } + } + + let ty_ret_type = if kind == CXCursor_ObjCInstanceMethodDecl || + kind == CXCursor_ObjCClassMethodDecl + { + ty.ret_type() + .or_else(|| cursor.ret_type()) + .ok_or(ParseError::Continue)? + } else { + ty.ret_type().ok_or(ParseError::Continue)? + }; + + let ret = if is_constructor && ctx.is_target_wasm32() { + // Constructors in Clang wasm32 target return a pointer to the object + // being constructed. + let void = Item::builtin_type(TypeKind::Void, false, ctx); + Item::builtin_type(TypeKind::Pointer(void), false, ctx) + } else { + Item::from_ty_or_ref(ty_ret_type, cursor, None, ctx) + }; + + // Clang plays with us at "find the calling convention", see #549 and + // co. This seems to be a better fix than that commit. + let mut call_conv = ty.call_conv(); + if let Some(ty) = cursor.cur_type().canonical_type().pointee_type() { + let cursor_call_conv = ty.call_conv(); + if cursor_call_conv != CXCallingConv_Invalid { + call_conv = cursor_call_conv; + } + } + + let abi = get_abi(call_conv); + + if abi.is_unknown() { + warn!("Unknown calling convention: {:?}", call_conv); + } + + Ok(FunctionSig { + return_type: ret, + argument_types: args, + is_variadic: ty.is_variadic(), + is_divergent, + must_use, + abi, + }) + } + + /// Get this function signature's return type. + pub fn return_type(&self) -> TypeId { + self.return_type + } + + /// Get this function signature's argument (name, type) pairs. + pub fn argument_types(&self) -> &[(Option<String>, TypeId)] { + &self.argument_types + } + + /// Get this function signature's ABI. + pub(crate) fn abi( + &self, + ctx: &BindgenContext, + name: Option<&str>, + ) -> ClangAbi { + // FIXME (pvdrz): Try to do this check lazily instead. Maybe store the ABI inside `ctx` + // instead?. + if let Some(name) = name { + if let Some((abi, _)) = ctx + .options() + .abi_overrides + .iter() + .find(|(_, regex_set)| regex_set.matches(name)) + { + ClangAbi::Known(*abi) + } else { + self.abi + } + } else { + self.abi + } + } + + /// Is this function signature variadic? + pub fn is_variadic(&self) -> bool { + // Clang reports some functions as variadic when they *might* be + // variadic. We do the argument check because rust doesn't codegen well + // variadic functions without an initial argument. + self.is_variadic && !self.argument_types.is_empty() + } + + /// Must this function's return value be used? + pub fn must_use(&self) -> bool { + self.must_use + } + + /// Are function pointers with this signature able to derive Rust traits? + /// Rust only supports deriving traits for function pointers with a limited + /// number of parameters and a couple ABIs. + /// + /// For more details, see: + /// + /// * https://github.com/rust-lang/rust-bindgen/issues/547, + /// * https://github.com/rust-lang/rust/issues/38848, + /// * and https://github.com/rust-lang/rust/issues/40158 + pub fn function_pointers_can_derive(&self) -> bool { + if self.argument_types.len() > RUST_DERIVE_FUNPTR_LIMIT { + return false; + } + + matches!(self.abi, ClangAbi::Known(Abi::C) | ClangAbi::Unknown(..)) + } + + pub(crate) fn is_divergent(&self) -> bool { + self.is_divergent + } +} + +impl ClangSubItemParser for Function { + fn parse( + cursor: clang::Cursor, + context: &mut BindgenContext, + ) -> Result<ParseResult<Self>, ParseError> { + use clang_sys::*; + + let kind = match FunctionKind::from_cursor(&cursor) { + None => return Err(ParseError::Continue), + Some(k) => k, + }; + + debug!("Function::parse({:?}, {:?})", cursor, cursor.cur_type()); + + let visibility = cursor.visibility(); + if visibility != CXVisibility_Default { + return Err(ParseError::Continue); + } + + if cursor.access_specifier() == CX_CXXPrivate { + return Err(ParseError::Continue); + } + + if cursor.is_inlined_function() || + cursor + .definition() + .map_or(false, |x| x.is_inlined_function()) + { + if !context.options().generate_inline_functions { + return Err(ParseError::Continue); + } + if cursor.is_deleted_function() { + return Err(ParseError::Continue); + } + } + + let linkage = cursor.linkage(); + let linkage = match linkage { + CXLinkage_External | CXLinkage_UniqueExternal => Linkage::External, + CXLinkage_Internal => Linkage::Internal, + _ => return Err(ParseError::Continue), + }; + + // Grab the signature using Item::from_ty. + let sig = Item::from_ty(&cursor.cur_type(), cursor, None, context)?; + + let mut name = cursor.spelling(); + assert!(!name.is_empty(), "Empty function name?"); + + if cursor.kind() == CXCursor_Destructor { + // Remove the leading `~`. The alternative to this is special-casing + // code-generation for destructor functions, which seems less than + // ideal. + if name.starts_with('~') { + name.remove(0); + } + + // Add a suffix to avoid colliding with constructors. This would be + // technically fine (since we handle duplicated functions/methods), + // but seems easy enough to handle it here. + name.push_str("_destructor"); + } + if let Some(nm) = context + .options() + .last_callback(|callbacks| callbacks.generated_name_override(&name)) + { + name = nm; + } + assert!(!name.is_empty(), "Empty function name."); + + let mangled_name = cursor_mangling(context, &cursor); + let comment = cursor.raw_comment(); + + let function = + Self::new(name, mangled_name, sig, comment, kind, linkage); + Ok(ParseResult::New(function, Some(cursor))) + } +} + +impl Trace for FunctionSig { + type Extra = (); + + fn trace<T>(&self, _: &BindgenContext, tracer: &mut T, _: &()) + where + T: Tracer, + { + tracer.visit_kind(self.return_type().into(), EdgeKind::FunctionReturn); + + for &(_, ty) in self.argument_types() { + tracer.visit_kind(ty.into(), EdgeKind::FunctionParameter); + } + } +} diff --git a/third_party/rust/bindgen/ir/int.rs b/third_party/rust/bindgen/ir/int.rs new file mode 100644 index 0000000000..22838e897c --- /dev/null +++ b/third_party/rust/bindgen/ir/int.rs @@ -0,0 +1,127 @@ +//! Intermediate representation for integral types. + +/// Which integral type are we dealing with? +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum IntKind { + /// A `bool`. + Bool, + + /// A `signed char`. + SChar, + + /// An `unsigned char`. + UChar, + + /// An `wchar_t`. + WChar, + + /// A platform-dependent `char` type, with the signedness support. + Char { + /// Whether the char is signed for the target platform. + is_signed: bool, + }, + + /// A `short`. + Short, + + /// An `unsigned short`. + UShort, + + /// An `int`. + Int, + + /// An `unsigned int`. + UInt, + + /// A `long`. + Long, + + /// An `unsigned long`. + ULong, + + /// A `long long`. + LongLong, + + /// An `unsigned long long`. + ULongLong, + + /// A 8-bit signed integer. + I8, + + /// A 8-bit unsigned integer. + U8, + + /// A 16-bit signed integer. + I16, + + /// Either a `char16_t` or a `wchar_t`. + U16, + + /// A 32-bit signed integer. + I32, + + /// A 32-bit unsigned integer. + U32, + + /// A 64-bit signed integer. + I64, + + /// A 64-bit unsigned integer. + U64, + + /// An `int128_t` + I128, + + /// A `uint128_t`. + U128, + + /// A custom integer type, used to allow custom macro types depending on + /// range. + Custom { + /// The name of the type, which would be used without modification. + name: &'static str, + /// Whether the type is signed or not. + is_signed: bool, + }, +} + +impl IntKind { + /// Is this integral type signed? + pub fn is_signed(&self) -> bool { + use self::IntKind::*; + match *self { + // TODO(emilio): wchar_t can in theory be signed, but we have no way + // to know whether it is or not right now (unlike char, there's no + // WChar_S / WChar_U). + Bool | UChar | UShort | UInt | ULong | ULongLong | U8 | U16 | + WChar | U32 | U64 | U128 => false, + + SChar | Short | Int | Long | LongLong | I8 | I16 | I32 | I64 | + I128 => true, + + Char { is_signed } => is_signed, + + Custom { is_signed, .. } => is_signed, + } + } + + /// If this type has a known size, return it (in bytes). This is to + /// alleviate libclang sometimes not giving us a layout (like in the case + /// when an enum is defined inside a class with template parameters). + pub fn known_size(&self) -> Option<usize> { + use self::IntKind::*; + Some(match *self { + Bool | UChar | SChar | U8 | I8 | Char { .. } => 1, + U16 | I16 => 2, + U32 | I32 => 4, + U64 | I64 => 8, + I128 | U128 => 16, + _ => return None, + }) + } + + /// Whether this type's signedness matches the value. + pub fn signedness_matches(&self, val: i64) -> bool { + val >= 0 || self.is_signed() + } +} diff --git a/third_party/rust/bindgen/ir/item.rs b/third_party/rust/bindgen/ir/item.rs new file mode 100644 index 0000000000..5e9aff9102 --- /dev/null +++ b/third_party/rust/bindgen/ir/item.rs @@ -0,0 +1,2017 @@ +//! Bindgen's core intermediate representation type. + +use super::super::codegen::{EnumVariation, CONSTIFIED_ENUM_MODULE_REPR_NAME}; +use super::analysis::{HasVtable, HasVtableResult, Sizedness, SizednessResult}; +use super::annotations::Annotations; +use super::comp::{CompKind, MethodKind}; +use super::context::{BindgenContext, ItemId, PartialType, TypeId}; +use super::derive::{ + CanDeriveCopy, CanDeriveDebug, CanDeriveDefault, CanDeriveEq, + CanDeriveHash, CanDeriveOrd, CanDerivePartialEq, CanDerivePartialOrd, +}; +use super::dot::DotAttributes; +use super::function::{Function, FunctionKind}; +use super::item_kind::ItemKind; +use super::layout::Opaque; +use super::module::Module; +use super::template::{AsTemplateParam, TemplateParameters}; +use super::traversal::{EdgeKind, Trace, Tracer}; +use super::ty::{Type, TypeKind}; +use crate::clang; +use crate::parse::{ + ClangItemParser, ClangSubItemParser, ParseError, ParseResult, +}; +use clang_sys; +use lazycell::LazyCell; +use regex; +use std::cell::Cell; +use std::collections::BTreeSet; +use std::fmt::Write; +use std::io; +use std::iter; + +/// A trait to get the canonical name from an item. +/// +/// This is the trait that will eventually isolate all the logic related to name +/// mangling and that kind of stuff. +/// +/// This assumes no nested paths, at some point I'll have to make it a more +/// complex thing. +/// +/// This name is required to be safe for Rust, that is, is not expected to +/// return any rust keyword from here. +pub trait ItemCanonicalName { + /// Get the canonical name for this item. + fn canonical_name(&self, ctx: &BindgenContext) -> String; +} + +/// The same, but specifies the path that needs to be followed to reach an item. +/// +/// To contrast with canonical_name, here's an example: +/// +/// ```c++ +/// namespace foo { +/// const BAR = 3; +/// } +/// ``` +/// +/// For bar, the canonical path is `vec!["foo", "BAR"]`, while the canonical +/// name is just `"BAR"`. +pub trait ItemCanonicalPath { + /// Get the namespace-aware canonical path for this item. This means that if + /// namespaces are disabled, you'll get a single item, and otherwise you get + /// the whole path. + fn namespace_aware_canonical_path( + &self, + ctx: &BindgenContext, + ) -> Vec<String>; + + /// Get the canonical path for this item. + fn canonical_path(&self, ctx: &BindgenContext) -> Vec<String>; +} + +/// A trait for determining if some IR thing is opaque or not. +pub trait IsOpaque { + /// Extra context the IR thing needs to determine if it is opaque or not. + type Extra; + + /// Returns `true` if the thing is opaque, and `false` otherwise. + /// + /// May only be called when `ctx` is in the codegen phase. + fn is_opaque(&self, ctx: &BindgenContext, extra: &Self::Extra) -> bool; +} + +/// A trait for determining if some IR thing has type parameter in array or not. +pub trait HasTypeParamInArray { + /// Returns `true` if the thing has Array, and `false` otherwise. + fn has_type_param_in_array(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait for determining if some IR thing has float or not. +pub trait HasFloat { + /// Returns `true` if the thing has float, and `false` otherwise. + fn has_float(&self, ctx: &BindgenContext) -> bool; +} + +/// A trait for iterating over an item and its parents and up its ancestor chain +/// up to (but not including) the implicit root module. +pub trait ItemAncestors { + /// Get an iterable over this item's ancestors. + fn ancestors<'a>(&self, ctx: &'a BindgenContext) -> ItemAncestorsIter<'a>; +} + +#[cfg(testing_only_extra_assertions)] +type DebugOnlyItemSet = ItemSet; + +#[cfg(not(testing_only_extra_assertions))] +struct DebugOnlyItemSet; + +#[cfg(not(testing_only_extra_assertions))] +impl DebugOnlyItemSet { + fn new() -> Self { + DebugOnlyItemSet + } + + fn contains(&self, _id: &ItemId) -> bool { + false + } + + fn insert(&mut self, _id: ItemId) {} +} + +/// An iterator over an item and its ancestors. +pub struct ItemAncestorsIter<'a> { + item: ItemId, + ctx: &'a BindgenContext, + seen: DebugOnlyItemSet, +} + +impl<'a> ItemAncestorsIter<'a> { + fn new<Id: Into<ItemId>>(ctx: &'a BindgenContext, id: Id) -> Self { + ItemAncestorsIter { + item: id.into(), + ctx, + seen: DebugOnlyItemSet::new(), + } + } +} + +impl<'a> Iterator for ItemAncestorsIter<'a> { + type Item = ItemId; + + fn next(&mut self) -> Option<Self::Item> { + let item = self.ctx.resolve_item(self.item); + + if item.parent_id() == self.item { + None + } else { + self.item = item.parent_id(); + + extra_assert!(!self.seen.contains(&item.id())); + self.seen.insert(item.id()); + + Some(item.id()) + } + } +} + +impl<T> AsTemplateParam for T +where + T: Copy + Into<ItemId>, +{ + type Extra = (); + + fn as_template_param( + &self, + ctx: &BindgenContext, + _: &(), + ) -> Option<TypeId> { + ctx.resolve_item((*self).into()).as_template_param(ctx, &()) + } +} + +impl AsTemplateParam for Item { + type Extra = (); + + fn as_template_param( + &self, + ctx: &BindgenContext, + _: &(), + ) -> Option<TypeId> { + self.kind.as_template_param(ctx, self) + } +} + +impl AsTemplateParam for ItemKind { + type Extra = Item; + + fn as_template_param( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> Option<TypeId> { + match *self { + ItemKind::Type(ref ty) => ty.as_template_param(ctx, item), + ItemKind::Module(..) | + ItemKind::Function(..) | + ItemKind::Var(..) => None, + } + } +} + +impl<T> ItemCanonicalName for T +where + T: Copy + Into<ItemId>, +{ + fn canonical_name(&self, ctx: &BindgenContext) -> String { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.resolve_item(*self).canonical_name(ctx) + } +} + +impl<T> ItemCanonicalPath for T +where + T: Copy + Into<ItemId>, +{ + fn namespace_aware_canonical_path( + &self, + ctx: &BindgenContext, + ) -> Vec<String> { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.resolve_item(*self).namespace_aware_canonical_path(ctx) + } + + fn canonical_path(&self, ctx: &BindgenContext) -> Vec<String> { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.resolve_item(*self).canonical_path(ctx) + } +} + +impl<T> ItemAncestors for T +where + T: Copy + Into<ItemId>, +{ + fn ancestors<'a>(&self, ctx: &'a BindgenContext) -> ItemAncestorsIter<'a> { + ItemAncestorsIter::new(ctx, *self) + } +} + +impl ItemAncestors for Item { + fn ancestors<'a>(&self, ctx: &'a BindgenContext) -> ItemAncestorsIter<'a> { + self.id().ancestors(ctx) + } +} + +impl<Id> Trace for Id +where + Id: Copy + Into<ItemId>, +{ + type Extra = (); + + fn trace<T>(&self, ctx: &BindgenContext, tracer: &mut T, extra: &()) + where + T: Tracer, + { + ctx.resolve_item(*self).trace(ctx, tracer, extra); + } +} + +impl Trace for Item { + type Extra = (); + + fn trace<T>(&self, ctx: &BindgenContext, tracer: &mut T, _extra: &()) + where + T: Tracer, + { + // Even if this item is blocklisted/hidden, we want to trace it. It is + // traversal iterators' consumers' responsibility to filter items as + // needed. Generally, this filtering happens in the implementation of + // `Iterator` for `allowlistedItems`. Fully tracing blocklisted items is + // necessary for things like the template parameter usage analysis to + // function correctly. + + match *self.kind() { + ItemKind::Type(ref ty) => { + // There are some types, like resolved type references, where we + // don't want to stop collecting types even though they may be + // opaque. + if ty.should_be_traced_unconditionally() || + !self.is_opaque(ctx, &()) + { + ty.trace(ctx, tracer, self); + } + } + ItemKind::Function(ref fun) => { + // Just the same way, it has not real meaning for a function to + // be opaque, so we trace across it. + tracer.visit(fun.signature().into()); + } + ItemKind::Var(ref var) => { + tracer.visit_kind(var.ty().into(), EdgeKind::VarType); + } + ItemKind::Module(_) => { + // Module -> children edges are "weak", and we do not want to + // trace them. If we did, then allowlisting wouldn't work as + // expected: everything in every module would end up + // allowlisted. + // + // TODO: make a new edge kind for module -> children edges and + // filter them during allowlisting traversals. + } + } + } +} + +impl CanDeriveDebug for Item { + fn can_derive_debug(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_debug(ctx) + } +} + +impl CanDeriveDefault for Item { + fn can_derive_default(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_default(ctx) + } +} + +impl CanDeriveCopy for Item { + fn can_derive_copy(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_copy(ctx) + } +} + +impl CanDeriveHash for Item { + fn can_derive_hash(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_hash(ctx) + } +} + +impl CanDerivePartialOrd for Item { + fn can_derive_partialord(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_partialord(ctx) + } +} + +impl CanDerivePartialEq for Item { + fn can_derive_partialeq(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_partialeq(ctx) + } +} + +impl CanDeriveEq for Item { + fn can_derive_eq(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_eq(ctx) + } +} + +impl CanDeriveOrd for Item { + fn can_derive_ord(&self, ctx: &BindgenContext) -> bool { + self.id().can_derive_ord(ctx) + } +} + +/// An item is the base of the bindgen representation, it can be either a +/// module, a type, a function, or a variable (see `ItemKind` for more +/// information). +/// +/// Items refer to each other by `ItemId`. Every item has its parent's +/// id. Depending on the kind of item this is, it may also refer to other items, +/// such as a compound type item referring to other types. Collectively, these +/// references form a graph. +/// +/// The entry-point to this graph is the "root module": a meta-item used to hold +/// all top-level items. +/// +/// An item may have a comment, and annotations (see the `annotations` module). +/// +/// Note that even though we parse all the types of annotations in comments, not +/// all of them apply to every item. Those rules are described in the +/// `annotations` module. +#[derive(Debug)] +pub struct Item { + /// This item's id. + id: ItemId, + + /// The item's local id, unique only amongst its siblings. Only used for + /// anonymous items. + /// + /// Lazily initialized in local_id(). + /// + /// Note that only structs, unions, and enums get a local type id. In any + /// case this is an implementation detail. + local_id: LazyCell<usize>, + + /// The next local id to use for a child or template instantiation. + next_child_local_id: Cell<usize>, + + /// A cached copy of the canonical name, as returned by `canonical_name`. + /// + /// This is a fairly used operation during codegen so this makes bindgen + /// considerably faster in those cases. + canonical_name: LazyCell<String>, + + /// The path to use for allowlisting and other name-based checks, as + /// returned by `path_for_allowlisting`, lazily constructed. + path_for_allowlisting: LazyCell<Vec<String>>, + + /// A doc comment over the item, if any. + comment: Option<String>, + /// Annotations extracted from the doc comment, or the default ones + /// otherwise. + annotations: Annotations, + /// An item's parent id. This will most likely be a class where this item + /// was declared, or a module, etc. + /// + /// All the items have a parent, except the root module, in which case the + /// parent id is its own id. + parent_id: ItemId, + /// The item kind. + kind: ItemKind, + /// The source location of the item. + location: Option<clang::SourceLocation>, +} + +impl AsRef<ItemId> for Item { + fn as_ref(&self) -> &ItemId { + &self.id + } +} + +impl Item { + /// Construct a new `Item`. + pub fn new( + id: ItemId, + comment: Option<String>, + annotations: Option<Annotations>, + parent_id: ItemId, + kind: ItemKind, + location: Option<clang::SourceLocation>, + ) -> Self { + debug_assert!(id != parent_id || kind.is_module()); + Item { + id, + local_id: LazyCell::new(), + next_child_local_id: Cell::new(1), + canonical_name: LazyCell::new(), + path_for_allowlisting: LazyCell::new(), + parent_id, + comment, + annotations: annotations.unwrap_or_default(), + kind, + location, + } + } + + /// Construct a new opaque item type. + pub fn new_opaque_type( + with_id: ItemId, + ty: &clang::Type, + ctx: &mut BindgenContext, + ) -> TypeId { + let location = ty.declaration().location(); + let ty = Opaque::from_clang_ty(ty, ctx); + let kind = ItemKind::Type(ty); + let parent = ctx.root_module().into(); + ctx.add_item( + Item::new(with_id, None, None, parent, kind, Some(location)), + None, + None, + ); + with_id.as_type_id_unchecked() + } + + /// Get this `Item`'s identifier. + pub fn id(&self) -> ItemId { + self.id + } + + /// Get this `Item`'s parent's identifier. + /// + /// For the root module, the parent's ID is its own ID. + pub fn parent_id(&self) -> ItemId { + self.parent_id + } + + /// Set this item's parent id. + /// + /// This is only used so replacements get generated in the proper module. + pub fn set_parent_for_replacement<Id: Into<ItemId>>(&mut self, id: Id) { + self.parent_id = id.into(); + } + + /// Returns the depth this item is indented to. + /// + /// FIXME(emilio): This may need fixes for the enums within modules stuff. + pub fn codegen_depth(&self, ctx: &BindgenContext) -> usize { + if !ctx.options().enable_cxx_namespaces { + return 0; + } + + self.ancestors(ctx) + .filter(|id| { + ctx.resolve_item(*id).as_module().map_or(false, |module| { + !module.is_inline() || + ctx.options().conservative_inline_namespaces + }) + }) + .count() + + 1 + } + + /// Get this `Item`'s comment, if it has any, already preprocessed and with + /// the right indentation. + pub fn comment(&self, ctx: &BindgenContext) -> Option<String> { + if !ctx.options().generate_comments { + return None; + } + + self.comment + .as_ref() + .map(|comment| ctx.options().process_comment(comment)) + } + + /// What kind of item is this? + pub fn kind(&self) -> &ItemKind { + &self.kind + } + + /// Get a mutable reference to this item's kind. + pub fn kind_mut(&mut self) -> &mut ItemKind { + &mut self.kind + } + + /// Where in the source is this item located? + pub fn location(&self) -> Option<&clang::SourceLocation> { + self.location.as_ref() + } + + /// Get an identifier that differentiates this item from its siblings. + /// + /// This should stay relatively stable in the face of code motion outside or + /// below this item's lexical scope, meaning that this can be useful for + /// generating relatively stable identifiers within a scope. + pub fn local_id(&self, ctx: &BindgenContext) -> usize { + *self.local_id.borrow_with(|| { + let parent = ctx.resolve_item(self.parent_id); + parent.next_child_local_id() + }) + } + + /// Get an identifier that differentiates a child of this item of other + /// related items. + /// + /// This is currently used for anonymous items, and template instantiation + /// tests, in both cases in order to reduce noise when system headers are at + /// place. + pub fn next_child_local_id(&self) -> usize { + let local_id = self.next_child_local_id.get(); + self.next_child_local_id.set(local_id + 1); + local_id + } + + /// Returns whether this item is a top-level item, from the point of view of + /// bindgen. + /// + /// This point of view changes depending on whether namespaces are enabled + /// or not. That way, in the following example: + /// + /// ```c++ + /// namespace foo { + /// static int var; + /// } + /// ``` + /// + /// `var` would be a toplevel item if namespaces are disabled, but won't if + /// they aren't. + /// + /// This function is used to determine when the codegen phase should call + /// `codegen` on an item, since any item that is not top-level will be + /// generated by its parent. + pub fn is_toplevel(&self, ctx: &BindgenContext) -> bool { + // FIXME: Workaround for some types falling behind when parsing weird + // stl classes, for example. + if ctx.options().enable_cxx_namespaces && + self.kind().is_module() && + self.id() != ctx.root_module() + { + return false; + } + + let mut parent = self.parent_id; + loop { + let parent_item = match ctx.resolve_item_fallible(parent) { + Some(item) => item, + None => return false, + }; + + if parent_item.id() == ctx.root_module() { + return true; + } else if ctx.options().enable_cxx_namespaces || + !parent_item.kind().is_module() + { + return false; + } + + parent = parent_item.parent_id(); + } + } + + /// Get a reference to this item's underlying `Type`. Panic if this is some + /// other kind of item. + pub fn expect_type(&self) -> &Type { + self.kind().expect_type() + } + + /// Get a reference to this item's underlying `Type`, or `None` if this is + /// some other kind of item. + pub fn as_type(&self) -> Option<&Type> { + self.kind().as_type() + } + + /// Get a reference to this item's underlying `Function`. Panic if this is + /// some other kind of item. + pub fn expect_function(&self) -> &Function { + self.kind().expect_function() + } + + /// Is this item a module? + pub fn is_module(&self) -> bool { + matches!(self.kind, ItemKind::Module(..)) + } + + /// Get this item's annotations. + pub fn annotations(&self) -> &Annotations { + &self.annotations + } + + /// Whether this item should be blocklisted. + /// + /// This may be due to either annotations or to other kind of configuration. + pub fn is_blocklisted(&self, ctx: &BindgenContext) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + if self.annotations.hide() { + return true; + } + + if !ctx.options().blocklisted_files.is_empty() { + if let Some(location) = &self.location { + let (file, _, _, _) = location.location(); + if let Some(filename) = file.name() { + if ctx.options().blocklisted_files.matches(filename) { + return true; + } + } + } + } + + let path = self.path_for_allowlisting(ctx); + let name = path[1..].join("::"); + ctx.options().blocklisted_items.matches(&name) || + match self.kind { + ItemKind::Type(..) => { + ctx.options().blocklisted_types.matches(&name) || + ctx.is_replaced_type(path, self.id) + } + ItemKind::Function(..) => { + ctx.options().blocklisted_functions.matches(&name) + } + // TODO: Add constant / namespace blocklisting? + ItemKind::Var(..) | ItemKind::Module(..) => false, + } + } + + /// Is this a reference to another type? + pub fn is_type_ref(&self) -> bool { + self.as_type().map_or(false, |ty| ty.is_type_ref()) + } + + /// Is this item a var type? + pub fn is_var(&self) -> bool { + matches!(*self.kind(), ItemKind::Var(..)) + } + + /// Take out item NameOptions + pub fn name<'a>(&'a self, ctx: &'a BindgenContext) -> NameOptions<'a> { + NameOptions::new(self, ctx) + } + + /// Get the target item id for name generation. + fn name_target(&self, ctx: &BindgenContext) -> ItemId { + let mut targets_seen = DebugOnlyItemSet::new(); + let mut item = self; + + loop { + extra_assert!(!targets_seen.contains(&item.id())); + targets_seen.insert(item.id()); + + if self.annotations().use_instead_of().is_some() { + return self.id(); + } + + match *item.kind() { + ItemKind::Type(ref ty) => match *ty.kind() { + TypeKind::ResolvedTypeRef(inner) => { + item = ctx.resolve_item(inner); + } + TypeKind::TemplateInstantiation(ref inst) => { + item = ctx.resolve_item(inst.template_definition()); + } + _ => return item.id(), + }, + _ => return item.id(), + } + } + } + + /// Create a fully disambiguated name for an item, including template + /// parameters if it is a type + pub fn full_disambiguated_name(&self, ctx: &BindgenContext) -> String { + let mut s = String::new(); + let level = 0; + self.push_disambiguated_name(ctx, &mut s, level); + s + } + + /// Helper function for full_disambiguated_name + fn push_disambiguated_name( + &self, + ctx: &BindgenContext, + to: &mut String, + level: u8, + ) { + to.push_str(&self.canonical_name(ctx)); + if let ItemKind::Type(ref ty) = *self.kind() { + if let TypeKind::TemplateInstantiation(ref inst) = *ty.kind() { + to.push_str(&format!("_open{}_", level)); + for arg in inst.template_arguments() { + arg.into_resolver() + .through_type_refs() + .resolve(ctx) + .push_disambiguated_name(ctx, to, level + 1); + to.push('_'); + } + to.push_str(&format!("close{}", level)); + } + } + } + + /// Get this function item's name, or `None` if this item is not a function. + fn func_name(&self) -> Option<&str> { + match *self.kind() { + ItemKind::Function(ref func) => Some(func.name()), + _ => None, + } + } + + /// Get the overload index for this method. If this is not a method, return + /// `None`. + fn overload_index(&self, ctx: &BindgenContext) -> Option<usize> { + self.func_name().and_then(|func_name| { + let parent = ctx.resolve_item(self.parent_id()); + if let ItemKind::Type(ref ty) = *parent.kind() { + if let TypeKind::Comp(ref ci) = *ty.kind() { + // All the constructors have the same name, so no need to + // resolve and check. + return ci + .constructors() + .iter() + .position(|c| *c == self.id()) + .or_else(|| { + ci.methods() + .iter() + .filter(|m| { + let item = ctx.resolve_item(m.signature()); + let func = item.expect_function(); + func.name() == func_name + }) + .position(|m| m.signature() == self.id()) + }); + } + } + + None + }) + } + + /// Get this item's base name (aka non-namespaced name). + fn base_name(&self, ctx: &BindgenContext) -> String { + if let Some(path) = self.annotations().use_instead_of() { + return path.last().unwrap().clone(); + } + + match *self.kind() { + ItemKind::Var(ref var) => var.name().to_owned(), + ItemKind::Module(ref module) => { + module.name().map(ToOwned::to_owned).unwrap_or_else(|| { + format!("_bindgen_mod_{}", self.exposed_id(ctx)) + }) + } + ItemKind::Type(ref ty) => { + ty.sanitized_name(ctx).map(Into::into).unwrap_or_else(|| { + format!("_bindgen_ty_{}", self.exposed_id(ctx)) + }) + } + ItemKind::Function(ref fun) => { + let mut name = fun.name().to_owned(); + + if let Some(idx) = self.overload_index(ctx) { + if idx > 0 { + write!(&mut name, "{}", idx).unwrap(); + } + } + + name + } + } + } + + fn is_anon(&self) -> bool { + match self.kind() { + ItemKind::Module(module) => module.name().is_none(), + ItemKind::Type(ty) => ty.name().is_none(), + ItemKind::Function(_) => false, + ItemKind::Var(_) => false, + } + } + + /// Get the canonical name without taking into account the replaces + /// annotation. + /// + /// This is the base logic used to implement hiding and replacing via + /// annotations, and also to implement proper name mangling. + /// + /// The idea is that each generated type in the same "level" (read: module + /// or namespace) has a unique canonical name. + /// + /// This name should be derived from the immutable state contained in the + /// type and the parent chain, since it should be consistent. + /// + /// If `BindgenOptions::disable_nested_struct_naming` is true then returned + /// name is the inner most non-anonymous name plus all the anonymous base names + /// that follows. + pub fn real_canonical_name( + &self, + ctx: &BindgenContext, + opt: &NameOptions, + ) -> String { + let target = ctx.resolve_item(self.name_target(ctx)); + + // Short-circuit if the target has an override, and just use that. + if let Some(path) = target.annotations.use_instead_of() { + if ctx.options().enable_cxx_namespaces { + return path.last().unwrap().clone(); + } + return path.join("_"); + } + + let base_name = target.base_name(ctx); + + // Named template type arguments are never namespaced, and never + // mangled. + if target.is_template_param(ctx, &()) { + return base_name; + } + + // Ancestors' id iter + let mut ids_iter = target + .parent_id() + .ancestors(ctx) + .filter(|id| *id != ctx.root_module()) + .take_while(|id| { + // Stop iterating ancestors once we reach a non-inline namespace + // when opt.within_namespaces is set. + !opt.within_namespaces || !ctx.resolve_item(*id).is_module() + }) + .filter(|id| { + if !ctx.options().conservative_inline_namespaces { + if let ItemKind::Module(ref module) = + *ctx.resolve_item(*id).kind() + { + return !module.is_inline(); + } + } + + true + }); + + let ids: Vec<_> = if ctx.options().disable_nested_struct_naming { + let mut ids = Vec::new(); + + // If target is anonymous we need find its first named ancestor. + if target.is_anon() { + for id in ids_iter.by_ref() { + ids.push(id); + + if !ctx.resolve_item(id).is_anon() { + break; + } + } + } + + ids + } else { + ids_iter.collect() + }; + + // Concatenate this item's ancestors' names together. + let mut names: Vec<_> = ids + .into_iter() + .map(|id| { + let item = ctx.resolve_item(id); + let target = ctx.resolve_item(item.name_target(ctx)); + target.base_name(ctx) + }) + .filter(|name| !name.is_empty()) + .collect(); + + names.reverse(); + + if !base_name.is_empty() { + names.push(base_name); + } + + if ctx.options().c_naming { + if let Some(prefix) = self.c_naming_prefix() { + names.insert(0, prefix.to_string()); + } + } + + let name = names.join("_"); + + let name = if opt.user_mangled == UserMangled::Yes { + ctx.options() + .last_callback(|callbacks| callbacks.item_name(&name)) + .unwrap_or(name) + } else { + name + }; + + ctx.rust_mangle(&name).into_owned() + } + + /// The exposed id that represents an unique id among the siblings of a + /// given item. + pub fn exposed_id(&self, ctx: &BindgenContext) -> String { + // Only use local ids for enums, classes, structs and union types. All + // other items use their global id. + let ty_kind = self.kind().as_type().map(|t| t.kind()); + if let Some(ty_kind) = ty_kind { + match *ty_kind { + TypeKind::Comp(..) | + TypeKind::TemplateInstantiation(..) | + TypeKind::Enum(..) => return self.local_id(ctx).to_string(), + _ => {} + } + } + + // Note that this `id_` prefix prevents (really unlikely) collisions + // between the global id and the local id of an item with the same + // parent. + format!("id_{}", self.id().as_usize()) + } + + /// Get a reference to this item's `Module`, or `None` if this is not a + /// `Module` item. + pub fn as_module(&self) -> Option<&Module> { + match self.kind { + ItemKind::Module(ref module) => Some(module), + _ => None, + } + } + + /// Get a mutable reference to this item's `Module`, or `None` if this is + /// not a `Module` item. + pub fn as_module_mut(&mut self) -> Option<&mut Module> { + match self.kind { + ItemKind::Module(ref mut module) => Some(module), + _ => None, + } + } + + /// Returns whether the item is a constified module enum + fn is_constified_enum_module(&self, ctx: &BindgenContext) -> bool { + // Do not jump through aliases, except for aliases that point to a type + // with the same name, since we dont generate coe for them. + let item = self.id.into_resolver().through_type_refs().resolve(ctx); + let type_ = match *item.kind() { + ItemKind::Type(ref type_) => type_, + _ => return false, + }; + + match *type_.kind() { + TypeKind::Enum(ref enum_) => { + enum_.computed_enum_variation(ctx, self) == + EnumVariation::ModuleConsts + } + TypeKind::Alias(inner_id) => { + // TODO(emilio): Make this "hop through type aliases that aren't + // really generated" an option in `ItemResolver`? + let inner_item = ctx.resolve_item(inner_id); + let name = item.canonical_name(ctx); + + if inner_item.canonical_name(ctx) == name { + inner_item.is_constified_enum_module(ctx) + } else { + false + } + } + _ => false, + } + } + + /// Is this item of a kind that is enabled for code generation? + pub fn is_enabled_for_codegen(&self, ctx: &BindgenContext) -> bool { + let cc = &ctx.options().codegen_config; + match *self.kind() { + ItemKind::Module(..) => true, + ItemKind::Var(_) => cc.vars(), + ItemKind::Type(_) => cc.types(), + ItemKind::Function(ref f) => match f.kind() { + FunctionKind::Function => cc.functions(), + FunctionKind::Method(MethodKind::Constructor) => { + cc.constructors() + } + FunctionKind::Method(MethodKind::Destructor) | + FunctionKind::Method(MethodKind::VirtualDestructor { + .. + }) => cc.destructors(), + FunctionKind::Method(MethodKind::Static) | + FunctionKind::Method(MethodKind::Normal) | + FunctionKind::Method(MethodKind::Virtual { .. }) => { + cc.methods() + } + }, + } + } + + /// Returns the path we should use for allowlisting / blocklisting, which + /// doesn't include user-mangling. + pub fn path_for_allowlisting(&self, ctx: &BindgenContext) -> &Vec<String> { + self.path_for_allowlisting + .borrow_with(|| self.compute_path(ctx, UserMangled::No)) + } + + fn compute_path( + &self, + ctx: &BindgenContext, + mangled: UserMangled, + ) -> Vec<String> { + if let Some(path) = self.annotations().use_instead_of() { + let mut ret = + vec![ctx.resolve_item(ctx.root_module()).name(ctx).get()]; + ret.extend_from_slice(path); + return ret; + } + + let target = ctx.resolve_item(self.name_target(ctx)); + let mut path: Vec<_> = target + .ancestors(ctx) + .chain(iter::once(ctx.root_module().into())) + .map(|id| ctx.resolve_item(id)) + .filter(|item| { + item.id() == target.id() || + item.as_module().map_or(false, |module| { + !module.is_inline() || + ctx.options().conservative_inline_namespaces + }) + }) + .map(|item| { + ctx.resolve_item(item.name_target(ctx)) + .name(ctx) + .within_namespaces() + .user_mangled(mangled) + .get() + }) + .collect(); + path.reverse(); + path + } + + /// Returns a prefix for the canonical name when C naming is enabled. + fn c_naming_prefix(&self) -> Option<&str> { + let ty = match self.kind { + ItemKind::Type(ref ty) => ty, + _ => return None, + }; + + Some(match ty.kind() { + TypeKind::Comp(ref ci) => match ci.kind() { + CompKind::Struct => "struct", + CompKind::Union => "union", + }, + TypeKind::Enum(..) => "enum", + _ => return None, + }) + } + + /// Whether this is a #[must_use] type. + pub fn must_use(&self, ctx: &BindgenContext) -> bool { + self.annotations().must_use_type() || ctx.must_use_type_by_name(self) + } +} + +impl<T> IsOpaque for T +where + T: Copy + Into<ItemId>, +{ + type Extra = (); + + fn is_opaque(&self, ctx: &BindgenContext, _: &()) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.resolve_item((*self).into()).is_opaque(ctx, &()) + } +} + +impl IsOpaque for Item { + type Extra = (); + + fn is_opaque(&self, ctx: &BindgenContext, _: &()) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + self.annotations.opaque() || + self.as_type().map_or(false, |ty| ty.is_opaque(ctx, self)) || + ctx.opaque_by_name(self.path_for_allowlisting(ctx)) + } +} + +impl<T> HasVtable for T +where + T: Copy + Into<ItemId>, +{ + fn has_vtable(&self, ctx: &BindgenContext) -> bool { + let id: ItemId = (*self).into(); + id.as_type_id(ctx).map_or(false, |id| { + !matches!(ctx.lookup_has_vtable(id), HasVtableResult::No) + }) + } + + fn has_vtable_ptr(&self, ctx: &BindgenContext) -> bool { + let id: ItemId = (*self).into(); + id.as_type_id(ctx).map_or(false, |id| { + matches!(ctx.lookup_has_vtable(id), HasVtableResult::SelfHasVtable) + }) + } +} + +impl HasVtable for Item { + fn has_vtable(&self, ctx: &BindgenContext) -> bool { + self.id().has_vtable(ctx) + } + + fn has_vtable_ptr(&self, ctx: &BindgenContext) -> bool { + self.id().has_vtable_ptr(ctx) + } +} + +impl<T> Sizedness for T +where + T: Copy + Into<ItemId>, +{ + fn sizedness(&self, ctx: &BindgenContext) -> SizednessResult { + let id: ItemId = (*self).into(); + id.as_type_id(ctx) + .map_or(SizednessResult::default(), |id| ctx.lookup_sizedness(id)) + } +} + +impl Sizedness for Item { + fn sizedness(&self, ctx: &BindgenContext) -> SizednessResult { + self.id().sizedness(ctx) + } +} + +impl<T> HasTypeParamInArray for T +where + T: Copy + Into<ItemId>, +{ + fn has_type_param_in_array(&self, ctx: &BindgenContext) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.lookup_has_type_param_in_array(*self) + } +} + +impl HasTypeParamInArray for Item { + fn has_type_param_in_array(&self, ctx: &BindgenContext) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.lookup_has_type_param_in_array(self.id()) + } +} + +impl<T> HasFloat for T +where + T: Copy + Into<ItemId>, +{ + fn has_float(&self, ctx: &BindgenContext) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.lookup_has_float(*self) + } +} + +impl HasFloat for Item { + fn has_float(&self, ctx: &BindgenContext) -> bool { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + ctx.lookup_has_float(self.id()) + } +} + +/// A set of items. +pub type ItemSet = BTreeSet<ItemId>; + +impl DotAttributes for Item { + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!( + out, + "<tr><td>{:?}</td></tr> + <tr><td>name</td><td>{}</td></tr>", + self.id, + self.name(ctx).get() + )?; + + if self.is_opaque(ctx, &()) { + writeln!(out, "<tr><td>opaque</td><td>true</td></tr>")?; + } + + self.kind.dot_attributes(ctx, out) + } +} + +impl<T> TemplateParameters for T +where + T: Copy + Into<ItemId>, +{ + fn self_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> { + ctx.resolve_item_fallible(*self) + .map_or(vec![], |item| item.self_template_params(ctx)) + } +} + +impl TemplateParameters for Item { + fn self_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> { + self.kind.self_template_params(ctx) + } +} + +impl TemplateParameters for ItemKind { + fn self_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> { + match *self { + ItemKind::Type(ref ty) => ty.self_template_params(ctx), + // If we start emitting bindings to explicitly instantiated + // functions, then we'll need to check ItemKind::Function for + // template params. + ItemKind::Function(_) | ItemKind::Module(_) | ItemKind::Var(_) => { + vec![] + } + } + } +} + +// An utility function to handle recursing inside nested types. +fn visit_child( + cur: clang::Cursor, + id: ItemId, + ty: &clang::Type, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + result: &mut Result<TypeId, ParseError>, +) -> clang_sys::CXChildVisitResult { + use clang_sys::*; + if result.is_ok() { + return CXChildVisit_Break; + } + + *result = Item::from_ty_with_id(id, ty, cur, parent_id, ctx); + + match *result { + Ok(..) => CXChildVisit_Break, + Err(ParseError::Recurse) => { + cur.visit(|c| visit_child(c, id, ty, parent_id, ctx, result)); + CXChildVisit_Continue + } + Err(ParseError::Continue) => CXChildVisit_Continue, + } +} + +impl ClangItemParser for Item { + fn builtin_type( + kind: TypeKind, + is_const: bool, + ctx: &mut BindgenContext, + ) -> TypeId { + // Feel free to add more here, I'm just lazy. + match kind { + TypeKind::Void | + TypeKind::Int(..) | + TypeKind::Pointer(..) | + TypeKind::Float(..) => {} + _ => panic!("Unsupported builtin type"), + } + + let ty = Type::new(None, None, kind, is_const); + let id = ctx.next_item_id(); + let module = ctx.root_module().into(); + ctx.add_item( + Item::new(id, None, None, module, ItemKind::Type(ty), None), + None, + None, + ); + id.as_type_id_unchecked() + } + + fn parse( + cursor: clang::Cursor, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + ) -> Result<ItemId, ParseError> { + use crate::ir::var::Var; + use clang_sys::*; + + if !cursor.is_valid() { + return Err(ParseError::Continue); + } + + let comment = cursor.raw_comment(); + let annotations = Annotations::new(&cursor); + + let current_module = ctx.current_module().into(); + let relevant_parent_id = parent_id.unwrap_or(current_module); + + macro_rules! try_parse { + ($what:ident) => { + match $what::parse(cursor, ctx) { + Ok(ParseResult::New(item, declaration)) => { + let id = ctx.next_item_id(); + + ctx.add_item( + Item::new( + id, + comment, + annotations, + relevant_parent_id, + ItemKind::$what(item), + Some(cursor.location()), + ), + declaration, + Some(cursor), + ); + return Ok(id); + } + Ok(ParseResult::AlreadyResolved(id)) => { + return Ok(id); + } + Err(ParseError::Recurse) => return Err(ParseError::Recurse), + Err(ParseError::Continue) => {} + } + }; + } + + try_parse!(Module); + + // NOTE: Is extremely important to parse functions and vars **before** + // types. Otherwise we can parse a function declaration as a type + // (which is legal), and lose functions to generate. + // + // In general, I'm not totally confident this split between + // ItemKind::Function and TypeKind::FunctionSig is totally worth it, but + // I guess we can try. + try_parse!(Function); + try_parse!(Var); + + // Types are sort of special, so to avoid parsing template classes + // twice, handle them separately. + { + let definition = cursor.definition(); + let applicable_cursor = definition.unwrap_or(cursor); + + let relevant_parent_id = match definition { + Some(definition) => { + if definition != cursor { + ctx.add_semantic_parent(definition, relevant_parent_id); + return Ok(Item::from_ty_or_ref( + applicable_cursor.cur_type(), + cursor, + parent_id, + ctx, + ) + .into()); + } + ctx.known_semantic_parent(definition) + .or(parent_id) + .unwrap_or_else(|| ctx.current_module().into()) + } + None => relevant_parent_id, + }; + + match Item::from_ty( + &applicable_cursor.cur_type(), + applicable_cursor, + Some(relevant_parent_id), + ctx, + ) { + Ok(ty) => return Ok(ty.into()), + Err(ParseError::Recurse) => return Err(ParseError::Recurse), + Err(ParseError::Continue) => {} + } + } + + // Guess how does clang treat extern "C" blocks? + if cursor.kind() == CXCursor_UnexposedDecl { + Err(ParseError::Recurse) + } else { + // We allowlist cursors here known to be unhandled, to prevent being + // too noisy about this. + match cursor.kind() { + CXCursor_MacroDefinition | + CXCursor_MacroExpansion | + CXCursor_UsingDeclaration | + CXCursor_UsingDirective | + CXCursor_StaticAssert | + CXCursor_FunctionTemplate => { + debug!( + "Unhandled cursor kind {:?}: {:?}", + cursor.kind(), + cursor + ); + } + CXCursor_InclusionDirective => { + let file = cursor.get_included_file_name(); + match file { + None => { + warn!( + "Inclusion of a nameless file in {:?}", + cursor + ); + } + Some(filename) => { + ctx.include_file(filename); + } + } + } + _ => { + // ignore toplevel operator overloads + let spelling = cursor.spelling(); + if !spelling.starts_with("operator") { + warn!( + "Unhandled cursor kind {:?}: {:?}", + cursor.kind(), + cursor + ); + } + } + } + + Err(ParseError::Continue) + } + } + + fn from_ty_or_ref( + ty: clang::Type, + location: clang::Cursor, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + ) -> TypeId { + let id = ctx.next_item_id(); + Self::from_ty_or_ref_with_id(id, ty, location, parent_id, ctx) + } + + /// Parse a C++ type. If we find a reference to a type that has not been + /// defined yet, use `UnresolvedTypeRef` as a placeholder. + /// + /// This logic is needed to avoid parsing items with the incorrect parent + /// and it's sort of complex to explain, so I'll just point to + /// `tests/headers/typeref.hpp` to see the kind of constructs that forced + /// this. + /// + /// Typerefs are resolved once parsing is completely done, see + /// `BindgenContext::resolve_typerefs`. + fn from_ty_or_ref_with_id( + potential_id: ItemId, + ty: clang::Type, + location: clang::Cursor, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + ) -> TypeId { + debug!( + "from_ty_or_ref_with_id: {:?} {:?}, {:?}, {:?}", + potential_id, ty, location, parent_id + ); + + if ctx.collected_typerefs() { + debug!("refs already collected, resolving directly"); + return Item::from_ty_with_id( + potential_id, + &ty, + location, + parent_id, + ctx, + ) + .unwrap_or_else(|_| Item::new_opaque_type(potential_id, &ty, ctx)); + } + + if let Some(ty) = ctx.builtin_or_resolved_ty( + potential_id, + parent_id, + &ty, + Some(location), + ) { + debug!("{:?} already resolved: {:?}", ty, location); + return ty; + } + + debug!("New unresolved type reference: {:?}, {:?}", ty, location); + + let is_const = ty.is_const(); + let kind = TypeKind::UnresolvedTypeRef(ty, location, parent_id); + let current_module = ctx.current_module(); + + ctx.add_item( + Item::new( + potential_id, + None, + None, + parent_id.unwrap_or_else(|| current_module.into()), + ItemKind::Type(Type::new(None, None, kind, is_const)), + Some(location.location()), + ), + None, + None, + ); + potential_id.as_type_id_unchecked() + } + + fn from_ty( + ty: &clang::Type, + location: clang::Cursor, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + ) -> Result<TypeId, ParseError> { + let id = ctx.next_item_id(); + Item::from_ty_with_id(id, ty, location, parent_id, ctx) + } + + /// This is one of the trickiest methods you'll find (probably along with + /// some of the ones that handle templates in `BindgenContext`). + /// + /// This method parses a type, given the potential id of that type (if + /// parsing it was correct), an optional location we're scanning, which is + /// critical some times to obtain information, an optional parent item id, + /// that will, if it's `None`, become the current module id, and the + /// context. + fn from_ty_with_id( + id: ItemId, + ty: &clang::Type, + location: clang::Cursor, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + ) -> Result<TypeId, ParseError> { + use clang_sys::*; + + debug!( + "Item::from_ty_with_id: {:?}\n\ + \tty = {:?},\n\ + \tlocation = {:?}", + id, ty, location + ); + + if ty.kind() == clang_sys::CXType_Unexposed || + location.cur_type().kind() == clang_sys::CXType_Unexposed + { + if ty.is_associated_type() || + location.cur_type().is_associated_type() + { + return Ok(Item::new_opaque_type(id, ty, ctx)); + } + + if let Some(param_id) = Item::type_param(None, location, ctx) { + return Ok(ctx.build_ty_wrapper(id, param_id, None, ty)); + } + } + + // Treat all types that are declared inside functions as opaque. The Rust binding + // won't be able to do anything with them anyway. + // + // (If we don't do this check here, we can have subtle logic bugs because we generally + // ignore function bodies. See issue #2036.) + if let Some(ref parent) = ty.declaration().fallible_semantic_parent() { + if FunctionKind::from_cursor(parent).is_some() { + debug!("Skipping type declared inside function: {:?}", ty); + return Ok(Item::new_opaque_type(id, ty, ctx)); + } + } + + let decl = { + let canonical_def = ty.canonical_type().declaration().definition(); + canonical_def.unwrap_or_else(|| ty.declaration()) + }; + + let comment = decl.raw_comment().or_else(|| location.raw_comment()); + let annotations = + Annotations::new(&decl).or_else(|| Annotations::new(&location)); + + if let Some(ref annotations) = annotations { + if let Some(replaced) = annotations.use_instead_of() { + ctx.replace(replaced, id); + } + } + + if let Some(ty) = + ctx.builtin_or_resolved_ty(id, parent_id, ty, Some(location)) + { + return Ok(ty); + } + + // First, check we're not recursing. + let mut valid_decl = decl.kind() != CXCursor_NoDeclFound; + let declaration_to_look_for = if valid_decl { + decl.canonical() + } else if location.kind() == CXCursor_ClassTemplate { + valid_decl = true; + location + } else { + decl + }; + + if valid_decl { + if let Some(partial) = ctx + .currently_parsed_types() + .iter() + .find(|ty| *ty.decl() == declaration_to_look_for) + { + debug!("Avoiding recursion parsing type: {:?}", ty); + // Unchecked because we haven't finished this type yet. + return Ok(partial.id().as_type_id_unchecked()); + } + } + + let current_module = ctx.current_module().into(); + let partial_ty = PartialType::new(declaration_to_look_for, id); + if valid_decl { + ctx.begin_parsing(partial_ty); + } + + let result = Type::from_clang_ty(id, ty, location, parent_id, ctx); + let relevant_parent_id = parent_id.unwrap_or(current_module); + let ret = match result { + Ok(ParseResult::AlreadyResolved(ty)) => { + Ok(ty.as_type_id_unchecked()) + } + Ok(ParseResult::New(item, declaration)) => { + ctx.add_item( + Item::new( + id, + comment, + annotations, + relevant_parent_id, + ItemKind::Type(item), + Some(location.location()), + ), + declaration, + Some(location), + ); + Ok(id.as_type_id_unchecked()) + } + Err(ParseError::Continue) => Err(ParseError::Continue), + Err(ParseError::Recurse) => { + debug!("Item::from_ty recursing in the ast"); + let mut result = Err(ParseError::Recurse); + + // Need to pop here, otherwise we'll get stuck. + // + // TODO: Find a nicer interface, really. Also, the + // declaration_to_look_for suspiciously shares a lot of + // logic with ir::context, so we should refactor that. + if valid_decl { + let finished = ctx.finish_parsing(); + assert_eq!(*finished.decl(), declaration_to_look_for); + } + + location.visit(|cur| { + visit_child(cur, id, ty, parent_id, ctx, &mut result) + }); + + if valid_decl { + let partial_ty = + PartialType::new(declaration_to_look_for, id); + ctx.begin_parsing(partial_ty); + } + + // If we have recursed into the AST all we know, and we still + // haven't found what we've got, let's just try and make a named + // type. + // + // This is what happens with some template members, for example. + if let Err(ParseError::Recurse) = result { + warn!( + "Unknown type, assuming named template type: \ + id = {:?}; spelling = {}", + id, + ty.spelling() + ); + Item::type_param(Some(id), location, ctx) + .map(Ok) + .unwrap_or(Err(ParseError::Recurse)) + } else { + result + } + } + }; + + if valid_decl { + let partial_ty = ctx.finish_parsing(); + assert_eq!(*partial_ty.decl(), declaration_to_look_for); + } + + ret + } + + /// A named type is a template parameter, e.g., the "T" in Foo<T>. They're + /// always local so it's the only exception when there's no declaration for + /// a type. + fn type_param( + with_id: Option<ItemId>, + location: clang::Cursor, + ctx: &mut BindgenContext, + ) -> Option<TypeId> { + let ty = location.cur_type(); + + debug!( + "Item::type_param:\n\ + \twith_id = {:?},\n\ + \tty = {} {:?},\n\ + \tlocation: {:?}", + with_id, + ty.spelling(), + ty, + location + ); + + if ty.kind() != clang_sys::CXType_Unexposed { + // If the given cursor's type's kind is not Unexposed, then we + // aren't looking at a template parameter. This check may need to be + // updated in the future if they start properly exposing template + // type parameters. + return None; + } + + let ty_spelling = ty.spelling(); + + // Clang does not expose any information about template type parameters + // via their clang::Type, nor does it give us their canonical cursors + // the straightforward way. However, there are three situations from + // which we can find the definition of the template type parameter, if + // the cursor is indeed looking at some kind of a template type + // parameter or use of one: + // + // 1. The cursor is pointing at the template type parameter's + // definition. This is the trivial case. + // + // (kind = TemplateTypeParameter, ...) + // + // 2. The cursor is pointing at a TypeRef whose referenced() cursor is + // situation (1). + // + // (kind = TypeRef, + // referenced = (kind = TemplateTypeParameter, ...), + // ...) + // + // 3. The cursor is pointing at some use of a template type parameter + // (for example, in a FieldDecl), and this cursor has a child cursor + // whose spelling is the same as the parent's type's spelling, and whose + // kind is a TypeRef of the situation (2) variety. + // + // (kind = FieldDecl, + // type = (kind = Unexposed, + // spelling = "T", + // ...), + // children = + // (kind = TypeRef, + // spelling = "T", + // referenced = (kind = TemplateTypeParameter, + // spelling = "T", + // ...), + // ...) + // ...) + // + // TODO: The alternative to this hacky pattern matching would be to + // maintain proper scopes of template parameters while parsing and use + // de Brujin indices to access template parameters, which clang exposes + // in the cursor's type's canonical type's spelling: + // "type-parameter-x-y". That is probably a better approach long-term, + // but maintaining these scopes properly would require more changes to + // the whole libclang -> IR parsing code. + + fn is_template_with_spelling( + refd: &clang::Cursor, + spelling: &str, + ) -> bool { + lazy_static! { + static ref ANON_TYPE_PARAM_RE: regex::Regex = + regex::Regex::new(r"^type\-parameter\-\d+\-\d+$").unwrap(); + } + + if refd.kind() != clang_sys::CXCursor_TemplateTypeParameter { + return false; + } + + let refd_spelling = refd.spelling(); + refd_spelling == spelling || + // Allow for anonymous template parameters. + (refd_spelling.is_empty() && ANON_TYPE_PARAM_RE.is_match(spelling.as_ref())) + } + + let definition = if is_template_with_spelling(&location, &ty_spelling) { + // Situation (1) + location + } else if location.kind() == clang_sys::CXCursor_TypeRef { + // Situation (2) + match location.referenced() { + Some(refd) + if is_template_with_spelling(&refd, &ty_spelling) => + { + refd + } + _ => return None, + } + } else { + // Situation (3) + let mut definition = None; + + location.visit(|child| { + let child_ty = child.cur_type(); + if child_ty.kind() == clang_sys::CXCursor_TypeRef && + child_ty.spelling() == ty_spelling + { + match child.referenced() { + Some(refd) + if is_template_with_spelling( + &refd, + &ty_spelling, + ) => + { + definition = Some(refd); + return clang_sys::CXChildVisit_Break; + } + _ => {} + } + } + + clang_sys::CXChildVisit_Continue + }); + + definition? + }; + assert!(is_template_with_spelling(&definition, &ty_spelling)); + + // Named types are always parented to the root module. They are never + // referenced with namespace prefixes, and they can't inherit anything + // from their parent either, so it is simplest to just hang them off + // something we know will always exist. + let parent = ctx.root_module().into(); + + if let Some(id) = ctx.get_type_param(&definition) { + if let Some(with_id) = with_id { + return Some(ctx.build_ty_wrapper( + with_id, + id, + Some(parent), + &ty, + )); + } else { + return Some(id); + } + } + + // See tests/headers/const_tparam.hpp and + // tests/headers/variadic_tname.hpp. + let name = ty_spelling.replace("const ", "").replace('.', ""); + + let id = with_id.unwrap_or_else(|| ctx.next_item_id()); + let item = Item::new( + id, + None, + None, + parent, + ItemKind::Type(Type::named(name)), + Some(location.location()), + ); + ctx.add_type_param(item, definition); + Some(id.as_type_id_unchecked()) + } +} + +impl ItemCanonicalName for Item { + fn canonical_name(&self, ctx: &BindgenContext) -> String { + debug_assert!( + ctx.in_codegen_phase(), + "You're not supposed to call this yet" + ); + self.canonical_name + .borrow_with(|| { + let in_namespace = ctx.options().enable_cxx_namespaces || + ctx.options().disable_name_namespacing; + + if in_namespace { + self.name(ctx).within_namespaces().get() + } else { + self.name(ctx).get() + } + }) + .clone() + } +} + +impl ItemCanonicalPath for Item { + fn namespace_aware_canonical_path( + &self, + ctx: &BindgenContext, + ) -> Vec<String> { + let mut path = self.canonical_path(ctx); + + // ASSUMPTION: (disable_name_namespacing && cxx_namespaces) + // is equivalent to + // disable_name_namespacing + if ctx.options().disable_name_namespacing { + // Only keep the last item in path + let split_idx = path.len() - 1; + path = path.split_off(split_idx); + } else if !ctx.options().enable_cxx_namespaces { + // Ignore first item "root" + path = vec![path[1..].join("_")]; + } + + if self.is_constified_enum_module(ctx) { + path.push(CONSTIFIED_ENUM_MODULE_REPR_NAME.into()); + } + + path + } + + fn canonical_path(&self, ctx: &BindgenContext) -> Vec<String> { + self.compute_path(ctx, UserMangled::Yes) + } +} + +/// Whether to use the user-mangled name (mangled by the `item_name` callback or +/// not. +/// +/// Most of the callers probably want just yes, but the ones dealing with +/// allowlisting and blocklisting don't. +#[derive(Copy, Clone, Debug, PartialEq)] +enum UserMangled { + No, + Yes, +} + +/// Builder struct for naming variations, which hold inside different +/// flags for naming options. +#[derive(Debug)] +pub struct NameOptions<'a> { + item: &'a Item, + ctx: &'a BindgenContext, + within_namespaces: bool, + user_mangled: UserMangled, +} + +impl<'a> NameOptions<'a> { + /// Construct a new `NameOptions` + pub fn new(item: &'a Item, ctx: &'a BindgenContext) -> Self { + NameOptions { + item, + ctx, + within_namespaces: false, + user_mangled: UserMangled::Yes, + } + } + + /// Construct the name without the item's containing C++ namespaces mangled + /// into it. In other words, the item's name within the item's namespace. + pub fn within_namespaces(&mut self) -> &mut Self { + self.within_namespaces = true; + self + } + + fn user_mangled(&mut self, user_mangled: UserMangled) -> &mut Self { + self.user_mangled = user_mangled; + self + } + + /// Construct a name `String` + pub fn get(&self) -> String { + self.item.real_canonical_name(self.ctx, self) + } +} diff --git a/third_party/rust/bindgen/ir/item_kind.rs b/third_party/rust/bindgen/ir/item_kind.rs new file mode 100644 index 0000000000..4a12fef40d --- /dev/null +++ b/third_party/rust/bindgen/ir/item_kind.rs @@ -0,0 +1,147 @@ +//! Different variants of an `Item` in our intermediate representation. + +use super::context::BindgenContext; +use super::dot::DotAttributes; +use super::function::Function; +use super::module::Module; +use super::ty::Type; +use super::var::Var; +use std::io; + +/// A item we parse and translate. +#[derive(Debug)] +pub enum ItemKind { + /// A module, created implicitly once (the root module), or via C++ + /// namespaces. + Module(Module), + + /// A type declared in any of the multiple ways it can be declared. + Type(Type), + + /// A function or method declaration. + Function(Function), + + /// A variable declaration, most likely a static. + Var(Var), +} + +impl ItemKind { + /// Get a reference to this `ItemKind`'s underying `Module`, or `None` if it + /// is some other kind. + pub fn as_module(&self) -> Option<&Module> { + match *self { + ItemKind::Module(ref module) => Some(module), + _ => None, + } + } + + /// Transform our `ItemKind` into a string. + pub fn kind_name(&self) -> &'static str { + match *self { + ItemKind::Module(..) => "Module", + ItemKind::Type(..) => "Type", + ItemKind::Function(..) => "Function", + ItemKind::Var(..) => "Var", + } + } + + /// Is this a module? + pub fn is_module(&self) -> bool { + self.as_module().is_some() + } + + /// Get a reference to this `ItemKind`'s underying `Module`, or panic if it + /// is some other kind. + pub fn expect_module(&self) -> &Module { + self.as_module().expect("Not a module") + } + + /// Get a reference to this `ItemKind`'s underying `Function`, or `None` if + /// it is some other kind. + pub fn as_function(&self) -> Option<&Function> { + match *self { + ItemKind::Function(ref func) => Some(func), + _ => None, + } + } + + /// Is this a function? + pub fn is_function(&self) -> bool { + self.as_function().is_some() + } + + /// Get a reference to this `ItemKind`'s underying `Function`, or panic if + /// it is some other kind. + pub fn expect_function(&self) -> &Function { + self.as_function().expect("Not a function") + } + + /// Get a reference to this `ItemKind`'s underying `Type`, or `None` if + /// it is some other kind. + pub fn as_type(&self) -> Option<&Type> { + match *self { + ItemKind::Type(ref ty) => Some(ty), + _ => None, + } + } + + /// Get a mutable reference to this `ItemKind`'s underying `Type`, or `None` + /// if it is some other kind. + pub fn as_type_mut(&mut self) -> Option<&mut Type> { + match *self { + ItemKind::Type(ref mut ty) => Some(ty), + _ => None, + } + } + + /// Is this a type? + pub fn is_type(&self) -> bool { + self.as_type().is_some() + } + + /// Get a reference to this `ItemKind`'s underying `Type`, or panic if it is + /// some other kind. + pub fn expect_type(&self) -> &Type { + self.as_type().expect("Not a type") + } + + /// Get a reference to this `ItemKind`'s underying `Var`, or `None` if it is + /// some other kind. + pub fn as_var(&self) -> Option<&Var> { + match *self { + ItemKind::Var(ref v) => Some(v), + _ => None, + } + } + + /// Is this a variable? + pub fn is_var(&self) -> bool { + self.as_var().is_some() + } + + /// Get a reference to this `ItemKind`'s underying `Var`, or panic if it is + /// some other kind. + pub fn expect_var(&self) -> &Var { + self.as_var().expect("Not a var") + } +} + +impl DotAttributes for ItemKind { + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!(out, "<tr><td>kind</td><td>{}</td></tr>", self.kind_name())?; + + match *self { + ItemKind::Module(ref module) => module.dot_attributes(ctx, out), + ItemKind::Type(ref ty) => ty.dot_attributes(ctx, out), + ItemKind::Function(ref func) => func.dot_attributes(ctx, out), + ItemKind::Var(ref var) => var.dot_attributes(ctx, out), + } + } +} diff --git a/third_party/rust/bindgen/ir/layout.rs b/third_party/rust/bindgen/ir/layout.rs new file mode 100644 index 0000000000..6f4503070a --- /dev/null +++ b/third_party/rust/bindgen/ir/layout.rs @@ -0,0 +1,143 @@ +//! Intermediate representation for the physical layout of some type. + +use super::derive::CanDerive; +use super::ty::{Type, TypeKind, RUST_DERIVE_IN_ARRAY_LIMIT}; +use crate::clang; +use crate::ir::context::BindgenContext; +use std::cmp; + +/// A type that represents the struct layout of a type. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Layout { + /// The size (in bytes) of this layout. + pub size: usize, + /// The alignment (in bytes) of this layout. + pub align: usize, + /// Whether this layout's members are packed or not. + pub packed: bool, +} + +#[test] +fn test_layout_for_size() { + use std::mem; + + let ptr_size = mem::size_of::<*mut ()>(); + assert_eq!( + Layout::for_size_internal(ptr_size, ptr_size), + Layout::new(ptr_size, ptr_size) + ); + assert_eq!( + Layout::for_size_internal(ptr_size, 3 * ptr_size), + Layout::new(3 * ptr_size, ptr_size) + ); +} + +impl Layout { + /// Gets the integer type name for a given known size. + pub fn known_type_for_size( + ctx: &BindgenContext, + size: usize, + ) -> Option<&'static str> { + Some(match size { + 16 if ctx.options().rust_features.i128_and_u128 => "u128", + 8 => "u64", + 4 => "u32", + 2 => "u16", + 1 => "u8", + _ => return None, + }) + } + + /// Construct a new `Layout` with the given `size` and `align`. It is not + /// packed. + pub fn new(size: usize, align: usize) -> Self { + Layout { + size, + align, + packed: false, + } + } + + fn for_size_internal(ptr_size: usize, size: usize) -> Self { + let mut next_align = 2; + while size % next_align == 0 && next_align <= ptr_size { + next_align *= 2; + } + Layout { + size, + align: next_align / 2, + packed: false, + } + } + + /// Creates a non-packed layout for a given size, trying to use the maximum + /// alignment possible. + pub fn for_size(ctx: &BindgenContext, size: usize) -> Self { + Self::for_size_internal(ctx.target_pointer_size(), size) + } + + /// Is this a zero-sized layout? + pub fn is_zero(&self) -> bool { + self.size == 0 && self.align == 0 + } + + /// Construct a zero-sized layout. + pub fn zero() -> Self { + Self::new(0, 0) + } + + /// Get this layout as an opaque type. + pub fn opaque(&self) -> Opaque { + Opaque(*self) + } +} + +/// When we are treating a type as opaque, it is just a blob with a `Layout`. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct Opaque(pub Layout); + +impl Opaque { + /// Construct a new opaque type from the given clang type. + pub fn from_clang_ty(ty: &clang::Type, ctx: &BindgenContext) -> Type { + let layout = Layout::new(ty.size(ctx), ty.align(ctx)); + let ty_kind = TypeKind::Opaque; + let is_const = ty.is_const(); + Type::new(None, Some(layout), ty_kind, is_const) + } + + /// Return the known rust type we should use to create a correctly-aligned + /// field with this layout. + pub fn known_rust_type_for_array( + &self, + ctx: &BindgenContext, + ) -> Option<&'static str> { + Layout::known_type_for_size(ctx, self.0.align) + } + + /// Return the array size that an opaque type for this layout should have if + /// we know the correct type for it, or `None` otherwise. + pub fn array_size(&self, ctx: &BindgenContext) -> Option<usize> { + if self.known_rust_type_for_array(ctx).is_some() { + Some(self.0.size / cmp::max(self.0.align, 1)) + } else { + None + } + } + + /// Return `true` if this opaque layout's array size will fit within the + /// maximum number of array elements that Rust allows deriving traits + /// with. Return `false` otherwise. + pub fn array_size_within_derive_limit( + &self, + ctx: &BindgenContext, + ) -> CanDerive { + if self + .array_size(ctx) + .map_or(false, |size| size <= RUST_DERIVE_IN_ARRAY_LIMIT) + { + CanDerive::Yes + } else { + CanDerive::Manually + } + } +} diff --git a/third_party/rust/bindgen/ir/mod.rs b/third_party/rust/bindgen/ir/mod.rs new file mode 100644 index 0000000000..8f6a2dac88 --- /dev/null +++ b/third_party/rust/bindgen/ir/mod.rs @@ -0,0 +1,24 @@ +//! The ir module defines bindgen's intermediate representation. +//! +//! Parsing C/C++ generates the IR, while code generation outputs Rust code from +//! the IR. + +pub mod analysis; +pub mod annotations; +pub mod comment; +pub mod comp; +pub mod context; +pub mod derive; +pub mod dot; +pub mod enum_ty; +pub mod function; +pub mod int; +pub mod item; +pub mod item_kind; +pub mod layout; +pub mod module; +pub mod objc; +pub mod template; +pub mod traversal; +pub mod ty; +pub mod var; diff --git a/third_party/rust/bindgen/ir/module.rs b/third_party/rust/bindgen/ir/module.rs new file mode 100644 index 0000000000..d5aca94a6e --- /dev/null +++ b/third_party/rust/bindgen/ir/module.rs @@ -0,0 +1,95 @@ +//! Intermediate representation for modules (AKA C++ namespaces). + +use super::context::BindgenContext; +use super::dot::DotAttributes; +use super::item::ItemSet; +use crate::clang; +use crate::parse::{ClangSubItemParser, ParseError, ParseResult}; +use crate::parse_one; +use std::io; + +/// Whether this module is inline or not. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum ModuleKind { + /// This module is not inline. + Normal, + /// This module is inline, as in `inline namespace foo {}`. + Inline, +} + +/// A module, as in, a C++ namespace. +#[derive(Clone, Debug)] +pub struct Module { + /// The name of the module, or none if it's anonymous. + name: Option<String>, + /// The kind of module this is. + kind: ModuleKind, + /// The children of this module, just here for convenience. + children: ItemSet, +} + +impl Module { + /// Construct a new `Module`. + pub fn new(name: Option<String>, kind: ModuleKind) -> Self { + Module { + name, + kind, + children: ItemSet::new(), + } + } + + /// Get this module's name. + pub fn name(&self) -> Option<&str> { + self.name.as_deref() + } + + /// Get a mutable reference to this module's children. + pub fn children_mut(&mut self) -> &mut ItemSet { + &mut self.children + } + + /// Get this module's children. + pub fn children(&self) -> &ItemSet { + &self.children + } + + /// Whether this namespace is inline. + pub fn is_inline(&self) -> bool { + self.kind == ModuleKind::Inline + } +} + +impl DotAttributes for Module { + fn dot_attributes<W>( + &self, + _ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!(out, "<tr><td>ModuleKind</td><td>{:?}</td></tr>", self.kind) + } +} + +impl ClangSubItemParser for Module { + fn parse( + cursor: clang::Cursor, + ctx: &mut BindgenContext, + ) -> Result<ParseResult<Self>, ParseError> { + use clang_sys::*; + match cursor.kind() { + CXCursor_Namespace => { + let module_id = ctx.module(cursor); + ctx.with_module(module_id, |ctx| { + cursor.visit(|cursor| { + parse_one(ctx, cursor, Some(module_id.into())) + }) + }); + + Ok(ParseResult::AlreadyResolved(module_id.into())) + } + _ => Err(ParseError::Continue), + } + } +} diff --git a/third_party/rust/bindgen/ir/objc.rs b/third_party/rust/bindgen/ir/objc.rs new file mode 100644 index 0000000000..0845ad0fde --- /dev/null +++ b/third_party/rust/bindgen/ir/objc.rs @@ -0,0 +1,329 @@ +//! Objective C types + +use super::context::{BindgenContext, ItemId}; +use super::function::FunctionSig; +use super::item::Item; +use super::traversal::{Trace, Tracer}; +use super::ty::TypeKind; +use crate::clang; +use crate::parse::ClangItemParser; +use clang_sys::CXChildVisit_Continue; +use clang_sys::CXCursor_ObjCCategoryDecl; +use clang_sys::CXCursor_ObjCClassMethodDecl; +use clang_sys::CXCursor_ObjCClassRef; +use clang_sys::CXCursor_ObjCInstanceMethodDecl; +use clang_sys::CXCursor_ObjCProtocolDecl; +use clang_sys::CXCursor_ObjCProtocolRef; +use clang_sys::CXCursor_ObjCSuperClassRef; +use clang_sys::CXCursor_TemplateTypeParameter; +use proc_macro2::{Ident, Span, TokenStream}; + +/// Objective C interface as used in TypeKind +/// +/// Also protocols and categories are parsed as this type +#[derive(Debug)] +pub struct ObjCInterface { + /// The name + /// like, NSObject + name: String, + + category: Option<String>, + + is_protocol: bool, + + /// The list of template names almost always, ObjectType or KeyType + pub template_names: Vec<String>, + + /// The list of protocols that this interface conforms to. + pub conforms_to: Vec<ItemId>, + + /// The direct parent for this interface. + pub parent_class: Option<ItemId>, + + /// List of the methods defined in this interfae + methods: Vec<ObjCMethod>, + + class_methods: Vec<ObjCMethod>, +} + +/// The objective c methods +#[derive(Debug)] +pub struct ObjCMethod { + /// The original method selector name + /// like, dataWithBytes:length: + name: String, + + /// Method name as converted to rust + /// like, dataWithBytes_length_ + rust_name: String, + + signature: FunctionSig, + + /// Is class method? + is_class_method: bool, +} + +impl ObjCInterface { + fn new(name: &str) -> ObjCInterface { + ObjCInterface { + name: name.to_owned(), + category: None, + is_protocol: false, + template_names: Vec::new(), + parent_class: None, + conforms_to: Vec::new(), + methods: Vec::new(), + class_methods: Vec::new(), + } + } + + /// The name + /// like, NSObject + pub fn name(&self) -> &str { + self.name.as_ref() + } + + /// Formats the name for rust + /// Can be like NSObject, but with categories might be like NSObject_NSCoderMethods + /// and protocols are like PNSObject + pub fn rust_name(&self) -> String { + if let Some(ref cat) = self.category { + format!("{}_{}", self.name(), cat) + } else if self.is_protocol { + format!("P{}", self.name()) + } else { + format!("I{}", self.name().to_owned()) + } + } + + /// Is this a template interface? + pub fn is_template(&self) -> bool { + !self.template_names.is_empty() + } + + /// List of the methods defined in this interface + pub fn methods(&self) -> &Vec<ObjCMethod> { + &self.methods + } + + /// Is this a protocol? + pub fn is_protocol(&self) -> bool { + self.is_protocol + } + + /// Is this a category? + pub fn is_category(&self) -> bool { + self.category.is_some() + } + + /// List of the class methods defined in this interface + pub fn class_methods(&self) -> &Vec<ObjCMethod> { + &self.class_methods + } + + /// Parses the Objective C interface from the cursor + pub fn from_ty( + cursor: &clang::Cursor, + ctx: &mut BindgenContext, + ) -> Option<Self> { + let name = cursor.spelling(); + let mut interface = Self::new(&name); + + if cursor.kind() == CXCursor_ObjCProtocolDecl { + interface.is_protocol = true; + } + + cursor.visit(|c| { + match c.kind() { + CXCursor_ObjCClassRef => { + if cursor.kind() == CXCursor_ObjCCategoryDecl { + // We are actually a category extension, and we found the reference + // to the original interface, so name this interface approriately + interface.name = c.spelling(); + interface.category = Some(cursor.spelling()); + } + } + CXCursor_ObjCProtocolRef => { + // Gather protocols this interface conforms to + let needle = format!("P{}", c.spelling()); + let items_map = ctx.items(); + debug!( + "Interface {} conforms to {}, find the item", + interface.name, needle + ); + + for (id, item) in items_map { + if let Some(ty) = item.as_type() { + if let TypeKind::ObjCInterface(ref protocol) = + *ty.kind() + { + if protocol.is_protocol { + debug!( + "Checking protocol {}, ty.name {:?}", + protocol.name, + ty.name() + ); + if Some(needle.as_ref()) == ty.name() { + debug!( + "Found conforming protocol {:?}", + item + ); + interface.conforms_to.push(id); + break; + } + } + } + } + } + } + CXCursor_ObjCInstanceMethodDecl | + CXCursor_ObjCClassMethodDecl => { + let name = c.spelling(); + let signature = + FunctionSig::from_ty(&c.cur_type(), &c, ctx) + .expect("Invalid function sig"); + let is_class_method = + c.kind() == CXCursor_ObjCClassMethodDecl; + let method = + ObjCMethod::new(&name, signature, is_class_method); + interface.add_method(method); + } + CXCursor_TemplateTypeParameter => { + let name = c.spelling(); + interface.template_names.push(name); + } + CXCursor_ObjCSuperClassRef => { + let item = Item::from_ty_or_ref(c.cur_type(), c, None, ctx); + interface.parent_class = Some(item.into()); + } + _ => {} + } + CXChildVisit_Continue + }); + Some(interface) + } + + fn add_method(&mut self, method: ObjCMethod) { + if method.is_class_method { + self.class_methods.push(method); + } else { + self.methods.push(method); + } + } +} + +impl ObjCMethod { + fn new( + name: &str, + signature: FunctionSig, + is_class_method: bool, + ) -> ObjCMethod { + let split_name: Vec<&str> = name.split(':').collect(); + + let rust_name = split_name.join("_"); + + ObjCMethod { + name: name.to_owned(), + rust_name, + signature, + is_class_method, + } + } + + /// The original method selector name + /// like, dataWithBytes:length: + pub fn name(&self) -> &str { + self.name.as_ref() + } + + /// Method name as converted to rust + /// like, dataWithBytes_length_ + pub fn rust_name(&self) -> &str { + self.rust_name.as_ref() + } + + /// Returns the methods signature as FunctionSig + pub fn signature(&self) -> &FunctionSig { + &self.signature + } + + /// Is this a class method? + pub fn is_class_method(&self) -> bool { + self.is_class_method + } + + /// Formats the method call + pub fn format_method_call(&self, args: &[TokenStream]) -> TokenStream { + let split_name: Vec<Option<Ident>> = self + .name + .split(':') + .map(|name| { + if name.is_empty() { + None + } else { + Some(Ident::new(name, Span::call_site())) + } + }) + .collect(); + + // No arguments + if args.is_empty() && split_name.len() == 1 { + let name = &split_name[0]; + return quote! { + #name + }; + } + + // Check right amount of arguments + assert!( + args.len() == split_name.len() - 1, + "Incorrect method name or arguments for objc method, {:?} vs {:?}", + args, + split_name + ); + + // Get arguments without type signatures to pass to `msg_send!` + let mut args_without_types = vec![]; + for arg in args.iter() { + let arg = arg.to_string(); + let name_and_sig: Vec<&str> = arg.split(' ').collect(); + let name = name_and_sig[0]; + args_without_types.push(Ident::new(name, Span::call_site())) + } + + let args = split_name.into_iter().zip(args_without_types).map( + |(arg, arg_val)| { + if let Some(arg) = arg { + quote! { #arg: #arg_val } + } else { + quote! { #arg_val: #arg_val } + } + }, + ); + + quote! { + #( #args )* + } + } +} + +impl Trace for ObjCInterface { + type Extra = (); + + fn trace<T>(&self, context: &BindgenContext, tracer: &mut T, _: &()) + where + T: Tracer, + { + for method in &self.methods { + method.signature.trace(context, tracer, &()); + } + + for class_method in &self.class_methods { + class_method.signature.trace(context, tracer, &()); + } + + for protocol in &self.conforms_to { + tracer.visit(*protocol); + } + } +} diff --git a/third_party/rust/bindgen/ir/template.rs b/third_party/rust/bindgen/ir/template.rs new file mode 100644 index 0000000000..8b06748e2c --- /dev/null +++ b/third_party/rust/bindgen/ir/template.rs @@ -0,0 +1,343 @@ +//! Template declaration and instantiation related things. +//! +//! The nomenclature surrounding templates is often confusing, so here are a few +//! brief definitions: +//! +//! * "Template definition": a class/struct/alias/function definition that takes +//! generic template parameters. For example: +//! +//! ```c++ +//! template<typename T> +//! class List<T> { +//! // ... +//! }; +//! ``` +//! +//! * "Template instantiation": an instantiation is a use of a template with +//! concrete template arguments. For example, `List<int>`. +//! +//! * "Template specialization": an alternative template definition providing a +//! custom definition for instantiations with the matching template +//! arguments. This C++ feature is unsupported by bindgen. For example: +//! +//! ```c++ +//! template<> +//! class List<int> { +//! // Special layout for int lists... +//! }; +//! ``` + +use super::context::{BindgenContext, ItemId, TypeId}; +use super::item::{IsOpaque, Item, ItemAncestors}; +use super::traversal::{EdgeKind, Trace, Tracer}; +use crate::clang; +use crate::parse::ClangItemParser; + +/// Template declaration (and such declaration's template parameters) related +/// methods. +/// +/// This trait's methods distinguish between `None` and `Some([])` for +/// declarations that are not templates and template declarations with zero +/// parameters, in general. +/// +/// Consider this example: +/// +/// ```c++ +/// template <typename T, typename U> +/// class Foo { +/// T use_of_t; +/// U use_of_u; +/// +/// template <typename V> +/// using Bar = V*; +/// +/// class Inner { +/// T x; +/// U y; +/// Bar<int> z; +/// }; +/// +/// template <typename W> +/// class Lol { +/// // No use of W, but here's a use of T. +/// T t; +/// }; +/// +/// template <typename X> +/// class Wtf { +/// // X is not used because W is not used. +/// Lol<X> lololol; +/// }; +/// }; +/// +/// class Qux { +/// int y; +/// }; +/// ``` +/// +/// The following table depicts the results of each trait method when invoked on +/// each of the declarations above: +/// +/// +------+----------------------+--------------------------+------------------------+---- +/// |Decl. | self_template_params | num_self_template_params | all_template_parameters| ... +/// +------+----------------------+--------------------------+------------------------+---- +/// |Foo | [T, U] | 2 | [T, U] | ... +/// |Bar | [V] | 1 | [T, U, V] | ... +/// |Inner | [] | 0 | [T, U] | ... +/// |Lol | [W] | 1 | [T, U, W] | ... +/// |Wtf | [X] | 1 | [T, U, X] | ... +/// |Qux | [] | 0 | [] | ... +/// +------+----------------------+--------------------------+------------------------+---- +/// +/// ----+------+-----+----------------------+ +/// ... |Decl. | ... | used_template_params | +/// ----+------+-----+----------------------+ +/// ... |Foo | ... | [T, U] | +/// ... |Bar | ... | [V] | +/// ... |Inner | ... | [] | +/// ... |Lol | ... | [T] | +/// ... |Wtf | ... | [T] | +/// ... |Qux | ... | [] | +/// ----+------+-----+----------------------+ +pub trait TemplateParameters: Sized { + /// Get the set of `ItemId`s that make up this template declaration's free + /// template parameters. + /// + /// Note that these might *not* all be named types: C++ allows + /// constant-value template parameters as well as template-template + /// parameters. Of course, Rust does not allow generic parameters to be + /// anything but types, so we must treat them as opaque, and avoid + /// instantiating them. + fn self_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId>; + + /// Get the number of free template parameters this template declaration + /// has. + fn num_self_template_params(&self, ctx: &BindgenContext) -> usize { + self.self_template_params(ctx).len() + } + + /// Get the complete set of template parameters that can affect this + /// declaration. + /// + /// Note that this item doesn't need to be a template declaration itself for + /// `Some` to be returned here (in contrast to `self_template_params`). If + /// this item is a member of a template declaration, then the parent's + /// template parameters are included here. + /// + /// In the example above, `Inner` depends on both of the `T` and `U` type + /// parameters, even though it is not itself a template declaration and + /// therefore has no type parameters itself. Perhaps it helps to think about + /// how we would fully reference such a member type in C++: + /// `Foo<int,char>::Inner`. `Foo` *must* be instantiated with template + /// arguments before we can gain access to the `Inner` member type. + fn all_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> + where + Self: ItemAncestors, + { + let mut ancestors: Vec<_> = self.ancestors(ctx).collect(); + ancestors.reverse(); + ancestors + .into_iter() + .flat_map(|id| id.self_template_params(ctx).into_iter()) + .collect() + } + + /// Get only the set of template parameters that this item uses. This is a + /// subset of `all_template_params` and does not necessarily contain any of + /// `self_template_params`. + fn used_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> + where + Self: AsRef<ItemId>, + { + assert!( + ctx.in_codegen_phase(), + "template parameter usage is not computed until codegen" + ); + + let id = *self.as_ref(); + ctx.resolve_item(id) + .all_template_params(ctx) + .into_iter() + .filter(|p| ctx.uses_template_parameter(id, *p)) + .collect() + } +} + +/// A trait for things which may or may not be a named template type parameter. +pub trait AsTemplateParam { + /// Any extra information the implementor might need to make this decision. + type Extra; + + /// Convert this thing to the item id of a named template type parameter. + fn as_template_param( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> Option<TypeId>; + + /// Is this a named template type parameter? + fn is_template_param( + &self, + ctx: &BindgenContext, + extra: &Self::Extra, + ) -> bool { + self.as_template_param(ctx, extra).is_some() + } +} + +/// A concrete instantiation of a generic template. +#[derive(Clone, Debug)] +pub struct TemplateInstantiation { + /// The template definition which this is instantiating. + definition: TypeId, + /// The concrete template arguments, which will be substituted in the + /// definition for the generic template parameters. + args: Vec<TypeId>, +} + +impl TemplateInstantiation { + /// Construct a new template instantiation from the given parts. + pub fn new<I>(definition: TypeId, args: I) -> TemplateInstantiation + where + I: IntoIterator<Item = TypeId>, + { + TemplateInstantiation { + definition, + args: args.into_iter().collect(), + } + } + + /// Get the template definition for this instantiation. + pub fn template_definition(&self) -> TypeId { + self.definition + } + + /// Get the concrete template arguments used in this instantiation. + pub fn template_arguments(&self) -> &[TypeId] { + &self.args[..] + } + + /// Parse a `TemplateInstantiation` from a clang `Type`. + pub fn from_ty( + ty: &clang::Type, + ctx: &mut BindgenContext, + ) -> Option<TemplateInstantiation> { + use clang_sys::*; + + let template_args = ty.template_args().map_or(vec![], |args| match ty + .canonical_type() + .template_args() + { + Some(canonical_args) => { + let arg_count = args.len(); + args.chain(canonical_args.skip(arg_count)) + .filter(|t| t.kind() != CXType_Invalid) + .map(|t| { + Item::from_ty_or_ref(t, t.declaration(), None, ctx) + }) + .collect() + } + None => args + .filter(|t| t.kind() != CXType_Invalid) + .map(|t| Item::from_ty_or_ref(t, t.declaration(), None, ctx)) + .collect(), + }); + + let declaration = ty.declaration(); + let definition = if declaration.kind() == CXCursor_TypeAliasTemplateDecl + { + Some(declaration) + } else { + declaration.specialized().or_else(|| { + let mut template_ref = None; + ty.declaration().visit(|child| { + if child.kind() == CXCursor_TemplateRef { + template_ref = Some(child); + return CXVisit_Break; + } + + // Instantiations of template aliases might have the + // TemplateRef to the template alias definition arbitrarily + // deep, so we need to recurse here and not only visit + // direct children. + CXChildVisit_Recurse + }); + + template_ref.and_then(|cur| cur.referenced()) + }) + }; + + let definition = match definition { + Some(def) => def, + None => { + if !ty.declaration().is_builtin() { + warn!( + "Could not find template definition for template \ + instantiation" + ); + } + return None; + } + }; + + let template_definition = + Item::from_ty_or_ref(definition.cur_type(), definition, None, ctx); + + Some(TemplateInstantiation::new( + template_definition, + template_args, + )) + } +} + +impl IsOpaque for TemplateInstantiation { + type Extra = Item; + + /// Is this an opaque template instantiation? + fn is_opaque(&self, ctx: &BindgenContext, item: &Item) -> bool { + if self.template_definition().is_opaque(ctx, &()) { + return true; + } + + // TODO(#774): This doesn't properly handle opaque instantiations where + // an argument is itself an instantiation because `canonical_name` does + // not insert the template arguments into the name, ie it for nested + // template arguments it creates "Foo" instead of "Foo<int>". The fully + // correct fix is to make `canonical_{name,path}` include template + // arguments properly. + + let mut path = item.path_for_allowlisting(ctx).clone(); + let args: Vec<_> = self + .template_arguments() + .iter() + .map(|arg| { + let arg_path = + ctx.resolve_item(*arg).path_for_allowlisting(ctx); + arg_path[1..].join("::") + }) + .collect(); + { + let last = path.last_mut().unwrap(); + last.push('<'); + last.push_str(&args.join(", ")); + last.push('>'); + } + + ctx.opaque_by_name(&path) + } +} + +impl Trace for TemplateInstantiation { + type Extra = (); + + fn trace<T>(&self, _ctx: &BindgenContext, tracer: &mut T, _: &()) + where + T: Tracer, + { + tracer + .visit_kind(self.definition.into(), EdgeKind::TemplateDeclaration); + for arg in self.template_arguments() { + tracer.visit_kind(arg.into(), EdgeKind::TemplateArgument); + } + } +} diff --git a/third_party/rust/bindgen/ir/traversal.rs b/third_party/rust/bindgen/ir/traversal.rs new file mode 100644 index 0000000000..f14483f295 --- /dev/null +++ b/third_party/rust/bindgen/ir/traversal.rs @@ -0,0 +1,478 @@ +//! Traversal of the graph of IR items and types. + +use super::context::{BindgenContext, ItemId}; +use super::item::ItemSet; +use std::collections::{BTreeMap, VecDeque}; + +/// An outgoing edge in the IR graph is a reference from some item to another +/// item: +/// +/// from --> to +/// +/// The `from` is left implicit: it is the concrete `Trace` implementer which +/// yielded this outgoing edge. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct Edge { + to: ItemId, + kind: EdgeKind, +} + +impl Edge { + /// Construct a new edge whose referent is `to` and is of the given `kind`. + pub fn new(to: ItemId, kind: EdgeKind) -> Edge { + Edge { to, kind } + } +} + +impl From<Edge> for ItemId { + fn from(val: Edge) -> Self { + val.to + } +} + +/// The kind of edge reference. This is useful when we wish to only consider +/// certain kinds of edges for a particular traversal or analysis. +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum EdgeKind { + /// A generic, catch-all edge. + Generic, + + /// An edge from a template declaration, to the definition of a named type + /// parameter. For example, the edge from `Foo<T>` to `T` in the following + /// snippet: + /// + /// ```C++ + /// template<typename T> + /// class Foo { }; + /// ``` + TemplateParameterDefinition, + + /// An edge from a template instantiation to the template declaration that + /// is being instantiated. For example, the edge from `Foo<int>` to + /// to `Foo<T>`: + /// + /// ```C++ + /// template<typename T> + /// class Foo { }; + /// + /// using Bar = Foo<ant>; + /// ``` + TemplateDeclaration, + + /// An edge from a template instantiation to its template argument. For + /// example, `Foo<Bar>` to `Bar`: + /// + /// ```C++ + /// template<typename T> + /// class Foo { }; + /// + /// class Bar { }; + /// + /// using FooBar = Foo<Bar>; + /// ``` + TemplateArgument, + + /// An edge from a compound type to one of its base member types. For + /// example, the edge from `Bar` to `Foo`: + /// + /// ```C++ + /// class Foo { }; + /// + /// class Bar : public Foo { }; + /// ``` + BaseMember, + + /// An edge from a compound type to the types of one of its fields. For + /// example, the edge from `Foo` to `int`: + /// + /// ```C++ + /// class Foo { + /// int x; + /// }; + /// ``` + Field, + + /// An edge from an class or struct type to an inner type member. For + /// example, the edge from `Foo` to `Foo::Bar` here: + /// + /// ```C++ + /// class Foo { + /// struct Bar { }; + /// }; + /// ``` + InnerType, + + /// An edge from an class or struct type to an inner static variable. For + /// example, the edge from `Foo` to `Foo::BAR` here: + /// + /// ```C++ + /// class Foo { + /// static const char* BAR; + /// }; + /// ``` + InnerVar, + + /// An edge from a class or struct type to one of its method functions. For + /// example, the edge from `Foo` to `Foo::bar`: + /// + /// ```C++ + /// class Foo { + /// bool bar(int x, int y); + /// }; + /// ``` + Method, + + /// An edge from a class or struct type to one of its constructor + /// functions. For example, the edge from `Foo` to `Foo::Foo(int x, int y)`: + /// + /// ```C++ + /// class Foo { + /// int my_x; + /// int my_y; + /// + /// public: + /// Foo(int x, int y); + /// }; + /// ``` + Constructor, + + /// An edge from a class or struct type to its destructor function. For + /// example, the edge from `Doggo` to `Doggo::~Doggo()`: + /// + /// ```C++ + /// struct Doggo { + /// char* wow; + /// + /// public: + /// ~Doggo(); + /// }; + /// ``` + Destructor, + + /// An edge from a function declaration to its return type. For example, the + /// edge from `foo` to `int`: + /// + /// ```C++ + /// int foo(char* string); + /// ``` + FunctionReturn, + + /// An edge from a function declaration to one of its parameter types. For + /// example, the edge from `foo` to `char*`: + /// + /// ```C++ + /// int foo(char* string); + /// ``` + FunctionParameter, + + /// An edge from a static variable to its type. For example, the edge from + /// `FOO` to `const char*`: + /// + /// ```C++ + /// static const char* FOO; + /// ``` + VarType, + + /// An edge from a non-templated alias or typedef to the referenced type. + TypeReference, +} + +/// A predicate to allow visiting only sub-sets of the whole IR graph by +/// excluding certain edges from being followed by the traversal. +/// +/// The predicate must return true if the traversal should follow this edge +/// and visit everything that is reachable through it. +pub type TraversalPredicate = for<'a> fn(&'a BindgenContext, Edge) -> bool; + +/// A `TraversalPredicate` implementation that follows all edges, and therefore +/// traversals using this predicate will see the whole IR graph reachable from +/// the traversal's roots. +pub fn all_edges(_: &BindgenContext, _: Edge) -> bool { + true +} + +/// A `TraversalPredicate` implementation that only follows +/// `EdgeKind::InnerType` edges, and therefore traversals using this predicate +/// will only visit the traversal's roots and their inner types. This is used +/// in no-recursive-allowlist mode, where inner types such as anonymous +/// structs/unions still need to be processed. +pub fn only_inner_type_edges(_: &BindgenContext, edge: Edge) -> bool { + edge.kind == EdgeKind::InnerType +} + +/// A `TraversalPredicate` implementation that only follows edges to items that +/// are enabled for code generation. This lets us skip considering items for +/// which are not reachable from code generation. +pub fn codegen_edges(ctx: &BindgenContext, edge: Edge) -> bool { + let cc = &ctx.options().codegen_config; + match edge.kind { + EdgeKind::Generic => { + ctx.resolve_item(edge.to).is_enabled_for_codegen(ctx) + } + + // We statically know the kind of item that non-generic edges can point + // to, so we don't need to actually resolve the item and check + // `Item::is_enabled_for_codegen`. + EdgeKind::TemplateParameterDefinition | + EdgeKind::TemplateArgument | + EdgeKind::TemplateDeclaration | + EdgeKind::BaseMember | + EdgeKind::Field | + EdgeKind::InnerType | + EdgeKind::FunctionReturn | + EdgeKind::FunctionParameter | + EdgeKind::VarType | + EdgeKind::TypeReference => cc.types(), + EdgeKind::InnerVar => cc.vars(), + EdgeKind::Method => cc.methods(), + EdgeKind::Constructor => cc.constructors(), + EdgeKind::Destructor => cc.destructors(), + } +} + +/// The storage for the set of items that have been seen (although their +/// outgoing edges might not have been fully traversed yet) in an active +/// traversal. +pub trait TraversalStorage<'ctx> { + /// Construct a new instance of this TraversalStorage, for a new traversal. + fn new(ctx: &'ctx BindgenContext) -> Self; + + /// Add the given item to the storage. If the item has never been seen + /// before, return `true`. Otherwise, return `false`. + /// + /// The `from` item is the item from which we discovered this item, or is + /// `None` if this item is a root. + fn add(&mut self, from: Option<ItemId>, item: ItemId) -> bool; +} + +impl<'ctx> TraversalStorage<'ctx> for ItemSet { + fn new(_: &'ctx BindgenContext) -> Self { + ItemSet::new() + } + + fn add(&mut self, _: Option<ItemId>, item: ItemId) -> bool { + self.insert(item) + } +} + +/// A `TraversalStorage` implementation that keeps track of how we first reached +/// each item. This is useful for providing debug assertions with meaningful +/// diagnostic messages about dangling items. +#[derive(Debug)] +pub struct Paths<'ctx>(BTreeMap<ItemId, ItemId>, &'ctx BindgenContext); + +impl<'ctx> TraversalStorage<'ctx> for Paths<'ctx> { + fn new(ctx: &'ctx BindgenContext) -> Self { + Paths(BTreeMap::new(), ctx) + } + + fn add(&mut self, from: Option<ItemId>, item: ItemId) -> bool { + let newly_discovered = + self.0.insert(item, from.unwrap_or(item)).is_none(); + + if self.1.resolve_item_fallible(item).is_none() { + let mut path = vec![]; + let mut current = item; + loop { + let predecessor = *self.0.get(¤t).expect( + "We know we found this item id, so it must have a \ + predecessor", + ); + if predecessor == current { + break; + } + path.push(predecessor); + current = predecessor; + } + path.reverse(); + panic!( + "Found reference to dangling id = {:?}\nvia path = {:?}", + item, path + ); + } + + newly_discovered + } +} + +/// The queue of seen-but-not-yet-traversed items. +/// +/// Using a FIFO queue with a traversal will yield a breadth-first traversal, +/// while using a LIFO queue will result in a depth-first traversal of the IR +/// graph. +pub trait TraversalQueue: Default { + /// Add a newly discovered item to the queue. + fn push(&mut self, item: ItemId); + + /// Pop the next item to traverse, if any. + fn next(&mut self) -> Option<ItemId>; +} + +impl TraversalQueue for Vec<ItemId> { + fn push(&mut self, item: ItemId) { + self.push(item); + } + + fn next(&mut self) -> Option<ItemId> { + self.pop() + } +} + +impl TraversalQueue for VecDeque<ItemId> { + fn push(&mut self, item: ItemId) { + self.push_back(item); + } + + fn next(&mut self) -> Option<ItemId> { + self.pop_front() + } +} + +/// Something that can receive edges from a `Trace` implementation. +pub trait Tracer { + /// Note an edge between items. Called from within a `Trace` implementation. + fn visit_kind(&mut self, item: ItemId, kind: EdgeKind); + + /// A synonym for `tracer.visit_kind(item, EdgeKind::Generic)`. + fn visit(&mut self, item: ItemId) { + self.visit_kind(item, EdgeKind::Generic); + } +} + +impl<F> Tracer for F +where + F: FnMut(ItemId, EdgeKind), +{ + fn visit_kind(&mut self, item: ItemId, kind: EdgeKind) { + (*self)(item, kind) + } +} + +/// Trace all of the outgoing edges to other items. Implementations should call +/// one of `tracer.visit(edge)` or `tracer.visit_kind(edge, EdgeKind::Whatever)` +/// for each of their outgoing edges. +pub trait Trace { + /// If a particular type needs extra information beyond what it has in + /// `self` and `context` to find its referenced items, its implementation + /// can define this associated type, forcing callers to pass the needed + /// information through. + type Extra; + + /// Trace all of this item's outgoing edges to other items. + fn trace<T>( + &self, + context: &BindgenContext, + tracer: &mut T, + extra: &Self::Extra, + ) where + T: Tracer; +} + +/// An graph traversal of the transitive closure of references between items. +/// +/// See `BindgenContext::allowlisted_items` for more information. +pub struct ItemTraversal<'ctx, Storage, Queue> +where + Storage: TraversalStorage<'ctx>, + Queue: TraversalQueue, +{ + ctx: &'ctx BindgenContext, + + /// The set of items we have seen thus far in this traversal. + seen: Storage, + + /// The set of items that we have seen, but have yet to traverse. + queue: Queue, + + /// The predicate that determines which edges this traversal will follow. + predicate: TraversalPredicate, + + /// The item we are currently traversing. + currently_traversing: Option<ItemId>, +} + +impl<'ctx, Storage, Queue> ItemTraversal<'ctx, Storage, Queue> +where + Storage: TraversalStorage<'ctx>, + Queue: TraversalQueue, +{ + /// Begin a new traversal, starting from the given roots. + pub fn new<R>( + ctx: &'ctx BindgenContext, + roots: R, + predicate: TraversalPredicate, + ) -> ItemTraversal<'ctx, Storage, Queue> + where + R: IntoIterator<Item = ItemId>, + { + let mut seen = Storage::new(ctx); + let mut queue = Queue::default(); + + for id in roots { + seen.add(None, id); + queue.push(id); + } + + ItemTraversal { + ctx, + seen, + queue, + predicate, + currently_traversing: None, + } + } +} + +impl<'ctx, Storage, Queue> Tracer for ItemTraversal<'ctx, Storage, Queue> +where + Storage: TraversalStorage<'ctx>, + Queue: TraversalQueue, +{ + fn visit_kind(&mut self, item: ItemId, kind: EdgeKind) { + let edge = Edge::new(item, kind); + if !(self.predicate)(self.ctx, edge) { + return; + } + + let is_newly_discovered = + self.seen.add(self.currently_traversing, item); + if is_newly_discovered { + self.queue.push(item) + } + } +} + +impl<'ctx, Storage, Queue> Iterator for ItemTraversal<'ctx, Storage, Queue> +where + Storage: TraversalStorage<'ctx>, + Queue: TraversalQueue, +{ + type Item = ItemId; + + fn next(&mut self) -> Option<Self::Item> { + let id = self.queue.next()?; + + let newly_discovered = self.seen.add(None, id); + debug_assert!( + !newly_discovered, + "should have already seen anything we get out of our queue" + ); + debug_assert!( + self.ctx.resolve_item_fallible(id).is_some(), + "should only get IDs of actual items in our context during traversal" + ); + + self.currently_traversing = Some(id); + id.trace(self.ctx, self, &()); + self.currently_traversing = None; + + Some(id) + } +} + +/// An iterator to find any dangling items. +/// +/// See `BindgenContext::assert_no_dangling_item_traversal` for more +/// information. +pub type AssertNoDanglingItemsTraversal<'ctx> = + ItemTraversal<'ctx, Paths<'ctx>, VecDeque<ItemId>>; diff --git a/third_party/rust/bindgen/ir/ty.rs b/third_party/rust/bindgen/ir/ty.rs new file mode 100644 index 0000000000..fd6108f774 --- /dev/null +++ b/third_party/rust/bindgen/ir/ty.rs @@ -0,0 +1,1287 @@ +//! Everything related to types in our intermediate representation. + +use super::comp::CompInfo; +use super::context::{BindgenContext, ItemId, TypeId}; +use super::dot::DotAttributes; +use super::enum_ty::Enum; +use super::function::FunctionSig; +use super::int::IntKind; +use super::item::{IsOpaque, Item}; +use super::layout::{Layout, Opaque}; +use super::objc::ObjCInterface; +use super::template::{ + AsTemplateParam, TemplateInstantiation, TemplateParameters, +}; +use super::traversal::{EdgeKind, Trace, Tracer}; +use crate::clang::{self, Cursor}; +use crate::parse::{ClangItemParser, ParseError, ParseResult}; +use std::borrow::Cow; +use std::io; + +/// The base representation of a type in bindgen. +/// +/// A type has an optional name, which if present cannot be empty, a `layout` +/// (size, alignment and packedness) if known, a `Kind`, which determines which +/// kind of type it is, and whether the type is const. +#[derive(Debug)] +pub struct Type { + /// The name of the type, or None if it was an unnamed struct or union. + name: Option<String>, + /// The layout of the type, if known. + layout: Option<Layout>, + /// The inner kind of the type + kind: TypeKind, + /// Whether this type is const-qualified. + is_const: bool, +} + +/// The maximum number of items in an array for which Rust implements common +/// traits, and so if we have a type containing an array with more than this +/// many items, we won't be able to derive common traits on that type. +/// +pub const RUST_DERIVE_IN_ARRAY_LIMIT: usize = 32; + +impl Type { + /// Get the underlying `CompInfo` for this type, or `None` if this is some + /// other kind of type. + pub fn as_comp(&self) -> Option<&CompInfo> { + match self.kind { + TypeKind::Comp(ref ci) => Some(ci), + _ => None, + } + } + + /// Get the underlying `CompInfo` for this type as a mutable reference, or + /// `None` if this is some other kind of type. + pub fn as_comp_mut(&mut self) -> Option<&mut CompInfo> { + match self.kind { + TypeKind::Comp(ref mut ci) => Some(ci), + _ => None, + } + } + + /// Construct a new `Type`. + pub fn new( + name: Option<String>, + layout: Option<Layout>, + kind: TypeKind, + is_const: bool, + ) -> Self { + Type { + name, + layout, + kind, + is_const, + } + } + + /// Which kind of type is this? + pub fn kind(&self) -> &TypeKind { + &self.kind + } + + /// Get a mutable reference to this type's kind. + pub fn kind_mut(&mut self) -> &mut TypeKind { + &mut self.kind + } + + /// Get this type's name. + pub fn name(&self) -> Option<&str> { + self.name.as_deref() + } + + /// Whether this is a block pointer type. + pub fn is_block_pointer(&self) -> bool { + matches!(self.kind, TypeKind::BlockPointer(..)) + } + + /// Is this a compound type? + pub fn is_comp(&self) -> bool { + matches!(self.kind, TypeKind::Comp(..)) + } + + /// Is this a union? + pub fn is_union(&self) -> bool { + match self.kind { + TypeKind::Comp(ref comp) => comp.is_union(), + _ => false, + } + } + + /// Is this type of kind `TypeKind::TypeParam`? + pub fn is_type_param(&self) -> bool { + matches!(self.kind, TypeKind::TypeParam) + } + + /// Is this a template instantiation type? + pub fn is_template_instantiation(&self) -> bool { + matches!(self.kind, TypeKind::TemplateInstantiation(..)) + } + + /// Is this a template alias type? + pub fn is_template_alias(&self) -> bool { + matches!(self.kind, TypeKind::TemplateAlias(..)) + } + + /// Is this a function type? + pub fn is_function(&self) -> bool { + matches!(self.kind, TypeKind::Function(..)) + } + + /// Is this an enum type? + pub fn is_enum(&self) -> bool { + matches!(self.kind, TypeKind::Enum(..)) + } + + /// Is this either a builtin or named type? + pub fn is_builtin_or_type_param(&self) -> bool { + matches!( + self.kind, + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Function(..) | + TypeKind::Array(..) | + TypeKind::Reference(..) | + TypeKind::Pointer(..) | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::TypeParam + ) + } + + /// Creates a new named type, with name `name`. + pub fn named(name: String) -> Self { + let name = if name.is_empty() { None } else { Some(name) }; + Self::new(name, None, TypeKind::TypeParam, false) + } + + /// Is this a floating point type? + pub fn is_float(&self) -> bool { + matches!(self.kind, TypeKind::Float(..)) + } + + /// Is this a boolean type? + pub fn is_bool(&self) -> bool { + matches!(self.kind, TypeKind::Int(IntKind::Bool)) + } + + /// Is this an integer type? + pub fn is_integer(&self) -> bool { + matches!(self.kind, TypeKind::Int(..)) + } + + /// Cast this type to an integer kind, or `None` if it is not an integer + /// type. + pub fn as_integer(&self) -> Option<IntKind> { + match self.kind { + TypeKind::Int(int_kind) => Some(int_kind), + _ => None, + } + } + + /// Is this a `const` qualified type? + pub fn is_const(&self) -> bool { + self.is_const + } + + /// Is this a reference to another type? + pub fn is_type_ref(&self) -> bool { + matches!( + self.kind, + TypeKind::ResolvedTypeRef(_) | TypeKind::UnresolvedTypeRef(_, _, _) + ) + } + + /// Is this an unresolved reference? + pub fn is_unresolved_ref(&self) -> bool { + matches!(self.kind, TypeKind::UnresolvedTypeRef(_, _, _)) + } + + /// Is this a incomplete array type? + pub fn is_incomplete_array(&self, ctx: &BindgenContext) -> Option<ItemId> { + match self.kind { + TypeKind::Array(item, len) => { + if len == 0 { + Some(item.into()) + } else { + None + } + } + TypeKind::ResolvedTypeRef(inner) => { + ctx.resolve_type(inner).is_incomplete_array(ctx) + } + _ => None, + } + } + + /// What is the layout of this type? + pub fn layout(&self, ctx: &BindgenContext) -> Option<Layout> { + self.layout.or_else(|| { + match self.kind { + TypeKind::Comp(ref ci) => ci.layout(ctx), + TypeKind::Array(inner, length) if length == 0 => Some( + Layout::new(0, ctx.resolve_type(inner).layout(ctx)?.align), + ), + // FIXME(emilio): This is a hack for anonymous union templates. + // Use the actual pointer size! + TypeKind::Pointer(..) => Some(Layout::new( + ctx.target_pointer_size(), + ctx.target_pointer_size(), + )), + TypeKind::ResolvedTypeRef(inner) => { + ctx.resolve_type(inner).layout(ctx) + } + _ => None, + } + }) + } + + /// Whether this named type is an invalid C++ identifier. This is done to + /// avoid generating invalid code with some cases we can't handle, see: + /// + /// tests/headers/381-decltype-alias.hpp + pub fn is_invalid_type_param(&self) -> bool { + match self.kind { + TypeKind::TypeParam => { + let name = self.name().expect("Unnamed named type?"); + !clang::is_valid_identifier(name) + } + _ => false, + } + } + + /// Takes `name`, and returns a suitable identifier representation for it. + fn sanitize_name(name: &str) -> Cow<str> { + if clang::is_valid_identifier(name) { + return Cow::Borrowed(name); + } + + let name = name.replace(|c| c == ' ' || c == ':' || c == '.', "_"); + Cow::Owned(name) + } + + /// Get this type's santizied name. + pub fn sanitized_name<'a>( + &'a self, + ctx: &BindgenContext, + ) -> Option<Cow<'a, str>> { + let name_info = match *self.kind() { + TypeKind::Pointer(inner) => Some((inner, Cow::Borrowed("ptr"))), + TypeKind::Reference(inner) => Some((inner, Cow::Borrowed("ref"))), + TypeKind::Array(inner, length) => { + Some((inner, format!("array{}", length).into())) + } + _ => None, + }; + if let Some((inner, prefix)) = name_info { + ctx.resolve_item(inner) + .expect_type() + .sanitized_name(ctx) + .map(|name| format!("{}_{}", prefix, name).into()) + } else { + self.name().map(Self::sanitize_name) + } + } + + /// See safe_canonical_type. + pub fn canonical_type<'tr>( + &'tr self, + ctx: &'tr BindgenContext, + ) -> &'tr Type { + self.safe_canonical_type(ctx) + .expect("Should have been resolved after parsing!") + } + + /// Returns the canonical type of this type, that is, the "inner type". + /// + /// For example, for a `typedef`, the canonical type would be the + /// `typedef`ed type, for a template instantiation, would be the template + /// its specializing, and so on. Return None if the type is unresolved. + pub fn safe_canonical_type<'tr>( + &'tr self, + ctx: &'tr BindgenContext, + ) -> Option<&'tr Type> { + match self.kind { + TypeKind::TypeParam | + TypeKind::Array(..) | + TypeKind::Vector(..) | + TypeKind::Comp(..) | + TypeKind::Opaque | + TypeKind::Int(..) | + TypeKind::Float(..) | + TypeKind::Complex(..) | + TypeKind::Function(..) | + TypeKind::Enum(..) | + TypeKind::Reference(..) | + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Pointer(..) | + TypeKind::BlockPointer(..) | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::ObjCInterface(..) => Some(self), + + TypeKind::ResolvedTypeRef(inner) | + TypeKind::Alias(inner) | + TypeKind::TemplateAlias(inner, _) => { + ctx.resolve_type(inner).safe_canonical_type(ctx) + } + TypeKind::TemplateInstantiation(ref inst) => ctx + .resolve_type(inst.template_definition()) + .safe_canonical_type(ctx), + + TypeKind::UnresolvedTypeRef(..) => None, + } + } + + /// There are some types we don't want to stop at when finding an opaque + /// item, so we can arrive to the proper item that needs to be generated. + pub fn should_be_traced_unconditionally(&self) -> bool { + matches!( + self.kind, + TypeKind::Comp(..) | + TypeKind::Function(..) | + TypeKind::Pointer(..) | + TypeKind::Array(..) | + TypeKind::Reference(..) | + TypeKind::TemplateInstantiation(..) | + TypeKind::ResolvedTypeRef(..) + ) + } +} + +impl IsOpaque for Type { + type Extra = Item; + + fn is_opaque(&self, ctx: &BindgenContext, item: &Item) -> bool { + match self.kind { + TypeKind::Opaque => true, + TypeKind::TemplateInstantiation(ref inst) => { + inst.is_opaque(ctx, item) + } + TypeKind::Comp(ref comp) => comp.is_opaque(ctx, &self.layout), + TypeKind::ResolvedTypeRef(to) => to.is_opaque(ctx, &()), + _ => false, + } + } +} + +impl AsTemplateParam for Type { + type Extra = Item; + + fn as_template_param( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> Option<TypeId> { + self.kind.as_template_param(ctx, item) + } +} + +impl AsTemplateParam for TypeKind { + type Extra = Item; + + fn as_template_param( + &self, + ctx: &BindgenContext, + item: &Item, + ) -> Option<TypeId> { + match *self { + TypeKind::TypeParam => Some(item.id().expect_type_id(ctx)), + TypeKind::ResolvedTypeRef(id) => id.as_template_param(ctx, &()), + _ => None, + } + } +} + +impl DotAttributes for Type { + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + if let Some(ref layout) = self.layout { + writeln!( + out, + "<tr><td>size</td><td>{}</td></tr> + <tr><td>align</td><td>{}</td></tr>", + layout.size, layout.align + )?; + if layout.packed { + writeln!(out, "<tr><td>packed</td><td>true</td></tr>")?; + } + } + + if self.is_const { + writeln!(out, "<tr><td>const</td><td>true</td></tr>")?; + } + + self.kind.dot_attributes(ctx, out) + } +} + +impl DotAttributes for TypeKind { + fn dot_attributes<W>( + &self, + ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + writeln!( + out, + "<tr><td>type kind</td><td>{}</td></tr>", + self.kind_name() + )?; + + if let TypeKind::Comp(ref comp) = *self { + comp.dot_attributes(ctx, out)?; + } + + Ok(()) + } +} + +impl TypeKind { + fn kind_name(&self) -> &'static str { + match *self { + TypeKind::Void => "Void", + TypeKind::NullPtr => "NullPtr", + TypeKind::Comp(..) => "Comp", + TypeKind::Opaque => "Opaque", + TypeKind::Int(..) => "Int", + TypeKind::Float(..) => "Float", + TypeKind::Complex(..) => "Complex", + TypeKind::Alias(..) => "Alias", + TypeKind::TemplateAlias(..) => "TemplateAlias", + TypeKind::Array(..) => "Array", + TypeKind::Vector(..) => "Vector", + TypeKind::Function(..) => "Function", + TypeKind::Enum(..) => "Enum", + TypeKind::Pointer(..) => "Pointer", + TypeKind::BlockPointer(..) => "BlockPointer", + TypeKind::Reference(..) => "Reference", + TypeKind::TemplateInstantiation(..) => "TemplateInstantiation", + TypeKind::UnresolvedTypeRef(..) => "UnresolvedTypeRef", + TypeKind::ResolvedTypeRef(..) => "ResolvedTypeRef", + TypeKind::TypeParam => "TypeParam", + TypeKind::ObjCInterface(..) => "ObjCInterface", + TypeKind::ObjCId => "ObjCId", + TypeKind::ObjCSel => "ObjCSel", + } + } +} + +#[test] +fn is_invalid_type_param_valid() { + let ty = Type::new(Some("foo".into()), None, TypeKind::TypeParam, false); + assert!(!ty.is_invalid_type_param()) +} + +#[test] +fn is_invalid_type_param_valid_underscore_and_numbers() { + let ty = Type::new( + Some("_foo123456789_".into()), + None, + TypeKind::TypeParam, + false, + ); + assert!(!ty.is_invalid_type_param()) +} + +#[test] +fn is_invalid_type_param_valid_unnamed_kind() { + let ty = Type::new(Some("foo".into()), None, TypeKind::Void, false); + assert!(!ty.is_invalid_type_param()) +} + +#[test] +fn is_invalid_type_param_invalid_start() { + let ty = Type::new(Some("1foo".into()), None, TypeKind::TypeParam, false); + assert!(ty.is_invalid_type_param()) +} + +#[test] +fn is_invalid_type_param_invalid_remaing() { + let ty = Type::new(Some("foo-".into()), None, TypeKind::TypeParam, false); + assert!(ty.is_invalid_type_param()) +} + +#[test] +#[should_panic] +fn is_invalid_type_param_unnamed() { + let ty = Type::new(None, None, TypeKind::TypeParam, false); + assert!(ty.is_invalid_type_param()) +} + +#[test] +fn is_invalid_type_param_empty_name() { + let ty = Type::new(Some("".into()), None, TypeKind::TypeParam, false); + assert!(ty.is_invalid_type_param()) +} + +impl TemplateParameters for Type { + fn self_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> { + self.kind.self_template_params(ctx) + } +} + +impl TemplateParameters for TypeKind { + fn self_template_params(&self, ctx: &BindgenContext) -> Vec<TypeId> { + match *self { + TypeKind::ResolvedTypeRef(id) => { + ctx.resolve_type(id).self_template_params(ctx) + } + TypeKind::Comp(ref comp) => comp.self_template_params(ctx), + TypeKind::TemplateAlias(_, ref args) => args.clone(), + + TypeKind::Opaque | + TypeKind::TemplateInstantiation(..) | + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(_) | + TypeKind::Float(_) | + TypeKind::Complex(_) | + TypeKind::Array(..) | + TypeKind::Vector(..) | + TypeKind::Function(_) | + TypeKind::Enum(_) | + TypeKind::Pointer(_) | + TypeKind::BlockPointer(_) | + TypeKind::Reference(_) | + TypeKind::UnresolvedTypeRef(..) | + TypeKind::TypeParam | + TypeKind::Alias(_) | + TypeKind::ObjCId | + TypeKind::ObjCSel | + TypeKind::ObjCInterface(_) => vec![], + } + } +} + +/// The kind of float this type represents. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum FloatKind { + /// A `float`. + Float, + /// A `double`. + Double, + /// A `long double`. + LongDouble, + /// A `__float128`. + Float128, +} + +/// The different kinds of types that we can parse. +#[derive(Debug)] +pub enum TypeKind { + /// The void type. + Void, + + /// The `nullptr_t` type. + NullPtr, + + /// A compound type, that is, a class, struct, or union. + Comp(CompInfo), + + /// An opaque type that we just don't understand. All usage of this shoulf + /// result in an opaque blob of bytes generated from the containing type's + /// layout. + Opaque, + + /// An integer type, of a given kind. `bool` and `char` are also considered + /// integers. + Int(IntKind), + + /// A floating point type. + Float(FloatKind), + + /// A complex floating point type. + Complex(FloatKind), + + /// A type alias, with a name, that points to another type. + Alias(TypeId), + + /// A templated alias, pointing to an inner type, just as `Alias`, but with + /// template parameters. + TemplateAlias(TypeId, Vec<TypeId>), + + /// A packed vector type: element type, number of elements + Vector(TypeId, usize), + + /// An array of a type and a length. + Array(TypeId, usize), + + /// A function type, with a given signature. + Function(FunctionSig), + + /// An `enum` type. + Enum(Enum), + + /// A pointer to a type. The bool field represents whether it's const or + /// not. + Pointer(TypeId), + + /// A pointer to an Apple block. + BlockPointer(TypeId), + + /// A reference to a type, as in: int& foo(). + Reference(TypeId), + + /// An instantiation of an abstract template definition with a set of + /// concrete template arguments. + TemplateInstantiation(TemplateInstantiation), + + /// A reference to a yet-to-resolve type. This stores the clang cursor + /// itself, and postpones its resolution. + /// + /// These are gone in a phase after parsing where these are mapped to + /// already known types, and are converted to ResolvedTypeRef. + /// + /// see tests/headers/typeref.hpp to see somewhere where this is a problem. + UnresolvedTypeRef( + clang::Type, + clang::Cursor, + /* parent_id */ + Option<ItemId>, + ), + + /// An indirection to another type. + /// + /// These are generated after we resolve a forward declaration, or when we + /// replace one type with another. + ResolvedTypeRef(TypeId), + + /// A named type, that is, a template parameter. + TypeParam, + + /// Objective C interface. Always referenced through a pointer + ObjCInterface(ObjCInterface), + + /// Objective C 'id' type, points to any object + ObjCId, + + /// Objective C selector type + ObjCSel, +} + +impl Type { + /// This is another of the nasty methods. This one is the one that takes + /// care of the core logic of converting a clang type to a `Type`. + /// + /// It's sort of nasty and full of special-casing, but hopefully the + /// comments in every special case justify why they're there. + pub fn from_clang_ty( + potential_id: ItemId, + ty: &clang::Type, + location: Cursor, + parent_id: Option<ItemId>, + ctx: &mut BindgenContext, + ) -> Result<ParseResult<Self>, ParseError> { + use clang_sys::*; + { + let already_resolved = ctx.builtin_or_resolved_ty( + potential_id, + parent_id, + ty, + Some(location), + ); + if let Some(ty) = already_resolved { + debug!("{:?} already resolved: {:?}", ty, location); + return Ok(ParseResult::AlreadyResolved(ty.into())); + } + } + + let layout = ty.fallible_layout(ctx).ok(); + let cursor = ty.declaration(); + let is_anonymous = cursor.is_anonymous(); + let mut name = if is_anonymous { + None + } else { + Some(cursor.spelling()).filter(|n| !n.is_empty()) + }; + + debug!( + "from_clang_ty: {:?}, ty: {:?}, loc: {:?}", + potential_id, ty, location + ); + debug!("currently_parsed_types: {:?}", ctx.currently_parsed_types()); + + let canonical_ty = ty.canonical_type(); + + // Parse objc protocols as if they were interfaces + let mut ty_kind = ty.kind(); + match location.kind() { + CXCursor_ObjCProtocolDecl | CXCursor_ObjCCategoryDecl => { + ty_kind = CXType_ObjCInterface + } + _ => {} + } + + // Objective C template type parameter + // FIXME: This is probably wrong, we are attempting to find the + // objc template params, which seem to manifest as a typedef. + // We are rewriting them as id to suppress multiple conflicting + // typedefs at root level + if ty_kind == CXType_Typedef { + let is_template_type_param = + ty.declaration().kind() == CXCursor_TemplateTypeParameter; + let is_canonical_objcpointer = + canonical_ty.kind() == CXType_ObjCObjectPointer; + + // We have found a template type for objc interface + if is_canonical_objcpointer && is_template_type_param { + // Objective-C generics are just ids with fancy name. + // To keep it simple, just name them ids + name = Some("id".to_owned()); + } + } + + if location.kind() == CXCursor_ClassTemplatePartialSpecialization { + // Sorry! (Not sorry) + warn!( + "Found a partial template specialization; bindgen does not \ + support partial template specialization! Constructing \ + opaque type instead." + ); + return Ok(ParseResult::New( + Opaque::from_clang_ty(&canonical_ty, ctx), + None, + )); + } + + let kind = if location.kind() == CXCursor_TemplateRef || + (ty.template_args().is_some() && ty_kind != CXType_Typedef) + { + // This is a template instantiation. + match TemplateInstantiation::from_ty(ty, ctx) { + Some(inst) => TypeKind::TemplateInstantiation(inst), + None => TypeKind::Opaque, + } + } else { + match ty_kind { + CXType_Unexposed + if *ty != canonical_ty && + canonical_ty.kind() != CXType_Invalid && + ty.ret_type().is_none() && + // Sometime clang desugars some types more than + // what we need, specially with function + // pointers. + // + // We should also try the solution of inverting + // those checks instead of doing this, that is, + // something like: + // + // CXType_Unexposed if ty.ret_type().is_some() + // => { ... } + // + // etc. + !canonical_ty.spelling().contains("type-parameter") => + { + debug!("Looking for canonical type: {:?}", canonical_ty); + return Self::from_clang_ty( + potential_id, + &canonical_ty, + location, + parent_id, + ctx, + ); + } + CXType_Unexposed | CXType_Invalid => { + // For some reason Clang doesn't give us any hint in some + // situations where we should generate a function pointer (see + // tests/headers/func_ptr_in_struct.h), so we do a guess here + // trying to see if it has a valid return type. + if ty.ret_type().is_some() { + let signature = + FunctionSig::from_ty(ty, &location, ctx)?; + TypeKind::Function(signature) + // Same here, with template specialisations we can safely + // assume this is a Comp(..) + } else if ty.is_fully_instantiated_template() { + debug!( + "Template specialization: {:?}, {:?} {:?}", + ty, location, canonical_ty + ); + let complex = CompInfo::from_ty( + potential_id, + ty, + Some(location), + ctx, + ) + .expect("C'mon"); + TypeKind::Comp(complex) + } else { + match location.kind() { + CXCursor_CXXBaseSpecifier | + CXCursor_ClassTemplate => { + if location.kind() == CXCursor_CXXBaseSpecifier + { + // In the case we're parsing a base specifier + // inside an unexposed or invalid type, it means + // that we're parsing one of two things: + // + // * A template parameter. + // * A complex class that isn't exposed. + // + // This means, unfortunately, that there's no + // good way to differentiate between them. + // + // Probably we could try to look at the + // declaration and complicate more this logic, + // but we'll keep it simple... if it's a valid + // C++ identifier, we'll consider it as a + // template parameter. + // + // This is because: + // + // * We expect every other base that is a + // proper identifier (that is, a simple + // struct/union declaration), to be exposed, + // so this path can't be reached in that + // case. + // + // * Quite conveniently, complex base + // specifiers preserve their full names (that + // is: Foo<T> instead of Foo). We can take + // advantage of this. + // + // If we find some edge case where this doesn't + // work (which I guess is unlikely, see the + // different test cases[1][2][3][4]), we'd need + // to find more creative ways of differentiating + // these two cases. + // + // [1]: inherit_named.hpp + // [2]: forward-inherit-struct-with-fields.hpp + // [3]: forward-inherit-struct.hpp + // [4]: inherit-namespaced.hpp + if location.spelling().chars().all(|c| { + c.is_alphanumeric() || c == '_' + }) { + return Err(ParseError::Recurse); + } + } else { + name = Some(location.spelling()); + } + + let complex = CompInfo::from_ty( + potential_id, + ty, + Some(location), + ctx, + ); + match complex { + Ok(complex) => TypeKind::Comp(complex), + Err(_) => { + warn!( + "Could not create complex type \ + from class template or base \ + specifier, using opaque blob" + ); + let opaque = + Opaque::from_clang_ty(ty, ctx); + return Ok(ParseResult::New( + opaque, None, + )); + } + } + } + CXCursor_TypeAliasTemplateDecl => { + debug!("TypeAliasTemplateDecl"); + + // We need to manually unwind this one. + let mut inner = Err(ParseError::Continue); + let mut args = vec![]; + + location.visit(|cur| { + match cur.kind() { + CXCursor_TypeAliasDecl => { + let current = cur.cur_type(); + + debug_assert_eq!( + current.kind(), + CXType_Typedef + ); + + name = Some(location.spelling()); + + let inner_ty = cur + .typedef_type() + .expect("Not valid Type?"); + inner = Ok(Item::from_ty_or_ref( + inner_ty, + cur, + Some(potential_id), + ctx, + )); + } + CXCursor_TemplateTypeParameter => { + let param = Item::type_param( + None, cur, ctx, + ) + .expect( + "Item::type_param shouldn't \ + ever fail if we are looking \ + at a TemplateTypeParameter", + ); + args.push(param); + } + _ => {} + } + CXChildVisit_Continue + }); + + let inner_type = match inner { + Ok(inner) => inner, + Err(..) => { + warn!( + "Failed to parse template alias \ + {:?}", + location + ); + return Err(ParseError::Continue); + } + }; + + TypeKind::TemplateAlias(inner_type, args) + } + CXCursor_TemplateRef => { + let referenced = location.referenced().unwrap(); + let referenced_ty = referenced.cur_type(); + + debug!( + "TemplateRef: location = {:?}; referenced = \ + {:?}; referenced_ty = {:?}", + location, + referenced, + referenced_ty + ); + + return Self::from_clang_ty( + potential_id, + &referenced_ty, + referenced, + parent_id, + ctx, + ); + } + CXCursor_TypeRef => { + let referenced = location.referenced().unwrap(); + let referenced_ty = referenced.cur_type(); + let declaration = referenced_ty.declaration(); + + debug!( + "TypeRef: location = {:?}; referenced = \ + {:?}; referenced_ty = {:?}", + location, referenced, referenced_ty + ); + + let id = Item::from_ty_or_ref_with_id( + potential_id, + referenced_ty, + declaration, + parent_id, + ctx, + ); + return Ok(ParseResult::AlreadyResolved( + id.into(), + )); + } + CXCursor_NamespaceRef => { + return Err(ParseError::Continue); + } + _ => { + if ty.kind() == CXType_Unexposed { + warn!( + "Unexposed type {:?}, recursing inside, \ + loc: {:?}", + ty, + location + ); + return Err(ParseError::Recurse); + } + + warn!("invalid type {:?}", ty); + return Err(ParseError::Continue); + } + } + } + } + CXType_Auto => { + if canonical_ty == *ty { + debug!("Couldn't find deduced type: {:?}", ty); + return Err(ParseError::Continue); + } + + return Self::from_clang_ty( + potential_id, + &canonical_ty, + location, + parent_id, + ctx, + ); + } + // NOTE: We don't resolve pointers eagerly because the pointee type + // might not have been parsed, and if it contains templates or + // something else we might get confused, see the comment inside + // TypeRef. + // + // We might need to, though, if the context is already in the + // process of resolving them. + CXType_ObjCObjectPointer | + CXType_MemberPointer | + CXType_Pointer => { + let mut pointee = ty.pointee_type().unwrap(); + if *ty != canonical_ty { + let canonical_pointee = + canonical_ty.pointee_type().unwrap(); + // clang sometimes loses pointee constness here, see + // #2244. + if canonical_pointee.is_const() != pointee.is_const() { + pointee = canonical_pointee; + } + } + let inner = + Item::from_ty_or_ref(pointee, location, None, ctx); + TypeKind::Pointer(inner) + } + CXType_BlockPointer => { + let pointee = ty.pointee_type().expect("Not valid Type?"); + let inner = + Item::from_ty_or_ref(pointee, location, None, ctx); + TypeKind::BlockPointer(inner) + } + // XXX: RValueReference is most likely wrong, but I don't think we + // can even add bindings for that, so huh. + CXType_RValueReference | CXType_LValueReference => { + let inner = Item::from_ty_or_ref( + ty.pointee_type().unwrap(), + location, + None, + ctx, + ); + TypeKind::Reference(inner) + } + // XXX DependentSizedArray is wrong + CXType_VariableArray | CXType_DependentSizedArray => { + let inner = Item::from_ty( + ty.elem_type().as_ref().unwrap(), + location, + None, + ctx, + ) + .expect("Not able to resolve array element?"); + TypeKind::Pointer(inner) + } + CXType_IncompleteArray => { + let inner = Item::from_ty( + ty.elem_type().as_ref().unwrap(), + location, + None, + ctx, + ) + .expect("Not able to resolve array element?"); + TypeKind::Array(inner, 0) + } + CXType_FunctionNoProto | CXType_FunctionProto => { + let signature = FunctionSig::from_ty(ty, &location, ctx)?; + TypeKind::Function(signature) + } + CXType_Typedef => { + let inner = cursor.typedef_type().expect("Not valid Type?"); + let inner_id = + Item::from_ty_or_ref(inner, location, None, ctx); + if inner_id == potential_id { + warn!( + "Generating oqaque type instead of self-referential \ + typedef"); + // This can happen if we bail out of recursive situations + // within the clang parsing. + TypeKind::Opaque + } else { + // Check if this type definition is an alias to a pointer of a `struct` / + // `union` / `enum` with the same name and add the `_ptr` suffix to it to + // avoid name collisions. + if let Some(ref mut name) = name { + if inner.kind() == CXType_Pointer && + !ctx.options().c_naming + { + let pointee = inner.pointee_type().unwrap(); + if pointee.kind() == CXType_Elaborated && + pointee.declaration().spelling() == *name + { + *name += "_ptr"; + } + } + } + TypeKind::Alias(inner_id) + } + } + CXType_Enum => { + let enum_ = Enum::from_ty(ty, ctx).expect("Not an enum?"); + + if !is_anonymous { + let pretty_name = ty.spelling(); + if clang::is_valid_identifier(&pretty_name) { + name = Some(pretty_name); + } + } + + TypeKind::Enum(enum_) + } + CXType_Record => { + let complex = CompInfo::from_ty( + potential_id, + ty, + Some(location), + ctx, + ) + .expect("Not a complex type?"); + + if !is_anonymous { + // The pretty-printed name may contain typedefed name, + // but may also be "struct (anonymous at .h:1)" + let pretty_name = ty.spelling(); + if clang::is_valid_identifier(&pretty_name) { + name = Some(pretty_name); + } + } + + TypeKind::Comp(complex) + } + CXType_Vector => { + let inner = Item::from_ty( + ty.elem_type().as_ref().unwrap(), + location, + None, + ctx, + )?; + TypeKind::Vector(inner, ty.num_elements().unwrap()) + } + CXType_ConstantArray => { + let inner = Item::from_ty( + ty.elem_type().as_ref().unwrap(), + location, + None, + ctx, + ) + .expect("Not able to resolve array element?"); + TypeKind::Array(inner, ty.num_elements().unwrap()) + } + CXType_Elaborated => { + return Self::from_clang_ty( + potential_id, + &ty.named(), + location, + parent_id, + ctx, + ); + } + CXType_ObjCId => TypeKind::ObjCId, + CXType_ObjCSel => TypeKind::ObjCSel, + CXType_ObjCClass | CXType_ObjCInterface => { + let interface = ObjCInterface::from_ty(&location, ctx) + .expect("Not a valid objc interface?"); + if !is_anonymous { + name = Some(interface.rust_name()); + } + TypeKind::ObjCInterface(interface) + } + CXType_Dependent => { + return Err(ParseError::Continue); + } + _ => { + warn!( + "unsupported type: kind = {:?}; ty = {:?}; at {:?}", + ty.kind(), + ty, + location + ); + return Err(ParseError::Continue); + } + } + }; + + name = name.filter(|n| !n.is_empty()); + + let is_const = ty.is_const() || + (ty.kind() == CXType_ConstantArray && + ty.elem_type() + .map_or(false, |element| element.is_const())); + + let ty = Type::new(name, layout, kind, is_const); + // TODO: maybe declaration.canonical()? + Ok(ParseResult::New(ty, Some(cursor.canonical()))) + } +} + +impl Trace for Type { + type Extra = Item; + + fn trace<T>(&self, context: &BindgenContext, tracer: &mut T, item: &Item) + where + T: Tracer, + { + if self + .name() + .map_or(false, |name| context.is_stdint_type(name)) + { + // These types are special-cased in codegen and don't need to be traversed. + return; + } + match *self.kind() { + TypeKind::Pointer(inner) | + TypeKind::Reference(inner) | + TypeKind::Array(inner, _) | + TypeKind::Vector(inner, _) | + TypeKind::BlockPointer(inner) | + TypeKind::Alias(inner) | + TypeKind::ResolvedTypeRef(inner) => { + tracer.visit_kind(inner.into(), EdgeKind::TypeReference); + } + TypeKind::TemplateAlias(inner, ref template_params) => { + tracer.visit_kind(inner.into(), EdgeKind::TypeReference); + for param in template_params { + tracer.visit_kind( + param.into(), + EdgeKind::TemplateParameterDefinition, + ); + } + } + TypeKind::TemplateInstantiation(ref inst) => { + inst.trace(context, tracer, &()); + } + TypeKind::Comp(ref ci) => ci.trace(context, tracer, item), + TypeKind::Function(ref sig) => sig.trace(context, tracer, &()), + TypeKind::Enum(ref en) => { + if let Some(repr) = en.repr() { + tracer.visit(repr.into()); + } + } + TypeKind::UnresolvedTypeRef(_, _, Some(id)) => { + tracer.visit(id); + } + + TypeKind::ObjCInterface(ref interface) => { + interface.trace(context, tracer, &()); + } + + // None of these variants have edges to other items and types. + TypeKind::Opaque | + TypeKind::UnresolvedTypeRef(_, _, None) | + TypeKind::TypeParam | + TypeKind::Void | + TypeKind::NullPtr | + TypeKind::Int(_) | + TypeKind::Float(_) | + TypeKind::Complex(_) | + TypeKind::ObjCId | + TypeKind::ObjCSel => {} + } + } +} diff --git a/third_party/rust/bindgen/ir/var.rs b/third_party/rust/bindgen/ir/var.rs new file mode 100644 index 0000000000..c86742ff69 --- /dev/null +++ b/third_party/rust/bindgen/ir/var.rs @@ -0,0 +1,414 @@ +//! Intermediate representation of variables. + +use super::super::codegen::MacroTypeVariation; +use super::context::{BindgenContext, TypeId}; +use super::dot::DotAttributes; +use super::function::cursor_mangling; +use super::int::IntKind; +use super::item::Item; +use super::ty::{FloatKind, TypeKind}; +use crate::callbacks::MacroParsingBehavior; +use crate::clang; +use crate::clang::ClangToken; +use crate::parse::{ + ClangItemParser, ClangSubItemParser, ParseError, ParseResult, +}; +use cexpr; +use std::io; +use std::num::Wrapping; + +/// The type for a constant variable. +#[derive(Debug)] +pub enum VarType { + /// A boolean. + Bool(bool), + /// An integer. + Int(i64), + /// A floating point number. + Float(f64), + /// A character. + Char(u8), + /// A string, not necessarily well-formed utf-8. + String(Vec<u8>), +} + +/// A `Var` is our intermediate representation of a variable. +#[derive(Debug)] +pub struct Var { + /// The name of the variable. + name: String, + /// The mangled name of the variable. + mangled_name: Option<String>, + /// The type of the variable. + ty: TypeId, + /// The value of the variable, that needs to be suitable for `ty`. + val: Option<VarType>, + /// Whether this variable is const. + is_const: bool, +} + +impl Var { + /// Construct a new `Var`. + pub fn new( + name: String, + mangled_name: Option<String>, + ty: TypeId, + val: Option<VarType>, + is_const: bool, + ) -> Var { + assert!(!name.is_empty()); + Var { + name, + mangled_name, + ty, + val, + is_const, + } + } + + /// Is this variable `const` qualified? + pub fn is_const(&self) -> bool { + self.is_const + } + + /// The value of this constant variable, if any. + pub fn val(&self) -> Option<&VarType> { + self.val.as_ref() + } + + /// Get this variable's type. + pub fn ty(&self) -> TypeId { + self.ty + } + + /// Get this variable's name. + pub fn name(&self) -> &str { + &self.name + } + + /// Get this variable's mangled name. + pub fn mangled_name(&self) -> Option<&str> { + self.mangled_name.as_deref() + } +} + +impl DotAttributes for Var { + fn dot_attributes<W>( + &self, + _ctx: &BindgenContext, + out: &mut W, + ) -> io::Result<()> + where + W: io::Write, + { + if self.is_const { + writeln!(out, "<tr><td>const</td><td>true</td></tr>")?; + } + + if let Some(ref mangled) = self.mangled_name { + writeln!( + out, + "<tr><td>mangled name</td><td>{}</td></tr>", + mangled + )?; + } + + Ok(()) + } +} + +fn default_macro_constant_type(ctx: &BindgenContext, value: i64) -> IntKind { + if value < 0 || + ctx.options().default_macro_constant_type == + MacroTypeVariation::Signed + { + if value < i32::min_value() as i64 || value > i32::max_value() as i64 { + IntKind::I64 + } else if !ctx.options().fit_macro_constants || + value < i16::min_value() as i64 || + value > i16::max_value() as i64 + { + IntKind::I32 + } else if value < i8::min_value() as i64 || + value > i8::max_value() as i64 + { + IntKind::I16 + } else { + IntKind::I8 + } + } else if value > u32::max_value() as i64 { + IntKind::U64 + } else if !ctx.options().fit_macro_constants || + value > u16::max_value() as i64 + { + IntKind::U32 + } else if value > u8::max_value() as i64 { + IntKind::U16 + } else { + IntKind::U8 + } +} + +/// Parses tokens from a CXCursor_MacroDefinition pointing into a function-like +/// macro, and calls the func_macro callback. +fn handle_function_macro( + cursor: &clang::Cursor, + callbacks: &dyn crate::callbacks::ParseCallbacks, +) { + let is_closing_paren = |t: &ClangToken| { + // Test cheap token kind before comparing exact spellings. + t.kind == clang_sys::CXToken_Punctuation && t.spelling() == b")" + }; + let tokens: Vec<_> = cursor.tokens().iter().collect(); + if let Some(boundary) = tokens.iter().position(is_closing_paren) { + let mut spelled = tokens.iter().map(ClangToken::spelling); + // Add 1, to convert index to length. + let left = spelled.by_ref().take(boundary + 1); + let left = left.collect::<Vec<_>>().concat(); + if let Ok(left) = String::from_utf8(left) { + let right: Vec<_> = spelled.collect(); + callbacks.func_macro(&left, &right); + } + } +} + +impl ClangSubItemParser for Var { + fn parse( + cursor: clang::Cursor, + ctx: &mut BindgenContext, + ) -> Result<ParseResult<Self>, ParseError> { + use cexpr::expr::EvalResult; + use cexpr::literal::CChar; + use clang_sys::*; + match cursor.kind() { + CXCursor_MacroDefinition => { + for callbacks in &ctx.options().parse_callbacks { + match callbacks.will_parse_macro(&cursor.spelling()) { + MacroParsingBehavior::Ignore => { + return Err(ParseError::Continue); + } + MacroParsingBehavior::Default => {} + } + + if cursor.is_macro_function_like() { + handle_function_macro(&cursor, callbacks.as_ref()); + // We handled the macro, skip macro processing below. + return Err(ParseError::Continue); + } + } + + let value = parse_macro(ctx, &cursor); + + let (id, value) = match value { + Some(v) => v, + None => return Err(ParseError::Continue), + }; + + assert!(!id.is_empty(), "Empty macro name?"); + + let previously_defined = ctx.parsed_macro(&id); + + // NB: It's important to "note" the macro even if the result is + // not an integer, otherwise we might loose other kind of + // derived macros. + ctx.note_parsed_macro(id.clone(), value.clone()); + + if previously_defined { + let name = String::from_utf8(id).unwrap(); + warn!("Duplicated macro definition: {}", name); + return Err(ParseError::Continue); + } + + // NOTE: Unwrapping, here and above, is safe, because the + // identifier of a token comes straight from clang, and we + // enforce utf8 there, so we should have already panicked at + // this point. + let name = String::from_utf8(id).unwrap(); + let (type_kind, val) = match value { + EvalResult::Invalid => return Err(ParseError::Continue), + EvalResult::Float(f) => { + (TypeKind::Float(FloatKind::Double), VarType::Float(f)) + } + EvalResult::Char(c) => { + let c = match c { + CChar::Char(c) => { + assert_eq!(c.len_utf8(), 1); + c as u8 + } + CChar::Raw(c) => { + assert!(c <= ::std::u8::MAX as u64); + c as u8 + } + }; + + (TypeKind::Int(IntKind::U8), VarType::Char(c)) + } + EvalResult::Str(val) => { + let char_ty = Item::builtin_type( + TypeKind::Int(IntKind::U8), + true, + ctx, + ); + for callbacks in &ctx.options().parse_callbacks { + callbacks.str_macro(&name, &val); + } + (TypeKind::Pointer(char_ty), VarType::String(val)) + } + EvalResult::Int(Wrapping(value)) => { + let kind = ctx + .options() + .last_callback(|c| c.int_macro(&name, value)) + .unwrap_or_else(|| { + default_macro_constant_type(ctx, value) + }); + + (TypeKind::Int(kind), VarType::Int(value)) + } + }; + + let ty = Item::builtin_type(type_kind, true, ctx); + + Ok(ParseResult::New( + Var::new(name, None, ty, Some(val), true), + Some(cursor), + )) + } + CXCursor_VarDecl => { + let name = cursor.spelling(); + if name.is_empty() { + warn!("Empty constant name?"); + return Err(ParseError::Continue); + } + + let ty = cursor.cur_type(); + + // TODO(emilio): do we have to special-case constant arrays in + // some other places? + let is_const = ty.is_const() || + ([CXType_ConstantArray, CXType_IncompleteArray] + .contains(&ty.kind()) && + ty.elem_type() + .map_or(false, |element| element.is_const())); + + let ty = match Item::from_ty(&ty, cursor, None, ctx) { + Ok(ty) => ty, + Err(e) => { + assert!( + matches!(ty.kind(), CXType_Auto | CXType_Unexposed), + "Couldn't resolve constant type, and it \ + wasn't an nondeductible auto type or unexposed \ + type!" + ); + return Err(e); + } + }; + + // Note: Ty might not be totally resolved yet, see + // tests/headers/inner_const.hpp + // + // That's fine because in that case we know it's not a literal. + let canonical_ty = ctx + .safe_resolve_type(ty) + .and_then(|t| t.safe_canonical_type(ctx)); + + let is_integer = canonical_ty.map_or(false, |t| t.is_integer()); + let is_float = canonical_ty.map_or(false, |t| t.is_float()); + + // TODO: We could handle `char` more gracefully. + // TODO: Strings, though the lookup is a bit more hard (we need + // to look at the canonical type of the pointee too, and check + // is char, u8, or i8 I guess). + let value = if is_integer { + let kind = match *canonical_ty.unwrap().kind() { + TypeKind::Int(kind) => kind, + _ => unreachable!(), + }; + + let mut val = cursor.evaluate().and_then(|v| v.as_int()); + if val.is_none() || !kind.signedness_matches(val.unwrap()) { + val = get_integer_literal_from_cursor(&cursor); + } + + val.map(|val| { + if kind == IntKind::Bool { + VarType::Bool(val != 0) + } else { + VarType::Int(val) + } + }) + } else if is_float { + cursor + .evaluate() + .and_then(|v| v.as_double()) + .map(VarType::Float) + } else { + cursor + .evaluate() + .and_then(|v| v.as_literal_string()) + .map(VarType::String) + }; + + let mangling = cursor_mangling(ctx, &cursor); + let var = Var::new(name, mangling, ty, value, is_const); + + Ok(ParseResult::New(var, Some(cursor))) + } + _ => { + /* TODO */ + Err(ParseError::Continue) + } + } + } +} + +/// Try and parse a macro using all the macros parsed until now. +fn parse_macro( + ctx: &BindgenContext, + cursor: &clang::Cursor, +) -> Option<(Vec<u8>, cexpr::expr::EvalResult)> { + use cexpr::expr; + + let cexpr_tokens = cursor.cexpr_tokens(); + + let parser = expr::IdentifierParser::new(ctx.parsed_macros()); + + match parser.macro_definition(&cexpr_tokens) { + Ok((_, (id, val))) => Some((id.into(), val)), + _ => None, + } +} + +fn parse_int_literal_tokens(cursor: &clang::Cursor) -> Option<i64> { + use cexpr::expr; + use cexpr::expr::EvalResult; + + let cexpr_tokens = cursor.cexpr_tokens(); + + // TODO(emilio): We can try to parse other kinds of literals. + match expr::expr(&cexpr_tokens) { + Ok((_, EvalResult::Int(Wrapping(val)))) => Some(val), + _ => None, + } +} + +fn get_integer_literal_from_cursor(cursor: &clang::Cursor) -> Option<i64> { + use clang_sys::*; + let mut value = None; + cursor.visit(|c| { + match c.kind() { + CXCursor_IntegerLiteral | CXCursor_UnaryOperator => { + value = parse_int_literal_tokens(&c); + } + CXCursor_UnexposedExpr => { + value = get_integer_literal_from_cursor(&c); + } + _ => (), + } + if value.is_some() { + CXChildVisit_Break + } else { + CXChildVisit_Continue + } + }); + value +} |