diff options
Diffstat (limited to 'src/arrow/experimental/computeir')
-rw-r--r-- | src/arrow/experimental/computeir/Expression.fbs | 222 | ||||
-rw-r--r-- | src/arrow/experimental/computeir/Literal.fbs | 184 | ||||
-rw-r--r-- | src/arrow/experimental/computeir/Plan.fbs | 28 | ||||
-rw-r--r-- | src/arrow/experimental/computeir/Relation.fbs | 221 |
4 files changed, 655 insertions, 0 deletions
diff --git a/src/arrow/experimental/computeir/Expression.fbs b/src/arrow/experimental/computeir/Expression.fbs new file mode 100644 index 000000000..e3a7fb4eb --- /dev/null +++ b/src/arrow/experimental/computeir/Expression.fbs @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "../../format/Schema.fbs"; +include "Literal.fbs"; + +namespace org.apache.arrow.computeir.flatbuf; + +/// Access a value for a given map key +table MapKey { + /// Any expression can be a map key. + key: Expression (required); +} + +/// Struct field access +table StructField { + /// The position of the field in the struct schema + position: uint32; +} + +/// Zero-based array index +table ArraySubscript { + position: uint32; +} + +/// Zero-based range of elements in an array +table ArraySlice { + /// The start of an array slice, inclusive + start_inclusive: uint32; + /// The end of an array slice, exclusive + end_exclusive: uint32; +} + +/// Field name in a relation, in ordinal position of the relation's schema. +table FieldIndex { + position: uint32; +} + +/// A union of possible dereference operations +union Deref { + /// Access a value for a given map key + MapKey, + /// Access the value at a struct field + StructField, + /// Access the element at a given index in an array + ArraySubscript, + /// Access a range of elements in an array + ArraySlice, + /// Access a field of a relation + FieldIndex, +} + +/// Access the data of a field +table FieldRef { + ref: Deref (required); + /// For Expressions which might reference fields in multiple Relations, + /// this index may be provided to indicate which Relation's fields + /// `ref` points into. For example in the case of a join, + /// 0 refers to the left relation and 1 to the right relation. + relation_index: int = 0; +} + +/// A function call expression +table Call { + /// The function to call + name: string (required); + + /// The arguments passed to `name`. + arguments: [Expression] (required); + + /// Possible ordering of input. These are useful + /// in aggregates where ordering in meaningful such as + /// string concatenation + orderings: [SortKey]; +} + +/// A single WHEN x THEN y fragment. +table CaseFragment { + match: Expression (required); + result: Expression (required); +} + +/// Conditional case statement expression +table ConditionalCase { + /// List of conditions to evaluate + conditions: [CaseFragment] (required); + /// The default value if no cases match. This is typically NULL in SQL + /// implementations. + /// + /// Defaulting to NULL is a frontend choice, so producers must specify NULL + /// if that's their desired behavior. + else: Expression (required); +} + +/// Switch-style case expression +table SimpleCase { + /// The expression whose value will be matched + expression: Expression (required); + /// Matches for `expression` + matches: [CaseFragment] (required); + /// The default value if no cases match + else: Expression (required); +} + +/// Whether lesser values should precede greater or vice versa, +/// also whether nulls should preced or follow values +enum Ordering : uint8 { + ASCENDING_THEN_NULLS, + DESCENDING_THEN_NULLS, + NULLS_THEN_ASCENDING, + NULLS_THEN_DESCENDING, +} + +/// An expression with an order +table SortKey { + expression: Expression (required); + ordering: Ordering = ASCENDING_THEN_NULLS; +} + +/// An unbounded window bound +table Unbounded {} + +/// A concrete bound, which can be an expression or unbounded +union ConcreteBoundImpl { + Expression, + Unbounded, +} + +/// Boundary is preceding rows, determined by the contained expression +table Preceding { + impl: ConcreteBoundImpl (required); +} + +/// Boundary is following rows, determined by the contained expression +table Following { + impl: ConcreteBoundImpl (required); +} + +/// Boundary is the current row +table CurrentRow {} + +union Bound { + Preceding, + Following, + CurrentRow, +} + +/// The kind of window function to be executed +enum Frame : uint8 { + Rows, + Range, +} + +/// An expression representing a window function call. +table WindowCall { + /// The expression to operate over + expression: Expression (required); + /// The kind of window frame + kind: Frame; + /// Partition keys + partitions: [Expression] (required); + /// Sort keys + orderings: [SortKey] (required); + /// Lower window bound + lower_bound: Bound (required); + /// Upper window bound + upper_bound: Bound (required); +} + +/// A cast expression +table Cast { + /// The expression to cast + operand: Expression (required); + /// The type to cast to. This value is a `Field` to allow complete representation + /// of arrow types. + /// + /// `Type` is unable to completely represent complex types like lists and + /// maps. + to: org.apache.arrow.flatbuf.Field (required); +} + +/// Various expression types +/// +/// WindowCall is a separate variant +/// due to special options for each that don't apply to generic +/// function calls. Again this is done to make it easier +/// for consumers to deal with the structure of the operation +union ExpressionImpl { + Literal, + FieldRef, + Call, + ConditionalCase, + SimpleCase, + WindowCall, + Cast, +} + +/// Expression types +/// +/// Expressions have a concrete `impl` value, which is a specific operation. +/// +/// This is a workaround for flatbuffers' lack of support for direct use of +/// union types. +table Expression { + impl: ExpressionImpl (required); +} + +root_type Expression; diff --git a/src/arrow/experimental/computeir/Literal.fbs b/src/arrow/experimental/computeir/Literal.fbs new file mode 100644 index 000000000..a966b6ece --- /dev/null +++ b/src/arrow/experimental/computeir/Literal.fbs @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "../../format/Schema.fbs"; + +namespace org.apache.arrow.computeir.flatbuf; + +table ListLiteral { + values: [Literal] (required); +} + +table StructLiteral { + /// Values for each struct field; the order must match the order of fields + /// in the `type` field of `Literal`. + values: [Literal] (required); +} + +table KeyValue { + key: Literal (required); + value: Literal (required); +} + +table MapLiteral { + values: [KeyValue] (required); +} + +table Int8Literal { + value: int8; +} + +table Int16Literal { + value: int16; +} + +table Int32Literal { + value: int32; +} + +table Int64Literal { + value: int64; +} + +table UInt8Literal { + value: uint8; +} + +table UInt16Literal { + value: uint16; +} + +table UInt32Literal { + value: uint32; +} + +table UInt64Literal { + value: uint64; +} + +table Float16Literal { + value: uint16; +} + +table Float32Literal { + value: float32; +} + +table Float64Literal { + value: float64; +} + +table DecimalLiteral { + /// Bytes of a Decimal value; bytes must be in little-endian order. + value: [byte] (required); +} + +table BooleanLiteral { + value: bool; +} + +table DateLiteral { + value: int64; +} + +table TimeLiteral { + value: int64; +} + +table TimestampLiteral { + value: int64; +} + +table IntervalLiteralMonths { + months: int32; +} + +table IntervalLiteralDaysMilliseconds { + days: int32; + milliseconds: int32; +} + +union IntervalLiteralImpl { + IntervalLiteralMonths, + IntervalLiteralDaysMilliseconds, +} + +table IntervalLiteral { + value: IntervalLiteralImpl (required); +} + +table DurationLiteral { + value: int64; +} + +table BinaryLiteral { + value: [byte] (required); +} + +table FixedSizeBinaryLiteral { + value: [byte] (required); +} + +table StringLiteral { + value: string (required); +} + +// no union literal is defined as only one branch of a union can be resolved. +// no literals for large string/binary types as flatbuffer is limited to 2gb. + +union LiteralImpl { + BooleanLiteral, + + Int8Literal, + Int16Literal, + Int32Literal, + Int64Literal, + + UInt8Literal, + UInt16Literal, + UInt32Literal, + UInt64Literal, + + DateLiteral, + TimeLiteral, + TimestampLiteral, + IntervalLiteral, + DurationLiteral, + + DecimalLiteral, + + Float16Literal, + Float32Literal, + Float64Literal, + + ListLiteral, + StructLiteral, + MapLiteral, + + StringLiteral, + BinaryLiteral, + FixedSizeBinaryLiteral, +} + +table Literal { + /// Literal value data; for null literals do not include this field. + impl: LiteralImpl; + /// Type of the literal value. This must match `impl`. + type: org.apache.arrow.flatbuf.Field (required); +} + +root_type Literal; diff --git a/src/arrow/experimental/computeir/Plan.fbs b/src/arrow/experimental/computeir/Plan.fbs new file mode 100644 index 000000000..779974ac9 --- /dev/null +++ b/src/arrow/experimental/computeir/Plan.fbs @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "Relation.fbs"; + +namespace org.apache.arrow.computeir.flatbuf; + +/// A specification of a query. +table Plan { + /// One or more output relations. + sinks: [Relation] (required); +} + +root_type Plan; diff --git a/src/arrow/experimental/computeir/Relation.fbs b/src/arrow/experimental/computeir/Relation.fbs new file mode 100644 index 000000000..ab0156e0f --- /dev/null +++ b/src/arrow/experimental/computeir/Relation.fbs @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +include "../../format/Schema.fbs"; +include "Literal.fbs"; +include "Expression.fbs"; + +namespace org.apache.arrow.computeir.flatbuf; + +/// A data type indicating that a different mapping of columns +/// should occur in the output. +/// +/// For example: +/// +/// Given a query `SELECT b, a FROM t` where `t` has columns a, b, c +/// the mapping value for the projection would equal [1, 0]. +table Remap { + mapping: [FieldIndex] (required); +} + +// Pass through indicates that no output remapping should occur. +table PassThrough {} + +/// A union for the different colum remapping variants +union Emit { + Remap, + PassThrough, +} + +/// An identifier for relations in a query. +/// +/// A table is used here to allow plan implementations optionality. +table RelId { + id: uint64; +} + +/// Fields common to every relational operator +table RelBase { + /// Output remapping of ordinal columns for a given operation + output_mapping: Emit (required); + + /// An identifiier for a relation. The identifier should be unique over the + /// entire plan. Optional. + id: RelId; +} + +/// Filter operation +table Filter { + /// Common options + base: RelBase (required); + /// Child relation + rel: Relation (required); + /// The expression which will be evaluated against input rows + /// to determine whether they should be excluded from the + /// filter relation's output. + predicate: Expression (required); +} + +/// Projection +table Project { + /// Common options + base: RelBase (required); + /// Child relation + rel: Relation (required); + /// Expressions which will be evaluated to produce to + /// the rows of the project relation's output. + expressions: [Expression] (required); +} + +/// A set of grouping keys +table Grouping { + /// Expressions to group by + keys: [Expression] (required); +} + +/// Aggregate operation +table Aggregate { + /// Common options + base: RelBase (required); + /// Child relation + rel: Relation (required); + /// Expressions which will be evaluated to produce to + /// the rows of the aggregate relation's output. + measures: [Expression] (required); + /// Keys by which `aggregations` will be grouped. + /// + /// The nested list here is to support grouping sets + /// eg + /// + /// SELECT a, b, c, sum(d) + /// FROM t + /// GROUP BY + /// GROUPING SETS ( + /// (a, b, c), + /// (a, b), + /// (a), + /// () + /// ); + groupings: [Grouping] (required); +} + +enum JoinKind : uint8 { + Anti, + Cross, + FullOuter, + Inner, + LeftOuter, + LeftSemi, + RightOuter, +} + +/// Join between two tables +table Join { + /// Common options + base: RelBase (required); + /// Left relation + left: Relation (required); + /// Right relation + right: Relation (required); + /// The expression which will be evaluated against rows from each + /// input to determine whether they should be included in the + /// join relation's output. + on_expression: Expression (required); + /// The kind of join to use. + join_kind: JoinKind; +} + +/// Order by relation +table OrderBy { + /// Common options + base: RelBase (required); + /// Child relation + rel: Relation (required); + /// Define sort order for rows of output. + /// Keys with higher precedence are ordered ahead of other keys. + keys: [SortKey] (required); +} + +/// Limit operation +table Limit { + /// Common options + base: RelBase (required); + /// Child relation + rel: Relation (required); + /// Starting index of rows + offset: uint32; + /// The maximum number of rows of output. + count: uint32; +} + +/// The kind of set operation being performed. +enum SetOpKind : uint8 { + Union, + Intersection, + Difference, +} + +/// A set operation on two or more relations +table SetOperation { + /// Common options + base: RelBase (required); + /// Child relations + rels: [Relation] (required); + /// The kind of set operation + set_op: SetOpKind; +} + +/// A single column of literal values. +table LiteralColumn { + /// The literal values of the column + elements: [Literal] (required); +} + +/// Literal relation +table LiteralRelation { + /// Common options + base: RelBase (required); + /// The columns of this literal relation. + columns: [LiteralColumn] (required); +} + +/// An external source of tabular data +table Source { + base: RelBase (required); + name: string (required); + schema: org.apache.arrow.flatbuf.Schema (required); +} + +/// The varieties of relations +union RelationImpl { + Aggregate, + Filter, + Join, + Limit, + LiteralRelation, + OrderBy, + Project, + SetOperation, + Source, +} + +/// A table holding an instance of the possible relation types. +table Relation { + impl: RelationImpl (required); +} + +root_type Relation; |