summaryrefslogtreecommitdiffstats
path: root/src/arrow/experimental/computeir
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/arrow/experimental/computeir
parentInitial commit. (diff)
downloadceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/arrow/experimental/computeir')
-rw-r--r--src/arrow/experimental/computeir/Expression.fbs222
-rw-r--r--src/arrow/experimental/computeir/Literal.fbs184
-rw-r--r--src/arrow/experimental/computeir/Plan.fbs28
-rw-r--r--src/arrow/experimental/computeir/Relation.fbs221
4 files changed, 655 insertions, 0 deletions
diff --git a/src/arrow/experimental/computeir/Expression.fbs b/src/arrow/experimental/computeir/Expression.fbs
new file mode 100644
index 000000000..e3a7fb4eb
--- /dev/null
+++ b/src/arrow/experimental/computeir/Expression.fbs
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "../../format/Schema.fbs";
+include "Literal.fbs";
+
+namespace org.apache.arrow.computeir.flatbuf;
+
+/// Access a value for a given map key
+table MapKey {
+ /// Any expression can be a map key.
+ key: Expression (required);
+}
+
+/// Struct field access
+table StructField {
+ /// The position of the field in the struct schema
+ position: uint32;
+}
+
+/// Zero-based array index
+table ArraySubscript {
+ position: uint32;
+}
+
+/// Zero-based range of elements in an array
+table ArraySlice {
+ /// The start of an array slice, inclusive
+ start_inclusive: uint32;
+ /// The end of an array slice, exclusive
+ end_exclusive: uint32;
+}
+
+/// Field name in a relation, in ordinal position of the relation's schema.
+table FieldIndex {
+ position: uint32;
+}
+
+/// A union of possible dereference operations
+union Deref {
+ /// Access a value for a given map key
+ MapKey,
+ /// Access the value at a struct field
+ StructField,
+ /// Access the element at a given index in an array
+ ArraySubscript,
+ /// Access a range of elements in an array
+ ArraySlice,
+ /// Access a field of a relation
+ FieldIndex,
+}
+
+/// Access the data of a field
+table FieldRef {
+ ref: Deref (required);
+ /// For Expressions which might reference fields in multiple Relations,
+ /// this index may be provided to indicate which Relation's fields
+ /// `ref` points into. For example in the case of a join,
+ /// 0 refers to the left relation and 1 to the right relation.
+ relation_index: int = 0;
+}
+
+/// A function call expression
+table Call {
+ /// The function to call
+ name: string (required);
+
+ /// The arguments passed to `name`.
+ arguments: [Expression] (required);
+
+ /// Possible ordering of input. These are useful
+ /// in aggregates where ordering in meaningful such as
+ /// string concatenation
+ orderings: [SortKey];
+}
+
+/// A single WHEN x THEN y fragment.
+table CaseFragment {
+ match: Expression (required);
+ result: Expression (required);
+}
+
+/// Conditional case statement expression
+table ConditionalCase {
+ /// List of conditions to evaluate
+ conditions: [CaseFragment] (required);
+ /// The default value if no cases match. This is typically NULL in SQL
+ /// implementations.
+ ///
+ /// Defaulting to NULL is a frontend choice, so producers must specify NULL
+ /// if that's their desired behavior.
+ else: Expression (required);
+}
+
+/// Switch-style case expression
+table SimpleCase {
+ /// The expression whose value will be matched
+ expression: Expression (required);
+ /// Matches for `expression`
+ matches: [CaseFragment] (required);
+ /// The default value if no cases match
+ else: Expression (required);
+}
+
+/// Whether lesser values should precede greater or vice versa,
+/// also whether nulls should preced or follow values
+enum Ordering : uint8 {
+ ASCENDING_THEN_NULLS,
+ DESCENDING_THEN_NULLS,
+ NULLS_THEN_ASCENDING,
+ NULLS_THEN_DESCENDING,
+}
+
+/// An expression with an order
+table SortKey {
+ expression: Expression (required);
+ ordering: Ordering = ASCENDING_THEN_NULLS;
+}
+
+/// An unbounded window bound
+table Unbounded {}
+
+/// A concrete bound, which can be an expression or unbounded
+union ConcreteBoundImpl {
+ Expression,
+ Unbounded,
+}
+
+/// Boundary is preceding rows, determined by the contained expression
+table Preceding {
+ impl: ConcreteBoundImpl (required);
+}
+
+/// Boundary is following rows, determined by the contained expression
+table Following {
+ impl: ConcreteBoundImpl (required);
+}
+
+/// Boundary is the current row
+table CurrentRow {}
+
+union Bound {
+ Preceding,
+ Following,
+ CurrentRow,
+}
+
+/// The kind of window function to be executed
+enum Frame : uint8 {
+ Rows,
+ Range,
+}
+
+/// An expression representing a window function call.
+table WindowCall {
+ /// The expression to operate over
+ expression: Expression (required);
+ /// The kind of window frame
+ kind: Frame;
+ /// Partition keys
+ partitions: [Expression] (required);
+ /// Sort keys
+ orderings: [SortKey] (required);
+ /// Lower window bound
+ lower_bound: Bound (required);
+ /// Upper window bound
+ upper_bound: Bound (required);
+}
+
+/// A cast expression
+table Cast {
+ /// The expression to cast
+ operand: Expression (required);
+ /// The type to cast to. This value is a `Field` to allow complete representation
+ /// of arrow types.
+ ///
+ /// `Type` is unable to completely represent complex types like lists and
+ /// maps.
+ to: org.apache.arrow.flatbuf.Field (required);
+}
+
+/// Various expression types
+///
+/// WindowCall is a separate variant
+/// due to special options for each that don't apply to generic
+/// function calls. Again this is done to make it easier
+/// for consumers to deal with the structure of the operation
+union ExpressionImpl {
+ Literal,
+ FieldRef,
+ Call,
+ ConditionalCase,
+ SimpleCase,
+ WindowCall,
+ Cast,
+}
+
+/// Expression types
+///
+/// Expressions have a concrete `impl` value, which is a specific operation.
+///
+/// This is a workaround for flatbuffers' lack of support for direct use of
+/// union types.
+table Expression {
+ impl: ExpressionImpl (required);
+}
+
+root_type Expression;
diff --git a/src/arrow/experimental/computeir/Literal.fbs b/src/arrow/experimental/computeir/Literal.fbs
new file mode 100644
index 000000000..a966b6ece
--- /dev/null
+++ b/src/arrow/experimental/computeir/Literal.fbs
@@ -0,0 +1,184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "../../format/Schema.fbs";
+
+namespace org.apache.arrow.computeir.flatbuf;
+
+table ListLiteral {
+ values: [Literal] (required);
+}
+
+table StructLiteral {
+ /// Values for each struct field; the order must match the order of fields
+ /// in the `type` field of `Literal`.
+ values: [Literal] (required);
+}
+
+table KeyValue {
+ key: Literal (required);
+ value: Literal (required);
+}
+
+table MapLiteral {
+ values: [KeyValue] (required);
+}
+
+table Int8Literal {
+ value: int8;
+}
+
+table Int16Literal {
+ value: int16;
+}
+
+table Int32Literal {
+ value: int32;
+}
+
+table Int64Literal {
+ value: int64;
+}
+
+table UInt8Literal {
+ value: uint8;
+}
+
+table UInt16Literal {
+ value: uint16;
+}
+
+table UInt32Literal {
+ value: uint32;
+}
+
+table UInt64Literal {
+ value: uint64;
+}
+
+table Float16Literal {
+ value: uint16;
+}
+
+table Float32Literal {
+ value: float32;
+}
+
+table Float64Literal {
+ value: float64;
+}
+
+table DecimalLiteral {
+ /// Bytes of a Decimal value; bytes must be in little-endian order.
+ value: [byte] (required);
+}
+
+table BooleanLiteral {
+ value: bool;
+}
+
+table DateLiteral {
+ value: int64;
+}
+
+table TimeLiteral {
+ value: int64;
+}
+
+table TimestampLiteral {
+ value: int64;
+}
+
+table IntervalLiteralMonths {
+ months: int32;
+}
+
+table IntervalLiteralDaysMilliseconds {
+ days: int32;
+ milliseconds: int32;
+}
+
+union IntervalLiteralImpl {
+ IntervalLiteralMonths,
+ IntervalLiteralDaysMilliseconds,
+}
+
+table IntervalLiteral {
+ value: IntervalLiteralImpl (required);
+}
+
+table DurationLiteral {
+ value: int64;
+}
+
+table BinaryLiteral {
+ value: [byte] (required);
+}
+
+table FixedSizeBinaryLiteral {
+ value: [byte] (required);
+}
+
+table StringLiteral {
+ value: string (required);
+}
+
+// no union literal is defined as only one branch of a union can be resolved.
+// no literals for large string/binary types as flatbuffer is limited to 2gb.
+
+union LiteralImpl {
+ BooleanLiteral,
+
+ Int8Literal,
+ Int16Literal,
+ Int32Literal,
+ Int64Literal,
+
+ UInt8Literal,
+ UInt16Literal,
+ UInt32Literal,
+ UInt64Literal,
+
+ DateLiteral,
+ TimeLiteral,
+ TimestampLiteral,
+ IntervalLiteral,
+ DurationLiteral,
+
+ DecimalLiteral,
+
+ Float16Literal,
+ Float32Literal,
+ Float64Literal,
+
+ ListLiteral,
+ StructLiteral,
+ MapLiteral,
+
+ StringLiteral,
+ BinaryLiteral,
+ FixedSizeBinaryLiteral,
+}
+
+table Literal {
+ /// Literal value data; for null literals do not include this field.
+ impl: LiteralImpl;
+ /// Type of the literal value. This must match `impl`.
+ type: org.apache.arrow.flatbuf.Field (required);
+}
+
+root_type Literal;
diff --git a/src/arrow/experimental/computeir/Plan.fbs b/src/arrow/experimental/computeir/Plan.fbs
new file mode 100644
index 000000000..779974ac9
--- /dev/null
+++ b/src/arrow/experimental/computeir/Plan.fbs
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "Relation.fbs";
+
+namespace org.apache.arrow.computeir.flatbuf;
+
+/// A specification of a query.
+table Plan {
+ /// One or more output relations.
+ sinks: [Relation] (required);
+}
+
+root_type Plan;
diff --git a/src/arrow/experimental/computeir/Relation.fbs b/src/arrow/experimental/computeir/Relation.fbs
new file mode 100644
index 000000000..ab0156e0f
--- /dev/null
+++ b/src/arrow/experimental/computeir/Relation.fbs
@@ -0,0 +1,221 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+include "../../format/Schema.fbs";
+include "Literal.fbs";
+include "Expression.fbs";
+
+namespace org.apache.arrow.computeir.flatbuf;
+
+/// A data type indicating that a different mapping of columns
+/// should occur in the output.
+///
+/// For example:
+///
+/// Given a query `SELECT b, a FROM t` where `t` has columns a, b, c
+/// the mapping value for the projection would equal [1, 0].
+table Remap {
+ mapping: [FieldIndex] (required);
+}
+
+// Pass through indicates that no output remapping should occur.
+table PassThrough {}
+
+/// A union for the different colum remapping variants
+union Emit {
+ Remap,
+ PassThrough,
+}
+
+/// An identifier for relations in a query.
+///
+/// A table is used here to allow plan implementations optionality.
+table RelId {
+ id: uint64;
+}
+
+/// Fields common to every relational operator
+table RelBase {
+ /// Output remapping of ordinal columns for a given operation
+ output_mapping: Emit (required);
+
+ /// An identifiier for a relation. The identifier should be unique over the
+ /// entire plan. Optional.
+ id: RelId;
+}
+
+/// Filter operation
+table Filter {
+ /// Common options
+ base: RelBase (required);
+ /// Child relation
+ rel: Relation (required);
+ /// The expression which will be evaluated against input rows
+ /// to determine whether they should be excluded from the
+ /// filter relation's output.
+ predicate: Expression (required);
+}
+
+/// Projection
+table Project {
+ /// Common options
+ base: RelBase (required);
+ /// Child relation
+ rel: Relation (required);
+ /// Expressions which will be evaluated to produce to
+ /// the rows of the project relation's output.
+ expressions: [Expression] (required);
+}
+
+/// A set of grouping keys
+table Grouping {
+ /// Expressions to group by
+ keys: [Expression] (required);
+}
+
+/// Aggregate operation
+table Aggregate {
+ /// Common options
+ base: RelBase (required);
+ /// Child relation
+ rel: Relation (required);
+ /// Expressions which will be evaluated to produce to
+ /// the rows of the aggregate relation's output.
+ measures: [Expression] (required);
+ /// Keys by which `aggregations` will be grouped.
+ ///
+ /// The nested list here is to support grouping sets
+ /// eg
+ ///
+ /// SELECT a, b, c, sum(d)
+ /// FROM t
+ /// GROUP BY
+ /// GROUPING SETS (
+ /// (a, b, c),
+ /// (a, b),
+ /// (a),
+ /// ()
+ /// );
+ groupings: [Grouping] (required);
+}
+
+enum JoinKind : uint8 {
+ Anti,
+ Cross,
+ FullOuter,
+ Inner,
+ LeftOuter,
+ LeftSemi,
+ RightOuter,
+}
+
+/// Join between two tables
+table Join {
+ /// Common options
+ base: RelBase (required);
+ /// Left relation
+ left: Relation (required);
+ /// Right relation
+ right: Relation (required);
+ /// The expression which will be evaluated against rows from each
+ /// input to determine whether they should be included in the
+ /// join relation's output.
+ on_expression: Expression (required);
+ /// The kind of join to use.
+ join_kind: JoinKind;
+}
+
+/// Order by relation
+table OrderBy {
+ /// Common options
+ base: RelBase (required);
+ /// Child relation
+ rel: Relation (required);
+ /// Define sort order for rows of output.
+ /// Keys with higher precedence are ordered ahead of other keys.
+ keys: [SortKey] (required);
+}
+
+/// Limit operation
+table Limit {
+ /// Common options
+ base: RelBase (required);
+ /// Child relation
+ rel: Relation (required);
+ /// Starting index of rows
+ offset: uint32;
+ /// The maximum number of rows of output.
+ count: uint32;
+}
+
+/// The kind of set operation being performed.
+enum SetOpKind : uint8 {
+ Union,
+ Intersection,
+ Difference,
+}
+
+/// A set operation on two or more relations
+table SetOperation {
+ /// Common options
+ base: RelBase (required);
+ /// Child relations
+ rels: [Relation] (required);
+ /// The kind of set operation
+ set_op: SetOpKind;
+}
+
+/// A single column of literal values.
+table LiteralColumn {
+ /// The literal values of the column
+ elements: [Literal] (required);
+}
+
+/// Literal relation
+table LiteralRelation {
+ /// Common options
+ base: RelBase (required);
+ /// The columns of this literal relation.
+ columns: [LiteralColumn] (required);
+}
+
+/// An external source of tabular data
+table Source {
+ base: RelBase (required);
+ name: string (required);
+ schema: org.apache.arrow.flatbuf.Schema (required);
+}
+
+/// The varieties of relations
+union RelationImpl {
+ Aggregate,
+ Filter,
+ Join,
+ Limit,
+ LiteralRelation,
+ OrderBy,
+ Project,
+ SetOperation,
+ Source,
+}
+
+/// A table holding an instance of the possible relation types.
+table Relation {
+ impl: RelationImpl (required);
+}
+
+root_type Relation;